/*
 * Decompiled with CFR 0.152.
 */
package weka.filters.unsupervised.attribute;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.ObjectInputStream;
import java.util.Enumeration;
import java.util.Vector;
import weka.clusterers.AbstractClusterer;
import weka.clusterers.Clusterer;
import weka.clusterers.SimpleKMeans;
import weka.core.AbstractInstance;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.DenseInstance;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Range;
import weka.core.RevisionUtils;
import weka.core.SparseInstance;
import weka.core.Utils;
import weka.core.WekaException;
import weka.filters.Filter;
import weka.filters.UnsupervisedFilter;
import weka.filters.unsupervised.attribute.Remove;

public class AddCluster
extends Filter
implements UnsupervisedFilter,
OptionHandler {
    static final long serialVersionUID = 7414280611943807337L;
    protected Clusterer m_Clusterer = new SimpleKMeans();
    protected File m_SerializedClustererFile = new File(System.getProperty("user.dir"));
    protected Clusterer m_ActualClusterer = null;
    protected Range m_IgnoreAttributesRange = null;
    protected Filter m_removeAttributes = new Remove();

    @Override
    public Capabilities getCapabilities(Instances data) {
        Instances newData = new Instances(data, 0);
        newData.setClassIndex(-1);
        return super.getCapabilities(newData);
    }

    @Override
    public Capabilities getCapabilities() {
        Capabilities result = this.m_Clusterer.getCapabilities();
        result.setMinimumNumberInstances(0);
        return result;
    }

    @Override
    protected void testInputFormat(Instances instanceInfo) throws Exception {
        this.getCapabilities(instanceInfo).testWithFail(this.removeIgnored(instanceInfo));
    }

    @Override
    public boolean setInputFormat(Instances instanceInfo) throws Exception {
        super.setInputFormat(instanceInfo);
        this.m_removeAttributes = null;
        return false;
    }

    protected Instances removeIgnored(Instances data) throws Exception {
        Instances result = data;
        if (this.m_IgnoreAttributesRange != null || data.classIndex() >= 0) {
            this.m_removeAttributes = new Remove();
            String rangeString = "";
            if (this.m_IgnoreAttributesRange != null) {
                rangeString = rangeString + this.m_IgnoreAttributesRange.getRanges();
            }
            if (data.classIndex() >= 0) {
                rangeString = rangeString.length() > 0 ? rangeString + "," + (data.classIndex() + 1) : "" + (data.classIndex() + 1);
            }
            ((Remove)this.m_removeAttributes).setAttributeIndices(rangeString);
            ((Remove)this.m_removeAttributes).setInvertSelection(false);
            this.m_removeAttributes.setInputFormat(data);
            result = Filter.useFilter(data, this.m_removeAttributes);
        }
        return result;
    }

    @Override
    public boolean batchFinished() throws Exception {
        if (this.getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }
        Instances toFilter = this.getInputFormat();
        if (!this.isFirstBatchDone()) {
            Instances toFilterIgnoringAttributes = this.removeIgnored(toFilter);
            File file = this.getSerializedClustererFile();
            if (!file.isDirectory()) {
                ObjectInputStream ois = new ObjectInputStream(new FileInputStream(file));
                this.m_ActualClusterer = (Clusterer)ois.readObject();
                Instances header = null;
                try {
                    header = (Instances)ois.readObject();
                }
                catch (Exception e) {
                    // empty catch block
                }
                ois.close();
                if (header != null && !header.equalHeaders(toFilterIgnoringAttributes)) {
                    throw new WekaException("Training header of clusterer and filter dataset don't match:\n" + header.equalHeadersMsg(toFilterIgnoringAttributes));
                }
            } else {
                this.m_ActualClusterer = AbstractClusterer.makeCopy(this.m_Clusterer);
                this.m_ActualClusterer.buildClusterer(toFilterIgnoringAttributes);
            }
            Instances filtered = new Instances(toFilter, 0);
            FastVector<String> nominal_values = new FastVector<String>(this.m_ActualClusterer.numberOfClusters());
            for (int i = 0; i < this.m_ActualClusterer.numberOfClusters(); ++i) {
                nominal_values.addElement("cluster" + (i + 1));
            }
            filtered.insertAttributeAt(new Attribute("cluster", nominal_values), filtered.numAttributes());
            this.setOutputFormat(filtered);
        }
        for (int i = 0; i < toFilter.numInstances(); ++i) {
            this.convertInstance(toFilter.instance(i));
        }
        this.flushInput();
        this.m_NewBatch = true;
        this.m_FirstBatchDone = true;
        return this.numPendingOutput() != 0;
    }

    @Override
    public boolean input(Instance instance) throws Exception {
        if (this.getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }
        if (this.m_NewBatch) {
            this.resetQueue();
            this.m_NewBatch = false;
        }
        if (this.outputFormatPeek() != null) {
            this.convertInstance(instance);
            return true;
        }
        this.bufferInput(instance);
        return false;
    }

    protected void convertInstance(Instance instance) throws Exception {
        Instance original = instance;
        double[] instanceVals = new double[instance.numAttributes() + 1];
        for (int j = 0; j < instance.numAttributes(); ++j) {
            instanceVals[j] = original.value(j);
        }
        Instance filteredI = null;
        if (this.m_removeAttributes != null) {
            this.m_removeAttributes.input(instance);
            filteredI = this.m_removeAttributes.output();
        } else {
            filteredI = instance;
        }
        try {
            instanceVals[instance.numAttributes()] = this.m_ActualClusterer.clusterInstance(filteredI);
        }
        catch (Exception e) {
            instanceVals[instance.numAttributes()] = Utils.missingValue();
        }
        AbstractInstance processed = original instanceof SparseInstance ? new SparseInstance(original.weight(), instanceVals) : new DenseInstance(original.weight(), instanceVals);
        processed.setDataset(instance.dataset());
        this.copyValues(processed, false, instance.dataset(), this.getOutputFormat());
        processed.setDataset(this.getOutputFormat());
        this.push(processed);
    }

    @Override
    public Enumeration listOptions() {
        Vector<Option> result = new Vector<Option>();
        result.addElement(new Option("\tFull class name of clusterer to use, followed\n\tby scheme options. eg:\n\t\t\"weka.clusterers.SimpleKMeans -N 3\"\n\t(default: weka.clusterers.SimpleKMeans)", "W", 1, "-W <clusterer specification>"));
        result.addElement(new Option("\tInstead of building a clusterer on the data, one can also provide\n\ta serialized model and use that for adding the clusters.", "serialized", 1, "-serialized <file>"));
        result.addElement(new Option("\tThe range of attributes the clusterer should ignore.\n", "I", 1, "-I <att1,att2-att4,...>"));
        return result.elements();
    }

    @Override
    public void setOptions(String[] options) throws Exception {
        boolean serializedModel = false;
        String tmpStr = Utils.getOption("serialized", options);
        if (tmpStr.length() != 0) {
            File file = new File(tmpStr);
            if (!file.exists()) {
                throw new FileNotFoundException("File '" + file.getAbsolutePath() + "' not found!");
            }
            if (file.isDirectory()) {
                throw new FileNotFoundException("'" + file.getAbsolutePath() + "' points to a directory not a file!");
            }
            this.setSerializedClustererFile(file);
            serializedModel = true;
        } else {
            this.setSerializedClustererFile(null);
        }
        if (!serializedModel) {
            String[] tmpOptions;
            tmpStr = Utils.getOption('W', options);
            if (tmpStr.length() == 0) {
                tmpStr = SimpleKMeans.class.getName();
            }
            if ((tmpOptions = Utils.splitOptions(tmpStr)).length == 0) {
                throw new Exception("Invalid clusterer specification string");
            }
            tmpStr = tmpOptions[0];
            tmpOptions[0] = "";
            this.setClusterer(AbstractClusterer.forName(tmpStr, tmpOptions));
        }
        this.setIgnoredAttributeIndices(Utils.getOption('I', options));
        Utils.checkForRemainingOptions(options);
    }

    @Override
    public String[] getOptions() {
        Vector<String> result = new Vector<String>();
        File file = this.getSerializedClustererFile();
        if (file != null && !file.isDirectory()) {
            result.add("-serialized");
            result.add(file.getAbsolutePath());
        } else {
            result.add("-W");
            result.add(this.getClustererSpec());
        }
        if (!this.getIgnoredAttributeIndices().equals("")) {
            result.add("-I");
            result.add(this.getIgnoredAttributeIndices());
        }
        return result.toArray(new String[result.size()]);
    }

    public String globalInfo() {
        return "A filter that adds a new nominal attribute representing the cluster assigned to each instance by the specified clustering algorithm.\nEither the clustering algorithm gets built with the first batch of data or one specifies are serialized clusterer model file to use instead.";
    }

    public String clustererTipText() {
        return "The clusterer to assign clusters with.";
    }

    public void setClusterer(Clusterer clusterer) {
        this.m_Clusterer = clusterer;
    }

    public Clusterer getClusterer() {
        return this.m_Clusterer;
    }

    protected String getClustererSpec() {
        Clusterer c = this.getClusterer();
        if (c instanceof OptionHandler) {
            return c.getClass().getName() + " " + Utils.joinOptions(((OptionHandler)((Object)c)).getOptions());
        }
        return c.getClass().getName();
    }

    public String ignoredAttributeIndicesTipText() {
        return "The range of attributes to be ignored by the clusterer. eg: first-3,5,9-last";
    }

    public String getIgnoredAttributeIndices() {
        if (this.m_IgnoreAttributesRange == null) {
            return "";
        }
        return this.m_IgnoreAttributesRange.getRanges();
    }

    public void setIgnoredAttributeIndices(String rangeList) {
        if (rangeList == null || rangeList.length() == 0) {
            this.m_IgnoreAttributesRange = null;
        } else {
            this.m_IgnoreAttributesRange = new Range();
            this.m_IgnoreAttributesRange.setRanges(rangeList);
        }
    }

    public File getSerializedClustererFile() {
        return this.m_SerializedClustererFile;
    }

    public void setSerializedClustererFile(File value) {
        if (value == null || !value.exists()) {
            value = new File(System.getProperty("user.dir"));
        }
        this.m_SerializedClustererFile = value;
    }

    public String serializedClustererFileTipText() {
        return "A file containing the serialized model of a built clusterer.";
    }

    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 8034 $");
    }

    public static void main(String[] argv) {
        AddCluster.runFilter(new AddCluster(), argv);
    }
}

