/*
 * Decompiled with CFR 0.152.
 */
package weka.filters.unsupervised.attribute;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.Vector;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.CapabilitiesHandler;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.RevisionUtils;
import weka.core.SingleIndex;
import weka.core.SparseInstance;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.SimpleStreamFilter;
import weka.filters.UnsupervisedFilter;
import weka.filters.unsupervised.attribute.DownSample;

public class SpellChecker
extends SimpleStreamFilter
implements UnsupervisedFilter {
    private static final long serialVersionUID = 5944266872914953692L;
    protected SingleIndex m_AttributeIndex = new SingleIndex("last");
    protected String[] m_Incorrect = new String[0];
    protected String m_Correct = "";
    protected HashSet<String> m_IncorrectCache;

    public String globalInfo() {
        return "A simple filter that merges misspelled labels into a single correct one.";
    }

    protected void reset() {
        super.reset();
        this.m_IncorrectCache = new HashSet();
    }

    public Enumeration listOptions() {
        Vector result = new Vector();
        Enumeration enm = super.listOptions();
        while (enm.hasMoreElements()) {
            result.add(enm.nextElement());
        }
        result.addElement(new Option("\tThe index of the attribute to process.\n\t(default: last).", "C", 1, "-C <col>"));
        result.addElement(new Option("\tThe incorrectly spelled labels.\n\t(default: none).", "incorrect", 1, "-incorrect <blank separated labels>"));
        result.addElement(new Option("\tThe correct spelling for the labels.\n\t(default: correct).", "correct", 1, "-correct <label>"));
        return result.elements();
    }

    public void setOptions(String[] options) throws Exception {
        this.reset();
        String tmpStr = Utils.getOption((String)"C", (String[])options);
        if (tmpStr.length() > 0) {
            this.setAttributeIndex(tmpStr);
        } else {
            this.setAttributeIndex("last");
        }
        tmpStr = Utils.getOption((String)"incorrect", (String[])options);
        if (tmpStr.length() > 0) {
            this.setIncorrect(tmpStr);
        } else {
            this.setIncorrect("");
        }
        tmpStr = Utils.getOption((String)"correct", (String[])options);
        if (tmpStr.length() > 0) {
            this.setCorrect(tmpStr);
        } else {
            this.setCorrect("correct");
        }
        super.setOptions(options);
    }

    public String[] getOptions() {
        Vector<String> result = new Vector<String>(Arrays.asList(super.getOptions()));
        result.add("-C");
        result.add(this.getAttributeIndex());
        result.add("-incorrect");
        result.add(this.getIncorrect());
        result.add("-correct");
        result.add(this.getCorrect());
        return result.toArray(new String[result.size()]);
    }

    public void setAttributeIndex(String value) {
        this.m_AttributeIndex.setSingleIndex(value);
        this.reset();
    }

    public String getAttributeIndex() {
        return this.m_AttributeIndex.getSingleIndex();
    }

    public String attributeIndexTipText() {
        return "The 1-based index of the attribute to process; 'first' and 'last' are accepted as well.";
    }

    public void setIncorrect(String value) throws Exception {
        this.m_Incorrect = Utils.splitOptions((String)value);
        this.reset();
    }

    public String getIncorrect() {
        return Utils.joinOptions((String[])this.m_Incorrect);
    }

    public String incorrectTipText() {
        return "The incorrect labels that get replaced by a single correct one (blank-separated list).";
    }

    public void setCorrect(String value) {
        this.m_Correct = value;
        this.reset();
    }

    public String getCorrect() {
        return this.m_Correct;
    }

    public String correctTipText() {
        return "The correct label replacing the incorrect ones.";
    }

    public Capabilities getCapabilities() {
        Capabilities result = new Capabilities((CapabilitiesHandler)this);
        result.enableAllAttributes();
        result.enable(Capabilities.Capability.MISSING_VALUES);
        result.enableAllClasses();
        result.enable(Capabilities.Capability.NO_CLASS);
        result.enable(Capabilities.Capability.MISSING_CLASS_VALUES);
        result.setMinimumNumberInstances(0);
        return result;
    }

    protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
        int i;
        if (this.m_Incorrect.length == 0) {
            throw new IllegalStateException("No incorrect labels provided!");
        }
        for (i = 0; i < this.m_Incorrect.length; ++i) {
            if (this.m_Incorrect[i].length() != 0) continue;
            throw new IllegalStateException("Incorrect label #" + (i + 1) + " has length 0!");
        }
        if (this.m_Correct.length() == 0) {
            throw new IllegalStateException("Correct label has length 0!");
        }
        this.m_AttributeIndex.setUpper(inputFormat.numAttributes() - 1);
        int index = this.m_AttributeIndex.getIndex();
        if (!inputFormat.attribute(index).isNominal()) {
            throw new IllegalStateException("Attribute #" + this.m_AttributeIndex.getSingleIndex() + " is not nominal!");
        }
        this.m_IncorrectCache = new HashSet<String>(Arrays.asList(this.m_Incorrect));
        Attribute attOld = inputFormat.attribute(index);
        ArrayList<String> labels = new ArrayList<String>();
        labels.add(this.m_Correct);
        for (i = 0; i < attOld.numValues(); ++i) {
            if (this.m_IncorrectCache.contains(attOld.value(i)) || labels.contains(attOld.value(i))) continue;
            labels.add(attOld.value(i));
        }
        Collections.sort(labels);
        Attribute attNew = new Attribute(attOld.name(), labels);
        ArrayList<Attribute> atts = new ArrayList<Attribute>();
        for (i = 0; i < inputFormat.numAttributes(); ++i) {
            if (i == index) {
                atts.add(attNew);
                continue;
            }
            atts.add((Attribute)inputFormat.attribute(i).copy());
        }
        Instances result = new Instances(inputFormat.relationName(), atts, 0);
        result.setClassIndex(inputFormat.classIndex());
        return result;
    }

    protected Instance process(Instance instance) throws Exception {
        Object result;
        int index = this.m_AttributeIndex.getIndex();
        if (instance.isMissing(index)) {
            result = (Instance)instance.copy();
        } else {
            double[] values = instance.toDoubleArray();
            String label = instance.stringValue(index);
            values[index] = this.m_IncorrectCache.contains(label) ? (double)this.outputFormatPeek().attribute(index).indexOfValue(this.m_Correct) : (double)this.outputFormatPeek().attribute(index).indexOfValue(label);
            result = instance instanceof SparseInstance ? new SparseInstance(instance.weight(), values) : new DenseInstance(instance.weight(), values);
        }
        this.copyValues(instance, false, instance.dataset(), this.getOutputFormat());
        return result;
    }

    public String getRevision() {
        return RevisionUtils.extract((String)"$Revision: 4521 $");
    }

    public static void main(String[] args) {
        SpellChecker.runFilter((Filter)new DownSample(), (String[])args);
    }
}

