/*
 * Decompiled with CFR 0.152.
 */
package weka.filters.unsupervised.instance;

import java.util.Arrays;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Vector;
import weka.core.Attribute;
import weka.core.AttributeStats;
import weka.core.Capabilities;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SingleIndex;
import weka.core.UnsupportedAttributeTypeException;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.UnsupervisedFilter;

public class RemoveFrequentValues
extends Filter
implements OptionHandler,
UnsupervisedFilter {
    static final long serialVersionUID = -2447432930070059511L;
    private SingleIndex m_AttIndex = new SingleIndex("last");
    protected int m_NumValues = 2;
    protected boolean m_LeastValues = false;
    protected boolean m_Invert = false;
    protected boolean m_ModifyHeader = false;
    protected int[] m_NominalMapping;
    protected HashSet m_Values = null;

    public String globalInfo() {
        return "Determines which values (frequent or infrequent ones) of an (nominal) attribute are retained and filters the instances accordingly. In case of values with the same frequency, they are kept in the way they appear in the original instances object. E.g. if you have the values \"1,2,3,4\" with the frequencies \"10,5,5,3\" and you chose to keep the 2 most common values, the values \"1,2\" would be returned, since the value \"2\" comes before \"3\", even though they have the same frequency.";
    }

    @Override
    public Enumeration listOptions() {
        Vector<Option> newVector = new Vector<Option>(5);
        newVector.addElement(new Option("\tChoose attribute to be used for selection.", "C", 1, "-C <num>"));
        newVector.addElement(new Option("\tNumber of values to retain for the sepcified attribute, \n\ti.e. the ones with the most instances (default 2).", "N", 1, "-N <num>"));
        newVector.addElement(new Option("\tInstead of values with the most instances the ones with the \n\tleast are retained.\n", "L", 0, "-L"));
        newVector.addElement(new Option("\tWhen selecting on nominal attributes, removes header\n\treferences to excluded values.", "H", 0, "-H"));
        newVector.addElement(new Option("\tInvert matching sense.", "V", 0, "-V"));
        return newVector.elements();
    }

    @Override
    public void setOptions(String[] options) throws Exception {
        String attIndex = Utils.getOption('C', options);
        if (attIndex.length() != 0) {
            this.setAttributeIndex(attIndex);
        } else {
            this.setAttributeIndex("last");
        }
        String numValues = Utils.getOption('N', options);
        if (numValues.length() != 0) {
            this.setNumValues(Integer.parseInt(numValues));
        } else {
            this.setNumValues(2);
        }
        this.setUseLeastValues(Utils.getFlag('L', options));
        this.setModifyHeader(Utils.getFlag('H', options));
        this.setInvertSelection(Utils.getFlag('V', options));
        if (this.getInputFormat() != null) {
            this.setInputFormat(this.getInputFormat());
        }
    }

    @Override
    public String[] getOptions() {
        String[] options = new String[7];
        int current = 0;
        options[current++] = "-C";
        options[current++] = this.getAttributeIndex();
        options[current++] = "-N";
        options[current++] = "" + this.getNumValues();
        if (this.getUseLeastValues()) {
            options[current++] = "-H";
        }
        if (this.getModifyHeader()) {
            options[current++] = "-H";
        }
        if (this.getInvertSelection()) {
            options[current++] = "-V";
        }
        while (current < options.length) {
            options[current++] = "";
        }
        return options;
    }

    public String attributeIndexTipText() {
        return "Choose attribute to be used for selection (default last).";
    }

    public String getAttributeIndex() {
        return this.m_AttIndex.getSingleIndex();
    }

    public void setAttributeIndex(String attIndex) {
        this.m_AttIndex.setSingleIndex(attIndex);
    }

    public String numValuesTipText() {
        return "The number of values to retain.";
    }

    public int getNumValues() {
        return this.m_NumValues;
    }

    public void setNumValues(int numValues) {
        this.m_NumValues = numValues;
    }

    public String useLeastValuesTipText() {
        return "Retains values with least instance instead of most.";
    }

    public boolean getUseLeastValues() {
        return this.m_LeastValues;
    }

    public void setUseLeastValues(boolean leastValues) {
        this.m_LeastValues = leastValues;
    }

    public String modifyHeaderTipText() {
        return "When selecting on nominal attributes, removes header references to excluded values.";
    }

    public boolean getModifyHeader() {
        return this.m_ModifyHeader;
    }

    public void setModifyHeader(boolean newModifyHeader) {
        this.m_ModifyHeader = newModifyHeader;
    }

    public String invertSelectionTipText() {
        return "Invert matching sense.";
    }

    public boolean getInvertSelection() {
        return this.m_Invert;
    }

    public void setInvertSelection(boolean invert) {
        this.m_Invert = invert;
    }

    public boolean isNominal() {
        if (this.getInputFormat() == null) {
            return false;
        }
        return this.getInputFormat().attribute(this.m_AttIndex.getIndex()).isNominal();
    }

    public void determineValues(Instances inst) {
        int max;
        int min;
        this.m_AttIndex.setUpper(inst.numAttributes() - 1);
        int attIdx = this.m_AttIndex.getIndex();
        this.m_Values = new HashSet();
        if (inst == null) {
            return;
        }
        AttributeStats stats = inst.attributeStats(attIdx);
        int count = this.m_Invert ? stats.nominalCounts.length - this.m_NumValues : this.m_NumValues;
        if (count < 1) {
            count = 1;
        }
        if (count > stats.nominalCounts.length) {
            count = stats.nominalCounts.length;
        }
        Arrays.sort(stats.nominalCounts);
        if (this.m_LeastValues) {
            min = stats.nominalCounts[0];
            max = stats.nominalCounts[count - 1];
        } else {
            min = stats.nominalCounts[stats.nominalCounts.length - 1 - count + 1];
            max = stats.nominalCounts[stats.nominalCounts.length - 1];
        }
        stats = inst.attributeStats(attIdx);
        int i = 0;
        while (i < stats.nominalCounts.length) {
            if (stats.nominalCounts[i] >= min && stats.nominalCounts[i] <= max && this.m_Values.size() < count) {
                this.m_Values.add(inst.attribute(attIdx).value(i));
            }
            ++i;
        }
    }

    protected Instances modifyHeader(Instances instanceInfo) {
        instanceInfo = new Instances(this.getInputFormat(), 0);
        Attribute oldAtt = instanceInfo.attribute(this.m_AttIndex.getIndex());
        int[] selection = new int[this.m_Values.size()];
        Iterator iter = this.m_Values.iterator();
        int i = 0;
        while (iter.hasNext()) {
            selection[i] = oldAtt.indexOfValue(iter.next().toString());
            ++i;
        }
        FastVector<String> newVals = new FastVector<String>();
        i = 0;
        while (i < selection.length) {
            newVals.addElement(oldAtt.value(selection[i]));
            ++i;
        }
        instanceInfo.deleteAttributeAt(this.m_AttIndex.getIndex());
        instanceInfo.insertAttributeAt(new Attribute(oldAtt.name(), newVals), this.m_AttIndex.getIndex());
        this.m_NominalMapping = new int[oldAtt.numValues()];
        i = 0;
        while (i < this.m_NominalMapping.length) {
            boolean found = false;
            int j = 0;
            while (j < selection.length) {
                if (selection[j] == i) {
                    this.m_NominalMapping[i] = j;
                    found = true;
                    break;
                }
                ++j;
            }
            if (!found) {
                this.m_NominalMapping[i] = -1;
            }
            ++i;
        }
        return instanceInfo;
    }

    @Override
    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.disableAll();
        result.enableAllAttributes();
        result.enable(Capabilities.Capability.MISSING_VALUES);
        result.enableAllClasses();
        result.enable(Capabilities.Capability.MISSING_CLASS_VALUES);
        result.enable(Capabilities.Capability.NO_CLASS);
        return result;
    }

    @Override
    public boolean setInputFormat(Instances instanceInfo) throws Exception {
        super.setInputFormat(instanceInfo);
        this.m_AttIndex.setUpper(instanceInfo.numAttributes() - 1);
        if (!this.isNominal()) {
            throw new UnsupportedAttributeTypeException("Can only handle nominal attributes.");
        }
        this.m_Values = null;
        return false;
    }

    protected void setOutputFormat() {
        if (this.m_Values == null) {
            this.setOutputFormat(null);
            return;
        }
        Instances instances = this.getModifyHeader() ? this.modifyHeader(this.getInputFormat()) : new Instances(this.getInputFormat(), 0);
        this.setOutputFormat(instances);
        int i = 0;
        while (i < this.getInputFormat().numInstances()) {
            Instance instance = this.getInputFormat().instance(i);
            if (this.m_Values.contains(instance.stringValue(this.m_AttIndex.getIndex()))) {
                if (this.getModifyHeader()) {
                    instance.setValue(this.m_AttIndex.getIndex(), (double)this.m_NominalMapping[(int)instance.value(this.m_AttIndex.getIndex())]);
                }
                this.push(instance);
            }
            ++i;
        }
    }

    @Override
    public boolean input(Instance instance) {
        if (this.getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }
        if (this.m_NewBatch) {
            this.resetQueue();
            this.m_NewBatch = false;
        }
        if (this.isFirstBatchDone()) {
            this.push(instance);
            return true;
        }
        this.bufferInput(instance);
        return false;
    }

    @Override
    public boolean batchFinished() {
        if (this.getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }
        if (this.m_Values == null) {
            this.determineValues(this.getInputFormat());
            this.setOutputFormat();
        }
        this.flushInput();
        this.m_NewBatch = true;
        this.m_FirstBatchDone = true;
        return this.numPendingOutput() != 0;
    }

    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 5499 $");
    }

    public static void main(String[] argv) {
        RemoveFrequentValues.runFilter(new RemoveFrequentValues(), argv);
    }
}

