/*
 * Decompiled with CFR 0.152.
 */
package weka.filters.unsupervised.attribute;

import java.util.Enumeration;
import java.util.Vector;
import weka.core.AbstractInstance;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.DenseInstance;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.Range;
import weka.core.RevisionUtils;
import weka.core.SparseInstance;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
import weka.filters.UnsupervisedFilter;
import weka.filters.unsupervised.attribute.PotentialClassIgnorer;

public class Discretize
extends PotentialClassIgnorer
implements UnsupervisedFilter,
WeightedInstancesHandler {
    static final long serialVersionUID = -1358531742174527279L;
    protected Range m_DiscretizeCols = new Range();
    protected int m_NumBins = 10;
    protected double m_DesiredWeightOfInstancesPerInterval = -1.0;
    protected double[][] m_CutPoints = null;
    protected boolean m_MakeBinary = false;
    protected boolean m_FindNumBins = false;
    protected boolean m_UseEqualFrequency = false;
    protected String m_DefaultCols;

    public Discretize() {
        this.m_DefaultCols = "first-last";
        this.setAttributeIndices("first-last");
    }

    public Discretize(String cols) {
        this.m_DefaultCols = cols;
        this.setAttributeIndices(cols);
    }

    @Override
    public Enumeration listOptions() {
        Vector result = new Vector();
        Enumeration enm = super.listOptions();
        while (enm.hasMoreElements()) {
            result.add(enm.nextElement());
        }
        result.addElement(new Option("\tSpecifies the (maximum) number of bins to divide numeric attributes into.\n\t(default = 10)", "B", 1, "-B <num>"));
        result.addElement(new Option("\tSpecifies the desired weight of instances per bin for\n\tequal-frequency binning. If this is set to a positive\n\tnumber then the -B option will be ignored.\n\t(default = -1)", "M", 1, "-M <num>"));
        result.addElement(new Option("\tUse equal-frequency instead of equal-width discretization.", "F", 0, "-F"));
        result.addElement(new Option("\tOptimize number of bins using leave-one-out estimate\n\tof estimated entropy (for equal-width discretization).\n\tIf this is set then the -B option will be ignored.", "O", 0, "-O"));
        result.addElement(new Option("\tSpecifies list of columns to Discretize. First and last are valid indexes.\n\t(default: first-last)", "R", 1, "-R <col1,col2-col4,...>"));
        result.addElement(new Option("\tInvert matching sense of column indexes.", "V", 0, "-V"));
        result.addElement(new Option("\tOutput binary attributes for discretized attributes.", "D", 0, "-D"));
        return result.elements();
    }

    @Override
    public void setOptions(String[] options) throws Exception {
        super.setOptions(options);
        this.setMakeBinary(Utils.getFlag('D', options));
        this.setUseEqualFrequency(Utils.getFlag('F', options));
        this.setFindNumBins(Utils.getFlag('O', options));
        this.setInvertSelection(Utils.getFlag('V', options));
        String weight = Utils.getOption('M', options);
        if (weight.length() != 0) {
            this.setDesiredWeightOfInstancesPerInterval(new Double(weight));
        } else {
            this.setDesiredWeightOfInstancesPerInterval(-1.0);
        }
        String numBins = Utils.getOption('B', options);
        if (numBins.length() != 0) {
            this.setBins(Integer.parseInt(numBins));
        } else {
            this.setBins(10);
        }
        String convertList = Utils.getOption('R', options);
        if (convertList.length() != 0) {
            this.setAttributeIndices(convertList);
        } else {
            this.setAttributeIndices(this.m_DefaultCols);
        }
        if (this.getInputFormat() != null) {
            this.setInputFormat(this.getInputFormat());
        }
    }

    @Override
    public String[] getOptions() {
        Vector<String> result = new Vector<String>();
        String[] options = super.getOptions();
        int i = 0;
        while (i < options.length) {
            result.add(options[i]);
            ++i;
        }
        if (this.getMakeBinary()) {
            result.add("-D");
        }
        if (this.getUseEqualFrequency()) {
            result.add("-F");
        }
        if (this.getFindNumBins()) {
            result.add("-O");
        }
        if (this.getInvertSelection()) {
            result.add("-V");
        }
        result.add("-B");
        result.add("" + this.getBins());
        result.add("-M");
        result.add("" + this.getDesiredWeightOfInstancesPerInterval());
        if (!this.getAttributeIndices().equals("")) {
            result.add("-R");
            result.add(this.getAttributeIndices());
        }
        return result.toArray(new String[result.size()]);
    }

    @Override
    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.disableAll();
        result.enableAllAttributes();
        result.enable(Capabilities.Capability.MISSING_VALUES);
        result.enableAllClasses();
        result.enable(Capabilities.Capability.MISSING_CLASS_VALUES);
        if (!this.getMakeBinary()) {
            result.enable(Capabilities.Capability.NO_CLASS);
        }
        return result;
    }

    @Override
    public boolean setInputFormat(Instances instanceInfo) throws Exception {
        if (this.m_MakeBinary && this.m_IgnoreClass) {
            throw new IllegalArgumentException("Can't ignore class when changing the number of attributes!");
        }
        super.setInputFormat(instanceInfo);
        this.m_DiscretizeCols.setUpper(instanceInfo.numAttributes() - 1);
        this.m_CutPoints = null;
        if (this.getFindNumBins() && this.getUseEqualFrequency()) {
            throw new IllegalArgumentException("Bin number optimization in conjunction with equal-frequency binning not implemented.");
        }
        return false;
    }

    @Override
    public boolean input(Instance instance) {
        if (this.getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }
        if (this.m_NewBatch) {
            this.resetQueue();
            this.m_NewBatch = false;
        }
        if (this.m_CutPoints != null) {
            this.convertInstance(instance);
            return true;
        }
        this.bufferInput(instance);
        return false;
    }

    @Override
    public boolean batchFinished() {
        if (this.getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }
        if (this.m_CutPoints == null) {
            this.calculateCutPoints();
            this.setOutputFormat();
            int i = 0;
            while (i < this.getInputFormat().numInstances()) {
                this.convertInstance(this.getInputFormat().instance(i));
                ++i;
            }
        }
        this.flushInput();
        this.m_NewBatch = true;
        return this.numPendingOutput() != 0;
    }

    public String globalInfo() {
        return "An instance filter that discretizes a range of numeric attributes in the dataset into nominal attributes. Discretization is by simple binning. Skips the class attribute if set.";
    }

    public String findNumBinsTipText() {
        return "Optimize number of equal-width bins using leave-one-out. Doesn't work for equal-frequency binning";
    }

    public boolean getFindNumBins() {
        return this.m_FindNumBins;
    }

    public void setFindNumBins(boolean newFindNumBins) {
        this.m_FindNumBins = newFindNumBins;
    }

    public String makeBinaryTipText() {
        return "Make resulting attributes binary.";
    }

    public boolean getMakeBinary() {
        return this.m_MakeBinary;
    }

    public void setMakeBinary(boolean makeBinary) {
        this.m_MakeBinary = makeBinary;
    }

    public String desiredWeightOfInstancesPerIntervalTipText() {
        return "Sets the desired weight of instances per interval for equal-frequency binning.";
    }

    public double getDesiredWeightOfInstancesPerInterval() {
        return this.m_DesiredWeightOfInstancesPerInterval;
    }

    public void setDesiredWeightOfInstancesPerInterval(double newDesiredNumber) {
        this.m_DesiredWeightOfInstancesPerInterval = newDesiredNumber;
    }

    public String useEqualFrequencyTipText() {
        return "If set to true, equal-frequency binning will be used instead of equal-width binning.";
    }

    public boolean getUseEqualFrequency() {
        return this.m_UseEqualFrequency;
    }

    public void setUseEqualFrequency(boolean newUseEqualFrequency) {
        this.m_UseEqualFrequency = newUseEqualFrequency;
    }

    public String binsTipText() {
        return "Number of bins.";
    }

    public int getBins() {
        return this.m_NumBins;
    }

    public void setBins(int numBins) {
        this.m_NumBins = numBins;
    }

    public String invertSelectionTipText() {
        return "Set attribute selection mode. If false, only selected (numeric) attributes in the range will be discretized; if true, only non-selected attributes will be discretized.";
    }

    public boolean getInvertSelection() {
        return this.m_DiscretizeCols.getInvert();
    }

    public void setInvertSelection(boolean invert) {
        this.m_DiscretizeCols.setInvert(invert);
    }

    public String attributeIndicesTipText() {
        return "Specify range of attributes to act on. This is a comma separated list of attribute indices, with \"first\" and \"last\" valid values. Specify an inclusive range with \"-\". E.g: \"first-3,5,6-10,last\".";
    }

    public String getAttributeIndices() {
        return this.m_DiscretizeCols.getRanges();
    }

    public void setAttributeIndices(String rangeList) {
        this.m_DiscretizeCols.setRanges(rangeList);
    }

    public void setAttributeIndicesArray(int[] attributes) {
        this.setAttributeIndices(Range.indicesToRangeList(attributes));
    }

    public double[] getCutPoints(int attributeIndex) {
        if (this.m_CutPoints == null) {
            return null;
        }
        return this.m_CutPoints[attributeIndex];
    }

    protected void calculateCutPoints() {
        this.m_CutPoints = new double[this.getInputFormat().numAttributes()][];
        int i = this.getInputFormat().numAttributes() - 1;
        while (i >= 0) {
            if (this.m_DiscretizeCols.isInRange(i) && this.getInputFormat().attribute(i).isNumeric() && this.getInputFormat().classIndex() != i) {
                if (this.m_FindNumBins) {
                    this.findNumBins(i);
                } else if (!this.m_UseEqualFrequency) {
                    this.calculateCutPointsByEqualWidthBinning(i);
                } else {
                    this.calculateCutPointsByEqualFrequencyBinning(i);
                }
            }
            --i;
        }
    }

    protected void calculateCutPointsByEqualWidthBinning(int index) {
        double max = 0.0;
        double min = 1.0;
        int i = 0;
        while (i < this.getInputFormat().numInstances()) {
            Instance currentInstance = this.getInputFormat().instance(i);
            if (!currentInstance.isMissing(index)) {
                double currentVal = currentInstance.value(index);
                if (max < min) {
                    max = min = currentVal;
                }
                if (currentVal > max) {
                    max = currentVal;
                }
                if (currentVal < min) {
                    min = currentVal;
                }
            }
            ++i;
        }
        double binWidth = (max - min) / (double)this.m_NumBins;
        double[] cutPoints = null;
        if (this.m_NumBins > 1 && binWidth > 0.0) {
            cutPoints = new double[this.m_NumBins - 1];
            int i2 = 1;
            while (i2 < this.m_NumBins) {
                cutPoints[i2 - 1] = min + binWidth * (double)i2;
                ++i2;
            }
        }
        this.m_CutPoints[index] = cutPoints;
    }

    protected void calculateCutPointsByEqualFrequencyBinning(int index) {
        double freq;
        Instances data = new Instances(this.getInputFormat());
        data.sort(index);
        double sumOfWeights = 0.0;
        int i = 0;
        while (i < data.numInstances()) {
            if (data.instance(i).isMissing(index)) break;
            sumOfWeights += data.instance(i).weight();
            ++i;
        }
        double[] cutPoints = new double[this.m_NumBins - 1];
        if (this.getDesiredWeightOfInstancesPerInterval() > 0.0) {
            freq = this.getDesiredWeightOfInstancesPerInterval();
            cutPoints = new double[(int)(sumOfWeights / freq)];
        } else {
            freq = sumOfWeights / (double)this.m_NumBins;
            cutPoints = new double[this.m_NumBins - 1];
        }
        double counter = 0.0;
        double last = 0.0;
        int cpindex = 0;
        int lastIndex = -1;
        int i2 = 0;
        while (i2 < data.numInstances() - 1) {
            if (data.instance(i2).isMissing(index)) break;
            counter += data.instance(i2).weight();
            sumOfWeights -= data.instance(i2).weight();
            if (data.instance(i2).value(index) < data.instance(i2 + 1).value(index)) {
                if (counter >= freq) {
                    if (freq - last < counter - freq && lastIndex != -1) {
                        cutPoints[cpindex] = (data.instance(lastIndex).value(index) + data.instance(lastIndex + 1).value(index)) / 2.0;
                        last = counter -= last;
                        lastIndex = i2;
                    } else {
                        cutPoints[cpindex] = (data.instance(i2).value(index) + data.instance(i2 + 1).value(index)) / 2.0;
                        counter = 0.0;
                        last = 0.0;
                        lastIndex = -1;
                    }
                    freq = (sumOfWeights + counter) / (double)(cutPoints.length + 1 - ++cpindex);
                } else {
                    lastIndex = i2;
                    last = counter;
                }
            }
            ++i2;
        }
        if (cpindex < cutPoints.length && lastIndex != -1) {
            cutPoints[cpindex] = (data.instance(lastIndex).value(index) + data.instance(lastIndex + 1).value(index)) / 2.0;
            ++cpindex;
        }
        if (cpindex == 0) {
            this.m_CutPoints[index] = null;
        } else {
            double[] cp = new double[cpindex];
            int i3 = 0;
            while (i3 < cpindex) {
                cp[i3] = cutPoints[i3];
                ++i3;
            }
            this.m_CutPoints[index] = cp;
        }
    }

    protected void findNumBins(int index) {
        Instance currentInstance;
        double min = Double.MAX_VALUE;
        double max = -1.7976931348623157E308;
        double binWidth = 0.0;
        double bestEntropy = Double.MAX_VALUE;
        int bestNumBins = 1;
        int i = 0;
        while (i < this.getInputFormat().numInstances()) {
            currentInstance = this.getInputFormat().instance(i);
            if (!currentInstance.isMissing(index)) {
                double currentVal = currentInstance.value(index);
                if (currentVal > max) {
                    max = currentVal;
                }
                if (currentVal < min) {
                    min = currentVal;
                }
            }
            ++i;
        }
        i = 0;
        while (i < this.m_NumBins) {
            double[] distribution = new double[i + 1];
            binWidth = (max - min) / (double)(i + 1);
            int j = 0;
            while (j < this.getInputFormat().numInstances()) {
                currentInstance = this.getInputFormat().instance(j);
                if (!currentInstance.isMissing(index)) {
                    int k = 0;
                    while (k < i + 1) {
                        if (currentInstance.value(index) <= min + ((double)k + 1.0) * binWidth) {
                            int n = k;
                            distribution[n] = distribution[n] + currentInstance.weight();
                            break;
                        }
                        ++k;
                    }
                }
                ++j;
            }
            double entropy = 0.0;
            int k = 0;
            while (k < i + 1) {
                if (distribution[k] < 2.0) {
                    entropy = Double.MAX_VALUE;
                    break;
                }
                entropy -= distribution[k] * Math.log((distribution[k] - 1.0) / binWidth);
                ++k;
            }
            if (entropy < bestEntropy) {
                bestEntropy = entropy;
                bestNumBins = i + 1;
            }
            ++i;
        }
        double[] cutPoints = null;
        if (bestNumBins > 1 && binWidth > 0.0) {
            cutPoints = new double[bestNumBins - 1];
            int i2 = 1;
            while (i2 < bestNumBins) {
                cutPoints[i2 - 1] = min + binWidth * (double)i2;
                ++i2;
            }
        }
        this.m_CutPoints[index] = cutPoints;
    }

    protected void setOutputFormat() {
        if (this.m_CutPoints == null) {
            this.setOutputFormat(null);
            return;
        }
        FastVector<Attribute> attributes = new FastVector<Attribute>(this.getInputFormat().numAttributes());
        int classIndex = this.getInputFormat().classIndex();
        int i = 0;
        while (i < this.getInputFormat().numAttributes()) {
            if (this.m_DiscretizeCols.isInRange(i) && this.getInputFormat().attribute(i).isNumeric() && this.getInputFormat().classIndex() != i) {
                if (!this.m_MakeBinary) {
                    FastVector<String> attribValues = new FastVector<String>(1);
                    if (this.m_CutPoints[i] == null) {
                        attribValues.addElement("'All'");
                    } else {
                        int j = 0;
                        while (j <= this.m_CutPoints[i].length) {
                            if (j == 0) {
                                attribValues.addElement("'(-inf-" + Utils.doubleToString(this.m_CutPoints[i][j], 6) + "]'");
                            } else if (j == this.m_CutPoints[i].length) {
                                attribValues.addElement("'(" + Utils.doubleToString(this.m_CutPoints[i][j - 1], 6) + "-inf)'");
                            } else {
                                attribValues.addElement("'(" + Utils.doubleToString(this.m_CutPoints[i][j - 1], 6) + "-" + Utils.doubleToString(this.m_CutPoints[i][j], 6) + "]'");
                            }
                            ++j;
                        }
                    }
                    attributes.addElement(new Attribute(this.getInputFormat().attribute(i).name(), attribValues));
                } else if (this.m_CutPoints[i] == null) {
                    FastVector<String> attribValues = new FastVector<String>(1);
                    attribValues.addElement("'All'");
                    attributes.addElement(new Attribute(this.getInputFormat().attribute(i).name(), attribValues));
                } else {
                    if (i < this.getInputFormat().classIndex()) {
                        classIndex += this.m_CutPoints[i].length - 1;
                    }
                    int j = 0;
                    while (j < this.m_CutPoints[i].length) {
                        FastVector<String> attribValues = new FastVector<String>(2);
                        attribValues.addElement("'(-inf-" + Utils.doubleToString(this.m_CutPoints[i][j], 6) + "]'");
                        attribValues.addElement("'(" + Utils.doubleToString(this.m_CutPoints[i][j], 6) + "-inf)'");
                        attributes.addElement(new Attribute(String.valueOf(this.getInputFormat().attribute(i).name()) + "_" + (j + 1), attribValues));
                        ++j;
                    }
                }
            } else {
                attributes.addElement((Attribute)this.getInputFormat().attribute(i).copy());
            }
            ++i;
        }
        Instances outputFormat = new Instances(this.getInputFormat().relationName(), attributes, 0);
        outputFormat.setClassIndex(classIndex);
        this.setOutputFormat(outputFormat);
    }

    protected void convertInstance(Instance instance) {
        int index = 0;
        double[] vals = new double[this.outputFormatPeek().numAttributes()];
        int i = 0;
        while (i < this.getInputFormat().numAttributes()) {
            if (this.m_DiscretizeCols.isInRange(i) && this.getInputFormat().attribute(i).isNumeric() && this.getInputFormat().classIndex() != i) {
                int j;
                double currentVal = instance.value(i);
                if (this.m_CutPoints[i] == null) {
                    vals[index] = instance.isMissing(i) ? Utils.missingValue() : 0.0;
                    ++index;
                } else if (!this.m_MakeBinary) {
                    if (instance.isMissing(i)) {
                        vals[index] = Utils.missingValue();
                    } else {
                        j = 0;
                        while (j < this.m_CutPoints[i].length) {
                            if (currentVal <= this.m_CutPoints[i][j]) break;
                            ++j;
                        }
                        vals[index] = j;
                    }
                    ++index;
                } else {
                    j = 0;
                    while (j < this.m_CutPoints[i].length) {
                        vals[index] = instance.isMissing(i) ? Utils.missingValue() : (currentVal <= this.m_CutPoints[i][j] ? 0.0 : 1.0);
                        ++index;
                        ++j;
                    }
                }
            } else {
                vals[index] = instance.value(i);
                ++index;
            }
            ++i;
        }
        AbstractInstance inst = null;
        inst = instance instanceof SparseInstance ? new SparseInstance(instance.weight(), vals) : new DenseInstance(instance.weight(), vals);
        inst.setDataset(this.getOutputFormat());
        this.copyValues(inst, false, instance.dataset(), this.getOutputFormat());
        inst.setDataset(this.getOutputFormat());
        this.push(inst);
    }

    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 6567 $");
    }

    public static void main(String[] argv) {
        Discretize.runFilter(new Discretize(), argv);
    }
}

