/*
 * Decompiled with CFR 0.152.
 */
package weka.filters.unsupervised.attribute;

import adams.core.base.BaseRegExp;
import gnu.trove.list.array.TDoubleArrayList;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.List;
import java.util.Vector;
import weka.core.Instances;
import weka.core.WekaOptionUtils;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.InterquartileRange;

public class InterquartileRangeSamp
extends InterquartileRange {
    protected static final long serialVersionUID = 3811630774543798261L;
    public static final String SAMPLE_SIZE = "sample-size";
    public static final String MIN_SAMPLES = "min-samples";
    public static final String IGNORED_ATTRIBUTES = "ignored-attributes";
    protected Hashtable<Integer, TDoubleArrayList> m_AttValues = new Hashtable();
    protected Hashtable<Integer, List<IQRs>> m_IQRs = new Hashtable();
    protected int m_SampleSize = this.getDefaultSampleSize();
    protected int m_MinSamples = this.getDefaultMinSamples();
    protected BaseRegExp m_IgnoredAttributes = this.getDefaultIgnoredAttributes();

    public String globalInfo() {
        return "A sampling filter for detecting outliers and extreme values based on interquartile ranges. The filter skips the class attribute.\n\nOutliers:\n  Q3 + OF*IQR < x <= Q3 + EVF*IQR\n  or\n  Q1 - EVF*IQR <= x < Q1 - OF*IQR\n\nExtreme values:\n  x > Q3 + EVF*IQR\n  or\n  x < Q1 - EVF*IQR\n\nKey:\n  Q1  = 25% quartile\n  Q3  = 75% quartile\n  IQR = Interquartile Range, difference between Q1 and Q3\n  OF  = Outlier Factor\n  EVF = Extreme Value Factor";
    }

    protected int getDefaultSampleSize() {
        return 150;
    }

    public void setSampleSize(int value) {
        if (value > 0) {
            this.m_SampleSize = value;
        } else {
            System.err.println("SampleSize must meet >0, provided: " + value);
        }
    }

    public int getSampleSize() {
        return this.m_SampleSize;
    }

    public String sampleSizeTipText() {
        return "The sample size to use.";
    }

    protected int getDefaultMinSamples() {
        return 5;
    }

    public void setMinSamples(int value) {
        if (value > 0) {
            this.m_MinSamples = value;
        } else {
            System.err.println("MinSamples must meet >0, provided: " + value);
        }
    }

    public int getMinSamples() {
        return this.m_MinSamples;
    }

    public String minSamplesTipText() {
        return "The minimum number of samples that are required for calculating IQR stats.";
    }

    protected BaseRegExp getDefaultIgnoredAttributes() {
        return new BaseRegExp("^.*_id$");
    }

    public void setIgnoredAttributes(BaseRegExp value) {
        this.m_IgnoredAttributes = value;
    }

    public BaseRegExp getIgnoredAttributes() {
        return this.m_IgnoredAttributes;
    }

    public String ignoredAttributesTipText() {
        return "The regular expression for attributes to ignore/skip.";
    }

    public Enumeration listOptions() {
        Vector result = new Vector();
        WekaOptionUtils.addOption(result, this.sampleSizeTipText(), "" + this.getDefaultSampleSize(), SAMPLE_SIZE);
        WekaOptionUtils.addOption(result, this.minSamplesTipText(), "" + this.getDefaultMinSamples(), MIN_SAMPLES);
        WekaOptionUtils.addOption(result, this.ignoredAttributesTipText(), "" + this.getDefaultIgnoredAttributes(), IGNORED_ATTRIBUTES);
        WekaOptionUtils.add(result, super.listOptions());
        return WekaOptionUtils.toEnumeration(result);
    }

    public void setOptions(String[] options) throws Exception {
        this.setSampleSize(WekaOptionUtils.parse(options, SAMPLE_SIZE, this.getDefaultSampleSize()));
        this.setMinSamples(WekaOptionUtils.parse(options, MIN_SAMPLES, this.getDefaultMinSamples()));
        this.setIgnoredAttributes(new BaseRegExp(WekaOptionUtils.parse(options, IGNORED_ATTRIBUTES, this.getDefaultIgnoredAttributes().getValue())));
        super.setOptions(options);
    }

    public String[] getOptions() {
        ArrayList<String> result = new ArrayList<String>();
        WekaOptionUtils.add(result, SAMPLE_SIZE, this.getSampleSize());
        WekaOptionUtils.add(result, MIN_SAMPLES, this.getMinSamples());
        WekaOptionUtils.add(result, IGNORED_ATTRIBUTES, this.getIgnoredAttributes().getValue());
        WekaOptionUtils.add(result, super.getOptions());
        return WekaOptionUtils.toArray(result);
    }

    protected void addIQR(Integer key, TDoubleArrayList v) {
        if (v.size() >= this.m_MinSamples) {
            double[] arr = v.toArray();
            Arrays.sort(arr);
            double q3val = this.valueAtPct(arr, 0.75);
            double q1val = this.valueAtPct(arr, 0.25);
            double med = this.valueAtPct(arr, 0.5);
            double d = arr[arr.length - 1];
            IQRs is = new IQRs(q1val, q3val, d, med);
            List<IQRs> viqr = this.m_IQRs.get(key);
            if (viqr == null) {
                viqr = new ArrayList<IQRs>();
                this.m_IQRs.put(key, viqr);
            }
            viqr.add(is);
        }
    }

    protected double valueAtPct(double[] sorted_arr, double pct) {
        double qval;
        double qindex = pct * (double)sorted_arr.length;
        int iqindex = (int)Math.floor(qindex);
        if ((double)iqindex == qindex) {
            qval = sorted_arr[iqindex];
        } else {
            double d1 = sorted_arr[iqindex];
            double d2 = sorted_arr[iqindex + 1];
            double pcte = qindex - (double)iqindex;
            qval = d1 + (d2 - d1) * pcte;
        }
        return qval;
    }

    protected void clearRemainder() {
        for (Integer key : this.m_AttValues.keySet()) {
            List<IQRs> viqr = this.m_IQRs.get(key);
            if (viqr != null) continue;
            TDoubleArrayList v = this.m_AttValues.get(key);
            this.addIQR(key, v);
        }
    }

    protected void computeThresholds(Instances instances) {
        this.m_UpperExtremeValue = new double[this.m_AttributeIndices.length];
        this.m_UpperOutlier = new double[this.m_AttributeIndices.length];
        this.m_LowerOutlier = new double[this.m_AttributeIndices.length];
        this.m_LowerExtremeValue = new double[this.m_AttributeIndices.length];
        this.m_Median = new double[this.m_AttributeIndices.length];
        this.m_IQR = new double[this.m_AttributeIndices.length];
        for (int i = 0; i < this.m_AttributeIndices.length; ++i) {
            String name = instances.attribute(i).name();
            if (this.m_AttributeIndices[i] == -1) {
                if (!this.getDebug()) continue;
                System.out.println("Skipping non-numeric attribute: " + name);
                continue;
            }
            if (this.m_IgnoredAttributes.isMatch(name)) {
                this.m_AttributeIndices[i] = -1;
                if (!this.getDebug()) continue;
                System.out.println("Ignored attribute: " + name);
                continue;
            }
            double[] values = instances.attributeToDoubleArray(this.m_AttributeIndices[i]);
            TDoubleArrayList v = new TDoubleArrayList();
            this.m_AttValues.put(i, v);
            for (int j = 0; j < values.length; ++j) {
                v.add(values[j]);
                if (v.size() != this.m_SampleSize) continue;
                this.addIQR(i, v);
                v.clear();
            }
            this.clearRemainder();
        }
        for (Integer key : this.m_IQRs.keySet()) {
            double q2;
            double q1;
            double q3;
            double dmax = Double.NEGATIVE_INFINITY;
            List<IQRs> v = this.m_IQRs.get(key);
            if (v.size() == 0) continue;
            double[] q1s = new double[v.size()];
            double[] q3s = new double[v.size()];
            double[] meds = new double[v.size()];
            for (int k = 0; k < v.size(); ++k) {
                IQRs iqrs = v.get(k);
                q1s[k] = iqrs.quartile1;
                q3s[k] = iqrs.quartile3;
                meds[k] = iqrs.median;
                if (!(iqrs.maxval > dmax)) continue;
                dmax = iqrs.maxval;
            }
            Arrays.sort(q1s);
            Arrays.sort(q3s);
            Arrays.sort(meds);
            if (v.size() > 1) {
                q3 = this.valueAtPct(q3s, 0.5);
                q1 = this.valueAtPct(q1s, 0.5);
                q2 = this.valueAtPct(meds, 0.5);
            } else {
                q3 = q3s[0];
                q1 = q1s[0];
                q2 = meds[0];
            }
            this.m_Median[key.intValue()] = q2;
            this.m_IQR[key.intValue()] = q3 - q1;
            this.m_UpperExtremeValue[key.intValue()] = q3 + this.getExtremeValuesFactor() * this.m_IQR[key];
            this.m_UpperOutlier[key.intValue()] = q3 + this.getOutlierFactor() * this.m_IQR[key];
            this.m_LowerOutlier[key.intValue()] = q1 - this.getOutlierFactor() * this.m_IQR[key];
            this.m_LowerExtremeValue[key.intValue()] = q1 - this.getExtremeValuesFactor() * this.m_IQR[key];
        }
        this.m_AttValues = new Hashtable();
        this.m_IQRs = new Hashtable();
    }

    public static void main(String[] args) {
        InterquartileRangeSamp.runFilter((Filter)new InterquartileRangeSamp(), (String[])args);
    }

    public static class IQRs {
        public double quartile1;
        public double median;
        public double quartile3;
        public double maxval;

        public IQRs(double q1, double q3, double mval, double med) {
            this.quartile1 = q1;
            this.quartile3 = q3;
            this.maxval = mval;
            this.median = med;
        }
    }
}

