/*
 * Decompiled with CFR 0.152.
 */
package adams.tools;

import adams.core.Index;
import adams.core.Range;
import adams.core.io.PlaceholderFile;
import adams.data.io.output.CsvSpreadSheetWriter;
import adams.data.spreadsheet.DefaultSpreadSheet;
import adams.data.spreadsheet.HeaderRow;
import adams.data.spreadsheet.SpreadSheet;
import adams.data.statistics.StatUtils;
import adams.tools.AbstractTool;
import adams.tools.OutputFileGenerator;
import java.util.Hashtable;
import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.converters.ConverterUtils;

public class CompareDatasets
extends AbstractTool
implements OutputFileGenerator {
    private static final long serialVersionUID = -1399473007795695271L;
    protected PlaceholderFile m_Dataset1;
    protected Range m_Range1;
    protected Index m_RowAttribute1;
    protected PlaceholderFile m_Dataset2;
    protected Range m_Range2;
    protected Index m_RowAttribute2;
    protected PlaceholderFile m_OutputFile;
    protected PlaceholderFile m_Missing;
    protected Instances m_Data1;
    protected Instances m_Data2;
    protected Boolean m_UseRowAttribute;
    protected boolean m_RowAttributeIsString;
    protected int[] m_Indices1;
    protected int[] m_Indices2;
    protected Hashtable<String, Integer> m_Lookup2;
    protected double m_Threshold;

    public String globalInfo() {
        return "Compares two datasets, either row-by-row or using a row attribute listing a unique ID for matching the rows, outputting the correlation coefficient of the numeric attributes found in the ranges defined by the user.\nIn order to trim down the number of generated rows, a threshold can be specified. Only rows are output which correlation coefficient is below that threshold.";
    }

    public void defineOptions() {
        super.defineOptions();
        this.m_OptionManager.add("dataset1", "dataset1", (Object)new PlaceholderFile("."));
        this.m_OptionManager.add("range1", "range1", (Object)new Range("first-last"));
        this.m_OptionManager.add("row1", "rowAttribute1", (Object)"");
        this.m_OptionManager.add("dataset2", "dataset2", (Object)new PlaceholderFile("."));
        this.m_OptionManager.add("range2", "range2", (Object)new Range("first-last"));
        this.m_OptionManager.add("row2", "rowAttribute2", (Object)"");
        this.m_OptionManager.add("output", "outputFile", (Object)new PlaceholderFile("output.csv"));
        this.m_OptionManager.add("missing", "missing", (Object)new PlaceholderFile("missing.csv"));
        this.m_OptionManager.add("threshold", "threshold", (Object)0.0, (Number)0.0, (Number)1.0);
    }

    protected void initialize() {
        super.initialize();
        this.m_Range1 = new Range();
        this.m_Range2 = new Range();
        this.m_RowAttribute1 = new Index();
        this.m_RowAttribute2 = new Index();
    }

    public void setDataset1(PlaceholderFile value) {
        this.m_Dataset1 = value;
        this.reset();
    }

    public PlaceholderFile getDataset1() {
        return this.m_Dataset1;
    }

    public String dataset1TipText() {
        return "The first dataset in the comparison.";
    }

    public void setDataset2(PlaceholderFile value) {
        this.m_Dataset2 = value;
        this.reset();
    }

    public PlaceholderFile getDataset2() {
        return this.m_Dataset2;
    }

    public String dataset2TipText() {
        return "The second dataset in the comparison.";
    }

    public void setRange1(Range value) {
        this.m_Range1 = value;
        this.reset();
    }

    public Range getRange1() {
        return this.m_Range1;
    }

    public String range1TipText() {
        return "The range of attributes of the first dataset.";
    }

    public void setRange2(Range value) {
        this.m_Range2 = value;
        this.reset();
    }

    public Range getRange2() {
        return this.m_Range2;
    }

    public String range2TipText() {
        return "The range of attributes of the second dataset.";
    }

    public void setRowAttribute1(String value) {
        this.m_RowAttribute1.setIndex(value);
        this.reset();
    }

    public String getRowAttribute1() {
        return this.m_RowAttribute1.getIndex();
    }

    public String rowAttribute1TipText() {
        return "The index for the attribute used for identifying rows to compare; if not provided, then the comparison is performed row-by-row (first dataset).";
    }

    public void setRowAttribute2(String value) {
        this.m_RowAttribute2.setIndex(value);
        this.reset();
    }

    public String getRowAttribute2() {
        return this.m_RowAttribute2.getIndex();
    }

    public String rowAttribute2TipText() {
        return "The index for the attribute used for identifying rows to compare; if not provided, then the comparison is performed row-by-row (second dataset).";
    }

    public void setOutputFile(PlaceholderFile value) {
        this.m_OutputFile = value;
        this.reset();
    }

    public PlaceholderFile getOutputFile() {
        return this.m_OutputFile;
    }

    public String outputFileTipText() {
        return "The file to save the comparison result in (CSV format).";
    }

    public void setMissing(PlaceholderFile value) {
        this.m_Missing = value;
        this.reset();
    }

    public PlaceholderFile getMissing() {
        return this.m_Missing;
    }

    public String missingTipText() {
        return "The file to save the information about missing rows to (CSV format).";
    }

    public void setThreshold(double value) {
        if (value >= 0.0 && value <= 1.0) {
            this.m_Threshold = value;
            this.reset();
        } else {
            this.getLogger().severe("Threshold has to satisfy 0<=x<=1.0, provided: " + value);
        }
    }

    public double getThreshold() {
        return this.m_Threshold;
    }

    public String thresholdTipText() {
        return "The threshold for the correlation coefficient; only if the coefficient is below that threshold, it will get output; 0.0 turns the threshold off.";
    }

    protected void preRun() {
        super.preRun();
        if (!this.m_Dataset1.exists()) {
            throw new IllegalArgumentException("Input file 1 '" + this.m_Dataset1 + "' does not exist?");
        }
        if (!this.m_Dataset2.exists()) {
            throw new IllegalArgumentException("Input file 2 '" + this.m_Dataset2 + "' does not exist?");
        }
        if (this.m_Dataset1.isDirectory()) {
            throw new IllegalArgumentException("Input 1 '" + this.m_Dataset1 + "' is a directory!");
        }
        if (this.m_Dataset2.isDirectory()) {
            throw new IllegalArgumentException("Input 2 '" + this.m_Dataset2 + "' is a directory!");
        }
        if (this.m_OutputFile.isDirectory()) {
            throw new IllegalArgumentException("Output '" + this.m_OutputFile + "' is pointing to a directory!");
        }
        try {
            this.m_Data1 = ConverterUtils.DataSource.read((String)this.m_Dataset1.getAbsolutePath());
            this.m_Data2 = ConverterUtils.DataSource.read((String)this.m_Dataset2.getAbsolutePath());
        }
        catch (Exception e) {
            throw new IllegalArgumentException(e);
        }
        this.m_Range1.setMax(this.m_Data1.numAttributes());
        this.m_Range2.setMax(this.m_Data2.numAttributes());
        if (this.m_Range1.getIntIndices().length != this.m_Range2.getIntIndices().length) {
            throw new IllegalArgumentException("Different range of attributes: " + this.m_Range1.getIntIndices().length + " != " + this.m_Range2.getIntIndices().length);
        }
        this.m_Indices1 = this.m_Range1.getIntIndices();
        this.m_Indices2 = this.m_Range2.getIntIndices();
        this.m_RowAttribute1.setMax(this.m_Data1.numAttributes());
        this.m_RowAttribute2.setMax(this.m_Data2.numAttributes());
        this.m_UseRowAttribute = null;
        this.m_Lookup2 = null;
        if (this.getUseRowAttribute()) {
            if (this.m_Data1.attribute(this.m_RowAttribute1.getIntIndex()).type() != this.m_Data2.attribute(this.m_RowAttribute2.getIntIndex()).type()) {
                throw new IllegalArgumentException("The attributes types of the two row attributes differ: " + Attribute.typeToString((Attribute)this.m_Data1.attribute(this.m_RowAttribute1.getIntIndex())) + " != " + Attribute.typeToString((Attribute)this.m_Data2.attribute(this.m_RowAttribute2.getIntIndex())));
            }
            this.m_RowAttributeIsString = this.m_Data1.attribute(this.m_RowAttribute1.getIntIndex()).isNominal() || this.m_Data1.attribute(this.m_RowAttribute1.getIntIndex()).isString();
        }
    }

    protected boolean getUseRowAttribute() {
        if (this.m_UseRowAttribute == null) {
            this.m_UseRowAttribute = this.m_RowAttribute1.getIndex().length() > 0 && this.m_RowAttribute2.getIndex().length() > 0;
        }
        return this.m_UseRowAttribute;
    }

    protected String getRowID(int index) {
        String result = this.getUseRowAttribute() ? (this.m_RowAttributeIsString ? this.m_Data1.instance(index).stringValue(this.m_RowAttribute1.getIntIndex()) : "" + this.m_Data1.instance(index).value(this.m_RowAttribute1.getIntIndex())) : "" + (index + 1);
        return result;
    }

    protected Instance[] nextByIndex(int index) {
        Instance[] result = null;
        if (index < this.m_Data1.numInstances() && index < this.m_Data2.numInstances()) {
            result = new Instance[]{this.m_Data1.instance(index), this.m_Data2.instance(index)};
        }
        return result;
    }

    protected void initLookup() {
        if (this.m_Lookup2 == null) {
            this.m_Lookup2 = new Hashtable();
            int attIndex = this.m_RowAttribute2.getIntIndex();
            for (int i = 0; i < this.m_Data2.numInstances(); ++i) {
                if (this.m_RowAttributeIsString) {
                    this.m_Lookup2.put(this.m_Data2.instance(i).stringValue(attIndex), i);
                    continue;
                }
                this.m_Lookup2.put("" + this.m_Data2.instance(i).value(attIndex), i);
            }
        }
    }

    protected Instance[] nextByRowAttribute(int index) {
        Instance[] result = null;
        if (index < this.m_Data1.numInstances() && index < this.m_Data2.numInstances()) {
            this.initLookup();
            int attIndex = this.m_RowAttribute1.getIntIndex();
            Integer rowIndex = this.m_RowAttributeIsString ? this.m_Lookup2.get(this.m_Data1.instance(index).stringValue(attIndex)) : this.m_Lookup2.get("" + this.m_Data1.instance(index).value(attIndex));
            if (rowIndex != null) {
                result = new Instance[]{this.m_Data1.instance(index), this.m_Data2.instance(rowIndex.intValue())};
            }
        }
        return result;
    }

    protected Instance[] next(int index) {
        if (this.getUseRowAttribute()) {
            return this.nextByRowAttribute(index);
        }
        return this.nextByIndex(index);
    }

    protected double getCorrelation(Instance first, Instance second) {
        double[] val1 = new double[this.m_Indices1.length];
        double[] val2 = new double[this.m_Indices2.length];
        for (int i = 0; i < val1.length; ++i) {
            if (first.attribute(this.m_Indices1[i]).isNumeric()) {
                val1[i] = first.value(this.m_Indices1[i]);
            }
            if (!second.attribute(this.m_Indices2[i]).isNumeric()) continue;
            val2[i] = second.value(this.m_Indices2[i]);
        }
        return StatUtils.correlationCoefficient((double[])val1, (double[])val2);
    }

    protected void doRun() {
        DefaultSpreadSheet output = new DefaultSpreadSheet();
        HeaderRow row = output.getHeaderRow();
        if (this.getUseRowAttribute()) {
            row.addCell("ID").setContent("ID");
        } else {
            row.addCell("ID").setContent("Index");
        }
        row.addCell("Correlation").setContent("Correlation");
        DefaultSpreadSheet missing = null;
        if (!this.m_Missing.isDirectory()) {
            missing = new DefaultSpreadSheet();
            row = missing.getHeaderRow();
            if (this.getUseRowAttribute()) {
                row.addCell("ID").setContent("ID");
            } else {
                row.addCell("ID").setContent("Index");
            }
        }
        for (int i = 0; i < this.m_Data1.numInstances(); ++i) {
            Instance[] pair = this.next(i);
            if (pair != null) {
                double correlation = this.getCorrelation(pair[0], pair[1]);
                if (this.m_Threshold == 0.0 || this.m_Threshold > 0.0 && correlation < this.m_Threshold) {
                    row = output.addRow("" + (i + 1));
                    row.addCell("ID").setContent(this.getRowID(i));
                    row.addCell("Correlation").setContent(Double.valueOf(correlation));
                }
            } else if (missing != null) {
                row = missing.addRow("" + (i + 1));
                row.addCell("ID").setContent(this.getRowID(i));
            }
            if (!this.isLoggingEnabled() || i % 100 != 0) continue;
            this.getLogger().info("Processed " + i + "/" + this.m_Data1.numInstances());
        }
        if (!new CsvSpreadSheetWriter().write((SpreadSheet)output, this.m_OutputFile.getAbsolutePath())) {
            this.getLogger().severe("Failed to write output to '" + this.m_OutputFile + "'!");
        } else {
            this.getLogger().info("Output written to '" + this.m_OutputFile + "'!");
        }
        if (missing != null) {
            if (!new CsvSpreadSheetWriter().write((SpreadSheet)missing, this.m_Missing.getAbsolutePath())) {
                this.getLogger().severe("Failed to write missing data to '" + this.m_Missing + "'!");
            } else {
                this.getLogger().info("Missing data written to '" + this.m_Missing + "'!");
            }
        }
    }

    public void cleanUp() {
        super.cleanUp();
        this.m_Data1 = null;
        this.m_Data2 = null;
        this.m_Indices1 = null;
        this.m_Indices2 = null;
    }
}

