package adams.tools;

import adams.core.Index;
import adams.core.Range;
import adams.core.io.CsvSpreadSheetWriter;
import adams.core.io.PlaceholderFile;
import adams.core.io.SpreadSheet;
import adams.data.statistics.StatUtils;
import java.util.Hashtable;
import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.converters.ConverterUtils;

/* loaded from: input_file:adams/tools/CompareDatasets.class */
public class CompareDatasets extends AbstractTool implements OutputFileGenerator {
    private static final long serialVersionUID = -1399473007795695271L;
    protected PlaceholderFile m_Dataset1;
    protected Range m_Range1;
    protected Index m_RowAttribute1;
    protected PlaceholderFile m_Dataset2;
    protected Range m_Range2;
    protected Index m_RowAttribute2;
    protected PlaceholderFile m_OutputFile;
    protected PlaceholderFile m_Missing;
    protected Instances m_Data1;
    protected Instances m_Data2;
    protected Boolean m_UseRowAttribute;
    protected boolean m_RowAttributeIsString;
    protected int[] m_Indices1;
    protected int[] m_Indices2;
    protected Hashtable<String, Integer> m_Lookup2;
    protected double m_Threshold;

    public String globalInfo() {
        return "Compares two datasets, either row-by-row or using a row attribute listing a unique ID for matching the rows, outputting the correlation coefficient of the numeric attributes found in the ranges defined by the user.\nIn order to trim down the number of generated rows, a threshold can be specified. Only rows are output which correlation coefficient is below that threshold.";
    }

    public void defineOptions() {
        super.defineOptions();
        this.m_OptionManager.add("dataset1", "dataset1", new PlaceholderFile("."));
        this.m_OptionManager.add("range1", "range1", new Range("first-last"));
        this.m_OptionManager.add("row1", "rowAttribute1", "");
        this.m_OptionManager.add("dataset2", "dataset2", new PlaceholderFile("."));
        this.m_OptionManager.add("range2", "range2", new Range("first-last"));
        this.m_OptionManager.add("row2", "rowAttribute2", "");
        this.m_OptionManager.add("output", "outputFile", new PlaceholderFile("output.csv"));
        this.m_OptionManager.add("missing", "missing", new PlaceholderFile("missing.csv"));
        this.m_OptionManager.add("threshold", "threshold", Double.valueOf(0.0d), Double.valueOf(0.0d), Double.valueOf(1.0d));
    }

    protected void initialize() {
        super.initialize();
        this.m_Range1 = new Range();
        this.m_Range2 = new Range();
        this.m_RowAttribute1 = new Index();
        this.m_RowAttribute2 = new Index();
    }

    public void setDataset1(PlaceholderFile placeholderFile) {
        this.m_Dataset1 = placeholderFile;
        reset();
    }

    public PlaceholderFile getDataset1() {
        return this.m_Dataset1;
    }

    public String dataset1TipText() {
        return "The first dataset in the comparison.";
    }

    public void setDataset2(PlaceholderFile placeholderFile) {
        this.m_Dataset2 = placeholderFile;
        reset();
    }

    public PlaceholderFile getDataset2() {
        return this.m_Dataset2;
    }

    public String dataset2TipText() {
        return "The second dataset in the comparison.";
    }

    public void setRange1(Range range) {
        this.m_Range1 = range;
        reset();
    }

    public Range getRange1() {
        return this.m_Range1;
    }

    public String range1TipText() {
        return "The range of attributes of the first dataset.";
    }

    public void setRange2(Range range) {
        this.m_Range2 = range;
        reset();
    }

    public Range getRange2() {
        return this.m_Range2;
    }

    public String range2TipText() {
        return "The range of attributes of the second dataset.";
    }

    public void setRowAttribute1(String str) {
        this.m_RowAttribute1.setIndex(str);
        reset();
    }

    public String getRowAttribute1() {
        return this.m_RowAttribute1.getIndex();
    }

    public String rowAttribute1TipText() {
        return "The index for the attribute used for identifying rows to compare; if not provided, then the comparison is performed row-by-row (first dataset).";
    }

    public void setRowAttribute2(String str) {
        this.m_RowAttribute2.setIndex(str);
        reset();
    }

    public String getRowAttribute2() {
        return this.m_RowAttribute2.getIndex();
    }

    public String rowAttribute2TipText() {
        return "The index for the attribute used for identifying rows to compare; if not provided, then the comparison is performed row-by-row (second dataset).";
    }

    public void setOutputFile(PlaceholderFile placeholderFile) {
        this.m_OutputFile = placeholderFile;
        reset();
    }

    public PlaceholderFile getOutputFile() {
        return this.m_OutputFile;
    }

    public String outputFileTipText() {
        return "The file to save the comparison result in (CSV format).";
    }

    public void setMissing(PlaceholderFile placeholderFile) {
        this.m_Missing = placeholderFile;
        reset();
    }

    public PlaceholderFile getMissing() {
        return this.m_Missing;
    }

    public String missingTipText() {
        return "The file to save the information about missing rows to (CSV format).";
    }

    public void setThreshold(double d) {
        if (d < 0.0d || d > 1.0d) {
            getSystemErr().println("Threshold has to satisfy 0<=x<=1.0, provided: " + d);
        } else {
            this.m_Threshold = d;
            reset();
        }
    }

    public double getThreshold() {
        return this.m_Threshold;
    }

    public String thresholdTipText() {
        return "The threshold for the correlation coefficient; only if the coefficient is below that threshold, it will get output; 0.0 turns the threshold off.";
    }

    protected void preRun() {
        super.preRun();
        if (!this.m_Dataset1.exists()) {
            throw new IllegalArgumentException("Input file 1 '" + this.m_Dataset1 + "' does not exist?");
        }
        if (!this.m_Dataset2.exists()) {
            throw new IllegalArgumentException("Input file 2 '" + this.m_Dataset2 + "' does not exist?");
        }
        if (this.m_Dataset1.isDirectory()) {
            throw new IllegalArgumentException("Input 1 '" + this.m_Dataset1 + "' is a directory!");
        }
        if (this.m_Dataset2.isDirectory()) {
            throw new IllegalArgumentException("Input 2 '" + this.m_Dataset2 + "' is a directory!");
        }
        if (this.m_OutputFile.isDirectory()) {
            throw new IllegalArgumentException("Output '" + this.m_OutputFile + "' is pointing to a directory!");
        }
        try {
            this.m_Data1 = ConverterUtils.DataSource.read(this.m_Dataset1.getAbsolutePath());
            this.m_Data2 = ConverterUtils.DataSource.read(this.m_Dataset2.getAbsolutePath());
            this.m_Range1.setMax(this.m_Data1.numAttributes());
            this.m_Range2.setMax(this.m_Data2.numAttributes());
            if (this.m_Range1.getIntIndices().length != this.m_Range2.getIntIndices().length) {
                throw new IllegalArgumentException("Different range of attributes: " + this.m_Range1.getIntIndices().length + " != " + this.m_Range2.getIntIndices().length);
            }
            this.m_Indices1 = this.m_Range1.getIntIndices();
            this.m_Indices2 = this.m_Range2.getIntIndices();
            this.m_RowAttribute1.setMax(this.m_Data1.numAttributes());
            this.m_RowAttribute2.setMax(this.m_Data2.numAttributes());
            this.m_UseRowAttribute = null;
            this.m_Lookup2 = null;
            if (getUseRowAttribute()) {
                if (this.m_Data1.attribute(this.m_RowAttribute1.getIntIndex()).type() != this.m_Data2.attribute(this.m_RowAttribute2.getIntIndex()).type()) {
                    throw new IllegalArgumentException("The attributes types of the two row attributes differ: " + Attribute.typeToString(this.m_Data1.attribute(this.m_RowAttribute1.getIntIndex())) + " != " + Attribute.typeToString(this.m_Data2.attribute(this.m_RowAttribute2.getIntIndex())));
                }
                this.m_RowAttributeIsString = this.m_Data1.attribute(this.m_RowAttribute1.getIntIndex()).isNominal() || this.m_Data1.attribute(this.m_RowAttribute1.getIntIndex()).isString();
            }
        } catch (Exception e) {
            throw new IllegalArgumentException(e);
        }
    }

    protected boolean getUseRowAttribute() {
        if (this.m_UseRowAttribute == null) {
            this.m_UseRowAttribute = Boolean.valueOf(this.m_RowAttribute1.getIndex().length() > 0 && this.m_RowAttribute2.getIndex().length() > 0);
        }
        return this.m_UseRowAttribute.booleanValue();
    }

    protected String getRowID(int i) {
        return getUseRowAttribute() ? this.m_RowAttributeIsString ? this.m_Data1.instance(i).stringValue(this.m_RowAttribute1.getIntIndex()) : "" + this.m_Data1.instance(i).value(this.m_RowAttribute1.getIntIndex()) : "" + (i + 1);
    }

    protected Instance[] nextByIndex(int i) {
        Instance[] instanceArr = null;
        if (i < this.m_Data1.numInstances() && i < this.m_Data2.numInstances()) {
            instanceArr = new Instance[]{this.m_Data1.instance(i), this.m_Data2.instance(i)};
        }
        return instanceArr;
    }

    protected void initLookup() {
        if (this.m_Lookup2 == null) {
            this.m_Lookup2 = new Hashtable<>();
            int intIndex = this.m_RowAttribute2.getIntIndex();
            for (int i = 0; i < this.m_Data2.numInstances(); i++) {
                if (this.m_RowAttributeIsString) {
                    this.m_Lookup2.put(this.m_Data2.instance(i).stringValue(intIndex), Integer.valueOf(i));
                } else {
                    this.m_Lookup2.put("" + this.m_Data2.instance(i).value(intIndex), Integer.valueOf(i));
                }
            }
        }
    }

    protected Instance[] nextByRowAttribute(int i) {
        Instance[] instanceArr = null;
        if (i < this.m_Data1.numInstances() && i < this.m_Data2.numInstances()) {
            initLookup();
            int intIndex = this.m_RowAttribute1.getIntIndex();
            Integer num = this.m_RowAttributeIsString ? this.m_Lookup2.get(this.m_Data1.instance(i).stringValue(intIndex)) : this.m_Lookup2.get("" + this.m_Data1.instance(i).value(intIndex));
            if (num != null) {
                instanceArr = new Instance[]{this.m_Data1.instance(i), this.m_Data2.instance(num.intValue())};
            }
        }
        return instanceArr;
    }

    protected Instance[] next(int i) {
        return getUseRowAttribute() ? nextByRowAttribute(i) : nextByIndex(i);
    }

    protected double getCorrelation(Instance instance, Instance instance2) {
        double[] dArr = new double[this.m_Indices1.length];
        double[] dArr2 = new double[this.m_Indices2.length];
        for (int i = 0; i < dArr.length; i++) {
            if (instance.attribute(this.m_Indices1[i]).isNumeric()) {
                dArr[i] = instance.value(this.m_Indices1[i]);
            }
            if (instance2.attribute(this.m_Indices2[i]).isNumeric()) {
                dArr2[i] = instance2.value(this.m_Indices2[i]);
            }
        }
        return StatUtils.correlationCoefficient(dArr, dArr2);
    }

    protected void doRun() {
        SpreadSheet spreadSheet = new SpreadSheet();
        SpreadSheet.Row headerRow = spreadSheet.getHeaderRow();
        if (getUseRowAttribute()) {
            headerRow.addCell(adams.data.instance.Instance.REPORT_ID).setContent(adams.data.instance.Instance.REPORT_ID);
        } else {
            headerRow.addCell(adams.data.instance.Instance.REPORT_ID).setContent("Index");
        }
        headerRow.addCell("Correlation").setContent("Correlation");
        SpreadSheet spreadSheet2 = null;
        if (!this.m_Missing.isDirectory()) {
            spreadSheet2 = new SpreadSheet();
            SpreadSheet.Row headerRow2 = spreadSheet2.getHeaderRow();
            if (getUseRowAttribute()) {
                headerRow2.addCell(adams.data.instance.Instance.REPORT_ID).setContent(adams.data.instance.Instance.REPORT_ID);
            } else {
                headerRow2.addCell(adams.data.instance.Instance.REPORT_ID).setContent("Index");
            }
        }
        for (int i = 0; i < this.m_Data1.numInstances(); i++) {
            Instance[] next = next(i);
            if (next != null) {
                double correlation = getCorrelation(next[0], next[1]);
                if (this.m_Threshold == 0.0d || (this.m_Threshold > 0.0d && correlation < this.m_Threshold)) {
                    SpreadSheet.Row addRow = spreadSheet.addRow("" + (i + 1));
                    addRow.addCell(adams.data.instance.Instance.REPORT_ID).setContent(getRowID(i));
                    addRow.addCell("Correlation").setContent(Double.valueOf(correlation));
                }
            } else if (spreadSheet2 != null) {
                spreadSheet2.addRow("" + (i + 1)).addCell(adams.data.instance.Instance.REPORT_ID).setContent(getRowID(i));
            }
            if (isDebugOn() && i % 100 == 0) {
                debug("Processed " + i + "/" + this.m_Data1.numInstances());
            }
        }
        if (new CsvSpreadSheetWriter().write(spreadSheet, this.m_OutputFile.getAbsolutePath())) {
            getSystemOut().println("Output written to '" + this.m_OutputFile + "'!");
        } else {
            getSystemErr().println("Failed to write output to '" + this.m_OutputFile + "'!");
        }
        if (spreadSheet2 != null) {
            if (new CsvSpreadSheetWriter().write(spreadSheet2, this.m_Missing.getAbsolutePath())) {
                getSystemOut().println("Missing data written to '" + this.m_Missing + "'!");
            } else {
                getSystemErr().println("Failed to write missing data to '" + this.m_Missing + "'!");
            }
        }
    }

    public void cleanUp() {
        super.cleanUp();
        this.m_Data1 = null;
        this.m_Data2 = null;
        this.m_Indices1 = null;
        this.m_Indices2 = null;
    }
}
