/*
 * Decompiled with CFR 0.152.
 */
package mulan.data;

import java.util.Arrays;
import java.util.Random;
import java.util.logging.Level;
import java.util.logging.Logger;
import mulan.data.InvalidDataFormatException;
import mulan.data.MultiLabelInstances;
import mulan.data.Stratification;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;

public class IterativeStratification
implements Stratification,
TechnicalInformationHandler {
    private long seed;

    public IterativeStratification() {
        this.seed = 0L;
    }

    public IterativeStratification(long seed) {
        this.seed = seed;
    }

    public TechnicalInformation getTechnicalInformation() {
        TechnicalInformation result = new TechnicalInformation(TechnicalInformation.Type.CONFERENCE);
        result.setValue(TechnicalInformation.Field.AUTHOR, "Sechidis, Konstantinos and Tsoumakas, Grigorios and Vlahavas, Ioannis");
        result.setValue(TechnicalInformation.Field.TITLE, "On the stratification of multi-label data");
        result.setValue(TechnicalInformation.Field.BOOKTITLE, "Proceedings of the 2011 European conference on Machine learning and knowledge discovery in databases - Volume Part III");
        result.setValue(TechnicalInformation.Field.SERIES, "ECML PKDD'11");
        result.setValue(TechnicalInformation.Field.YEAR, "2011");
        result.setValue(TechnicalInformation.Field.ISBN, "978-3-642-23807-9");
        result.setValue(TechnicalInformation.Field.LOCATION, "Athens, Greece");
        result.setValue(TechnicalInformation.Field.PAGES, "145--158");
        result.setValue(TechnicalInformation.Field.PUBLISHER, "Springer-Verlag");
        result.setValue(TechnicalInformation.Field.ADDRESS, "Berlin, Heidelberg");
        return result;
    }

    @Override
    public MultiLabelInstances[] stratify(MultiLabelInstances data, int folds) {
        MultiLabelInstances[] segments = new MultiLabelInstances[folds];
        double[] splitRatio = new double[folds];
        Arrays.fill(splitRatio, 1.0 / (double)folds);
        Instances[] singleSegments = this.foldsCreation(data.getDataSet(), new Random(this.seed), splitRatio, data.getNumLabels(), data.getLabelIndices(), data.getNumInstances());
        for (int i = 0; i < folds; ++i) {
            try {
                segments[i] = new MultiLabelInstances(singleSegments[i], data.getLabelsMetaData());
                continue;
            }
            catch (InvalidDataFormatException ex) {
                Logger.getLogger(IterativeStratification.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
        return segments;
    }

    private Instances[] foldsCreation(Instances workingSet, Random random, double[] splitRatio, int numLabels, int[] labelIndices, int totalNumberOfInstances) {
        int splitToBeInserted;
        int numFolds = splitRatio.length;
        Instances[] instancesOnSplits = new Instances[numFolds];
        for (int fold = 0; fold < numFolds; ++fold) {
            instancesOnSplits[fold] = new Instances(workingSet, 0);
        }
        int[] frequenciesOnDataset = new int[numLabels];
        frequenciesOnDataset = this.calculatingTheFrequencies(workingSet, numLabels, labelIndices);
        double[][] desiredSplit = new double[numFolds][numLabels + 1];
        desiredSplit = this.calculatingTheDesiredSplits(frequenciesOnDataset, splitRatio, numLabels, totalNumberOfInstances);
        int[] smallestFreqLabel = new int[2];
        smallestFreqLabel = this.takingTheSmallestIndexAndNumberInVector(frequenciesOnDataset, totalNumberOfInstances);
        boolean[] trueLabels = new boolean[numLabels];
        for (int lab = 0; lab < numLabels; ++lab) {
            Instances[] temp = new Instances[2];
            temp = this.takeTheInstancesOfTheLabel(workingSet, numLabels, labelIndices, smallestFreqLabel);
            Instances filteredInstancesForLabel = temp[0];
            workingSet = temp[1];
            for (int instancesOfTheLab = 0; instancesOfTheLab < filteredInstancesForLabel.numInstances(); ++instancesOfTheLab) {
                Instance filteredInstance = filteredInstancesForLabel.instance(instancesOfTheLab);
                trueLabels = this.getTrueLabels(filteredInstance, numLabels, labelIndices);
                int[] possibleSplits = this.findThePossibleSpit(desiredSplit, smallestFreqLabel[0], numFolds);
                splitToBeInserted = possibleSplits[0] != 1 ? possibleSplits[random.nextInt(possibleSplits[0]) + 1] : possibleSplits[1];
                instancesOnSplits[splitToBeInserted].add(filteredInstance);
                desiredSplit[splitToBeInserted] = this.updateDesiredSplitStatistics(desiredSplit[splitToBeInserted], trueLabels);
            }
            frequenciesOnDataset = this.calculatingTheFrequencies(workingSet, numLabels, labelIndices);
            smallestFreqLabel = this.takingTheSmallestIndexAndNumberInVector(frequenciesOnDataset, totalNumberOfInstances);
        }
        int[] possibleSplitsNoAnnotated = new int[numFolds];
        while (workingSet.numInstances() != 0) {
            possibleSplitsNoAnnotated = this.returnPossibleSplitsForNotAnnotated(desiredSplit);
            Instance noAnnotatedInstances = workingSet.instance(0);
            splitToBeInserted = possibleSplitsNoAnnotated[0] != 1 ? possibleSplitsNoAnnotated[random.nextInt(possibleSplitsNoAnnotated[0]) + 1] : possibleSplitsNoAnnotated[1];
            instancesOnSplits[splitToBeInserted].add(noAnnotatedInstances);
            desiredSplit[splitToBeInserted][desiredSplit[splitToBeInserted].length - 1] = desiredSplit[splitToBeInserted][desiredSplit[splitToBeInserted].length - 1] - 1.0;
            workingSet.delete(0);
        }
        return instancesOnSplits;
    }

    private int[] calculatingTheFrequencies(Instances dataSet, int numLabels, int[] labelIndices) {
        int[] vectorSumOfLabels = new int[numLabels];
        int numInstances = dataSet.numInstances();
        boolean[] trueLabels = new boolean[numLabels];
        for (int instanceIndex = 0; instanceIndex < numInstances; ++instanceIndex) {
            Instance instance = dataSet.instance(instanceIndex);
            trueLabels = this.getTrueLabels(instance, numLabels, labelIndices);
            for (int lab = 0; lab < numLabels; ++lab) {
                if (trueLabels[lab]) {
                    int n = lab;
                    vectorSumOfLabels[n] = vectorSumOfLabels[n] + 1;
                    continue;
                }
                int n = lab;
                vectorSumOfLabels[n] = vectorSumOfLabels[n] + 0;
            }
        }
        return vectorSumOfLabels;
    }

    private double[][] calculatingTheDesiredSplits(int[] frequenciesOnDataset, double[] splitRatio, int numLabels, int totalNumberOfInstances) {
        double[][] desiredSplit = new double[splitRatio.length][numLabels + 1];
        for (int fold = 0; fold < splitRatio.length; ++fold) {
            for (int lab = 0; lab < numLabels; ++lab) {
                desiredSplit[fold][lab] = splitRatio[fold] * (double)frequenciesOnDataset[lab];
            }
            desiredSplit[fold][numLabels] = splitRatio[fold] * (double)totalNumberOfInstances;
        }
        return desiredSplit;
    }

    private int[] takingTheSmallestIndexAndNumberInVector(int[] vectorSumOfLabels, int totalNumberOfInstances) {
        int smallestIndex = 0;
        int smallestValue = totalNumberOfInstances;
        int[] returnedTable = new int[2];
        for (int index = 0; index < vectorSumOfLabels.length; ++index) {
            if (vectorSumOfLabels[index] >= smallestValue || vectorSumOfLabels[index] == 0) continue;
            smallestIndex = index;
            smallestValue = vectorSumOfLabels[index];
        }
        returnedTable[0] = smallestIndex;
        returnedTable[1] = smallestValue;
        return returnedTable;
    }

    private Instances[] takeTheInstancesOfTheLabel(Instances workingSet, int numLabels, int[] labelIndices, int[] desiredLabel) {
        Instances[] returnedInstances = new Instances[2];
        Instances filteredInstancesOfLabel = new Instances(workingSet, 0);
        int numInstances = workingSet.numInstances();
        boolean[] trueLabels = new boolean[numLabels];
        int[] removedIndexes = new int[desiredLabel[1]];
        int count = 0;
        for (int instanceIndex = 0; instanceIndex < numInstances; ++instanceIndex) {
            Instance instance = workingSet.instance(instanceIndex);
            trueLabels = this.getTrueLabels(instance, numLabels, labelIndices);
            if (!trueLabels[desiredLabel[0]]) continue;
            filteredInstancesOfLabel.add(instance);
            removedIndexes[count] = instanceIndex;
            ++count;
        }
        for (int k = count - 1; k >= 0; --k) {
            workingSet.delete(removedIndexes[k]);
        }
        returnedInstances[0] = filteredInstancesOfLabel;
        returnedInstances[1] = workingSet;
        return returnedInstances;
    }

    private int[] findThePossibleSpit(double[][] desiredSplit, int lab, int numFolds) {
        int fold;
        int[] possibleSplits = new int[numFolds + 1];
        int maxIndex = 0;
        double maxValue = -1.0;
        for (fold = 0; fold < numFolds; ++fold) {
            if (!(desiredSplit[fold][lab] > maxValue)) continue;
            maxIndex = fold;
            maxValue = desiredSplit[fold][lab];
        }
        for (fold = 0; fold < numFolds; ++fold) {
            if (desiredSplit[fold][lab] != maxValue || !(desiredSplit[fold][desiredSplit[0].length - 1] > desiredSplit[maxIndex][desiredSplit[0].length - 1])) continue;
            maxIndex = fold;
        }
        int count = 0;
        for (int fold2 = 0; fold2 < numFolds; ++fold2) {
            if (desiredSplit[fold2][lab] != maxValue || desiredSplit[fold2][desiredSplit[0].length - 1] != desiredSplit[maxIndex][desiredSplit[0].length - 1]) continue;
            possibleSplits[++count] = fold2;
            maxIndex = fold2;
        }
        possibleSplits[0] = count;
        return possibleSplits;
    }

    private double[] updateDesiredSplitStatistics(double[] desiredSplit, boolean[] trueLabels) {
        double[] returnedArray = new double[desiredSplit.length];
        for (int lab = 0; lab < desiredSplit.length - 1; ++lab) {
            returnedArray[lab] = trueLabels[lab] ? desiredSplit[lab] - 1.0 : desiredSplit[lab];
        }
        returnedArray[desiredSplit.length - 1] = desiredSplit[desiredSplit.length - 1] - 1.0;
        return returnedArray;
    }

    private int[] returnPossibleSplitsForNotAnnotated(double[][] desiredSplit) {
        int numFolds = desiredSplit.length;
        int minIndex = 0;
        int[] possibleSplits = new int[numFolds + 1];
        for (int fold = 0; fold < numFolds; ++fold) {
            if (!(desiredSplit[fold][desiredSplit[0].length - 1] > desiredSplit[minIndex][desiredSplit[0].length - 1])) continue;
            minIndex = fold;
        }
        int count = 0;
        for (int fold = 0; fold < numFolds; ++fold) {
            if (desiredSplit[fold][desiredSplit[0].length - 1] != desiredSplit[minIndex][desiredSplit[0].length - 1]) continue;
            possibleSplits[++count] = fold;
            minIndex = fold;
        }
        possibleSplits[0] = count;
        return possibleSplits;
    }

    private boolean[] getTrueLabels(Instance instance, int numLabels, int[] labelIndices) {
        boolean[] trueLabels = new boolean[numLabels];
        for (int counter = 0; counter < numLabels; ++counter) {
            int classIdx = labelIndices[counter];
            String classValue = instance.attribute(classIdx).value((int)instance.value(classIdx));
            trueLabels[counter] = classValue.equals("1");
        }
        return trueLabels;
    }
}

