/*
 * Decompiled with CFR 0.152.
 */
package jsat.io;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import jsat.DataSet;
import jsat.classifiers.CategoricalData;
import jsat.classifiers.ClassificationDataSet;
import jsat.classifiers.DataPoint;
import jsat.datatransform.DenseSparceTransform;
import jsat.io.DataWriter;
import jsat.linear.IndexValue;
import jsat.linear.SparseVector;
import jsat.linear.Vec;
import jsat.regression.RegressionDataSet;
import jsat.utils.DoubleList;
import jsat.utils.StringUtils;

public class LIBSVMLoader {
    private static boolean fastLoad = true;

    private LIBSVMLoader() {
    }

    public static RegressionDataSet loadR(File file) throws FileNotFoundException, IOException {
        return LIBSVMLoader.loadR(file, 0.5);
    }

    public static RegressionDataSet loadR(File file, double sparseRatio) throws FileNotFoundException, IOException {
        return LIBSVMLoader.loadR(file, sparseRatio, -1);
    }

    public static RegressionDataSet loadR(File file, double sparseRatio, int vectorLength) throws FileNotFoundException, IOException {
        return LIBSVMLoader.loadR(new FileReader(file), sparseRatio, vectorLength);
    }

    public static RegressionDataSet loadR(InputStreamReader isr, double sparseRatio) throws IOException {
        return LIBSVMLoader.loadR(isr, sparseRatio, -1);
    }

    public static RegressionDataSet loadR(Reader reader, double sparseRatio, int vectorLength) throws IOException {
        return (RegressionDataSet)LIBSVMLoader.loadG(reader, sparseRatio, vectorLength, false);
    }

    public static ClassificationDataSet loadC(File file) throws FileNotFoundException, IOException {
        return LIBSVMLoader.loadC(new FileReader(file), 0.5);
    }

    public static ClassificationDataSet loadC(File file, double sparseRatio) throws FileNotFoundException, IOException {
        return LIBSVMLoader.loadC(file, sparseRatio, -1);
    }

    public static ClassificationDataSet loadC(File file, double sparseRatio, int vectorLength) throws FileNotFoundException, IOException {
        return LIBSVMLoader.loadC(new FileReader(file), sparseRatio, vectorLength);
    }

    public static ClassificationDataSet loadC(InputStreamReader isr, double sparseRatio) throws IOException {
        return LIBSVMLoader.loadC(isr, sparseRatio, -1);
    }

    public static ClassificationDataSet loadC(Reader reader, double sparseRatio, int vectorLength) throws IOException {
        return (ClassificationDataSet)LIBSVMLoader.loadG(reader, sparseRatio, vectorLength, true);
    }

    private static DataSet loadG(Reader reader, double sparseRatio, int vectorLength, boolean classification) throws IOException {
        StringBuilder processBuffer = new StringBuilder(20);
        StringBuilder charBuffer = new StringBuilder(1024);
        char[] buffer = new char[1024];
        ArrayList<SparseVector> sparceVecs = new ArrayList<SparseVector>();
        DoubleList labelVals = new DoubleList();
        HashMap<Double, Integer> possibleCats = new HashMap<Double, Integer>();
        int maxLen = 1;
        STATE state = STATE.INITIAL;
        int position = 0;
        SparseVector tempVec = new SparseVector(1, 1);
        int indexProcessing = -1;
        while (true) {
            if (charBuffer.length() - position <= 1) {
                charBuffer.delete(0, position);
                position = 0;
                int read = reader.read(buffer);
                if (read >= 0) {
                    charBuffer.append(buffer, 0, read);
                    continue;
                }
            }
            if (charBuffer.length() - position == 0) {
                if (state == STATE.LABEL) {
                    double label = Double.parseDouble(processBuffer.toString());
                    if (!possibleCats.containsKey(label) && classification) {
                        possibleCats.put(label, possibleCats.size());
                    }
                    labelVals.add(Double.valueOf(label));
                    sparceVecs.add(new SparseVector(maxLen, 0));
                    break;
                }
                if (state == STATE.WHITESPACE_AFTER_LABEL) {
                    sparceVecs.add(new SparseVector(maxLen, 0));
                    break;
                }
                if (state == STATE.FEATURE_VALUE || state == STATE.WHITESPACE_AFTER_FEATURE) {
                    double value = StringUtils.parseDouble(processBuffer, 0, processBuffer.length());
                    processBuffer.delete(0, processBuffer.length());
                    maxLen = Math.max(maxLen, indexProcessing + 1);
                    tempVec.setLength(maxLen);
                    if (value != 0.0) {
                        tempVec.set(indexProcessing, value);
                    }
                    sparceVecs.add(tempVec.clone());
                    break;
                }
                if (state == STATE.NEWLINE) break;
                throw new RuntimeException();
            }
            char ch = charBuffer.charAt(position);
            switch (state) {
                case INITIAL: {
                    state = STATE.LABEL;
                    break;
                }
                case LABEL: {
                    if (Character.isDigit(ch) || ch == '.' || ch == 'E' || ch == 'e' || ch == '-' || ch == '+') {
                        processBuffer.append(ch);
                        ++position;
                        break;
                    }
                    if (Character.isWhitespace(ch)) {
                        double label = Double.parseDouble(processBuffer.toString());
                        if (!possibleCats.containsKey(label) && classification) {
                            possibleCats.put(label, possibleCats.size());
                        }
                        labelVals.add(Double.valueOf(label));
                        processBuffer.delete(0, processBuffer.length());
                        if (ch == '\n' || ch == '\r') {
                            tempVec.zeroOut();
                            sparceVecs.add(new SparseVector(maxLen, 0));
                            state = STATE.NEWLINE;
                            break;
                        }
                        tempVec.zeroOut();
                        state = STATE.WHITESPACE_AFTER_LABEL;
                        break;
                    }
                    throw new RuntimeException("Invalid LIBSVM file");
                }
                case WHITESPACE_AFTER_LABEL: {
                    if (Character.isDigit(ch)) {
                        state = STATE.FEATURE_INDEX;
                        break;
                    }
                    if (Character.isWhitespace(ch)) {
                        if (ch == '\n' || ch == '\r') {
                            tempVec.zeroOut();
                            sparceVecs.add(new SparseVector(maxLen, 0));
                            state = STATE.NEWLINE;
                            break;
                        }
                        ++position;
                        break;
                    }
                    throw new RuntimeException();
                }
                case FEATURE_INDEX: {
                    if (Character.isDigit(ch)) {
                        processBuffer.append(ch);
                        ++position;
                        break;
                    }
                    if (ch == ':') {
                        indexProcessing = StringUtils.parseInt(processBuffer, 0, processBuffer.length()) - 1;
                        processBuffer.delete(0, processBuffer.length());
                        state = STATE.FEATURE_VALUE;
                        ++position;
                        break;
                    }
                    throw new RuntimeException();
                }
                case FEATURE_VALUE: {
                    if (Character.isDigit(ch) || ch == '.' || ch == 'E' || ch == 'e' || ch == '-' || ch == '+') {
                        processBuffer.append(ch);
                        ++position;
                        break;
                    }
                    double value = StringUtils.parseDouble(processBuffer, 0, processBuffer.length());
                    processBuffer.delete(0, processBuffer.length());
                    maxLen = Math.max(maxLen, indexProcessing + 1);
                    tempVec.setLength(maxLen);
                    if (value != 0.0) {
                        tempVec.set(indexProcessing, value);
                    }
                    if (Character.isWhitespace(ch)) {
                        state = STATE.WHITESPACE_AFTER_FEATURE;
                        break;
                    }
                    throw new RuntimeException();
                }
                case WHITESPACE_AFTER_FEATURE: {
                    if (Character.isDigit(ch)) {
                        state = STATE.FEATURE_INDEX;
                        break;
                    }
                    if (!Character.isWhitespace(ch)) break;
                    if (ch == '\n' || ch == '\r') {
                        sparceVecs.add(tempVec.clone());
                        tempVec.zeroOut();
                        state = STATE.NEWLINE;
                        break;
                    }
                    ++position;
                    break;
                }
                case NEWLINE: {
                    if (ch == '\n' || ch == '\r') {
                        ++position;
                        break;
                    }
                    state = STATE.LABEL;
                }
            }
        }
        if (vectorLength > 0) {
            if (maxLen > vectorLength) {
                throw new RuntimeException("Length given was " + vectorLength + ", but observed length was " + maxLen);
            }
            maxLen = vectorLength;
        }
        if (classification) {
            CategoricalData predicting = new CategoricalData(possibleCats.size());
            DoubleList allCatKeys = new DoubleList(possibleCats.keySet());
            Collections.sort(allCatKeys);
            for (int i = 0; i < allCatKeys.size(); ++i) {
                possibleCats.put((Double)allCatKeys.get(i), i);
            }
            ClassificationDataSet cds = new ClassificationDataSet(maxLen, new CategoricalData[0], predicting);
            for (int i = 0; i < labelVals.size(); ++i) {
                SparseVector vec = (SparseVector)sparceVecs.get(i);
                vec.setLength(maxLen);
                cds.addDataPoint((Vec)vec, new int[0], (Integer)possibleCats.get(labelVals.get(i)));
            }
            cds.applyTransform(new DenseSparceTransform(sparseRatio));
            return cds;
        }
        RegressionDataSet rds = new RegressionDataSet(maxLen, new CategoricalData[0]);
        for (int i = 0; i < sparceVecs.size(); ++i) {
            SparseVector sv = (SparseVector)sparceVecs.get(i);
            sv.setLength(maxLen);
            rds.addDataPoint(sv, new int[0], (Double)labelVals.get(i));
        }
        rds.applyTransform(new DenseSparceTransform(sparseRatio));
        return rds;
    }

    public static void write(ClassificationDataSet data, OutputStream os) {
        PrintWriter writer = new PrintWriter(os);
        for (int i = 0; i < data.getSampleSize(); ++i) {
            int pred = data.getDataPointCategory(i);
            Vec vals = data.getDataPoint(i).getNumericalValues();
            writer.write(pred + " ");
            for (IndexValue iv : vals) {
                double val = iv.getValue();
                if (Math.rint(val) == val) {
                    writer.write(iv.getIndex() + 1 + ":" + (long)val + " ");
                    continue;
                }
                writer.write(iv.getIndex() + 1 + ":" + val + " ");
            }
            writer.write("\n");
        }
        writer.flush();
        writer.close();
    }

    public static void write(RegressionDataSet data, OutputStream os) {
        PrintWriter writer = new PrintWriter(os);
        for (int i = 0; i < data.getSampleSize(); ++i) {
            double pred = data.getTargetValue(i);
            Vec vals = data.getDataPoint(i).getNumericalValues();
            writer.write(pred + " ");
            for (IndexValue iv : vals) {
                double val = iv.getValue();
                if (Math.rint(val) == val) {
                    writer.write(iv.getIndex() + 1 + ":" + (long)val + " ");
                    continue;
                }
                writer.write(iv.getIndex() + 1 + ":" + val + " ");
            }
            writer.write("\n");
        }
        writer.flush();
        writer.close();
    }

    public static DataWriter getWriter(OutputStream out, int dim, DataWriter.DataSetType type) throws IOException {
        DataWriter dw = new DataWriter(out, new CategoricalData[0], dim, type){

            @Override
            protected void writeHeader(CategoricalData[] catInfo, int dim, DataWriter.DataSetType type, OutputStream out) {
            }

            @Override
            protected void pointToBytes(DataPoint dp, double label, ByteArrayOutputStream byteOut) {
                PrintWriter writer = new PrintWriter(byteOut);
                if (this.type == DataWriter.DataSetType.REGRESSION) {
                    writer.write(label + " ");
                } else if (this.type == DataWriter.DataSetType.CLASSIFICATION) {
                    writer.write((int)label + " ");
                } else if (this.type == DataWriter.DataSetType.SIMPLE) {
                    writer.write("0 ");
                }
                Vec vals = dp.getNumericalValues();
                for (IndexValue iv : vals) {
                    double val = iv.getValue();
                    if (Math.rint(val) == val) {
                        writer.write(iv.getIndex() + 1 + ":" + (long)val + " ");
                        continue;
                    }
                    writer.write(iv.getIndex() + 1 + ":" + val + " ");
                }
                writer.write("\n");
                writer.flush();
            }
        };
        return dw;
    }

    private static enum STATE {
        INITIAL,
        LABEL,
        WHITESPACE_AFTER_LABEL,
        FEATURE_INDEX,
        FEATURE_VALUE,
        WHITESPACE_AFTER_FEATURE,
        NEWLINE;

    }
}

