/*
 * Decompiled with CFR 0.152.
 */
package jsat;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import jsat.DataSet;
import jsat.SimpleDataSet;
import jsat.classifiers.CategoricalData;
import jsat.classifiers.ClassificationDataSet;
import jsat.classifiers.DataPoint;
import jsat.linear.DenseVector;
import jsat.linear.Vec;
import jsat.regression.RegressionDataSet;

public class ARFFLoader {
    public static SimpleDataSet loadArffFile(File file) {
        try {
            return ARFFLoader.loadArffFile(new FileReader(file));
        }
        catch (FileNotFoundException ex) {
            Logger.getLogger(ARFFLoader.class.getName()).log(Level.SEVERE, null, ex);
            return null;
        }
    }

    public static SimpleDataSet loadArffFile(Reader input) {
        int i;
        int k;
        ArrayList<DataPoint> list = new ArrayList<DataPoint>();
        BufferedReader br = new BufferedReader(input);
        int numOfVars = 0;
        int numReal = 0;
        ArrayList<Boolean> isReal = new ArrayList<Boolean>();
        ArrayList<String> variableNames = new ArrayList<String>();
        ArrayList catVals = new ArrayList();
        String line = null;
        CategoricalData[] categoricalData = null;
        try {
            boolean atData = false;
            while ((line = br.readLine()) != null) {
                if (line.startsWith("%") || line.trim().isEmpty()) continue;
                if ((line = line.trim()).startsWith("@") && !atData) {
                    if ((line = line.substring(1).toLowerCase()).toLowerCase().startsWith("data")) {
                        categoricalData = new CategoricalData[numOfVars - numReal];
                        k = 0;
                        for (i = 0; i < catVals.size(); ++i) {
                            if (catVals.get(i) == null) continue;
                            categoricalData[k] = new CategoricalData(((HashMap)catVals.get(i)).size());
                            categoricalData[k].setCategoryName((String)variableNames.get(i));
                            for (Map.Entry entry : ((HashMap)catVals.get(i)).entrySet()) {
                                categoricalData[k].setOptionName((String)entry.getKey(), (Integer)entry.getValue());
                            }
                            ++k;
                        }
                        atData = true;
                        continue;
                    }
                    if (!line.toLowerCase().startsWith("attribute")) continue;
                    ++numOfVars;
                    line = line.substring("attribute".length()).trim();
                    String variableName = null;
                    if ((line = line.replace("\t", " ")).startsWith("'")) {
                        Pattern p = Pattern.compile("'.+?'");
                        Matcher m = p.matcher(line);
                        m.find();
                        variableName = ARFFLoader.nameTrim(m.group());
                        line = line.replaceFirst("'.+?'", "placeHolder");
                    } else {
                        variableName = ARFFLoader.nameTrim(line.trim().replaceAll("\\s+.*", ""));
                    }
                    variableNames.add(variableName);
                    String[] tmp = line.split("\\s+", 2);
                    if (tmp[1].trim().equals("real") || tmp[1].trim().equals("numeric") || tmp[1].trim().startsWith("integer")) {
                        ++numReal;
                        isReal.add(true);
                        catVals.add(null);
                        continue;
                    }
                    isReal.add(false);
                    String cats = tmp[1].replace("{", "").replace("}", "").trim();
                    if (cats.endsWith(",")) {
                        cats = cats.substring(0, cats.length() - 1);
                    }
                    String[] catValsRaw = cats.split(",");
                    HashMap<String, Integer> tempMap = new HashMap<String, Integer>();
                    for (int i2 = 0; i2 < catValsRaw.length; ++i2) {
                        catValsRaw[i2] = ARFFLoader.nameTrim(catValsRaw[i2]);
                        tempMap.put(catValsRaw[i2], i2);
                    }
                    catVals.add(tempMap);
                    continue;
                }
                if (!atData || line.isEmpty()) continue;
                double weight = 1.0;
                String[] tmp = line.split(",");
                if (tmp.length != isReal.size()) {
                    String s = tmp[isReal.size()];
                    if (tmp.length == isReal.size() + 1) {
                        if (!s.matches("\\{\\d+(\\.\\d+)?\\}")) {
                            throw new RuntimeException("extra column must indicate a data point weigh in the form of \"{#}\", instead bad token " + s + " was found");
                        }
                        weight = Double.parseDouble(s.substring(1, s.length() - 1));
                    } else {
                        throw new RuntimeException("Column had " + tmp.length + " values instead of " + isReal.size());
                    }
                }
                DenseVector vec = new DenseVector(numReal);
                int[] cats = new int[numOfVars - numReal];
                int k2 = 0;
                for (int i3 = 0; i3 < isReal.size(); ++i3) {
                    String val_string = tmp[i3].trim();
                    if (((Boolean)isReal.get(i3)).booleanValue()) {
                        if (val_string.equals("?")) {
                            vec.set(i3 - k2, Double.NaN);
                            continue;
                        }
                        vec.set(i3 - k2, Double.parseDouble(val_string));
                        continue;
                    }
                    tmp[i3] = ARFFLoader.nameTrim(tmp[i3]).trim().toLowerCase();
                    cats[k2++] = tmp[i3].equals("?") ? -1 : (Integer)((HashMap)catVals.get(i3)).get(tmp[i3]);
                }
                list.add(new DataPoint(vec, cats, categoricalData, weight));
            }
        }
        catch (IOException ex) {
            // empty catch block
        }
        SimpleDataSet dataSet = new SimpleDataSet(list);
        k = 0;
        for (i = 0; i < isReal.size(); ++i) {
            if (!((Boolean)isReal.get(i)).booleanValue()) continue;
            dataSet.setNumericName((String)variableNames.get(k), k++);
        }
        return dataSet;
    }

    public static void writeArffFile(DataSet data, OutputStream os) {
        ARFFLoader.writeArffFile(data, os, "Default_Relation");
    }

    public static void writeArffFile(DataSet data, OutputStream os, String relation) {
        CategoricalData[] catInfo;
        PrintWriter writer = new PrintWriter(os);
        writer.write(String.format("@relation %s\n", ARFFLoader.addQuotes(relation)));
        for (CategoricalData cate : catInfo = data.getCategories()) {
            ARFFLoader.writeCatVar(writer, cate);
        }
        for (int i = 0; i < data.getNumNumericalVars(); ++i) {
            String name = data.getNumericName(i);
            writer.write("@attribute " + (name == null ? "num" + i : name.replaceAll("\\s+", "-")) + " NUMERIC\n");
        }
        if (data instanceof ClassificationDataSet) {
            ARFFLoader.writeCatVar(writer, ((ClassificationDataSet)data).getPredicting());
        }
        if (data instanceof RegressionDataSet) {
            writer.write("@ATTRIBUTE target NUMERIC\n");
        }
        writer.write("@DATA\n");
        for (int row = 0; row < data.getSampleSize(); ++row) {
            DataPoint dp = data.getDataPoint(row);
            boolean firstFeature = true;
            for (int i = 0; i < catInfo.length; ++i) {
                if (!firstFeature) {
                    writer.write(",");
                }
                firstFeature = false;
                int cat_val = dp.getCategoricalValue(i);
                if (cat_val < 0) {
                    writer.write("?");
                    continue;
                }
                writer.write(ARFFLoader.addQuotes(catInfo[i].getOptionName(cat_val)));
            }
            Vec v = dp.getNumericalValues();
            for (int i = 0; i < v.length(); ++i) {
                if (!firstFeature) {
                    writer.write(",");
                }
                firstFeature = false;
                double val = v.get(i);
                if (Double.isNaN(val)) {
                    writer.write("?");
                    continue;
                }
                if (Math.rint(val) == val) {
                    writer.write(Long.toString((long)val));
                    continue;
                }
                writer.write(Double.toString(val));
            }
            if (data instanceof ClassificationDataSet) {
                if (!firstFeature) {
                    writer.write(",");
                }
                firstFeature = false;
                ClassificationDataSet cdata = (ClassificationDataSet)data;
                writer.write(ARFFLoader.addQuotes(cdata.getPredicting().getOptionName(cdata.getDataPointCategory(row))));
            }
            if (data instanceof RegressionDataSet) {
                if (!firstFeature) {
                    writer.write(",");
                }
                firstFeature = false;
                writer.write(Double.toString(((RegressionDataSet)data).getTargetValue(row)));
            }
            writer.write("\n");
        }
        writer.flush();
    }

    private static String addQuotes(String string) {
        if (string.contains(" ")) {
            return "\"" + string + "\"";
        }
        return string;
    }

    private static void writeCatVar(PrintWriter writer, CategoricalData cate) {
        writer.write("@ATTRIBUTE " + cate.getCategoryName().replaceAll("\\s+", "-") + " {");
        for (int i = 0; i < cate.getNumOfCategories(); ++i) {
            if (i != 0) {
                writer.write(",");
            }
            writer.write(ARFFLoader.addQuotes(cate.getOptionName(i)));
        }
        writer.write("}\n");
    }

    private static String nameTrim(String in) {
        if ((in = in.trim()).startsWith("'") || in.startsWith("\"")) {
            in = in.substring(1);
        }
        if (in.endsWith("'") || in.startsWith("\"")) {
            in = in.substring(0, in.length() - 1);
        }
        return in.trim();
    }
}

