/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 *    IterativeEvaluationExistingPropArffs.java
 *    Copyright (C) 2010 Stefan Mutter
 *
 */

package weka.utils;

import java.io.File;
import java.util.List;
import java.util.Vector;

import weka.classifiers.AbstractClassifier;
import weka.classifiers.Classifier;
import weka.classifiers.Evaluation;
import weka.classifiers.evaluation.NominalPrediction;
import weka.classifiers.meta.CVParameterSelectionAUC;
import weka.classifiers.meta.CVParameterSelectionAUCMulti;
import weka.core.FastVector;
import weka.core.Instances;
import weka.core.OptionHandler;
import weka.core.Range;
import weka.core.Utils;
import weka.core.converters.ArffLoader;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Remove;

/**
 * Simulates a 10 fold cross-validation run for saved arffs files generated by an IterativeHMMPropositionalizer.
 *
 * @author Stefan Mutter (pHMM4weka@gmail.com)
 * @author FracPete (fracpete at waikato dot ac dot nz)
 */
public class IterativeEvaluationExistingPropArffs {

	private String FILE_EXTENSION;
	private Classifier m_classifier;
	private int m_folds;

	//absolute path to the directory that saves the prop. arffs, e.g /research/smm52/HMMProps_models/Pro/propArffs/
	private String dirName;

	private String[] dirNames;

	private String modeString;

	private static String TRAINING_FILE_START = "model_prop_";

	private static String TEST_FILE_START = "test_model_prop_";

	private int fileNumberOffset;

	private int maxOffset;

	private int m_iterationSteps;

	private Range modeRange;

	private Remove cutClass;

	private boolean useGZipFile;

	private boolean adjustIterationNumbers;

	private int[] limits;

	public String classifierName;

	public IterativeEvaluationExistingPropArffs() {
		// TODO Auto-generated constructor stub
		//saveFolds = false;
		m_iterationSteps = 1;
		m_folds = 10;
		fileNumberOffset = 0;
		maxOffset = 0;
		modeRange= new Range();
		cutClass= new Remove();
		cutClass.setAttributeIndices("last");
		useGZipFile = false;
		FILE_EXTENSION = ".arff";
		adjustIterationNumbers = false;
	}

	public String[] getOptions() throws Exception {
		Vector        result;
		result  = new Vector();

		result.add("-F");
		result.add("" + getFolds());

		result.add("-O");
		result.add("" + getFileNumberOffset());

		result.add("-U");
		result.add("" + getMaxOffset());

		if(isUseGZipFile())
			result.add("-Z");

		result.add("-M");
		result.add("" + modeString);

		result.add("-D");
		result.add("" + getDirName());

		result.add("-L");
		result.add("" + getIterationSteps());

		result.add("-W");
		result.add("" + getClassifier().getClass().getName());

		String [] classifierOptions = ((OptionHandler)getClassifier()).getOptions();
		for (int i = 0; i < classifierOptions.length; i++)
			result.add(classifierOptions[i]);


		return (String[]) result.toArray(new String[result.size()]);
	}

	public void setOptions(String[] options) throws Exception {


		String minNumString = Utils.getOption('F', options);
		if (minNumString.length() != 0) {
			m_folds = Integer.parseInt(minNumString);
		} else {
			m_folds = 10;
		}

		String offsetString = Utils.getOption('O', options);
		if (offsetString.length() != 0) {
			fileNumberOffset = Integer.parseInt(offsetString);
		} else {
			fileNumberOffset = 0;
		}

		offsetString = Utils.getOption('U', options);
		if (offsetString.length() != 0) {
			maxOffset = Integer.parseInt(offsetString);
		} else {
			maxOffset = 0;
		}

		useGZipFile = Utils.getFlag('Z', options);
		if(useGZipFile){
			FILE_EXTENSION = ".arff.gz";
		}
		else{
			FILE_EXTENSION = ".arff";
		}

		String modeList = Utils.getOption('M', options);
		if (modeList.length() != 0) {
			setAttributeIndices(modeList);
			getPropSettings();
		}
		else{
			throw new IllegalArgumentException("No propositionalisation mode provided");
		}

		String nameOfDirs = Utils.getOption('D', options);
		//    if(dirName.lastIndexOf("/") != dirName.length()-1){
		//      dirName = dirName+"/";
		//    }
		dirNames = nameOfDirs.split(",");
		for(int i = 0; i < dirNames.length; i++){
			if(dirNames[i].lastIndexOf("/") != dirNames[i].length()-1){
				dirNames[i] = dirNames[i]+"/";
			}
		}

		String limitString = Utils.getOption('L', options);
		//    if(dirName.lastIndexOf("/") != dirName.length()-1){
		//      dirName = dirName+"/";
		//    }
		if (limitString.length() != 0){
			adjustIterationNumbers = true;
			String[] individualLimits = limitString.split(",");
			limits = new int[individualLimits.length];
			for(int i = 0; i < individualLimits.length; i++){
				limits[i] = Integer.parseInt(individualLimits[i]);
			}
		}

		String iterationStepsString = Utils.getOption('S', options);
		if (iterationStepsString.length() != 0) {
			m_iterationSteps = Integer.parseInt(iterationStepsString);
		} else {
			m_iterationSteps = 1;
		}

		//    String instancesString = Utils.getOption('T', options);
		//    trainingSet = ConverterUtils.DataSource.read(instancesString);
		//    trainingSet.setClassIndex(trainingSet.numAttributes() - 1);

		classifierName = Utils.getOption('W', options);
		if (classifierName.length() > 0) {

			// This is just to set the classifier in case the option
			// parsing fails.
			setClassifier(AbstractClassifier.forName(classifierName, null));
			setClassifier(AbstractClassifier.forName(classifierName,
					Utils.partitionOptions(options)));
		} else {
			throw new Exception("no classifier given");
		}

	}

	//  private void extractModeString() {
	//    modeString = dirName.substring(dirName.lastIndexOf("/", dirName.length()-2)+1, dirName.length()-1);
	//  }

	/**
	 * Set the base learner.
	 *
	 * @param newClassifier the classifier to use.
	 */
	public void setClassifier(Classifier newClassifier) {

		m_classifier = newClassifier;
	}

	/**
	 * Get the classifier used as the base learner.
	 *
	 * @return the classifier used as the classifier
	 */
	public Classifier getClassifier() {

		return m_classifier;
	}

	private Instances getFullSet(int fileNumber) throws Exception{
		Instances fullSet = null;
		Instances rememberClass = null;
		for(int j = 0; j < dirNames.length; j++){
			if(adjustIterationNumbers){
				fileNumber = adjustFileNumber(j,fileNumber);
			}
			dirName = dirNames[j];
			String modeStringCopy = new String(modeString);
			modeString = modeStringCopy.charAt(0)+"";
			String fileName = dirName+modeString+"/"+TRAINING_FILE_START+fileNumber+"_"+modeString+FILE_EXTENSION;
			ArffLoader loader = new ArffLoader();
			loader.setFile(new File(fileName));
			Instances tempFullSet = loader.getDataSet();
			fileName = dirName+modeString+"/"+TEST_FILE_START+fileNumber+"_"+modeString+FILE_EXTENSION;
			loader = new ArffLoader();
			loader.setFile(new File(fileName));
			Instances testSet = loader.getDataSet();
			//      if(modeStringCopy.length() != 1){
			//	cutClass.setInputFormat(tempFullSet);
			//	tempFullSet = Filter.useFilter(tempFullSet, cutClass);
			//	cutClass.setInputFormat(testSet);
			//	testSet = Filter.useFilter(testSet, cutClass);
			//      }
			for(int k = 0; k < testSet.numInstances(); k++){
				tempFullSet.add(testSet.instance(k));
			}
			if(j == 0){
				rememberClass = new Instances(tempFullSet);
				for(int k = 0; k < tempFullSet.numAttributes()-1; k++){
					rememberClass.deleteAttributeAt(0);
				}
			}
			cutClass.setInputFormat(tempFullSet);
			tempFullSet = Filter.useFilter(tempFullSet, cutClass);
			for(int k = 0 ; k < tempFullSet.numAttributes(); k++){
				tempFullSet.renameAttribute(k, tempFullSet.attribute(k).name()+"_"+j);
			}
			testSet = null;
			for(int i = 1; i < modeStringCopy.length(); i++){
				modeString = modeStringCopy.charAt(i)+"";
				fileName = dirName+modeString+"/"+TRAINING_FILE_START+fileNumber+"_"+modeString+FILE_EXTENSION;
				//Instances secondfullSet = new Instances(new FileReader(fileName));
				loader = new ArffLoader();
				loader.setFile(new File(fileName));
				Instances secondfullSet = loader.getDataSet();
				fileName = dirName+modeString+"/"+TEST_FILE_START+fileNumber+"_"+modeString+FILE_EXTENSION;
				//testSet = new Instances(new FileReader(fileName));
				loader = new ArffLoader();
				loader.setFile(new File(fileName));
				testSet = loader.getDataSet();
				//if(i != modeStringCopy.length()-1){
				cutClass.setInputFormat(secondfullSet);
				secondfullSet = Filter.useFilter(secondfullSet, cutClass);
				cutClass.setInputFormat(testSet);
				testSet = Filter.useFilter(testSet, cutClass);
				//}
				for(int k = 0; k < testSet.numInstances(); k++){
					secondfullSet.add(testSet.instance(k));
				}
				for(int k = 0 ; k < secondfullSet.numAttributes(); k++){
					secondfullSet.renameAttribute(k, secondfullSet.attribute(k).name()+"_"+j);
				}
				testSet = null;
				tempFullSet = Instances.mergeInstances(tempFullSet, secondfullSet);
			}
			modeString = modeStringCopy;
			if(j == 0){
				fullSet = tempFullSet;
			}
			else{
				fullSet = Instances.mergeInstances(fullSet, tempFullSet);
			}
			tempFullSet = null;
		}
		fullSet = Instances.mergeInstances(fullSet, rememberClass);
		return fullSet;
	}

	private Instances getTrainingSet(int fileNumber) throws Exception{
		Instances fullSet = null;
		Instances rememberClass = null;
		for(int j = 0; j < dirNames.length; j++){
			if(adjustIterationNumbers){
				fileNumber = adjustFileNumber(j,fileNumber);
			}
			dirName = dirNames[j];
			String modeStringCopy = new String(modeString);
			modeString = modeStringCopy.charAt(0)+"";
			String fileName = dirName+modeString+"/"+TRAINING_FILE_START+fileNumber+"_"+modeString+FILE_EXTENSION;
			//Instances fullSet = new Instances(new FileReader(fileName));
			ArffLoader loader = new ArffLoader();
			loader.setFile(new File(fileName));
			Instances tempFullSet = loader.getDataSet();
			if(j == 0){
				rememberClass = new Instances(tempFullSet);
				for(int k = 0; k < tempFullSet.numAttributes()-1; k++){
					rememberClass.deleteAttributeAt(0);
				}
				//System.out.println(rememberClass);
			}
			//if(modeStringCopy.length() != 1){
			cutClass.setInputFormat(tempFullSet);
			tempFullSet = Filter.useFilter(tempFullSet, cutClass);
			//System.out.println(tempFullSet);
			//}
			for(int k = 0 ; k < tempFullSet.numAttributes(); k++){
				tempFullSet.renameAttribute(k, tempFullSet.attribute(k).name()+"_"+j);
			}
			//System.out.println(tempFullSet);
			for(int i = 1; i < modeStringCopy.length(); i++){
				modeString = modeStringCopy.charAt(i)+"";
				fileName = dirName+modeString+"/"+TRAINING_FILE_START+fileNumber+"_"+modeString+FILE_EXTENSION;
				//Instances secondfullSet = new Instances(new FileReader(fileName));
				loader = new ArffLoader();
				loader.setFile(new File(fileName));
				Instances secondfullSet = loader.getDataSet();
				//if(i != modeStringCopy.length()-1){
				cutClass.setInputFormat(secondfullSet);
				secondfullSet = Filter.useFilter(secondfullSet, cutClass);
				//}
				for(int k = 0 ; k < secondfullSet.numAttributes(); k++){
					secondfullSet.renameAttribute(k, secondfullSet.attribute(k).name()+"_"+j);
				}
				tempFullSet = Instances.mergeInstances(tempFullSet, secondfullSet);
			}
			modeString = modeStringCopy;
			if(j == 0){
				fullSet = tempFullSet;
			}
			else{
				fullSet = Instances.mergeInstances(fullSet, tempFullSet);
			}
			tempFullSet = null;
			//System.out.println(fullSet);
		}
		fullSet = Instances.mergeInstances(fullSet, rememberClass);
		//System.out.println(fullSet);
		return fullSet;
	}

	private int adjustFileNumber(int j, int fileNumber) {
		if(fileNumber <= limits[j]){
			return fileNumber;
		}
		else{
			int reminder = fileNumber % 10;
			if (reminder == 0){
				return limits[j];
			}
			else{
				return limits[j] - 10 + reminder;
			}
		}
	}

	private Instances getTestSet(int fileNumber) throws Exception{
		Instances fullSet = null;
		Instances rememberClass = null;
		for(int j = 0; j < dirNames.length; j++){
			if(adjustIterationNumbers){
				fileNumber = adjustFileNumber(j,fileNumber);
			}
			dirName = dirNames[j];
			String modeStringCopy = new String(modeString);
			modeString = modeStringCopy.charAt(0)+"";
			String fileName = dirName+modeString+"/"+TEST_FILE_START+fileNumber+"_"+modeString+FILE_EXTENSION;
			//Instances fullSet = new Instances(new FileReader(fileName));
			ArffLoader loader = new ArffLoader();
			loader.setFile(new File(fileName));
			Instances tempFullSet = loader.getDataSet();
			if(j == 0){
				rememberClass = new Instances(tempFullSet);
				for(int k = 0; k < tempFullSet.numAttributes()-1; k++){
					rememberClass.deleteAttributeAt(0);
				}
			}
			//if(modeStringCopy.length() != 1){
			cutClass.setInputFormat(tempFullSet);
			tempFullSet = Filter.useFilter(tempFullSet, cutClass);
			//}
			for(int k = 0 ; k < tempFullSet.numAttributes(); k++){
				tempFullSet.renameAttribute(k, tempFullSet.attribute(k).name()+"_"+j);
			}
			for(int i = 1; i < modeStringCopy.length(); i++){
				modeString = modeStringCopy.charAt(i)+"";
				fileName = dirName+modeString+"/"+TEST_FILE_START+fileNumber+"_"+modeString+FILE_EXTENSION;
				//Instances secondfullSet = new Instances(new FileReader(fileName));
				loader = new ArffLoader();
				loader.setFile(new File(fileName));
				Instances secondfullSet = loader.getDataSet();
				//if(i != modeStringCopy.length()-1){
				cutClass.setInputFormat(secondfullSet);
				secondfullSet = Filter.useFilter(secondfullSet, cutClass);
				//}
				for(int k = 0 ; k < secondfullSet.numAttributes(); k++){
					secondfullSet.renameAttribute(k, secondfullSet.attribute(k).name()+"_"+j);
				}
				tempFullSet = Instances.mergeInstances(tempFullSet, secondfullSet);
			}
			modeString = modeStringCopy;
			if(j == 0){
				fullSet = tempFullSet;
			}
			else{
				fullSet = Instances.mergeInstances(fullSet, tempFullSet);
			}
			tempFullSet = null;
		}
		fullSet = Instances.mergeInstances(fullSet, rememberClass);
		return fullSet;
	}


	public static void main(String[] args) throws Exception {

		IterativeEvaluationExistingPropArffs itEval = new IterativeEvaluationExistingPropArffs();
		itEval.setOptions(args);

		Classifier actualClassifier = itEval.getClassifier();
		System.out.println("Classifier: "+actualClassifier.toString());
		String [] classifierOptions = ((OptionHandler)actualClassifier).getOptions();
		String options ="";
		for (int i = 0; i < classifierOptions.length; i++)
			options += classifierOptions[i]+ " ";
		int stepSizeIteration = itEval.getIterationSteps();
		//int seed = actualClassifier.getSeed();
		int folds = itEval.getFolds();
		int fileNumberOffset = itEval.getFileNumberOffset();
		int maxOffset = itEval.getMaxOffset();

		//  randomize data
		//Random rand = new Random(seed);   // create seeded number generator
		//Instances randData = new Instances(itEval.getTrainingSet());   // create copy of original data
		//System.out.println("Training and Prediction for: "+randData.relationName());
		//randData.randomize(rand);         // randomize data with number generator
		//if (randData.classAttribute().isNominal())
		//randData.stratify(folds);



		//actualClassifier.setIterationStepSize(stepSizeIteration);
		//actualClassifier.setSeed(seed);

		List<Classifier> classifiersForAllFolds = new  Vector<Classifier>();
		for(int i = 0; i < folds; i++){
			classifiersForAllFolds.add(i,AbstractClassifier.makeCopy(actualClassifier));
		}

		long timeStart = System.currentTimeMillis();
		int iteration = 0;

		FastVector pred;
		Vector pos = null;
		Vector neg = null;

		//boolean converged;
		while((iteration*10)+ 1 + fileNumberOffset <= maxOffset){
			//converged = true;
			int fileNumber = (iteration * 10) + 1 + fileNumberOffset;
			Instances fullSet = itEval.getFullSet(fileNumber);
			fullSet.setClassIndex(fullSet.numAttributes()-1);
			//System.out.println(fullSet);
			//ArffSaver saver = new ArffSaver();
			//saver.setInstances(fullSet);
			//saver.writeBatch();
			Evaluation eval = new Evaluation(fullSet);
			if(iteration == 0){
				System.out.println("Dataset: "+fullSet.relationName());
				System.out.println("Mode: "+itEval.modeString);
			}

			// perform cross-validation
			for (int n = 0; n < folds; n++) {
				int fileFoldNumber = fileNumber + n;
				Instances train = itEval.getTrainingSet(fileFoldNumber);
				train.setClassIndex(train.numAttributes()-1);
				//System.out.println(train);
				Instances test = itEval.getTestSet(fileFoldNumber);
				test.setClassIndex(test.numAttributes()-1);
				//Instances train = randData;
				//Instances test = randData;

				//Classifier copy = Classifier.makeCopy(actualClassifier);
				//copy.buildClassifier(train);
				//eval.evaluateModel(copy, test);
				Classifier trainNow = classifiersForAllFolds.get(n);

				//System.out.println("Working on fold "+(n+1)+" of "+folds+" in iteration "+(iteration+1));

				trainNow.buildClassifier(train);

				eval.evaluateModel(trainNow, test);

				classifiersForAllFolds.set(n,trainNow);


				if (trainNow instanceof CVParameterSelectionAUC){
					System.out.println(((CVParameterSelectionAUC)trainNow).toSummaryString());
				}
				if (trainNow instanceof CVParameterSelectionAUCMulti){
					System.out.println(((CVParameterSelectionAUCMulti)trainNow).toSummaryString());
				}
			}

			pred = new FastVector();
			pred = eval.predictions();
			pos = new Vector();
			neg = new Vector();
			String classifierInfo = itEval.classifierName+" "+options+" "+itEval.modeString;
			classifierInfo = classifierInfo.replaceAll(" +", "_");
			pos.add(classifierInfo);
			neg.add(classifierInfo);
			for(int i = 0; i < pred.size(); i++){
				NominalPrediction pred4Instance = (NominalPrediction)pred.elementAt(i);
				double prob = (pred4Instance.distribution())[0];
				if( pred4Instance.actual() == 0.0){
					pos.add(prob);
				}
				else{
					neg.add(prob);
				}
			}

			if(iteration == 0){
				for(int i = 0; i < pos.size(); i++){
					System.out.println(">score for positiv instance "+i+"\t"+pos.elementAt(i));
				}
				System.out.println("\n\n");
				for(int i = 0; i < neg.size(); i++){
					System.out.println(">score for negative instance "+i+"\t"+neg.elementAt(i));
				}
			}

			System.out.println("Iterative Evaluation for Iteration: "+(iteration+(fileNumberOffset/10)+1));
			System.out.println(eval.toSummaryString());
			System.out.println(eval.toClassDetailsString()+"\n\n");

			iteration++;

		}

		if(iteration != 0){
			for(int i = 0; i < pos.size(); i++){
				System.out.println(">>score for positiv instance "+i+"\t"+pos.elementAt(i));
			}
			System.out.println("\n\n");
			for(int i = 0; i < neg.size(); i++){
				System.out.println(">>score for negative instance "+i+"\t"+neg.elementAt(i));
			}
		}

		long timeEnd = System.currentTimeMillis();
		System.out.println("Time elapsed in milliseconds: "+(timeEnd-timeStart));


	}

	public int getFolds() {
		return m_folds;
	}

	public void setFolds(int folds) {
		this.m_folds = folds;
	}

	public String getDirName() {
		return dirName;
	}

	public void setDirName(String dirName) {
		this.dirName = dirName;
	}

	public int getIterationSteps() {
		return m_iterationSteps;
	}

	public void setIterationSteps(int steps) {
		m_iterationSteps = steps;
	}

	public int getFileNumberOffset() {
		return fileNumberOffset;
	}

	public void setFileNumberOffset(int fileNumberOffset) {
		this.fileNumberOffset = fileNumberOffset;
	}

	public int getMaxOffset() {
		return maxOffset;
	}

	public void setMaxOffset(int maxOffset) {
		this.maxOffset = maxOffset;
	}

	public void setAttributeIndices(String rangeList) {
		modeRange.setRanges(rangeList);
	}

	private void getPropSettings() {
		modeString = "";

		modeRange.setUpper(6);
		int [] selectedMode = modeRange.getSelection();
		for(int i =0; i< selectedMode.length; i++){
			int currentMode = selectedMode[i];
			modeString += currentMode+1;
		}

	}

	public boolean isUseGZipFile() {
		return useGZipFile;
	}

	public void setUseGZipFile(boolean useGZipFile) {
		this.useGZipFile = useGZipFile;
	}


}
