/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 *    LogStatesScores2Probabilities.java
 *    Copyright (C) 2010 Stefan Mutter
 *
 */
package weka.filters.supervised.attribute;

import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.SupervisedFilter;

/**
<!-- globalinfo-start -->
* transfers log scores for each states in both PHMMs of the binary PHMM (mode -M 4) stored arff files into probabilities (-M 5).
* the original arff file contains the log scores of each state (except the first and last match state as their log score is always 0 and thus their probability always 1) of both PHMMs of the binary PHMM and the class attribute. The resulting arff file contains the normalised probabilities (normalised separately for each PHMM) and the class attribute.
* Class attribute must be the last attribute in the original file!
* <p/>
<!-- globalinfo-end -->
*
<!-- options-start -->
* no options
*
<!-- options-end -->
*
* @author Stefan Mutter (pHMM4weka@gmail.com)
* @version $Revision: 6 $
*/
public class LogStatesScores2Probabilities extends Filter implements SupervisedFilter{

  /**
   *
   */
  private static final long serialVersionUID = -7027709161454713739L;

  public String getRevision() {
    return "1.0";
  }

  protected int determineHMMNumber(Instances instances){
    Attribute att = instances.attribute(instances.numAttributes()-2);
    String attName = att.name();
    return Integer.parseInt(attName.substring(attName.length()-1));
  }


  public String globalInfo() {
    return   "A batch filter that converts arff files created in Mode 4 to probablities.";
  }

  public Capabilities getCapabilities() {
    Capabilities result = super.getCapabilities();
    result.enable(Capability.NUMERIC_CLASS);
    result.enable(Capability.NUMERIC_ATTRIBUTES);
    result.enableAllClasses();
    result.enable(Capability.NO_CLASS);  // filter doesn't need class to be set; treats last attribute as class
    return result;
  }

  public boolean setInputFormat(Instances instanceInfo) throws Exception {
    super.setInputFormat(instanceInfo);

    setOutputFormat(new Instances(instanceInfo, 0));

    return true;  // output format is immediately available
  }

  public boolean batchFinished() throws Exception {
    if (getInputFormat() == null)
      throw new NullPointerException("No input instance format defined");

    Instances inst = getInputFormat();
    //System.out.println(inst);
    int hmmNumber = this.determineHMMNumber(inst);
    int[] stateIndices = this.determineStateNumbers(inst, hmmNumber);
    //System.out.println(inst.numAttributes()-1);
    for (int i = 0; i < inst.numInstances(); i++) {
      Instance instance = inst.instance(i);
      int actualHMM = 0;
      int oldStateIndex = 0;
      while(actualHMM < hmmNumber){
	int index = 0;
	double[] probs = new double [stateIndices[actualHMM]+1-oldStateIndex];
	for(int k = oldStateIndex; k < stateIndices[actualHMM]+1; k++){
	  probs [index] = instance.value(k);
	  index++;
	}
	probs = Utils.logs2probs(probs);
	Utils.normalize(probs);
	index = 0;
	for(int k = oldStateIndex; k < stateIndices[actualHMM]+1; k++){
	  instance.setValue(k,probs[index]);
	  index++;
	}
	oldStateIndex = stateIndices[actualHMM]+1;
	actualHMM++;
      }
      push(instance);
    }

    flushInput();
    m_NewBatch = true;
    m_FirstBatchDone = true;
    return (numPendingOutput() != 0);
  }



  protected int[] determineStateNumbers(Instances instances, int hmmNumber) {
    int[] maxIndex = new int[hmmNumber];
    int index = 0;
    Attribute att = instances.attribute(0);
    String attName = att.name();
    char numberOne = attName.charAt(attName.length()-1);
    for(int i = 1; i < instances.numAttributes()-1; i++){
      att = instances.attribute(i);
      attName = att.name();
      char numberTwo = attName.charAt(attName.length()-1);
      if(numberOne != numberTwo){
	numberOne = numberTwo;
	maxIndex[index] = i-1;
	index++;
      }
    }
    maxIndex[hmmNumber-1] = instances.numAttributes()-2;
    return maxIndex;
  }

  /**
   * Main method for testing this class.
   *
   * @param argv
   */
  public static void main(String [] argv) {
    runFilter(new LogStatesScores2Probabilities(), argv);
  }

}
