/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 *    BackgroundDistNullModel.java
 *    Copyright (C) 2010 Stefan Mutter
 *
 */
package weka.classifiers.sequence.core;

import weka.core.Utils;

/**
<!-- globalinfo-start -->
* constructs a null model whose probabilities reflect the background distribution of the symbols in the alphabet for the sequences
<!-- globalinfo-end -->
* 
* @author Stefan Mutter (pHMM4weka@gmail.com)
* @version $Revision: 4 $
*/
public class BackgroundDistNullModel extends NullModel {

  private static final long serialVersionUID = -2547427871055879202L;


  /**
 * use other constructor
 * @param useLogSpace flag indicating whether calculations are in log space
 * @param alphabet the alphabet under consideration
 */
public BackgroundDistNullModel(boolean useLogSpace, Alphabet alphabet) {
    super(useLogSpace, alphabet);

  }

  /**
 * Constructor to build a null model with the background probabilities of the symbol occurence in the sequence set
 * @param useLogSpace flag indicating whether calculations are in log space
 * @param alphabet
 * @param sequences
 * @throws IllegalSymbolException
 * @throws NumericStabilityException if background distribution is not a real probability distribution
 */
public BackgroundDistNullModel(boolean useLogSpace, Alphabet alphabet, String[] sequences) throws IllegalSymbolException, NumericStabilityException {
    super(useLogSpace, alphabet);
    distribution = new SimpleDistribution(alphabet, useLogSpace);
    double[] countSequenceElements = new double[alphabet.alphabetSize()];
    for(int i = 0; i < countSequenceElements.length; i++){
      countSequenceElements[i] = 1;
    }
    for(int i = 0; i < sequences.length; i++){
      String sequence = sequences[i];
      for(int j = 0; j < sequence.length(); j++){
	countSequenceElements[alphabet.indexOfAlphabetSymbol(sequence.charAt(j)+"")]++;
      }
    }
    Utils.normalize(countSequenceElements);
    if(useLogSpace){
      for(int i = 0; i < countSequenceElements.length; i++){
	if(countSequenceElements[i] == 0){
	  countSequenceElements[i] = Double.NEGATIVE_INFINITY;
	}
	else{
	  if(countSequenceElements[i] == 1){
	    countSequenceElements[i] = 0;
	  }
	  else{
	    countSequenceElements[i] = Math.log(countSequenceElements[i]);
	  }
	}
      }
    }
    ((SimpleDistribution) distribution).setProbWithArray(countSequenceElements);
  }


}
