/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 * WekaInstanceBuffer.java
 * Copyright (C) 2009-2011 University of Waikato, Hamilton, New Zealand
 */

package adams.flow.transformer;

import java.util.Hashtable;
import java.util.Iterator;

import weka.core.BinarySparseInstance;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.SparseInstance;
import adams.flow.core.Token;
import adams.flow.provenance.ActorType;
import adams.flow.provenance.Provenance;
import adams.flow.provenance.ProvenanceContainer;
import adams.flow.provenance.ProvenanceInformation;
import adams.flow.provenance.ProvenanceSupporter;

/**
 <!-- globalinfo-start -->
 * Can act in two different ways:<br/>
 * 1. Instance -&gt; Instances (row -&gt; dataset)<br/>
 * Buffers weka.core.Instance objects and outputs a weka.core.Instances object with each arriving weka.core.Instance object.<br/>
 * 2. Instances -&gt; Instance (dataset -&gt; row)<br/>
 * Outputs all the weka.core.Instance objects that the incoming weka.core.Instances object contains.
 * <p/>
 <!-- globalinfo-end -->
 *
 <!-- flow-summary-start -->
 * Input&#47;output:<br/>
 * - accepts:<br/>
 * &nbsp;&nbsp;&nbsp;weka.core.Instance<br/>
 * &nbsp;&nbsp;&nbsp;weka.core.Instance[]<br/>
 * - generates:<br/>
 * &nbsp;&nbsp;&nbsp;weka.core.Instances<br/>
 * <p/>
 <!-- flow-summary-end -->
 *
 <!-- options-start -->
 * Valid options are: <p/>
 *
 * <pre>-D &lt;int&gt; (property: debugLevel)
 * &nbsp;&nbsp;&nbsp;The greater the number the more additional info the scheme may output to
 * &nbsp;&nbsp;&nbsp;the console (0 = off).
 * &nbsp;&nbsp;&nbsp;default: 0
 * &nbsp;&nbsp;&nbsp;minimum: 0
 * </pre>
 *
 * <pre>-name &lt;java.lang.String&gt; (property: name)
 * &nbsp;&nbsp;&nbsp;The name of the actor.
 * &nbsp;&nbsp;&nbsp;default: InstanceBuffer
 * </pre>
 *
 * <pre>-annotation &lt;adams.core.base.BaseText&gt; (property: annotations)
 * &nbsp;&nbsp;&nbsp;The annotations to attach to this actor.
 * &nbsp;&nbsp;&nbsp;default:
 * </pre>
 *
 * <pre>-skip (property: skip)
 * &nbsp;&nbsp;&nbsp;If set to true, transformation is skipped and the input token is just forwarded
 * &nbsp;&nbsp;&nbsp;as it is.
 * </pre>
 *
 * <pre>-stop-flow-on-error (property: stopFlowOnError)
 * &nbsp;&nbsp;&nbsp;If set to true, the flow gets stopped in case this actor encounters an error;
 * &nbsp;&nbsp;&nbsp; useful for critical actors.
 * </pre>
 *
 * <pre>-operation &lt;INSTANCES_TO_INSTANCE|INSTANCE_TO_INSTANCES&gt; (property: operation)
 * &nbsp;&nbsp;&nbsp;The way the buffer operates, 'dataset -&gt; row' or 'row -&gt; dataset'.
 * &nbsp;&nbsp;&nbsp;default: INSTANCE_TO_INSTANCES
 * </pre>
 *
 * <pre>-check (property: checkHeader)
 * &nbsp;&nbsp;&nbsp;Whether to check the headers - if the headers change, the Instance object
 * &nbsp;&nbsp;&nbsp;gets dumped into a new file (in case of INSTANCE_TO_INSTANCES).
 * </pre>
 *
 <!-- options-end -->
 *
 * @author  fracpete (fracpete at waikato dot ac dot nz)
 * @version $Revision: 4584 $
 */
public class WekaInstanceBuffer
  extends AbstractTransformer
  implements ProvenanceSupporter {

  /** for serialization. */
  private static final long serialVersionUID = 6774529845778672623L;

  /** the key for storing the current buffer in the backup. */
  public final static String BACKUP_BUFFER = "buffer";

  /** the key for storing the current iterator in the backup. */
  public final static String BACKUP_ITERATOR = "iterator";

  /**
   * Defines how the buffer actor operates.
   *
   * @author  fracpete (fracpete at waikato dot ac dot nz)
   * @version $Revision: 4584 $
   */
  public enum Operation {
    /** Instances -&gt; Instance. */
    INSTANCES_TO_INSTANCE,
    /** Instance -&gt; Instances. */
    INSTANCE_TO_INSTANCES,
  }

  /** the currently buffered data. */
  protected Instances m_Buffer;

  /** the iterator for broadcasting Instance objects. */
  protected Iterator<Instance> m_Iterator;

  /** the way the buffer operates. */
  protected Operation m_Operation;

  /** whether to check the header. */
  protected boolean m_CheckHeader;

  /**
   * Returns a string describing the object.
   *
   * @return 			a description suitable for displaying in the gui
   */
  public String globalInfo() {
    return
        "Can act in two different ways:\n"
      + "1. Instance -> Instances (row -> dataset)\n"
      + "Buffers weka.core.Instance objects and outputs a weka.core.Instances "
      + "object with each arriving weka.core.Instance object.\n"
      + "2. Instances -> Instance (dataset -> row)\n"
      + "Outputs all the weka.core.Instance objects that the incoming "
      + "weka.core.Instances object contains.";
  }

  /**
   * Adds options to the internal list of options.
   */
  public void defineOptions() {
    super.defineOptions();

    m_OptionManager.add(
	    "operation", "operation",
	    Operation.INSTANCE_TO_INSTANCES);

    m_OptionManager.add(
	    "check", "checkHeader",
	    false);
  }

  /**
   * Returns a quick info about the actor, which will be displayed in the GUI.
   *
   * @return		null if no info available, otherwise short string
   */
  public String getQuickInfo() {
    String	result;
    String	variable;

    variable = getOptionManager().getVariableForProperty("operation");

    if (variable != null)
      result = variable;
    else
      result = m_Operation.toString();

    if (m_CheckHeader)
      result += " [checking header]";

    return result;
  }

  /**
   * Sets the way the buffer operates.
   *
   * @param value	the operation
   */
  public void setOperation(Operation value) {
    m_Operation = value;
    reset();
  }

  /**
   * Returns the way the buffer operates.
   *
   * @return 		the operation
   */
  public Operation getOperation() {
    return m_Operation;
  }

  /**
   * Returns the tip text for this property.
   *
   * @return		tip text for this property suitable for
   *             	displaying in the GUI or for listing the options.
   */
  public String operationTipText() {
    return "The way the buffer operates, 'dataset -> row' or 'row -> dataset'.";
  }

  /**
   * Sets whether to check the header or not.
   *
   * @param value	if true then the headers get checked
   */
  public void setCheckHeader(boolean value) {
    m_CheckHeader = value;
    reset();
  }

  /**
   * Returns whether the header gets checked or not.
   *
   * @return		true if the header gets checked
   */
  public boolean getCheckHeader() {
    return m_CheckHeader;
  }

  /**
   * Returns the tip text for this property.
   *
   * @return 		tip text for this property suitable for
   * 			displaying in the GUI or for listing the options.
   */
  public String checkHeaderTipText() {
    return
        "Whether to check the headers - if the headers change, the Instance "
      + "object gets dumped into a new file (in case of " + Operation.INSTANCE_TO_INSTANCES + ").";
  }

  /**
   * Returns the class that the consumer accepts.
   *
   * @return		<!-- flow-accepts-start -->weka.core.Instance.class, weka.core.Instance[].class<!-- flow-accepts-end -->
   */
  public Class[] accepts() {
    if (m_Operation == Operation.INSTANCE_TO_INSTANCES)
      return new Class[]{Instance.class, Instance[].class};
    else if (m_Operation == Operation.INSTANCES_TO_INSTANCE)
      return new Class[]{Instances.class};
    else
      throw new IllegalStateException("Unhandled operation: " + m_Operation);
  }

  /**
   * Returns the class of objects that it generates.
   *
   * @return		<!-- flow-generates-start -->weka.core.Instances.class<!-- flow-generates-end -->
   */
  public Class[] generates() {
    if (m_Operation == Operation.INSTANCE_TO_INSTANCES)
      return new Class[]{Instances.class};
    else if (m_Operation == Operation.INSTANCES_TO_INSTANCE)
      return new Class[]{Instance.class};
    else
      throw new IllegalStateException("Unhandled operation: " + m_Operation);
  }

  /**
   * Removes entries from the backup.
   */
  protected void pruneBackup() {
    super.pruneBackup();

    pruneBackup(BACKUP_BUFFER);
    pruneBackup(BACKUP_ITERATOR);
  }

  /**
   * Backs up the current state of the actor before update the variables.
   *
   * @return		the backup
   */
  protected Hashtable<String,Object> backupState() {
    Hashtable<String,Object>	result;

    result = super.backupState();

    if (m_Buffer != null)
      result.put(BACKUP_BUFFER, m_Buffer);
    if (m_Iterator != null)
      result.put(BACKUP_ITERATOR, m_Iterator);

    return result;
  }

  /**
   * Restores the state of the actor before the variables got updated.
   *
   * @param state	the backup of the state to restore from
   */
  protected void restoreState(Hashtable<String,Object> state) {
    if (state.containsKey(BACKUP_BUFFER)) {
      m_Buffer = (Instances) state.get(BACKUP_BUFFER);
      state.remove(BACKUP_BUFFER);
    }
    if (state.containsKey(BACKUP_ITERATOR)) {
      m_Iterator = (Iterator<Instance>) state.get(BACKUP_ITERATOR);
      state.remove(BACKUP_ITERATOR);
    }

    super.restoreState(state);
  }

  /**
   * Resets the scheme.
   */
  protected void reset() {
    super.reset();

    m_Buffer   = null;
    m_Iterator = null;
  }

  /**
   * Executes the flow item.
   *
   * @return		null if everything is fine, otherwise error message
   */
  protected String doExecute() {
    String	result;
    Instance[]	insts;
    Instance	inst;
    double[]	values;
    int		i;
    int		n;
    boolean	updated;

    result = null;

    if (m_Operation == Operation.INSTANCE_TO_INSTANCES) {
      if (m_InputToken.getPayload() instanceof Instance) {
	insts = new Instance[]{(Instance) m_InputToken.getPayload()};
      }
      else {
	insts = (Instance[]) m_InputToken.getPayload();
      }

      for (n = 0; n < insts.length; n++) {
	inst = insts[n];

	if ((m_Buffer != null) && m_CheckHeader) {
	  if (!m_Buffer.equalHeaders(inst.dataset())) {
	    debug("Header changed, resetting buffer");
	    m_Buffer = null;
	  }
	}

	// buffer instance
	if (m_Buffer == null)
	  m_Buffer = new Instances(inst.dataset(), 0);

	// we need to make sure that string and relational values are in our
	// buffer header and update the current Instance accordingly before
	// buffering it
	values  = inst.toDoubleArray();
	updated = false;
	for (i = 0; i < values.length; i++) {
	  if (inst.isMissing(i))
	    continue;
	  if (inst.attribute(i).isString()) {
	    values[i] = m_Buffer.attribute(i).addStringValue(inst.stringValue(i));
	    updated   = true;
	  }
	  else if (inst.attribute(i).isRelationValued()) {
	    values[i] = m_Buffer.attribute(i).addRelation(inst.relationalValue(i));
	    updated   = true;
	  }
	}

	if (updated) {
	  if (inst instanceof SparseInstance) {
	    inst = new SparseInstance(inst.weight(), values);
	  }
	  else if (inst instanceof BinarySparseInstance) {
	    inst = new BinarySparseInstance(inst.weight(), values);
	  }
	  else {
	    if (!(inst instanceof DenseInstance)) {
	      getSystemErr().println(
		  "Unhandled instance class (" + inst.getClass().getName() + "), "
		  + "defaulting to " + DenseInstance.class.getName());
	    }
	    inst = new DenseInstance(inst.weight(), values);
	  }
	}
	else {
	  inst = (Instance) inst.copy();
	}

	m_Buffer.add(inst);
      }

      m_OutputToken = new Token(m_Buffer);
    }
    else if (m_Operation == Operation.INSTANCES_TO_INSTANCE) {
      m_Buffer   = (Instances) m_InputToken.getPayload();
      m_Iterator = m_Buffer.iterator();
    }
    else {
      throw new IllegalStateException("Unhandled operation: " + m_Operation);
    }

    return result;
  }

  /**
   * Checks whether there is pending output to be collected after
   * executing the flow item.
   *
   * @return		true if there is pending output
   */
  public boolean hasPendingOutput() {
    if (m_Operation == Operation.INSTANCE_TO_INSTANCES)
      return super.hasPendingOutput();
    else if (m_Operation == Operation.INSTANCES_TO_INSTANCE)
      return ((m_Iterator != null) && m_Iterator.hasNext());
    else
      throw new IllegalStateException("Unhandled operation: " + m_Operation);
  }

  /**
   * Returns the generated token.
   *
   * @return		the generated token
   */
  public Token output() {
    Token	result;

    if (m_Operation == Operation.INSTANCE_TO_INSTANCES) {
      result        = m_OutputToken;
      m_OutputToken = null;
    }
    else if (m_Operation == Operation.INSTANCES_TO_INSTANCE) {
      result = new Token(m_Iterator.next());
    }
    else {
      throw new IllegalStateException("Unhandled operation: " + m_Operation);
    }

    updateProvenance(result);

    return result;
  }

  /**
   * Updates the provenance information in the provided container.
   *
   * @param cont	the provenance container to update
   */
  public void updateProvenance(ProvenanceContainer cont) {
    if (Provenance.getSingleton().isEnabled()) {
      cont.setProvenance(m_InputToken.getProvenance());
      cont.addProvenance(new ProvenanceInformation(ActorType.PREPROCESSOR, m_InputToken.getPayload().getClass(), this, ((Token) cont).getPayload().getClass()));
    }
  }

  /**
   * Cleans up after the execution has finished.
   */
  public void wrapUp() {
    m_Iterator = null;
    m_Buffer   = null;

    super.wrapUp();
  }
}
