/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 *    AttributeSelection.java
 *    Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.filters.supervised.attribute;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;

import weka.attributeSelection.ASEvaluation;
import weka.attributeSelection.ASSearch;
import weka.attributeSelection.AttributeEvaluator;
import weka.attributeSelection.AttributeTransformer;
import weka.attributeSelection.BestFirst;
import weka.attributeSelection.CfsSubsetEval;
import weka.attributeSelection.Ranker;
import weka.core.*;
import weka.core.Capabilities.Capability;
import weka.filters.Filter;
import weka.filters.SupervisedFilter;

/**
 <!-- globalinfo-start --> 
 * A supervised attribute filter that can be used to
 * select attributes. It is very flexible and allows various search and
 * evaluation methods to be combined.
 * <p/>
 <!-- globalinfo-end -->
 * 
 <!-- options-start --> 
 * Valid options are:
 * <p/>
 * 
 * <pre>
 * -S &lt;"Name of search class [search options]"&gt;
 *  Sets search method for subset evaluators.
 *  eg. -S "weka.attributeSelection.BestFirst -S 8"
 * </pre>
 * 
 * <pre>
 * -E &lt;"Name of attribute/subset evaluation class [evaluator options]"&gt;
 *  Sets attribute/subset evaluator.
 *  eg. -E "weka.attributeSelection.CfsSubsetEval -L"
 * </pre>
 * 
 * <pre>
 * Options specific to evaluator weka.attributeSelection.CfsSubsetEval:
 * </pre>
 * 
 * <pre>
 * -M
 *  Treat missing values as a seperate value.
 * </pre>
 * 
 * <pre>
 * -L
 *  Don't include locally predictive attributes.
 * </pre>
 * 
 * <pre>
 * Options specific to search weka.attributeSelection.BestFirst:
 * </pre>
 * 
 * <pre>
 * -P &lt;start set&gt;
 *  Specify a starting set of attributes.
 *  Eg. 1,3,5-7.
 * </pre>
 * 
 * <pre>
 * -D &lt;0 = backward | 1 = forward | 2 = bi-directional&gt;
 *  Direction of search. (default = 1).
 * </pre>
 * 
 * <pre>
 * -N &lt;num&gt;
 *  Number of non-improving nodes to
 *  consider before terminating search.
 * </pre>
 * 
 * <pre>
 * -S &lt;num&gt;
 *  Size of lookup cache for evaluated subsets.
 *  Expressed as a multiple of the number of
 *  attributes in the data set. (default = 1)
 * </pre>
 * 
 <!-- options-end -->
 * 
 * @author Mark Hall (mhall@cs.waikato.ac.nz)
 * @version $Revision: 14508 $
 */
public class AttributeSelection extends Filter implements SupervisedFilter,
  OptionHandler, WeightedAttributesHandler, WeightedInstancesHandler {

  /** for serialization */
  static final long serialVersionUID = -296211247688169716L;

  /** the attribute selection evaluation object */
  private weka.attributeSelection.AttributeSelection m_trainSelector;

  /** the attribute evaluator to use */
  private ASEvaluation m_ASEvaluator;

  /** the search method if any */
  private ASSearch m_ASSearch;

  /** holds the selected attributes */
  private int[] m_SelectedAttributes;

  /** True if a class attribute is set in the data */
  protected boolean m_hasClass;

  /**
   * Returns a string describing this filter
   * 
   * @return a description of the filter suitable for displaying in the
   *         explorer/experimenter gui
   */
  public String globalInfo() {

    return "A supervised attribute filter that can be used to select "
      + "attributes. It is very flexible and allows various search "
      + "and evaluation methods to be combined.";
  }

  /**
   * Constructor
   */
  public AttributeSelection() {

    resetOptions();
  }

  /**
   * Returns an enumeration describing the available options.
   * 
   * @return an enumeration of all the available options.
   */
  @Override
  public Enumeration<Option> listOptions() {

    Vector<Option> newVector = new Vector<Option>(6);

    newVector.addElement(new Option(
      "\tSets search method for subset evaluators.\n"
        + "\teg. -S \"weka.attributeSelection.BestFirst -S 8\"", "S", 1,
      "-S <\"Name of search class [search options]\">"));

    newVector
      .addElement(new Option("\tSets attribute/subset evaluator.\n"
        + "\teg. -E \"weka.attributeSelection.CfsSubsetEval -L\"", "E", 1,
        "-E <\"Name of attribute/subset evaluation class [evaluator options]\">"));

    if ((m_ASEvaluator != null) && (m_ASEvaluator instanceof OptionHandler)) {

      newVector.addElement(new Option("", "", 0, "\nOptions specific to "
        + "evaluator " + m_ASEvaluator.getClass().getName() + ":"));

      newVector.addAll(Collections.list(((OptionHandler) m_ASEvaluator)
        .listOptions()));
    }

    if ((m_ASSearch != null) && (m_ASSearch instanceof OptionHandler)) {

      newVector.addElement(new Option("", "", 0, "\nOptions specific to "
        + "search " + m_ASSearch.getClass().getName() + ":"));

      newVector.addAll(Collections.list(((OptionHandler) m_ASSearch)
        .listOptions()));
    }
    return newVector.elements();
  }

  /**
   * Parses a given list of options.
   * <p/>
   * 
   <!-- options-start --> 
   * Valid options are:
   * <p/>
   * 
   * <pre>
   * -S &lt;"Name of search class [search options]"&gt;
   *  Sets search method for subset evaluators.
   *  eg. -S "weka.attributeSelection.BestFirst -S 8"
   * </pre>
   * 
   * <pre>
   * -E &lt;"Name of attribute/subset evaluation class [evaluator options]"&gt;
   *  Sets attribute/subset evaluator.
   *  eg. -E "weka.attributeSelection.CfsSubsetEval -L"
   * </pre>
   * 
   * <pre>
   * Options specific to evaluator weka.attributeSelection.CfsSubsetEval:
   * </pre>
   * 
   * <pre>
   * -M
   *  Treat missing values as a seperate value.
   * </pre>
   * 
   * <pre>
   * -L
   *  Don't include locally predictive attributes.
   * </pre>
   * 
   * <pre>
   * Options specific to search weka.attributeSelection.BestFirst:
   * </pre>
   * 
   * <pre>
   * -P &lt;start set&gt;
   *  Specify a starting set of attributes.
   *  Eg. 1,3,5-7.
   * </pre>
   * 
   * <pre>
   * -D &lt;0 = backward | 1 = forward | 2 = bi-directional&gt;
   *  Direction of search. (default = 1).
   * </pre>
   * 
   * <pre>
   * -N &lt;num&gt;
   *  Number of non-improving nodes to
   *  consider before terminating search.
   * </pre>
   * 
   * <pre>
   * -S &lt;num&gt;
   *  Size of lookup cache for evaluated subsets.
   *  Expressed as a multiple of the number of
   *  attributes in the data set. (default = 1)
   * </pre>
   * 
   <!-- options-end -->
   * 
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  @Override
  public void setOptions(String[] options) throws Exception {

    String optionString;
    resetOptions();

    if (Utils.getFlag('X', options)) {
      throw new Exception("Cross validation is not a valid option"
        + " when using attribute selection as a Filter.");
    }

    optionString = Utils.getOption('E', options);
    if (optionString.length() != 0) {
      optionString = optionString.trim();
      // split a quoted evaluator name from its options (if any)
      int breakLoc = optionString.indexOf(' ');
      String evalClassName = optionString;
      String evalOptionsString = "";
      String[] evalOptions = null;
      if (breakLoc != -1) {
        evalClassName = optionString.substring(0, breakLoc);
        evalOptionsString = optionString.substring(breakLoc).trim();
        evalOptions = Utils.splitOptions(evalOptionsString);
      }
      setEvaluator(ASEvaluation.forName(evalClassName, evalOptions));
    }

    if (m_ASEvaluator instanceof AttributeEvaluator) {
      setSearch(new Ranker());
    }

    optionString = Utils.getOption('S', options);
    if (optionString.length() != 0) {
      optionString = optionString.trim();
      int breakLoc = optionString.indexOf(' ');
      String SearchClassName = optionString;
      String SearchOptionsString = "";
      String[] SearchOptions = null;
      if (breakLoc != -1) {
        SearchClassName = optionString.substring(0, breakLoc);
        SearchOptionsString = optionString.substring(breakLoc).trim();
        SearchOptions = Utils.splitOptions(SearchOptionsString);
      }
      setSearch(ASSearch.forName(SearchClassName, SearchOptions));
    }

    Utils.checkForRemainingOptions(options);
  }

  /**
   * Gets the current settings for the attribute selection (search, evaluator)
   * etc.
   * 
   * @return an array of strings suitable for passing to setOptions()
   */
  @Override
  public String[] getOptions() {
    String[] EvaluatorOptions = new String[0];
    String[] SearchOptions = new String[0];
    int current = 0;

    if (m_ASEvaluator instanceof OptionHandler) {
      EvaluatorOptions = ((OptionHandler) m_ASEvaluator).getOptions();
    }

    if (m_ASSearch instanceof OptionHandler) {
      SearchOptions = ((OptionHandler) m_ASSearch).getOptions();
    }

    String[] setOptions = new String[10];
    setOptions[current++] = "-E";
    setOptions[current++] =
      getEvaluator().getClass().getName() + " "
        + Utils.joinOptions(EvaluatorOptions);

    setOptions[current++] = "-S";
    setOptions[current++] =
      getSearch().getClass().getName() + " " + Utils.joinOptions(SearchOptions);

    while (current < setOptions.length) {
      setOptions[current++] = "";
    }

    return setOptions;
  }

  /**
   * Returns the tip text for this property
   * 
   * @return tip text for this property suitable for displaying in the
   *         explorer/experimenter gui
   */
  public String evaluatorTipText() {

    return "Determines how attributes/attribute subsets are evaluated.";
  }

  /**
   * set attribute/subset evaluator
   * 
   * @param evaluator the evaluator to use
   */
  public void setEvaluator(ASEvaluation evaluator) {
    m_ASEvaluator = evaluator;
  }

  /**
   * Returns the tip text for this property
   * 
   * @return tip text for this property suitable for displaying in the
   *         explorer/experimenter gui
   */
  public String searchTipText() {

    return "Determines the search method.";
  }

  /**
   * Set search class
   * 
   * @param search the search class to use
   */
  public void setSearch(ASSearch search) {
    m_ASSearch = search;
  }

  /**
   * Get the name of the attribute/subset evaluator
   * 
   * @return the name of the attribute/subset evaluator as a string
   */
  public ASEvaluation getEvaluator() {

    return m_ASEvaluator;
  }

  /**
   * Get the name of the search method
   * 
   * @return the name of the search method as a string
   */
  public ASSearch getSearch() {

    return m_ASSearch;
  }

  /**
   * Returns the Capabilities of this filter.
   * 
   * @return the capabilities of this object
   * @see Capabilities
   */
  @Override
  public Capabilities getCapabilities() {
    Capabilities result;

    if (m_ASEvaluator == null) {
      result = super.getCapabilities();
      result.disableAll();
    } else {
      result = m_ASEvaluator.getCapabilities();
      // class index will be set if necessary, so we always allow the dataset
      // to have no class attribute set. see the following method:
      // weka.attributeSelection.AttributeSelection.SelectAttributes(Instances)
      result.enable(Capability.NO_CLASS);
    }

    result.setMinimumNumberInstances(0);

    return result;
  }

  /**
   * Input an instance for filtering. Ordinarily the instance is processed and
   * made available for output immediately. Some filters require all instances
   * be read before producing output.
   * 
   * @param instance the input instance
   * @return true if the filtered instance may now be collected with output().
   * @throws IllegalStateException if no input format has been defined.
   * @throws Exception if the input instance was not of the correct format or if
   *           there was a problem with the filtering.
   */
  @Override
  public boolean input(Instance instance) throws Exception {

    if (getInputFormat() == null) {
      throw new IllegalStateException("No input instance format defined");
    }

    if (m_NewBatch) {
      resetQueue();
      m_NewBatch = false;
    }

    if (isOutputFormatDefined()) {
      convertInstance(instance);
      return true;
    }

    bufferInput(instance);
    return false;
  }

  /**
   * Signify that this batch of input to the filter is finished. If the filter
   * requires all instances prior to filtering, output() may now be called to
   * retrieve the filtered instances.
   * 
   * @return true if there are instances pending output.
   * @throws IllegalStateException if no input structure has been defined.
   * @throws Exception if there is a problem during the attribute selection.
   */
  @Override
  public boolean batchFinished() throws Exception {

    if (getInputFormat() == null) {
      throw new IllegalStateException("No input instance format defined");
    }

    if (!isOutputFormatDefined()) {
      m_hasClass = (getInputFormat().classIndex() >= 0);

      m_trainSelector.setEvaluator(m_ASEvaluator);
      m_trainSelector.setSearch(m_ASSearch);
      m_trainSelector.SelectAttributes(getInputFormat());
      // System.out.println(m_trainSelector.toResultsString());

      m_SelectedAttributes = m_trainSelector.selectedAttributes();
      if (m_SelectedAttributes == null) {
        throw new Exception("No selected attributes\n");
      }

      setOutputFormat();

      // Convert pending input instances
      for (int i = 0; i < getInputFormat().numInstances(); i++) {
        convertInstance(getInputFormat().instance(i));
      }
      flushInput();
    }

    m_NewBatch = true;
    return (numPendingOutput() != 0);
  }

  /**
   * Set the output format. Takes the currently defined attribute set
   * m_InputFormat and calls setOutputFormat(Instances) appropriately.
   * 
   * @throws Exception if something goes wrong
   */
  protected void setOutputFormat() throws Exception {
    Instances informat;

    if (m_SelectedAttributes == null) {
      setOutputFormat(null);
      return;
    }

    ArrayList<Attribute> attributes =
      new ArrayList<Attribute>(m_SelectedAttributes.length);

    int i;
    if (m_ASEvaluator instanceof AttributeTransformer) {
      informat = ((AttributeTransformer) m_ASEvaluator).transformedHeader();
    } else {
      informat = getInputFormat();
    }

    for (i = 0; i < m_SelectedAttributes.length; i++) {
      attributes.add((Attribute) informat.attribute(m_SelectedAttributes[i])
        .copy());
    }

    Instances outputFormat =
      new Instances(getInputFormat().relationName(), attributes, 0);

    // if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator)
    // && !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)) {
    if (m_hasClass) {
      outputFormat.setClassIndex(m_SelectedAttributes.length - 1);
    }

    setOutputFormat(outputFormat);
  }

  /**
   * Convert a single instance over. Selected attributes only are transfered.
   * The converted instance is added to the end of the output queue.
   * 
   * @param instance the instance to convert
   * @throws Exception if something goes wrong
   */
  protected void convertInstance(Instance instance) throws Exception {
    double[] newVals = new double[getOutputFormat().numAttributes()];

    if (m_ASEvaluator instanceof AttributeTransformer) {
      Instance tempInstance =
        ((AttributeTransformer) m_ASEvaluator).convertInstance(instance);
      for (int i = 0; i < m_SelectedAttributes.length; i++) {
        int current = m_SelectedAttributes[i];
        newVals[i] = tempInstance.value(current);
      }
    } else {
      for (int i = 0; i < m_SelectedAttributes.length; i++) {
        int current = m_SelectedAttributes[i];
        newVals[i] = instance.value(current);
      }
    }
    if (instance instanceof SparseInstance) {
      push(new SparseInstance(instance.weight(), newVals));
    } else {
      push(new DenseInstance(instance.weight(), newVals));
    }
  }

  /**
   * set options to their default values
   */
  protected void resetOptions() {

    m_trainSelector = new weka.attributeSelection.AttributeSelection();
    setEvaluator(new CfsSubsetEval());
    setSearch(new BestFirst());
    m_SelectedAttributes = null;
  }

  /**
   * Returns the revision string.
   * 
   * @return the revision
   */
  @Override
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 14508 $");
  }

  /**
   * Main method for testing this class.
   * 
   * @param argv should contain arguments to the filter: use -h for help
   */
  public static void main(String[] argv) {
    runFilter(new AttributeSelection(), argv);
  }
}
