/**
 * SpreadSheetToWekaInstances.java
 * Copyright (C) 2011 University of Waikato, Hamilton, New Zealand
 */
package adams.data.conversion;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;

import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instances;
import adams.core.io.SpreadSheet;
import adams.core.io.SpreadSheet.Cell;
import adams.core.io.SpreadSheet.Row;
import adams.env.Environment;

/**
 <!-- globalinfo-start -->
 * Generates a weke.core.Instances object from a SpreadSheet object.
 * <p/>
 <!-- globalinfo-end -->
 *
 <!-- options-start -->
 * Valid options are: <p/>
 *
 * <pre>-D &lt;int&gt; (property: debugLevel)
 * &nbsp;&nbsp;&nbsp;The greater the number the more additional info the scheme may output to
 * &nbsp;&nbsp;&nbsp;the console (0 = off).
 * &nbsp;&nbsp;&nbsp;default: 0
 * &nbsp;&nbsp;&nbsp;minimum: 0
 * </pre>
 *
 <!-- options-end -->
 *
 * @author  fracpete (fracpete at waikato dot ac dot nz)
 * @version $Revision: 3743 $
 */
public class SpreadSheetToWekaInstances
  extends AbstractConversion {

  /** for serialization. */
  private static final long serialVersionUID = 867886761713927179L;

  /**
   * Returns a string describing the object.
   *
   * @return 			a description suitable for displaying in the gui
   */
  public String globalInfo() {
    return "Generates a weke.core.Instances object from a SpreadSheet object.";
  }

  /**
   * Returns the class that is accepted as input.
   *
   * @return		the class
   */
  public Class accepts() {
    return adams.core.io.SpreadSheet.class;
  }

  /**
   * Returns the class that is generated as output.
   *
   * @return		the class
   */
  public Class generates() {
    return weka.core.Instances.class;
  }

  /**
   * Performs the actual conversion.
   *
   * @return		the converted data
   * @throws Exception	if something goes wrong with the conversion
   */
  protected Object doConvert() throws Exception {
    Instances			result;
    SpreadSheet			sheet;
    DenseInstance		inst;
    ArrayList<Attribute>	atts;
    HashSet<String>		unique;
    ArrayList<String>		labels;
    Row				row;
    Cell			cell;
    int				i;
    int				n;
    double[]			values;

    sheet = (SpreadSheet) m_Input;

    // create header
    atts = new ArrayList<Attribute>();
    for (i = 0; i < sheet.getColumnCount(); i++) {
      if (sheet.isNumeric(i)) {
	atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent()));
      }
      else {
	unique = new HashSet<String>();
	for (n = 0; n < sheet.getRowCount(); n++) {
	  row  = sheet.getRow(n);
	  cell = row.getCell(i);
	  if ((cell != null) && !cell.isMissing())
	    unique.add(cell.getContent());
	}
	labels = new ArrayList<String>(unique);
	Collections.sort(labels);
	atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent(), labels));
      }
    }
    result = new Instances(Environment.getInstance().getProject(), atts, sheet.getRowCount());
    if (sheet.hasName())
      result.setRelationName(sheet.getName());

    // add data
    for (n = 0; n < sheet.getRowCount(); n++) {
      row    = sheet.getRow(n);
      values = new double[result.numAttributes()];
      for (i = 0; i < result.numAttributes(); i++) {
	cell      = row.getCell(i);
	values[i] = weka.core.Utils.missingValue();
	if ((cell != null) && !cell.isMissing()) {
	  if (result.attribute(i).isNumeric())
	    values[i] = Double.parseDouble(cell.getContent());
	  else
	    values[i] = result.attribute(i).indexOfValue(cell.getContent());
	}
	inst = new DenseInstance(1.0, values);
	result.add(inst);
      }
    }

    return result;
  }
}
