/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/**
 * ExcelSpreadSheetReader.java
 * Copyright (C) 2010-2012 University of Waikato, Hamilton, New Zealand
 */
package adams.core.io;

import java.io.InputStream;

import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;

import adams.core.DateFormat;
import adams.core.DateUtils;
import adams.core.Index;
import adams.core.Range;

/**
 <!-- globalinfo-start -->
 * Reads MS Excel files.
 * <p/>
 <!-- globalinfo-end -->
 *
 <!-- options-start -->
 * Valid options are: <p/>
 *
 * <pre>-D &lt;int&gt; (property: debugLevel)
 * &nbsp;&nbsp;&nbsp;The greater the number the more additional info the scheme may output to
 * &nbsp;&nbsp;&nbsp;the console (0 = off).
 * &nbsp;&nbsp;&nbsp;default: 0
 * &nbsp;&nbsp;&nbsp;minimum: 0
 * </pre>
 *
 * <pre>-missing &lt;java.lang.String&gt; (property: missingValue)
 * &nbsp;&nbsp;&nbsp;The placeholder for missing values.
 * &nbsp;&nbsp;&nbsp;default:
 * </pre>
 *
 * <pre>-sheet &lt;java.lang.String&gt; (property: sheetIndex)
 * &nbsp;&nbsp;&nbsp;The index of the sheet to load; An index is a number starting with 1; the
 * &nbsp;&nbsp;&nbsp;following placeholders can be used as well: first, second, third, last_2,
 * &nbsp;&nbsp;&nbsp; last_1, last
 * &nbsp;&nbsp;&nbsp;default: first
 * </pre>
 *
 * <pre>-no-auto-extend-header (property: autoExtendHeader)
 * &nbsp;&nbsp;&nbsp;If enabled, the header gets automatically extended if rows have more cells
 * &nbsp;&nbsp;&nbsp;than the header.
 * </pre>
 *
 * <pre>-text-columns &lt;java.lang.String&gt; (property: textColumns)
 * &nbsp;&nbsp;&nbsp;The range of columns to treat as text; A range is a comma-separated list
 * &nbsp;&nbsp;&nbsp;of single 1-based indices or sub-ranges of indices ('start-end'); 'inv(..
 * &nbsp;&nbsp;&nbsp;.)' inverts the range '...'; the following placeholders can be used as well:
 * &nbsp;&nbsp;&nbsp; first, second, third, last_2, last_1, last
 * &nbsp;&nbsp;&nbsp;default:
 * </pre>
 *
 <!-- options-end -->
 *
 * @author  fracpete (fracpete at waikato dot ac dot nz)
 * @version $Revision: 5563 $
 */
public class ExcelSpreadSheetReader
  extends AbstractSpreadSheetReader {

  /** for serialization. */
  private static final long serialVersionUID = 4755872204697328246L;

  /** the sheet to load. */
  protected Index m_SheetIndex;

  /** whether to automatically extend the header if rows have more cells than header. */
  protected boolean m_AutoExtendHeader;

  /** the range of columns to force to be text. */
  protected Range m_TextColumns;

  /**
   * Returns a string describing the object.
   *
   * @return 			a description suitable for displaying in the gui
   */
  public String globalInfo() {
    return "Reads MS Excel files.";
  }

  /**
   * Adds options to the internal list of options.
   */
  public void defineOptions() {
    super.defineOptions();

    m_OptionManager.add(
	    "sheet", "sheetIndex",
	    new Index(Index.FIRST));

    m_OptionManager.add(
	    "no-auto-extend-header", "autoExtendHeader",
	    true);

    m_OptionManager.add(
	    "text-columns", "textColumns",
	    "");
  }

  /**
   * Initializes the members.
   */
  protected void initialize() {
    super.initialize();

    m_SheetIndex  = new Index();
    m_TextColumns = new Range();
  }

  /**
   * Returns a string describing the format (used in the file chooser).
   *
   * @return 			a description suitable for displaying in the
   * 				file chooser
   */
  public String getFormatDescription() {
    return "MS Excel spreadsheets";
  }

  /**
   * Returns the extension(s) of the format.
   *
   * @return 			the extension (without the dot!)
   */
  public String[] getFormatExtensions() {
    return new String[]{"xls", "xlsx"};
  }

  /**
   * Sets the index of the sheet to load.
   *
   * @param value	the index (1-based)
   */
  public void setSheetIndex(Index value) {
    m_SheetIndex = value;
    reset();
  }

  /**
   * Returns the index of the sheet to load.
   *
   * @return		the index (1-based)
   */
  public Index getSheetIndex() {
    return m_SheetIndex;
  }

  /**
   * Returns the tip text for this property.
   *
   * @return 		tip text for this property suitable for
   *         		displaying in the explorer/experimenter gui
   */
  public String sheetIndexTipText() {
    return "The index of the sheet to load; " + m_SheetIndex.getExample();
  }

  /**
   * Sets whether to extend the header if rows have more cells than the header.
   *
   * @param value	if true then the header gets extended if necessary
   */
  public void setAutoExtendHeader(boolean value) {
    m_AutoExtendHeader = value;
    reset();
  }

  /**
   * Returns whether to extend the header if rows have more cells than the header.
   *
   * @return		true if the header gets extended if necessary
   */
  public boolean getAutoExtendHeader() {
    return m_AutoExtendHeader;
  }

  /**
   * Returns the tip text for this property.
   *
   * @return 		tip text for this property suitable for
   *         		displaying in the explorer/experimenter gui
   */
  public String autoExtendHeaderTipText() {
    return "If enabled, the header gets automatically extended if rows have more cells than the header.";
  }

  /**
   * Sets the range of columns to treat as text.
   *
   * @param value	the range of columns
   */
  public void setTextColumns(String value) {
    m_TextColumns.setRange(value);
    reset();
  }

  /**
   * Returns the range of columns to treat as text.
   *
   * @return		the range of columns
   */
  public String getTextColumns() {
    return m_TextColumns.getRange();
  }

  /**
   * Returns the tip text for this property.
   *
   * @return 		tip text for this property suitable for
   *         		displaying in the explorer/experimenter gui
   */
  public String textColumnsTipText() {
    return "The range of columns to treat as text; " + m_TextColumns.getExample();
  }

  /**
   * Returns whether to read from an InputStream rather than a Reader when
   * using a file name.
   *
   * @return		true if to read from an InputStream
   */
  protected boolean getUseInputStream() {
    return true;
  }

  /**
   * Turns a numeric cell into a string. Tries to use "long" representation
   * if possible.
   *
   * @param cell	the cell to process
   * @return		the string representation
   */
  protected String numericToString(Cell cell) {
    Double	dbl;
    long	lng;

    dbl = cell.getNumericCellValue();
    lng = dbl.longValue();
    if (dbl == lng)
      return "" + lng;
    else
      return "" + dbl;
  }

  /**
   * Reads the spreadsheet content from the specified file.
   *
   * @param in		the input stream to read from
   * @return		the spreadsheet or null in case of an error
   */
  protected SpreadSheet doRead(InputStream in) {
    SpreadSheet		result;
    Workbook		workbook;
    Sheet 		sheet;
    Row 		exRow;
    Cell 		exCell;
    SpreadSheet.Row	spRow;
    int 		i;
    int			n;
    int			cellType;
    DateFormat		dformat;
    boolean		numeric;

    result = new SpreadSheet();

    workbook = null;
    dformat  = DateUtils.getTimestampFormatter();
    try {
      workbook = WorkbookFactory.create(in);
      m_SheetIndex.setMax(workbook.getNumberOfSheets());
      sheet = workbook.getSheetAt(m_SheetIndex.getIntIndex());
      if (sheet.getLastRowNum() == 0) {
	getSystemErr().println("No rows in sheet #" + m_SheetIndex.getIndex());
	return result;
      }
      result.setName(sheet.getSheetName());
      // header
      exRow = sheet.getRow(0);
      spRow = result.getHeaderRow();
      m_TextColumns.setMax(exRow.getLastCellNum());
      for (i = 0; i < exRow.getLastCellNum(); i++) {
	exCell = exRow.getCell(i);
	if (exCell == null) {
	  spRow.addCell("" + (i + 1)).setContent(SpreadSheet.MISSING_VALUE);
	  continue;
	}
	numeric = !m_TextColumns.isInRange(i);
	switch (exCell.getCellType()) {
	  case Cell.CELL_TYPE_BLANK:
	  case Cell.CELL_TYPE_ERROR:
	    spRow.addCell("" + (i + 1)).setContent("column-" + (i+1));
	    break;
	  case Cell.CELL_TYPE_NUMERIC:
	    if (HSSFDateUtil.isCellDateFormatted(exCell))
	      spRow.addCell("" + (i + 1)).setContent(dformat.format(HSSFDateUtil.getJavaDate(exCell.getNumericCellValue())));
	    else if (numeric)
	      spRow.addCell("" + (i + 1)).setContent(exCell.getNumericCellValue());
	    else
	      spRow.addCell("" + (i + 1)).setContent(numericToString(exCell), false);
	    break;
	  default:
	    spRow.addCell("" + (i + 1)).setContent(exCell.getStringCellValue(), false);
	}
      }

      // data
      for (i = 1; i < sheet.getLastRowNum(); i++) {
	spRow = result.addRow("" + result.getRowCount());
	exRow = sheet.getRow(i);
	if (exRow == null)
	  continue;
	for (n = 0; n < exRow.getLastCellNum(); n++) {
	  // too few columns in header?
	  if ((n >= result.getHeaderRow().getCellCount()) && m_AutoExtendHeader)
	    result.getHeaderRow().addCell("" + n).setContent("");

	  m_TextColumns.setMax(result.getHeaderRow().getCellCount());
	  exCell = exRow.getCell(n);
	  if (exCell == null) {
	    spRow.addCell("" + (n + 1)).setContent(SpreadSheet.MISSING_VALUE);
	    continue;
	  }
	  cellType = exCell.getCellType();
	  if (cellType == Cell.CELL_TYPE_FORMULA)
	    cellType = exCell.getCachedFormulaResultType();
	  numeric = !m_TextColumns.isInRange(n);
	  switch (cellType) {
	    case Cell.CELL_TYPE_BLANK:
	    case Cell.CELL_TYPE_ERROR:
	      if ((m_MissingValue.length() == 0))
		spRow.addCell("" + (n + 1)).setContent(SpreadSheet.MISSING_VALUE);
	      else
		spRow.addCell("" + (n + 1)).setContent("");
	      break;
	    case Cell.CELL_TYPE_NUMERIC:
	      if (HSSFDateUtil.isCellDateFormatted(exCell))
		spRow.addCell("" + (n + 1)).setContent(dformat.format(HSSFDateUtil.getJavaDate(exCell.getNumericCellValue())));
	      else if (numeric)
		spRow.addCell("" + (n + 1)).setContent(exCell.getNumericCellValue());
	      else
		spRow.addCell("" + (n + 1)).setContent(numericToString(exCell), false);
	      break;
	    default:
	      if (exCell.getStringCellValue().equals(m_MissingValue))
		spRow.addCell("" + (n + 1)).setContent(SpreadSheet.MISSING_VALUE);
	      else
		spRow.addCell("" + (n + 1)).setContent(exCell.getStringCellValue(), false);
	  }
	}
      }
    }
    catch (Exception ioe) {
      getSystemErr().println("Failed to read header:");
      getSystemErr().printStackTrace(ioe);
      result = null;
    }

    return result;
  }
}
