/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/**
 * CsvSpreadSheetReader.java
 * Copyright (C) 2010 University of Waikato, Hamilton, New Zealand
 */
package adams.core.io;

import java.io.BufferedReader;
import java.io.Reader;
import java.util.Vector;

import adams.core.Utils;
import adams.core.io.SpreadSheet.Row;

/**
 <!-- globalinfo-start -->
 * Reads CSV files.
 * <p/>
 <!-- globalinfo-end -->
 *
 <!-- options-start -->
 * Valid options are: <p/>
 *
 * <pre>-D &lt;int&gt; (property: debugLevel)
 * &nbsp;&nbsp;&nbsp;The greater the number the more additional info the scheme may output to
 * &nbsp;&nbsp;&nbsp;the console (0 = off).
 * &nbsp;&nbsp;&nbsp;default: 0
 * &nbsp;&nbsp;&nbsp;minimum: 0
 * </pre>
 *
 <!-- options-end -->
 *
 * @author  fracpete (fracpete at waikato dot ac dot nz)
 * @version $Revision: 4584 $
 */
public class CsvSpreadSheetReader
  extends AbstractSpreadSheetReader {

  /** for serialization. */
  private static final long serialVersionUID = 4461796269354230002L;

  /**
   * Returns a string describing the object.
   *
   * @return 			a description suitable for displaying in the gui
   */
  public String globalInfo() {
    return "Reads CSV files.";
  }

  /**
   * Returns a string describing the format (used in the file chooser).
   *
   * @return 			a description suitable for displaying in the
   * 				file chooser
   */
  public String getFormatDescription() {
    return "CSV (comma-separated values)";
  }

  /**
   * Returns the extension(s) of the format.
   *
   * @return 			the extension (without the dot!)
   */
  public String[] getFormatExtensions() {
    return new String[]{"csv"};
  }

  /**
   * Returns whether to read from an InputStream rather than a Reader.
   *
   * @return		true if to read from an InputStream
   */
  protected boolean getUseInputStream() {
    return false;
  }

  /**
   * Unquotes the given string.
   *
   * @param s		the string to unquote, if necessary
   * @return		the processed string
   */
  protected String unquote(String s) {
    String	result;

    result = Utils.unquote(s);
    result = Utils.unDoubleQuote(result);

    return result;
  }

  /**
   * Breaks up a line from a CSV file into its cells.
   *
   * @param line	the row to break up
   * @return		the cells
   */
  protected String[] breakUp(String line) {
    Vector<String>	result;
    int			i;
    StringBuffer	current;
    boolean		escaped;
    char		escapeChr;
    char		chr;

    result    = new Vector<String>();
    current   = new StringBuffer();
    escaped   = false;
    escapeChr = '\0';

    for (i = 0; i < line.length(); i++) {
      chr = line.charAt(i);

      if ((chr == ',') || (chr == '\t')) {
	if (escaped) {
	  current.append(chr);
	}
	else {
	  result.add(unquote(current.toString()));
	  current = new StringBuffer();
	}
      }
      else if ((chr == '\'') || (chr == '"')) {
	if ((i > 0) && (line.charAt(i - 1) == '\\')) {
	  current.append(chr);
	}
	else {
	  if (escaped && (escapeChr == chr)) {
	    escaped = false;
	    current.append(chr);
	  }
	  else {
	    escaped   = true;
	    escapeChr = chr;
	    current.append(chr);
	  }
	}
      }
      else {
	current.append(chr);
      }
    }

    // add last cell
    result.add(unquote(current.toString()));

    return result.toArray(new String[result.size()]);
  }

  /**
   * Reads the spreadsheet content from the specified file.
   *
   * @param r		the reader to read from
   * @return		the spreadsheet or null in case of an error
   */
  protected SpreadSheet doRead(Reader r) {
    SpreadSheet		result;
    String		line;
    String[]		cells;
    String[]		headerCells;
    boolean		comments;
    Row			row;
    BufferedReader	reader;
    int			i;

    if (r instanceof BufferedReader)
      reader = (BufferedReader) r;
    else
      reader = new BufferedReader(r);

    try {
      result = new SpreadSheet();

      comments    = true;
      headerCells = null;
      while ((line = reader.readLine()) != null) {
	// still in comments section?
	if (line.startsWith(SpreadSheet.COMMENT)) {
	  if (comments)
	    result.addComment(line.substring(1).trim());
	  continue;
	}

	// actual data
	comments = false;
	if (line.trim().length() == 0)
	  continue;
	cells = breakUp(line);
	if (headerCells == null) {
	  headerCells = cells;
	  row         = result.getHeaderRow();
	}
	else {
	  row = result.addRow("" + result.getRowCount());
	}
	for (i = 0; (i < headerCells.length) && (i < cells.length); i++) {
	  if (cells[i].equals(m_MissingValue))
	    row.addCell("" + i).setContent(SpreadSheet.MISSING_VALUE);
	  else
	    row.addCell("" + i).setContent(cells[i]);
	}
      }
    }
    catch (Exception e) {
      result = null;
      e.printStackTrace();
    }

    return result;
  }
}
