/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 * TwitterFilter.java
 * Copyright (C) 2010-2012 University of Waikato, Hamilton, New Zealand
 */

package adams.flow.transformer;

import java.util.HashMap;

import twitter4j.Status;
import twitter4j.Tweet;
import adams.flow.core.Token;
import adams.parser.GrammarSupplier;
import adams.parser.twitterfilter.Symbols;

/**
 <!-- globalinfo-start -->
 * Filters Twitter objects (tweet&#47;status) according to the provided filter expression. Only objects that match the filter expression are passed on.<br/>
 * <br/>
 * The expressions use the following grammar:<br/>
 * <br/>
 * expr_list ::= expr_list expr | expr;<br/>
 * expr      ::=   ( expr )<br/>
 *               | boolexpr<br/>
 *               ;<br/>
 * <br/>
 * boolexpr ::=    BOOLEAN<br/>
 *               | ( boolexpr )<br/>
 *               | not boolexpr<br/>
 *               | boolexpr and boolexpr<br/>
 *               | boolexpr or boolexpr<br/>
 *               | boolexpr xor boolexpr<br/>
 * <br/>
 *               | langcode &lt;match&gt; pattern<br/>
 *               | country &lt;match&gt; pattern<br/>
 *               | countrycode &lt;match&gt; pattern<br/>
 *               | place &lt;match&gt; pattern<br/>
 *               | source &lt;match&gt; pattern<br/>
 *               | text &lt;match&gt; pattern<br/>
 *               | user &lt;match&gt; pattern<br/>
 * <br/>
 *               | ifelse ( boolexpr:test , boolexpr:test_true , boolexpr:test_false )<br/>
 *               ;<br/>
 * <br/>
 * The '&lt;match&gt;' operator can be one of the following:<br/>
 * 1. '=' - exact match (the twitter field must be the exact 'pattern' string)<br/>
 * 2. ':' - substring match (the 'pattern' can occur anywhere in the twitter field)<br/>
 * 3. '~' - regular expression match (the 'pattern' is a regular expression that the twitter field must match)<br/>
 * <br/>
 * Please note, all strings are converted to lower case before the filter is applied.
 * <p/>
 <!-- globalinfo-end -->
 *
 <!-- flow-summary-start -->
 * Input&#47;output:<br/>
 * - accepts:<br/>
 * &nbsp;&nbsp;&nbsp;twitter4j.Tweet<br/>
 * &nbsp;&nbsp;&nbsp;twitter4j.Status<br/>
 * - generates:<br/>
 * &nbsp;&nbsp;&nbsp;twitter4j.Tweet<br/>
 * &nbsp;&nbsp;&nbsp;twitter4j.Status<br/>
 * <p/>
 <!-- flow-summary-end -->
 *
 <!-- options-start -->
 * Valid options are: <p/>
 *
 * <pre>-D &lt;int&gt; (property: debugLevel)
 * &nbsp;&nbsp;&nbsp;The greater the number the more additional info the scheme may output to
 * &nbsp;&nbsp;&nbsp;the console (0 = off).
 * &nbsp;&nbsp;&nbsp;default: 0
 * &nbsp;&nbsp;&nbsp;minimum: 0
 * </pre>
 *
 * <pre>-name &lt;java.lang.String&gt; (property: name)
 * &nbsp;&nbsp;&nbsp;The name of the actor.
 * &nbsp;&nbsp;&nbsp;default: TwitterFilter
 * </pre>
 *
 * <pre>-annotation &lt;adams.core.base.BaseText&gt; (property: annotations)
 * &nbsp;&nbsp;&nbsp;The annotations to attach to this actor.
 * &nbsp;&nbsp;&nbsp;default:
 * </pre>
 *
 * <pre>-skip (property: skip)
 * &nbsp;&nbsp;&nbsp;If set to true, transformation is skipped and the input token is just forwarded
 * &nbsp;&nbsp;&nbsp;as it is.
 * </pre>
 *
 * <pre>-stop-flow-on-error (property: stopFlowOnError)
 * &nbsp;&nbsp;&nbsp;If set to true, the flow gets stopped in case this actor encounters an error;
 * &nbsp;&nbsp;&nbsp; useful for critical actors.
 * </pre>
 *
 * <pre>-expression &lt;java.lang.String&gt; (property: expression)
 * &nbsp;&nbsp;&nbsp;The filter expression to use.
 * &nbsp;&nbsp;&nbsp;default: text~\".*\"
 * </pre>
 *
 <!-- options-end -->
 *
 * @author  fracpete (fracpete at waikato dot ac dot nz)
 * @version $Revision: 5867 $
 */
public class TwitterFilter
  extends AbstractTransformer
  implements GrammarSupplier {

  /** for serialization. */
  private static final long serialVersionUID = -449062766931736640L;

  /** the filter expression. */
  protected String m_Expression;

  /**
   * Returns a string describing the object.
   *
   * @return 			a description suitable for displaying in the gui
   */
  @Override
  public String globalInfo() {
    return
        "Filters Twitter objects (tweet/status) according to the provided "
      + "filter expression. Only objects that match the filter expression "
      + "are passed on.\n"
      + "\n"
      + "The expressions use the following grammar:\n\n"
      + getGrammar();
  }

  /**
   * Returns a string representation of the grammar.
   *
   * @return		the grammar, null if not available
   */
  public String getGrammar() {
    return new adams.parser.TwitterFilter().getGrammar();
  }

  /**
   * Adds options to the internal list of options.
   */
  @Override
  public void defineOptions() {
    super.defineOptions();

    m_OptionManager.add(
	    "expression", "expression",
	    "text~\".*\"");
  }

  /**
   * Sets the separator to use. \t, \n, \r, \\ must be quoted.
   *
   * @param value	the separator
   */
  public void setExpression(String value) {
    m_Expression = value;
    reset();
  }

  /**
   * Returns the separator in use. \t, \r, \n, \\ get returned quoted.
   *
   * @return		the separator
   */
  public String getExpression() {
    return m_Expression;
  }

  /**
   * Returns the tip text for this property.
   *
   * @return 		tip text for this property suitable for
   * 			displaying in the GUI or for listing the options.
   */
  public String expressionTipText() {
    return "The filter expression to use.";
  }

  /**
   * Returns a quick info about the actor, which will be displayed in the GUI.
   *
   * @return		null if no info available, otherwise short string
   */
  @Override
  public String getQuickInfo() {
    String	variable;

    variable = getOptionManager().getVariableForProperty("expression");

    if (variable != null)
      return variable;
    else if ((m_Expression != null) && (m_Expression.length() > 0))
      return m_Expression;
    else
      return null;
  }

  /**
   * Returns the class that the consumer accepts.
   *
   * @return		<!-- flow-accepts-start -->twitter4j.Tweet.class, twitter4j.Status.class<!-- flow-accepts-end -->
   */
  public Class[] accepts() {
    return new Class[]{Tweet.class, Status.class};
  }

  /**
   * Returns the class of objects that it generates.
   *
   * @return		<!-- flow-generates-start -->twitter4j.Tweet.class, twitter4j.Status.class<!-- flow-generates-end -->
   */
  public Class[] generates() {
    return new Class[]{Tweet.class, Status.class};
  }

  /**
   * Processes the specified tweet.
   *
   * @param tweet	the tweet to process
   * @return		the hashmap for the parser
   */
  protected HashMap processTweet(Tweet tweet) {
    HashMap	result;

    result = new HashMap();

    result.put(Symbols.USER, tweet.getFromUser().toLowerCase());
    if (tweet.getSource() != null)
      result.put(Symbols.SOURCE, tweet.getSource().toLowerCase());
    result.put(Symbols.TEXT, tweet.getText().toLowerCase());
    if (tweet.getIsoLanguageCode() != null)
      result.put(Symbols.LANGUAGE_CODE, tweet.getIsoLanguageCode().toLowerCase());

    return result;
  }

  /**
   * Processes the specified status.
   *
   * @param status	the status to process
   * @return		the association between fields and status values
   */
  protected HashMap processStatus(Status status) {
    HashMap	result;

    result = new HashMap();

    result.put(Symbols.USER, status.getUser().getName().toLowerCase());
    result.put(Symbols.SOURCE, status.getSource().toLowerCase());
    result.put(Symbols.TEXT, status.getText().toLowerCase());
    if (status.getPlace() != null) {
      result.put(Symbols.COUNTRY, status.getPlace().getCountry().toLowerCase());
      result.put(Symbols.COUNTRY_CODE, status.getPlace().getCountryCode().toLowerCase());
      result.put(Symbols.PLACE, status.getPlace().getName().toLowerCase());
    }

    return result;
  }

  /**
   * Executes the flow item.
   *
   * @return		null if everything is fine, otherwise error message
   */
  @Override
  protected String doExecute() {
    String	result;
    HashMap	symbols;
    boolean	match;
    String	exp;

    result = null;

    exp = m_Expression;
    try {
      // get input
      if (m_InputToken.getPayload() instanceof Tweet)
	symbols = processTweet((Tweet) m_InputToken.getPayload());
      else
	symbols = processStatus((Status) m_InputToken.getPayload());

      // evaluate the expression
      match = adams.parser.TwitterFilter.evaluate(exp, symbols);
      if (match)
	m_OutputToken = new Token(m_InputToken.getPayload());
    }
    catch (Exception e) {
      m_OutputToken = null;
      result = handleException("Error evaluating: " + exp, e);
    }

    return result;
  }
}
