/*
 * TwitterFilter.java
 * Copyright (C) 2010 University of Waikato, Hamilton, New Zealand
 */

package adams.parser;

import java.io.ByteArrayInputStream;
import java.util.HashMap;

import java_cup.runtime.DefaultSymbolFactory;
import java_cup.runtime.SymbolFactory;
import adams.parser.twitterfilter.Parser;
import adams.parser.twitterfilter.Scanner;

/**
 <!-- globalinfo-start -->
 * Evaluates Twitter filter expressions.<br/>
 * <br/>
 * The expressions use the following grammar:<br/>
 * <br/>
 * expr_list ::= expr_list expr | expr;<br/>
 * expr      ::=   ( expr )<br/>
 *               | boolexpr<br/>
 *               ;<br/>
 * <br/>
 * boolexpr ::=    BOOLEAN<br/>
 *               | ( boolexpr )<br/>
 *               | not boolexpr<br/>
 *               | boolexpr and boolexpr<br/>
 *               | boolexpr or boolexpr<br/>
 *               | boolexpr xor boolexpr<br/>
 * <br/>
 *               | langcode &lt;match&gt; pattern<br/>
 *               | country &lt;match&gt; pattern<br/>
 *               | countrycode &lt;match&gt; pattern<br/>
 *               | place &lt;match&gt; pattern<br/>
 *               | source &lt;match&gt; pattern<br/>
 *               | text &lt;match&gt; pattern<br/>
 *               | user &lt;match&gt; pattern<br/>
 * <br/>
 *               | ifelse ( boolexpr:test , boolexpr:test_true , boolexpr:test_false )<br/>
 *               ;<br/>
 * <br/>
 * The '&lt;match&gt;' operator can be one of the following:<br/>
 * 1. '=' - exact match (the twitter field must be the exact 'pattern' string)<br/>
 * 2. ':' - substring match (the 'pattern' can occur anywhere in the twitter field)<br/>
 * 3. '~' - regular expression match (the 'pattern' is a regular expression that the twitter field must match)<br/>
 * <br/>
 * Please note, all strings are converted to lower case before the filter is applied.
 * <p/>
 <!-- globalinfo-end -->
 *
 <!-- options-start -->
 * Valid options are: <p/>
 *
 * <pre>-D &lt;int&gt; (property: debugLevel)
 * &nbsp;&nbsp;&nbsp;The greater the number the more additional info the scheme may output to
 * &nbsp;&nbsp;&nbsp;the console (0 = off).
 * &nbsp;&nbsp;&nbsp;default: 0
 * &nbsp;&nbsp;&nbsp;minimum: 0
 * </pre>
 *
 * <pre>-env &lt;java.lang.String&gt; (property: environment)
 * &nbsp;&nbsp;&nbsp;The class to use for determining the environment.
 * &nbsp;&nbsp;&nbsp;default: adams.env.Environment
 * </pre>
 *
 * <pre>-expression &lt;java.lang.String&gt; (property: expression)
 * &nbsp;&nbsp;&nbsp;The filter expression to evaluate.
 * &nbsp;&nbsp;&nbsp;default: text~\".*\"
 * </pre>
 *
 * <pre>-symbol &lt;adams.core.base.BaseString&gt; [-symbol ...] (property: symbols)
 * &nbsp;&nbsp;&nbsp;The symbols to initialize the parser with, key-value pairs: name=value.
 * &nbsp;&nbsp;&nbsp;default:
 * </pre>
 *
 <!-- options-end -->
 *
 * @author FracPete (fracpete at waikato dot ac dot nz)
 * @version $Revision: 3136 $
 */
public class TwitterFilter
  extends AbstractSymbolEvaluator<Boolean> {

  /** for serialization. */
  private static final long serialVersionUID = -1217454324054448107L;

  /**
   * Returns a string describing the object.
   *
   * @return 			a description suitable for displaying in the gui
   */
  public String globalInfo() {
    return
        "Evaluates Twitter filter expressions.\n"
      + "\n"
      + "The expressions use the following grammar:\n\n"
      + getGrammar();
  }

  /**
   * Returns a string representation of the grammar.
   *
   * @return		the grammar, null if not available
   */
  public String getGrammar() {
    return
        "expr_list ::= expr_list expr | expr;\n"
      + "expr      ::=   ( expr )\n"
      + "              | boolexpr\n"
      + "              ;\n"
      + "\n"
      + "boolexpr ::=    BOOLEAN\n"
      + "              | ( boolexpr )\n"
      + "              | not boolexpr\n"
      + "              | boolexpr and boolexpr\n"
      + "              | boolexpr or boolexpr\n"
      + "              | boolexpr xor boolexpr\n"
      + "\n"
      + "              | langcode <match> pattern\n"
      + "              | country <match> pattern\n"
      + "              | countrycode <match> pattern\n"
      + "              | place <match> pattern\n"
      + "              | source <match> pattern\n"
      + "              | text <match> pattern\n"
      + "              | user <match> pattern\n"
      + "\n"
      + "              | ifelse ( boolexpr:test , boolexpr:test_true , boolexpr:test_false )\n"
      + "              ;\n"
      + "\n"
      + "The '<match>' operator can be one of the following:\n"
      + "1. '=' - exact match (the twitter field must be the exact 'pattern' string)\n"
      + "2. ':' - substring match (the 'pattern' can occur anywhere in the twitter field)\n"
      + "3. '~' - regular expression match (the 'pattern' is a regular expression that the twitter field must match)\n"
      + "\n"
      + "Please note, all strings are converted to lower case before the filter is applied.";
  }

  /**
   * Returns the default expression to use.
   *
   * @return		the default expression
   */
  protected String getDefaultExpression() {
    return "text~\".*\"";
  }

  /**
   * Returns the tip text for this property.
   *
   * @return 		tip text for this property suitable for
   * 			displaying in the GUI or for listing the options.
   */
  public String expressionTipText() {
    return "The filter expression to evaluate.";
  }

  /**
   * Initializes the symbol.
   *
   * @param name	the name of the symbol
   * @param value	the string representation of the symbol
   * @return		the object representation of the symbol
   */
  protected Object initializeSymbol(String name, String value) {
    return value;
  }

  /**
   * Performs the actual evaluation.
   *
   * @param symbols	the symbols to use
   * @return		the evaluation, or null in case of error
   * @throws Exception	if evaluation fails
   */
  protected Boolean doEvaluate(HashMap symbols) throws Exception {
    return evaluate(m_Expression, symbols);
  }

  /**
   * Parses and evaluates the given expression.
   * Returns the result of the boolean expression, based on the given
   * values of the symbols.
   *
   * @param expr	the expression to evaluate
   * @param symbols	the symbol/value mapping
   * @return		the evaluated result
   * @throws Exception	if something goes wrong
   */
  public static boolean evaluate(String expr, HashMap symbols) throws Exception {
    SymbolFactory 		sf;
    ByteArrayInputStream 	parserInput;
    Parser 			parser;

    sf          = new DefaultSymbolFactory();
    parserInput = new ByteArrayInputStream(expr.getBytes());
    parser      = new Parser(new Scanner(parserInput, sf), sf);
    parser.setSymbols(symbols);
    parser.parse();

    return parser.getResult();
  }

  /**
   * Runs the evaluator from command-line.
   *
   * @param args	the command-line options, use "-help" to list them
   */
  public static void main(String[] args) {
    runEvaluator(TwitterFilter.class, args);
  }
}
