MathExpressionClassifier |
Simple classifier that uses a pre-defined formula that can make use of attribute values using their names.
Grammar:
expr_list ::= '=' expr_list expr_part | expr_part ;
expr_part ::= expr ;
expr ::= ( expr )
# data types
| number
| string
| boolean
| date
# constants
| true
| false
| pi
| e
| now()
| today()
# negating numeric value
| -expr
# comparisons
| expr < expr
| expr <= expr
| expr > expr
| expr >= expr
| expr = expr
| expr != expr (or: expr <> expr)
# boolean operations
| ! expr (or: not expr)
| expr & expr (or: expr and expr)
| expr | expr (or: expr or expr)
| if[else] ( expr , expr (if true) , expr (if false) )
| ifmissing ( variable , expr (default value if variable is missing) )
| isNaN ( expr )
# arithmetics
| expr + expr
| expr - expr
| expr * expr
| expr / expr
| expr ^ expr (power of)
| expr % expr (modulo)
;
# numeric functions
| abs ( expr )
| sqrt ( expr )
| cbrt ( expr )
| log ( expr )
| log10 ( expr )
| exp ( expr )
| sin ( expr )
| sinh ( expr )
| cos ( expr )
| cosh ( expr )
| tan ( expr )
| tanh ( expr )
| atan ( expr )
| atan2 ( exprY , exprX )
| hypot ( exprX , exprY )
| signum ( expr )
| rint ( expr )
| floor ( expr )
| pow[er] ( expr , expr )
| ceil ( expr )
| min ( expr1 , expr2 )
| max ( expr1 , expr2 )
| year ( expr )
| month ( expr )
| day ( expr )
| hour ( expr )
| minute ( expr )
| second ( expr )
| weekday ( expr )
| weeknum ( expr )
# string functions
| substr ( expr , start [, end] )
| left ( expr , len )
| mid ( expr , start , len )
| right ( expr , len )
| rept ( expr , count )
| concatenate ( expr1 , expr2 [, expr3-5] )
| lower[case] ( expr )
| upper[case] ( expr )
| trim ( expr )
| matches ( expr , regexp )
| trim ( expr )
| len[gth] ( str )
| find ( search , expr [, pos] )
| replace ( str , pos , len , newstr )
| substitute ( str , find , replace [, occurrences] )
;
Notes:
- Variables are either all upper case letters (e.g., "ABC") or any character apart from "]" enclosed by "[" and "]" (e.g., "[Hello World]").
- 'start' and 'end' for function 'substr' are indices that start at 1.
- Index 'end' for function 'substr' is excluded (like Java's 'String.substring(int,int)' method)
- Line comments start with '#'.
- Semi-colons (';') or commas (',') can be used as separator in the formulas,
e.g., 'pow(2,2)' is equivalent to 'pow(2;2)'
- dates have to be of format 'yyyy-MM-dd' or 'yyyy-MM-dd HH:mm:ss'
- times have to be of format 'HH:mm:ss' or 'yyyy-MM-dd HH:mm:ss'
- the characters in square brackets in function names are optional:
e.g.
|