/*
 * Decompiled with CFR 0.152.
 */
package weka.core.tokenizers;

import cmu.arktweetnlp.Twokenize;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.Vector;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.core.WekaOptionUtils;
import weka.core.tokenizers.Tokenizer;
import weka.core.tokenizers.cleaners.PassThrough;
import weka.core.tokenizers.cleaners.TokenCleaner;

public class TwitterNLPTokenizer
extends Tokenizer {
    private static final long serialVersionUID = 4352757127093531518L;
    public static final String CLEANER = "cleaner";
    public static final String USE_LOWER_CASE = "use-lower-case";
    protected transient Iterator<String> m_TokenIterator;
    protected boolean m_UseLowerCase = false;
    protected TokenCleaner m_Cleaner = this.getDefaultCleaner();

    public String globalInfo() {
        return "Tokenizer based on TwitterNLP's Twokenize.\n\nFor more information see:\nhttp://www.ark.cs.cmu.edu/TweetNLP/\n\nOriginal code from:\nhttps://github.com/felipebravom/SentimentDomain/blob/master/src/weka/core/tokenizers/TwitterNLPTokenizer.java";
    }

    public Enumeration listOptions() {
        Vector result = new Vector();
        WekaOptionUtils.addFlag(result, (String)this.useLowerCaseTipText(), (String)USE_LOWER_CASE);
        WekaOptionUtils.addOption(result, (String)this.cleanerTipText(), (String)this.getDefaultCleaner().getClass().getName(), (String)CLEANER);
        WekaOptionUtils.add(result, (Enumeration)super.listOptions());
        return WekaOptionUtils.toEnumeration(result);
    }

    public void setOptions(String[] options) throws Exception {
        this.setUseLowerCase(Utils.getFlag((String)USE_LOWER_CASE, (String[])options));
        this.setCleaner((TokenCleaner)WekaOptionUtils.parse((String[])options, (String)CLEANER, (OptionHandler)this.getDefaultCleaner()));
        super.setOptions(options);
    }

    public String[] getOptions() {
        ArrayList result = new ArrayList();
        WekaOptionUtils.add(result, (String)USE_LOWER_CASE, (boolean)this.getUseLowerCase());
        WekaOptionUtils.add(result, (String)CLEANER, (OptionHandler)this.getCleaner());
        WekaOptionUtils.add(result, (String[])super.getOptions());
        return WekaOptionUtils.toArray(result);
    }

    public void setUseLowerCase(boolean value) {
        this.m_UseLowerCase = value;
    }

    public boolean getUseLowerCase() {
        return this.m_UseLowerCase;
    }

    public String useLowerCaseTipText() {
        return "If enabled, the tweet is converted to lower case before tokenized.";
    }

    protected TokenCleaner getDefaultCleaner() {
        return new PassThrough();
    }

    public void setCleaner(TokenCleaner value) {
        this.m_Cleaner = value;
    }

    public TokenCleaner getCleaner() {
        return this.m_Cleaner;
    }

    public String cleanerTipText() {
        return "The token cleaner to use (after optional lower case).";
    }

    public boolean hasMoreElements() {
        return this.m_TokenIterator.hasNext();
    }

    public String nextElement() {
        return this.m_TokenIterator.next();
    }

    public void tokenize(String s) {
        if (this.m_UseLowerCase) {
            s = s.toLowerCase();
        }
        ArrayList<String> words = Twokenize.tokenizeRawTweetText((String)s);
        if (!(this.m_Cleaner instanceof PassThrough)) {
            ArrayList<String> clean = new ArrayList<String>();
            for (String word : words) {
                if ((word = this.m_Cleaner.clean(word)) == null) continue;
                clean.add(word);
            }
            words = clean;
        }
        this.m_TokenIterator = words.iterator();
    }

    public String getRevision() {
        return RevisionUtils.extract((String)"$Revision: 10203 $");
    }

    public static void main(String[] args) {
        TwitterNLPTokenizer.runTokenizer((Tokenizer)new TwitterNLPTokenizer(), (String[])args);
    }
}

