/*
 * Decompiled with CFR 0.152.
 */
package adams.flow.transformer.tokenizer;

import adams.core.License;
import adams.core.annotation.MixedCopyright;
import adams.flow.transformer.tokenizer.AbstractTokenizer;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.LexedTokenFactory;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.Tokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;

@MixedCopyright(copyright="2013 StackExchange", author="Yaniv.H", license=License.CC_BY_SA_25, url="http://stackoverflow.com/a/19464001")
public class StanfordPTBTokenizer
extends AbstractTokenizer {
    private static final long serialVersionUID = 4043221889853222507L;
    protected String m_TokenizerOptions;
    protected transient TokenizerFactory m_TokenizerFactory;

    public String globalInfo() {
        return "Uses Stanford's PTBTokenizer.\n\nFor more details on the options see:\nhttp://nlp.stanford.edu/software/tokenizer.shtml";
    }

    public void defineOptions() {
        super.defineOptions();
        this.m_OptionManager.add("tokenizer-options", "tokenizerOptions", (Object)"americanize=false,unicodeQuotes=true,unicodeEllipsis=true");
    }

    protected void reset() {
        super.reset();
        this.m_TokenizerFactory = null;
    }

    public void setTokenizerOptions(String value) {
        this.m_TokenizerOptions = value;
        this.reset();
    }

    public String getTokenizerOptions() {
        return this.m_TokenizerOptions;
    }

    public String tokenizerOptionsTipText() {
        return "The tokenizer options to use.";
    }

    protected TokenizerFactory getTokenizerFactory() {
        if (this.m_TokenizerFactory == null) {
            this.m_TokenizerFactory = PTBTokenizer.factory((LexedTokenFactory)new CoreLabelTokenFactory(), (String)this.m_TokenizerOptions);
        }
        return this.m_TokenizerFactory;
    }

    @Override
    protected List<String> doTokenize(String str) {
        ArrayList<String> result = new ArrayList<String>();
        StringReader reader = new StringReader(str);
        Tokenizer tokenizer = this.getTokenizerFactory().getTokenizer((Reader)reader);
        while (tokenizer.hasNext()) {
            result.add(((CoreLabel)tokenizer.next()).value());
        }
        return result;
    }
}

