package adams.flow.transformer.tokenizer;

import adams.core.License;
import adams.core.annotation.MixedCopyright;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.Tokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;

@MixedCopyright(copyright = "2013 StackExchange", author = "Yaniv.H", license = License.CC_BY_SA_25, url = "http://stackoverflow.com/a/19464001")
/* loaded from: input_file:adams/flow/transformer/tokenizer/StanfordPTBTokenizer.class */
public class StanfordPTBTokenizer extends AbstractTokenizer {
    private static final long serialVersionUID = 4043221889853222507L;
    protected String m_TokenizerOptions;
    protected transient TokenizerFactory m_TokenizerFactory;

    public String globalInfo() {
        return "Uses Stanford's PTBTokenizer.\n\nFor more details on the options see:\nhttp://nlp.stanford.edu/software/tokenizer.shtml";
    }

    public void defineOptions() {
        super.defineOptions();
        this.m_OptionManager.add("tokenizer-options", "tokenizerOptions", "americanize=false,unicodeQuotes=true,unicodeEllipsis=true");
    }

    protected void reset() {
        super.reset();
        this.m_TokenizerFactory = null;
    }

    public void setTokenizerOptions(String str) {
        this.m_TokenizerOptions = str;
        reset();
    }

    public String getTokenizerOptions() {
        return this.m_TokenizerOptions;
    }

    public String tokenizerOptionsTipText() {
        return "The tokenizer options to use.";
    }

    protected TokenizerFactory getTokenizerFactory() {
        if (this.m_TokenizerFactory == null) {
            this.m_TokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), this.m_TokenizerOptions);
        }
        return this.m_TokenizerFactory;
    }

    @Override // adams.flow.transformer.tokenizer.AbstractTokenizer
    protected List<String> doTokenize(String str) {
        ArrayList arrayList = new ArrayList();
        Tokenizer tokenizer = getTokenizerFactory().getTokenizer(new StringReader(str));
        while (tokenizer.hasNext()) {
            arrayList.add(((CoreLabel) tokenizer.next()).value());
        }
        return arrayList;
    }
}
