/*
 * Decompiled with CFR 0.152.
 */
package adams.flow.transformer.splitter;

import adams.core.License;
import adams.core.annotation.MixedCopyright;
import adams.flow.transformer.splitter.AbstractDocumentToSentences;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.DocumentPreprocessor;
import edu.stanford.nlp.process.LexedTokenFactory;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.util.StringUtils;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;

@MixedCopyright(copyright="2013 StackExchange", author="Yaniv.H", license=License.CC_BY_SA_25, url="http://stackoverflow.com/a/19464001")
public class StanfordPTBTokenizer
extends AbstractDocumentToSentences {
    private static final long serialVersionUID = 4043221889853222507L;
    protected String m_SplitterOptions;
    protected transient TokenizerFactory m_TokenizerFactory;

    public String globalInfo() {
        return "Uses Stanford's PTBTokenizer.\n\nFor more details on the options see:\nhttp://nlp.stanford.edu/software/tokenizer.shtml";
    }

    public void defineOptions() {
        super.defineOptions();
        this.m_OptionManager.add("splitter-options", "splitterOptions", (Object)"normalizeParentheses=false,normalizeOtherBrackets=false,invertible=true");
    }

    protected void reset() {
        super.reset();
        this.m_TokenizerFactory = null;
    }

    public void setSplitterOptions(String value) {
        this.m_SplitterOptions = value;
        this.reset();
    }

    public String getSplitterOptions() {
        return this.m_SplitterOptions;
    }

    public String splitterOptionsTipText() {
        return "The splitter options to use.";
    }

    protected TokenizerFactory getTokenizerFactory() {
        if (this.m_TokenizerFactory == null) {
            this.m_TokenizerFactory = PTBTokenizer.factory((LexedTokenFactory)new CoreLabelTokenFactory(), (String)this.m_SplitterOptions);
        }
        return this.m_TokenizerFactory;
    }

    @Override
    protected List<String> doSplit(String doc) {
        ArrayList<String> result = new ArrayList<String>();
        DocumentPreprocessor preProcessor = new DocumentPreprocessor((Reader)new StringReader(doc));
        preProcessor.setTokenizerFactory(this.getTokenizerFactory());
        for (List sentence : preProcessor) {
            result.add(StringUtils.joinWithOriginalWhiteSpace((List)sentence));
        }
        return result;
    }
}

