package adams.flow.transformer.splitter;

import adams.core.License;
import adams.core.annotation.MixedCopyright;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.DocumentPreprocessor;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.util.StringUtils;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

@MixedCopyright(copyright = "2013 StackExchange", author = "Yaniv.H", license = License.CC_BY_SA_25, url = "http://stackoverflow.com/a/19464001")
/* loaded from: input_file:adams/flow/transformer/splitter/StanfordPTBTokenizer.class */
public class StanfordPTBTokenizer extends AbstractDocumentToSentences {
    private static final long serialVersionUID = 4043221889853222507L;
    protected String m_SplitterOptions;
    protected transient TokenizerFactory m_TokenizerFactory;

    public String globalInfo() {
        return "Uses Stanford's PTBTokenizer.\n\nFor more details on the options see:\nhttp://nlp.stanford.edu/software/tokenizer.shtml";
    }

    public void defineOptions() {
        super.defineOptions();
        this.m_OptionManager.add("splitter-options", "splitterOptions", "normalizeParentheses=false,normalizeOtherBrackets=false,invertible=true");
    }

    protected void reset() {
        super.reset();
        this.m_TokenizerFactory = null;
    }

    public void setSplitterOptions(String str) {
        this.m_SplitterOptions = str;
        reset();
    }

    public String getSplitterOptions() {
        return this.m_SplitterOptions;
    }

    public String splitterOptionsTipText() {
        return "The splitter options to use.";
    }

    protected TokenizerFactory getTokenizerFactory() {
        if (this.m_TokenizerFactory == null) {
            this.m_TokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), this.m_SplitterOptions);
        }
        return this.m_TokenizerFactory;
    }

    @Override // adams.flow.transformer.splitter.AbstractDocumentToSentences
    protected List<String> doSplit(String str) {
        ArrayList arrayList = new ArrayList();
        DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(new StringReader(str));
        documentPreprocessor.setTokenizerFactory(getTokenizerFactory());
        Iterator it = documentPreprocessor.iterator();
        while (it.hasNext()) {
            arrayList.add(StringUtils.joinWithOriginalWhiteSpace((List) it.next()));
        }
        return arrayList;
    }
}
