/*
 * Decompiled with CFR 0.152.
 */
package org.cleartk.syntax.opennlp;

import java.io.InputStream;
import java.lang.reflect.Constructor;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import opennlp.tools.sentdetect.SentenceDetector;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;
import opennlp.tools.util.Span;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.util.IOUtil;
import org.cleartk.util.ParamUtil;
import org.uimafit.component.JCasAnnotator_ImplBase;
import org.uimafit.descriptor.ConfigurationParameter;
import org.uimafit.descriptor.TypeCapability;
import org.uimafit.factory.AnalysisEngineFactory;
import org.uimafit.factory.ConfigurationParameterFactory;
import org.uimafit.factory.initializable.InitializableFactory;
import org.uimafit.util.JCasUtil;

@TypeCapability(outputs={"org.cleartk.token.type.Sentence"})
public class SentenceAnnotator
extends JCasAnnotator_ImplBase {
    public static final String PARAM_SENTENCE_MODEL_PATH = ConfigurationParameterFactory.createConfigurationParameterName(SentenceAnnotator.class, (String)"sentenceModelPath");
    @ConfigurationParameter(mandatory=true, description="provides the path of the OpenNLP sentence segmenter model file")
    private String sentenceModelPath;
    public static final String PARAM_SENTENCE_TYPE_NAME = ConfigurationParameterFactory.createConfigurationParameterName(SentenceAnnotator.class, (String)"sentenceTypeName");
    public static final String PARAM_WINDOW_CLASS_NAMES = ConfigurationParameterFactory.createConfigurationParameterName(SentenceAnnotator.class, (String)"windowClassNames");
    @ConfigurationParameter(mandatory=false, description="provides an array of the annotation types that will be processed by this sentence annotator.  If the parameter is not filled, then SentenceAnnotator will process on the contents of jCas.getDocumentText().  It us up to the caller to ensure annotations do not overlap.")
    private String[] windowClassNames;
    @ConfigurationParameter(description="class type of the sentences that are created by this annotator. If this parameter is not filled, then sentencesof type org.cleartk.type.Sentence will be created.", defaultValue={"org.cleartk.token.type.Sentence"})
    private String sentenceTypeName;
    Class<? extends Annotation> sentenceClass;
    protected List<Class<? extends Annotation>> windowClasses;
    Constructor<? extends Annotation> sentenceConstructor;
    public static final String multipleNewlinesRegex = "\\s*\\n\\s*\\n\\s*";
    SentenceDetector sentenceDetector;
    Pattern multipleNewlinesPattern;
    Pattern leadingWhitespacePattern;
    Pattern trailingWhitespacePattern;

    public static AnalysisEngineDescription getDescription() throws ResourceInitializationException {
        return AnalysisEngineFactory.createPrimitiveDescription(SentenceAnnotator.class, (Object[])new Object[]{PARAM_SENTENCE_MODEL_PATH, ParamUtil.getParameterValue((String)PARAM_SENTENCE_MODEL_PATH, (String)"/models/en-sent.bin"), PARAM_WINDOW_CLASS_NAMES, ParamUtil.getParameterValue((String)PARAM_WINDOW_CLASS_NAMES, null)});
    }

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        try {
            this.sentenceClass = InitializableFactory.getClass((String)this.sentenceTypeName, Annotation.class);
            this.sentenceConstructor = this.sentenceClass.getConstructor(JCas.class, Integer.TYPE, Integer.TYPE);
            if (this.windowClassNames != null && this.windowClassNames.length > 0) {
                this.windowClasses = new ArrayList<Class<? extends Annotation>>();
                for (String windowClassName : this.windowClassNames) {
                    this.windowClasses.add(InitializableFactory.getClass((String)windowClassName, Annotation.class));
                }
            }
            InputStream modelInputStream = IOUtil.getInputStream(SentenceAnnotator.class, (String)this.sentenceModelPath);
            SentenceModel model = new SentenceModel(modelInputStream);
            this.sentenceDetector = new SentenceDetectorME(model);
            this.multipleNewlinesPattern = Pattern.compile(multipleNewlinesRegex, 40);
            this.leadingWhitespacePattern = Pattern.compile("^\\s+");
            this.trailingWhitespacePattern = Pattern.compile("\\s+$");
        }
        catch (Exception e) {
            throw new ResourceInitializationException((Throwable)e);
        }
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        if (this.windowClasses == null) {
            String text = jCas.getDocumentText();
            this.processText(jCas, text, 0);
        } else {
            for (Class<? extends Annotation> windowClass : this.windowClasses) {
                for (Annotation window : new ArrayList(JCasUtil.select((JCas)jCas, windowClass))) {
                    String text = window.getCoveredText();
                    this.processText(jCas, text, window.getBegin());
                }
            }
        }
    }

    protected void processText(JCas jCas, String text, int textOffset) throws AnalysisEngineProcessException {
        List<Integer> sentenceOffsets = this.getSentenceOffsets(text);
        int begin = 0;
        int end = 0;
        Matcher matcher = this.leadingWhitespacePattern.matcher(text);
        if (matcher.find()) {
            begin += matcher.group().length();
        }
        try {
            for (Integer offset : sentenceOffsets) {
                end = offset;
                String sentenceText = text.substring(begin, end);
                if (sentenceText.trim().length() > 0) {
                    matcher = this.trailingWhitespacePattern.matcher(sentenceText);
                    if (matcher.find()) {
                        end -= matcher.group().length();
                    }
                    this.sentenceConstructor.newInstance(jCas, textOffset + begin, textOffset + end).addToIndexes();
                }
                begin = offset;
            }
            if (begin < text.length()) {
                String sentenceText = text.substring(begin, text.length());
                end = text.length();
                if (sentenceText.trim().length() > 0) {
                    matcher = this.trailingWhitespacePattern.matcher(sentenceText);
                    if (matcher.find()) {
                        end -= matcher.group().length();
                    }
                    this.sentenceConstructor.newInstance(jCas, textOffset + begin, textOffset + end).addToIndexes();
                }
            }
        }
        catch (Exception e) {
            throw new AnalysisEngineProcessException((Throwable)e);
        }
    }

    private List<Integer> getSentenceOffsets(String text) {
        Matcher matcher = this.multipleNewlinesPattern.matcher(text);
        ArrayList<Integer> offsets = new ArrayList<Integer>();
        while (matcher.find()) {
            offsets.add(matcher.end());
        }
        Span[] sentenceOffsetsML = this.sentenceDetector.sentPosDetect(text);
        for (int i = 0; i < sentenceOffsetsML.length; ++i) {
            offsets.add(sentenceOffsetsML[i].getStart());
        }
        Collections.sort(offsets);
        return offsets;
    }
}

