/*
 * Decompiled with CFR 0.152.
 */
package org.cleartk.token.pos.genia;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.uima.UimaContext;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.jcas.JCas;
import org.apache.uima.pear.util.FileUtil;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
import org.cleartk.token.pos.genia.util.GeniaPOSParser;
import org.cleartk.token.pos.genia.util.GeniaParse;
import org.cleartk.token.pos.genia.util.GeniaSentence;
import org.cleartk.token.pos.genia.util.GeniaTag;
import org.cleartk.token.pos.genia.util.Span;
import org.cleartk.token.type.Sentence;
import org.cleartk.token.type.Token;
import org.cleartk.util.ViewURIUtil;
import org.jdom2.JDOMException;
import org.uimafit.component.JCasCollectionReader_ImplBase;
import org.uimafit.descriptor.ConfigurationParameter;
import org.uimafit.descriptor.SofaCapability;
import org.uimafit.factory.CollectionReaderFactory;
import org.uimafit.factory.ConfigurationParameterFactory;

@Deprecated
@SofaCapability(outputSofas={"UriView", "GeniaPOSView"})
public class GeniaPosGoldReader
extends JCasCollectionReader_ImplBase {
    public static final String PARAM_GENIA_CORPUS_FILE = ConfigurationParameterFactory.createConfigurationParameterName(GeniaPosGoldReader.class, (String)"geniaCorpusFile");
    @ConfigurationParameter(description="names the file that is the Genia corpus to be loaded. A good value is probably '.../GENIAcorpus3.02.pos.xml'.  Please see README in this directory for edits that you may need to make to this file manually.", mandatory=true)
    private File geniaCorpusFile;
    public static final String PARAM_LOAD_SENTENCES = ConfigurationParameterFactory.createConfigurationParameterName(GeniaPosGoldReader.class, (String)"loadSentences");
    @ConfigurationParameter(description="determines whether sentence annotations will be added from the Genia corpus.", defaultValue={"true"})
    private boolean loadSentences = true;
    public static final String PARAM_LOAD_TOKENS = ConfigurationParameterFactory.createConfigurationParameterName(GeniaPosGoldReader.class, (String)"loadTokens");
    @ConfigurationParameter(description="determines whether tokens annotations will be added from the Genia corpus. ", defaultValue={"true"})
    private boolean loadTokens = true;
    public static final String PARAM_LOAD_POS_TAGS = ConfigurationParameterFactory.createConfigurationParameterName(GeniaPosGoldReader.class, (String)"loadPosTags");
    @ConfigurationParameter(description="determines whether the part of speech tags assigned to each token in the genia corpus will be loaded. The default value of 'true' is used if this parameter is unspecified. If 'loadTokens' is 'false', then 'loadPOSTags' will be treated as 'false' regardless of what is given in the descriptor file.", defaultValue={"true"})
    private boolean loadPosTags = true;
    public static final String PARAM_ARTICLE_IDS_LIST_FILE = ConfigurationParameterFactory.createConfigurationParameterName(GeniaPosGoldReader.class, (String)"articleIdsListFile");
    @ConfigurationParameter(description="names the file used to specify the article ids that should be read in")
    File articleIdsListFile;
    private boolean filterArticles;
    private Set<String> articleIds;
    private GeniaPOSParser parser;
    private GeniaParse parse;
    private int progress = 0;
    public static String[] TEST_FOLDS = new String[]{"resources/genia/article_ids/fold-1-test.txt", "resources/genia/article_ids/fold-2-test.txt", "resources/genia/article_ids/fold-3-test.txt", "resources/genia/article_ids/fold-4-test.txt", "resources/genia/article_ids/fold-5-test.txt", "resources/genia/article_ids/fold-6-test.txt", "resources/genia/article_ids/fold-7-test.txt", "resources/genia/article_ids/fold-8-test.txt", "resources/genia/article_ids/fold-9-test.txt", "resources/genia/article_ids/fold-10-test.txt"};
    public static String[] TRAIN_FOLDS = new String[]{"resources/genia/article_ids/fold-1-train.txt", "resources/genia/article_ids/fold-2-train.txt", "resources/genia/article_ids/fold-3-train.txt", "resources/genia/article_ids/fold-4-train.txt", "resources/genia/article_ids/fold-5-train.txt", "resources/genia/article_ids/fold-6-train.txt", "resources/genia/article_ids/fold-7-train.txt", "resources/genia/article_ids/fold-8-train.txt", "resources/genia/article_ids/fold-9-train.txt", "resources/genia/article_ids/fold-10-train.txt"};

    public void initialize(UimaContext context) throws ResourceInitializationException {
        this.articleIds = new HashSet<String>();
        try {
            if (this.articleIdsListFile == null) {
                this.filterArticles = false;
            } else {
                String[] ids;
                this.filterArticles = true;
                for (String id : ids = FileUtil.loadListOfStrings((File)this.articleIdsListFile)) {
                    this.articleIds.add(id);
                }
            }
            this.parser = new GeniaPOSParser(this.geniaCorpusFile);
            this.loadPosTags = this.loadTokens & this.loadPosTags;
        }
        catch (IOException ioe) {
            throw new ResourceInitializationException((Throwable)ioe);
        }
        catch (JDOMException je) {
            throw new ResourceInitializationException((Throwable)je);
        }
    }

    public void getNext(JCas jCas) throws IOException, CollectionException {
        if (!this.hasNext()) {
            throw new CollectionException("Should not be calling getNext() because hasNext returns false", null);
        }
        try {
            JCas annotationsView = jCas.getView("_InitialView");
            String text = this.parse.getText();
            annotationsView.setDocumentText(text);
            List<GeniaSentence> sentences = this.parse.getSentences();
            for (GeniaSentence geniaSentence : sentences) {
                if (this.loadTokens) {
                    List<GeniaTag> posTags = geniaSentence.getPosTags();
                    for (GeniaTag posTag : posTags) {
                        Span tokenSpan = posTag.getSpans().get(0);
                        Token token = new Token(annotationsView, tokenSpan.getBegin(), tokenSpan.getEnd());
                        if (this.loadPosTags) {
                            token.setPos(posTag.getLabel());
                        }
                        token.addToIndexes();
                    }
                }
                if (!this.loadSentences) continue;
                Sentence sentence = new Sentence(annotationsView, geniaSentence.getSpan().getBegin(), geniaSentence.getSpan().getEnd());
                sentence.addToIndexes();
            }
            ViewURIUtil.setURI((JCas)jCas, (URI)new File(this.parse.getMedline()).toURI());
            JCas geniaView = jCas.createView("GeniaPOSView");
            geniaView.setDocumentText(this.parse.getXml());
            this.parse = null;
        }
        catch (CASException ce) {
            throw new CollectionException((Throwable)ce);
        }
    }

    public void close() throws IOException {
    }

    public Progress[] getProgress() {
        if (this.filterArticles) {
            return new Progress[]{new ProgressImpl(this.progress, this.articleIds.size(), "entities")};
        }
        return new Progress[]{new ProgressImpl(this.progress, 2000, "entities")};
    }

    public boolean hasNext() throws IOException, CollectionException {
        if (this.parse != null) {
            return true;
        }
        while (this.parser.hasNext()) {
            this.parse = this.parser.next();
            if (!this.filterArticles) {
                ++this.progress;
                return true;
            }
            if (!this.articleIds.contains(this.parse.getMedline())) continue;
            ++this.progress;
            return true;
        }
        return false;
    }

    public static CollectionReader getDescription(String geniaCorpusFile) throws ResourceInitializationException {
        return CollectionReaderFactory.createCollectionReader(GeniaPosGoldReader.class, (Object[])new Object[]{PARAM_GENIA_CORPUS_FILE, geniaCorpusFile});
    }
}

