/*
 * Decompiled with CFR 0.152.
 */
package org.cleartk.token.pos.genia.util;

import java.io.File;
import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.cleartk.token.pos.genia.util.GeniaParse;
import org.cleartk.token.pos.genia.util.GeniaSentence;
import org.cleartk.token.pos.genia.util.GeniaTag;
import org.cleartk.token.pos.genia.util.Span;
import org.jdom2.Content;
import org.jdom2.Element;
import org.jdom2.JDOMException;
import org.jdom2.Text;
import org.jdom2.input.SAXBuilder;
import org.jdom2.output.XMLOutputter;

@Deprecated
public class GeniaPOSParser
implements Iterator<GeniaParse> {
    Element root;
    Iterator<?> articles;
    Set<String> posLabels = new HashSet<String>();
    XMLOutputter outputter;

    public GeniaPOSParser(File xmlFile) throws IOException, JDOMException {
        this();
        SAXBuilder builder = new SAXBuilder();
        builder.setDTDHandler(null);
        this.root = builder.build(xmlFile).getRootElement();
        this.articles = this.root.getChildren("article").iterator();
        this.outputter = new XMLOutputter();
    }

    public GeniaPOSParser() {
    }

    @Override
    public boolean hasNext() {
        return this.articles.hasNext();
    }

    @Override
    public GeniaParse next() {
        return this.parse((Element)this.articles.next());
    }

    @Override
    public void remove() {
    }

    public GeniaParse parse(Element articleElement) {
        GeniaParse parse = new GeniaParse();
        try {
            StringWriter stringWriter = new StringWriter();
            new XMLOutputter().output(articleElement, (Writer)stringWriter);
            parse.setXml(stringWriter.toString());
        }
        catch (IOException ioe) {
            throw new RuntimeException(ioe);
        }
        String medline = articleElement.getChild("articleinfo").getChild("bibliomisc").getText();
        medline = medline.split(":")[1];
        parse.setMedline(medline);
        StringBuffer text = new StringBuffer();
        int offset = 0;
        Element titleElement = articleElement.getChild("title");
        Element abstractElement = articleElement.getChild("abstract");
        if (titleElement != null) {
            offset = this.parse(titleElement, parse, text, offset);
            if (abstractElement != null) {
                text.append("\n\n");
                offset += 2;
            }
        }
        if (abstractElement != null) {
            offset = this.parse(abstractElement, parse, text, offset);
        }
        parse.setText(text.toString());
        return parse;
    }

    private int parse(Element abstractElement, GeniaParse parse, StringBuffer text, int offset) {
        ArrayList<GeniaTag> posTags = new ArrayList<GeniaTag>();
        ArrayList<GeniaTag> sentencePosTags = new ArrayList<GeniaTag>();
        ArrayList<GeniaTag> wildcardTags = new ArrayList<GeniaTag>();
        for (Element sentence : abstractElement.getChildren("sentence")) {
            sentencePosTags.clear();
            wildcardTags.clear();
            int beginSentence = offset;
            for (Content content : sentence.getContent()) {
                if (content instanceof Text) {
                    Text contentText = (Text)content;
                    text.append(contentText.getText());
                    offset += contentText.getText().length();
                    continue;
                }
                if (!(content instanceof Element)) continue;
                Element wordElement = (Element)content;
                if (!wordElement.getName().equals("w")) {
                    throw new RuntimeException("non-word element in sentence: " + wordElement);
                }
                String wordText = wordElement.getText();
                text.append(wordText);
                String pos = wordElement.getAttributeValue("c");
                if (pos.indexOf(124) != -1) {
                    pos = pos.substring(0, pos.indexOf(124));
                }
                GeniaTag posTag = new GeniaTag(pos, new Span(offset, offset + wordText.length()));
                if (pos.equals("*")) {
                    wildcardTags.add(posTag);
                } else {
                    if (wildcardTags.size() > 0) {
                        int start = ((GeniaTag)wildcardTags.get(0)).getSpans().get(0).getBegin();
                        posTag = new GeniaTag(pos, new Span(start, offset + wordText.length()));
                        wildcardTags.clear();
                    }
                    posTags.add(posTag);
                    sentencePosTags.add(posTag);
                }
                offset += wordText.length();
            }
            int endSentence = offset;
            Span sentenceSpan = new Span(beginSentence, endSentence);
            GeniaSentence geniaSentence = new GeniaSentence();
            geniaSentence.setSpan(sentenceSpan);
            geniaSentence.addPosTags(sentencePosTags);
            parse.addSentence(geniaSentence);
            text.append("  ");
            offset += 2;
        }
        parse.addPosTags(posTags);
        return offset;
    }

    public static void main(String[] args) {
        try {
            System.out.print("loading GENIA...");
            String xmlFileName = args[0];
            GeniaPOSParser parser = new GeniaPOSParser(new File(xmlFileName));
            System.out.println("done.");
            HashSet<String> tags = new HashSet<String>();
            while (parser.hasNext()) {
                GeniaParse parse = parser.next();
                for (GeniaTag posTag : parse.getPosTags()) {
                    tags.add(posTag.getLabel());
                }
            }
            ArrayList sortedTags = new ArrayList(tags);
            Collections.sort(sortedTags);
            System.out.println("number of tags=" + sortedTags.size());
            for (String tag : sortedTags) {
                System.out.println(tag);
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }
}

