/*
 * Decompiled with CFR 0.152.
 */
package weka.filters.unsupervised.attribute;

import cmu.arktweetnlp.Tagger;
import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Vector;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.CapabilitiesHandler;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.RevisionUtils;
import weka.core.SparseInstance;
import weka.core.Utils;
import weka.core.WekaException;
import weka.core.WekaOptionUtils;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.AbstractTweetContentFilter;

public class TwitterNLPPos
extends AbstractTweetContentFilter {
    private static final long serialVersionUID = -6908047935900687249L;
    public static final String MODEL = "model";
    public static final String POS_PREFIX = "POS-";
    protected File m_Model = this.getDefaultModel();
    protected transient Tagger m_Tagger;
    protected transient List<String> m_Vocabulary;

    public String globalInfo() {
        return "A simple batch filter that adds attributes for all the Twitter-oriented POS tags of the TwitterNLP library.\n\nFor more information see:\nhttp://www.ark.cs.cmu.edu/TweetNLP/\nOriginal code taken from here:\nhttps://github.com/felipebravom/TwitterSentLex/blob/master/src/weka/filters/unsupervised/attribute/TwitterNlpPos.java";
    }

    @Override
    public Enumeration listOptions() {
        Vector result = new Vector();
        WekaOptionUtils.addOption(result, (String)this.modelTipText(), (String)("" + this.getDefaultModel()), (String)MODEL);
        WekaOptionUtils.add(result, (Enumeration)super.listOptions());
        return WekaOptionUtils.toEnumeration(result);
    }

    @Override
    public void setOptions(String[] options) throws Exception {
        this.setModel(WekaOptionUtils.parse((String[])options, (String)MODEL, (File)this.getDefaultModel()));
        super.setOptions(options);
    }

    @Override
    public String[] getOptions() {
        ArrayList result = new ArrayList();
        WekaOptionUtils.add(result, (String)MODEL, (File)this.getModel());
        WekaOptionUtils.add(result, (String[])super.getOptions());
        return WekaOptionUtils.toArray(result);
    }

    protected void reset() {
        super.reset();
        this.m_Tagger = null;
        this.m_Vocabulary = null;
    }

    protected File getDefaultModel() {
        return new File(".");
    }

    public void setModel(File value) {
        this.m_Model = value;
        this.reset();
    }

    public File getModel() {
        return this.m_Model;
    }

    public String modelTipText() {
        return "The model file to load and use.";
    }

    @Override
    public boolean allowAccessToFullInputFormat() {
        return true;
    }

    @Override
    public Capabilities getCapabilities() {
        Capabilities result = new Capabilities((CapabilitiesHandler)this);
        result.enableAll();
        result.enable(Capabilities.Capability.NO_CLASS);
        result.disable(Capabilities.Capability.RELATIONAL_CLASS);
        result.disable(Capabilities.Capability.RELATIONAL_ATTRIBUTES);
        result.setMinimumNumberInstances(0);
        return result;
    }

    protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
        this.checkInputFormat(inputFormat);
        if (this.m_Tagger == null) {
            if (!this.m_Model.exists()) {
                throw new WekaException("Model file does not exist: " + this.m_Model);
            }
            if (this.m_Model.isDirectory()) {
                throw new WekaException("Model file points to a directory: " + this.m_Model);
            }
            this.m_Tagger = new Tagger();
            this.m_Tagger.loadModel(this.m_Model.getAbsolutePath());
            this.m_Vocabulary = new ArrayList<String>();
            for (int tag = 0; tag < this.m_Tagger.model.labelVocab.size(); ++tag) {
                this.m_Vocabulary.add(this.m_Tagger.model.labelVocab.name(tag));
            }
            Collections.sort(this.m_Vocabulary);
        }
        ArrayList<Attribute> att = new ArrayList<Attribute>();
        for (int i = 0; i < inputFormat.numAttributes(); ++i) {
            att.add((Attribute)inputFormat.attribute(i).copy());
        }
        for (int tag = 0; tag < this.m_Vocabulary.size(); ++tag) {
            att.add(new Attribute(POS_PREFIX + this.m_Vocabulary.get(tag)));
        }
        Instances result = new Instances("Twitter Sentiment Analysis", att, 0);
        result.setClassIndex(inputFormat.classIndex());
        return result;
    }

    protected Instances process(Instances instances) throws Exception {
        Instances result = new Instances(this.getOutputFormat(), instances.numInstances());
        Attribute attrCont = instances.attribute(this.m_AttributeName);
        for (int inst = 0; inst < instances.numInstances(); ++inst) {
            int tag;
            Instance instance = instances.instance(inst);
            double[] values = new double[result.numAttributes()];
            for (int att = 0; att < instance.numAttributes(); ++att) {
                values[att] = instance.isMissing(att) ? Utils.missingValue() : (instance.attribute(att).isString() ? (double)result.attribute(att).addStringValue(instance.stringValue(att)) : instance.value(att));
            }
            String content = instances.instance(inst).stringValue(attrCont);
            List tokens = this.m_Tagger.tokenizeAndTag(content);
            HashMap<String, Double> freq = new HashMap<String, Double>();
            for (tag = 0; tag < this.m_Vocabulary.size(); ++tag) {
                freq.put(this.m_Vocabulary.get(tag), 0.0);
            }
            for (Tagger.TaggedToken token : tokens) {
                freq.put(token.tag, (Double)freq.get(token.tag) + 1.0 / (double)tokens.size());
            }
            for (tag = 0; tag < this.m_Vocabulary.size(); ++tag) {
                int index = result.attribute(POS_PREFIX + this.m_Vocabulary.get(tag)).index();
                values[index] = (Double)freq.get(this.m_Vocabulary.get(tag));
            }
            instance = new SparseInstance(1.0, values);
            result.add(instance);
        }
        return result;
    }

    public String getRevision() {
        return RevisionUtils.extract((String)"$Revision: 4521 $");
    }

    public static void main(String[] args) {
        TwitterNLPPos.runFilter((Filter)new TwitterNLPPos(), (String[])args);
    }
}

