package weka.filters.unsupervised.attribute;

import cmu.arktweetnlp.Tagger;
import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Vector;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.RevisionUtils;
import weka.core.SparseInstance;
import weka.core.Utils;
import weka.core.WekaException;
import weka.core.WekaOptionUtils;

/* loaded from: input_file:weka/filters/unsupervised/attribute/TwitterNLPPos.class */
public class TwitterNLPPos extends AbstractTweetContentFilter {
    private static final long serialVersionUID = -6908047935900687249L;
    public static final String MODEL = "model";
    public static final String POS_PREFIX = "POS-";
    protected File m_Model = getDefaultModel();
    protected transient Tagger m_Tagger;
    protected transient List<String> m_Vocabulary;

    public String globalInfo() {
        return "A simple batch filter that adds attributes for all the Twitter-oriented POS tags of the TwitterNLP library.\n\nFor more information see:\nhttp://www.ark.cs.cmu.edu/TweetNLP/\nOriginal code taken from here:\nhttps://github.com/felipebravom/TwitterSentLex/blob/master/src/weka/filters/unsupervised/attribute/TwitterNlpPos.java";
    }

    @Override // weka.filters.unsupervised.attribute.AbstractTweetContentFilter
    public Enumeration listOptions() {
        Vector vector = new Vector();
        WekaOptionUtils.addOption(vector, modelTipText(), "" + getDefaultModel(), "model");
        WekaOptionUtils.add(vector, super.listOptions());
        return WekaOptionUtils.toEnumeration(vector);
    }

    @Override // weka.filters.unsupervised.attribute.AbstractTweetContentFilter
    public void setOptions(String[] strArr) throws Exception {
        setModel(WekaOptionUtils.parse(strArr, "model", getDefaultModel()));
        super.setOptions(strArr);
    }

    @Override // weka.filters.unsupervised.attribute.AbstractTweetContentFilter
    public String[] getOptions() {
        ArrayList arrayList = new ArrayList();
        WekaOptionUtils.add(arrayList, "model", getModel());
        WekaOptionUtils.add(arrayList, super.getOptions());
        return WekaOptionUtils.toArray(arrayList);
    }

    protected void reset() {
        super.reset();
        this.m_Tagger = null;
        this.m_Vocabulary = null;
    }

    protected File getDefaultModel() {
        return new File(".");
    }

    public void setModel(File file) {
        this.m_Model = file;
        reset();
    }

    public File getModel() {
        return this.m_Model;
    }

    public String modelTipText() {
        return "The model file to load and use.";
    }

    @Override // weka.filters.unsupervised.attribute.AbstractTweetContentFilter
    public boolean allowAccessToFullInputFormat() {
        return true;
    }

    @Override // weka.filters.unsupervised.attribute.AbstractTweetContentFilter
    public Capabilities getCapabilities() {
        Capabilities capabilities = new Capabilities(this);
        capabilities.enableAll();
        capabilities.enable(Capabilities.Capability.NO_CLASS);
        capabilities.disable(Capabilities.Capability.RELATIONAL_CLASS);
        capabilities.disable(Capabilities.Capability.RELATIONAL_ATTRIBUTES);
        capabilities.setMinimumNumberInstances(0);
        return capabilities;
    }

    protected Instances determineOutputFormat(Instances instances) throws Exception {
        checkInputFormat(instances);
        if (this.m_Tagger == null) {
            if (!this.m_Model.exists()) {
                throw new WekaException("Model file does not exist: " + this.m_Model);
            }
            if (this.m_Model.isDirectory()) {
                throw new WekaException("Model file points to a directory: " + this.m_Model);
            }
            this.m_Tagger = new Tagger();
            this.m_Tagger.loadModel(this.m_Model.getAbsolutePath());
            this.m_Vocabulary = new ArrayList();
            for (int i = 0; i < this.m_Tagger.model.labelVocab.size(); i++) {
                this.m_Vocabulary.add(this.m_Tagger.model.labelVocab.name(i));
            }
            Collections.sort(this.m_Vocabulary);
        }
        ArrayList arrayList = new ArrayList();
        for (int i2 = 0; i2 < instances.numAttributes(); i2++) {
            arrayList.add((Attribute) instances.attribute(i2).copy());
        }
        for (int i3 = 0; i3 < this.m_Vocabulary.size(); i3++) {
            arrayList.add(new Attribute(POS_PREFIX + this.m_Vocabulary.get(i3)));
        }
        Instances instances2 = new Instances("Twitter Sentiment Analysis", arrayList, 0);
        instances2.setClassIndex(instances.classIndex());
        return instances2;
    }

    protected Instances process(Instances instances) throws Exception {
        Instances instances2 = new Instances(getOutputFormat(), instances.numInstances());
        Attribute attribute = instances.attribute(this.m_AttributeName);
        for (int i = 0; i < instances.numInstances(); i++) {
            Instance instance = instances.instance(i);
            double[] dArr = new double[instances2.numAttributes()];
            for (int i2 = 0; i2 < instance.numAttributes(); i2++) {
                if (instance.isMissing(i2)) {
                    dArr[i2] = Utils.missingValue();
                } else if (instance.attribute(i2).isString()) {
                    dArr[i2] = instances2.attribute(i2).addStringValue(instance.stringValue(i2));
                } else {
                    dArr[i2] = instance.value(i2);
                }
            }
            List<Tagger.TaggedToken> list = this.m_Tagger.tokenizeAndTag(instances.instance(i).stringValue(attribute));
            HashMap hashMap = new HashMap();
            for (int i3 = 0; i3 < this.m_Vocabulary.size(); i3++) {
                hashMap.put(this.m_Vocabulary.get(i3), Double.valueOf(0.0d));
            }
            for (Tagger.TaggedToken taggedToken : list) {
                hashMap.put(taggedToken.tag, Double.valueOf(((Double) hashMap.get(taggedToken.tag)).doubleValue() + (1.0d / list.size())));
            }
            for (int i4 = 0; i4 < this.m_Vocabulary.size(); i4++) {
                dArr[instances2.attribute(POS_PREFIX + this.m_Vocabulary.get(i4)).index()] = ((Double) hashMap.get(this.m_Vocabulary.get(i4))).doubleValue();
            }
            instances2.add(new SparseInstance(1.0d, dArr));
        }
        return instances2;
    }

    public String getRevision() {
        return RevisionUtils.extract("$Revision: 4521 $");
    }

    public static void main(String[] strArr) {
        runFilter(new TwitterNLPPos(), strArr);
    }
}
