package weka.core.tokenizers.cleaners;

import adams.core.io.FileUtils;
import java.io.File;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Vector;
import weka.core.WekaOptionUtils;

/* loaded from: input_file:weka/core/tokenizers/cleaners/WordCluster.class */
public class WordCluster extends AbstractTokenCleaner {
    private static final long serialVersionUID = 4367295660326278568L;
    public static final String MODEL = "model";
    public static final String UNKNOWN_WORD = "???";
    protected File m_Model = getDefaultModel();
    protected transient Map<String, String> m_Clusters;

    public String globalInfo() {
        return "Replaces words with clusters.";
    }

    public Enumeration listOptions() {
        Vector vector = new Vector();
        WekaOptionUtils.addOption(vector, modelTipText(), "" + getDefaultModel(), "model");
        WekaOptionUtils.add(vector, super.listOptions());
        return WekaOptionUtils.toEnumeration(vector);
    }

    public void setOptions(String[] strArr) throws Exception {
        setModel(WekaOptionUtils.parse(strArr, "model", getDefaultModel()));
        super.setOptions(strArr);
    }

    public String[] getOptions() {
        ArrayList arrayList = new ArrayList();
        WekaOptionUtils.add(arrayList, "model", getModel());
        WekaOptionUtils.add(arrayList, super.getOptions());
        return WekaOptionUtils.toArray(arrayList);
    }

    protected void reset() {
        super.reset();
        this.m_Clusters = null;
    }

    protected File getDefaultModel() {
        return new File(".");
    }

    public void setModel(File file) {
        this.m_Model = file;
        reset();
    }

    public File getModel() {
        return this.m_Model;
    }

    public String modelTipText() {
        return "The tab-separated model file to load and use; no header; 1st column is cluster, 2nd column is word.";
    }

    public String clean(String str) {
        if (this.m_Clusters == null) {
            if (!this.m_Model.exists()) {
                throw new IllegalStateException("Cluster model file does not exist: " + this.m_Model);
            }
            if (this.m_Model.isDirectory()) {
                throw new IllegalStateException("Cluster model file points to a directory: " + this.m_Model);
            }
            this.m_Clusters = new HashMap();
            Iterator it = FileUtils.loadFromFile(this.m_Model).iterator();
            while (it.hasNext()) {
                String[] split = ((String) it.next()).split("\t");
                if (split.length >= 2) {
                    this.m_Clusters.put(split[1], split[0]);
                }
            }
        }
        return this.m_Clusters.containsKey(str) ? this.m_Clusters.get(str) : UNKNOWN_WORD;
    }
}
