package adams.flow.transformer;

import adams.core.QuickInfoHelper;
import adams.core.base.BaseCharset;
import adams.core.io.EncodingSupporter;
import adams.flow.control.StorageName;
import adams.flow.core.Token;
import com.kennycason.kumo.WordFrequency;
import com.kennycason.kumo.nlp.FrequencyAnalyzer;
import com.kennycason.kumo.nlp.normalize.Normalizer;
import java.io.ByteArrayInputStream;
import java.util.Arrays;

/* loaded from: input_file:adams/flow/transformer/WordFrequencyAnalyzer.class */
public class WordFrequencyAnalyzer extends AbstractTransformer implements EncodingSupporter {
    private static final long serialVersionUID = 8328022277704245871L;
    protected BaseCharset m_Encoding;
    protected Normalizer[] m_Normalizers;
    protected int m_MinWordLength;
    protected int m_MaxWordLength;
    protected int m_NumFrequencies;
    protected StorageName m_Stopwords;

    public String globalInfo() {
        return "Generates a word frequency analyzer from the incoming text.";
    }

    public void defineOptions() {
        super.defineOptions();
        this.m_OptionManager.add("encoding", "encoding", new BaseCharset());
        this.m_OptionManager.add("normalizer", "normalizers", new Normalizer[0]);
        this.m_OptionManager.add("min-word-length", "minWordLength", 3, 1, (Number) null);
        this.m_OptionManager.add("max-word-length", "maxWordLength", 32, 1, (Number) null);
        this.m_OptionManager.add("num-frequencies", "numFrequencies", 50, 1, (Number) null);
        this.m_OptionManager.add("stopwords", "stopwords", new StorageName());
    }

    public void setEncoding(BaseCharset baseCharset) {
        this.m_Encoding = baseCharset;
        reset();
    }

    public BaseCharset getEncoding() {
        return this.m_Encoding;
    }

    public String encodingTipText() {
        return "The type of encoding to use when writing to the file, use empty string for default.";
    }

    public void setNormalizers(Normalizer[] normalizerArr) {
        this.m_Normalizers = normalizerArr;
        reset();
    }

    public Normalizer[] getNormalizers() {
        return this.m_Normalizers;
    }

    public String normalizersTipText() {
        return "The normalizers to use.";
    }

    public void setMinWordLength(int i) {
        if (getOptionManager().isValid("minWordLength", Integer.valueOf(i))) {
            this.m_MinWordLength = i;
            reset();
        }
    }

    public int getMinWordLength() {
        return this.m_MinWordLength;
    }

    public String minWordLengthTipText() {
        return "The minimum length for words.";
    }

    public void setMaxWordLength(int i) {
        if (getOptionManager().isValid("maxWordLength", Integer.valueOf(i))) {
            this.m_MaxWordLength = i;
            reset();
        }
    }

    public int getMaxWordLength() {
        return this.m_MaxWordLength;
    }

    public String maxWordLengthTipText() {
        return "The maximum length for words.";
    }

    public void setNumFrequencies(int i) {
        if (getOptionManager().isValid("numFrequencies", Integer.valueOf(i))) {
            this.m_NumFrequencies = i;
            reset();
        }
    }

    public int getNumFrequencies() {
        return this.m_NumFrequencies;
    }

    public String numFrequenciesTipText() {
        return "The number of frequencies to return.";
    }

    public void setStopwords(StorageName storageName) {
        this.m_Stopwords = storageName;
        reset();
    }

    public StorageName getStopwords() {
        return this.m_Stopwords;
    }

    public String stopwordsTipText() {
        return "The storage item that holds the string array of stopwords to use.";
    }

    public String getQuickInfo() {
        return ((QuickInfoHelper.toString(this, "minWordLength", Integer.valueOf(this.m_MinWordLength), "min: ") + QuickInfoHelper.toString(this, "maxWordLength", Integer.valueOf(this.m_MaxWordLength), ", max: ")) + QuickInfoHelper.toString(this, "numFrequencies", Integer.valueOf(this.m_NumFrequencies), ", #: ")) + QuickInfoHelper.toString(this, "stopwords", this.m_Stopwords.isEmpty() ? "-none-" : this.m_Stopwords.getValue(), ", stopwords: ");
    }

    public Class[] accepts() {
        return new Class[]{String.class};
    }

    public Class[] generates() {
        return new Class[]{WordFrequency[].class};
    }

    protected String doExecute() {
        String str = null;
        ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(((String) this.m_InputToken.getPayload(String.class)).getBytes(this.m_Encoding.charsetValue()));
        FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer();
        frequencyAnalyzer.clearNormalizers();
        for (Normalizer normalizer : this.m_Normalizers) {
            frequencyAnalyzer.addNormalizer(normalizer);
        }
        frequencyAnalyzer.setCharacterEncoding(this.m_Encoding.getValue());
        frequencyAnalyzer.setMinWordLength(this.m_MinWordLength);
        frequencyAnalyzer.setMaxWordLength(this.m_MaxWordLength);
        frequencyAnalyzer.setWordFrequenciesToReturn(this.m_NumFrequencies);
        if (!this.m_Stopwords.isEmpty() && getStorageHandler().getStorage().has(this.m_Stopwords)) {
            try {
                frequencyAnalyzer.setStopWords(Arrays.asList((String[]) getStorageHandler().getStorage().get(this.m_Stopwords)));
            } catch (Exception e) {
                str = handleException("Failed to retrieved stopwords string array from storage item: " + this.m_Stopwords, e);
            }
        }
        try {
            this.m_OutputToken = new Token(frequencyAnalyzer.load(byteArrayInputStream).toArray(new WordFrequency[0]));
        } catch (Exception e2) {
            str = handleException("Failed to generate word frequencies!", e2);
        }
        return str;
    }
}
