package weka.core.tokenizers;

import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;
import java.util.Vector;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.WekaOptionUtils;
import weka.core.tokenizers.cleaners.PassThrough;
import weka.core.tokenizers.cleaners.TokenCleaner;

/* loaded from: input_file:weka/core/tokenizers/PreCleanedTokenizer.class */
public class PreCleanedTokenizer extends Tokenizer {
    private static final long serialVersionUID = 6094968316580671771L;
    public static final String PRE_TOKENIZER = "pre-tokenizer";
    public static final String CLEANER = "cleaner";
    public static final String POST_TOKENIZER = "post-tokenizer";
    protected Tokenizer m_PreTokenizer = getDefaultPreTokenizer();
    protected TokenCleaner m_Cleaner = getDefaultCleaner();
    protected Tokenizer m_PostTokenizer = getDefaultPostTokenizer();

    public String globalInfo() {
        return "Allows the cleaning of tokens before actual tokenization.\nProcess:\n- apply pre-tokenizer to obtain tokens to clean\n- apply token cleaner to tokens\n- combine cleaned tokens back into a string (separated by blanks)\n- apply post-tokenizer to produce final set of tokens";
    }

    public Enumeration listOptions() {
        Vector vector = new Vector();
        WekaOptionUtils.addOption(vector, preTokenizerTipText(), getDefaultPreTokenizer().getClass().getName(), PRE_TOKENIZER);
        WekaOptionUtils.addOption(vector, cleanerTipText(), getDefaultCleaner().getClass().getName(), "cleaner");
        WekaOptionUtils.addOption(vector, postTokenizerTipText(), getDefaultPostTokenizer().getClass().getName(), POST_TOKENIZER);
        WekaOptionUtils.add(vector, super.listOptions());
        return WekaOptionUtils.toEnumeration(vector);
    }

    public void setOptions(String[] strArr) throws Exception {
        setPreTokenizer((Tokenizer) WekaOptionUtils.parse(strArr, PRE_TOKENIZER, (OptionHandler) getDefaultPreTokenizer()));
        setCleaner((TokenCleaner) WekaOptionUtils.parse(strArr, "cleaner", getDefaultCleaner()));
        setPostTokenizer((Tokenizer) WekaOptionUtils.parse(strArr, POST_TOKENIZER, (OptionHandler) getDefaultPostTokenizer()));
        super.setOptions(strArr);
    }

    public String[] getOptions() {
        ArrayList arrayList = new ArrayList();
        WekaOptionUtils.add((List<String>) arrayList, PRE_TOKENIZER, (OptionHandler) getPreTokenizer());
        WekaOptionUtils.add((List<String>) arrayList, "cleaner", (OptionHandler) getCleaner());
        WekaOptionUtils.add((List<String>) arrayList, POST_TOKENIZER, (OptionHandler) getPostTokenizer());
        WekaOptionUtils.add(arrayList, super.getOptions());
        return WekaOptionUtils.toArray(arrayList);
    }

    protected Tokenizer getDefaultPreTokenizer() {
        return new WordTokenizer();
    }

    public void setPreTokenizer(Tokenizer tokenizer) {
        this.m_PreTokenizer = tokenizer;
    }

    public Tokenizer getPreTokenizer() {
        return this.m_PreTokenizer;
    }

    public String preTokenizerTipText() {
        return "The tokenizer to use for the initial tokenization (before cleaning).";
    }

    protected TokenCleaner getDefaultCleaner() {
        return new PassThrough();
    }

    public void setCleaner(TokenCleaner tokenCleaner) {
        this.m_Cleaner = tokenCleaner;
    }

    public TokenCleaner getCleaner() {
        return this.m_Cleaner;
    }

    public String cleanerTipText() {
        return "The cleaner to use for cleaning the tokens from the initial tokenization.";
    }

    protected Tokenizer getDefaultPostTokenizer() {
        return new WordTokenizer();
    }

    public void setPostTokenizer(Tokenizer tokenizer) {
        this.m_PostTokenizer = tokenizer;
    }

    public Tokenizer getPostTokenizer() {
        return this.m_PostTokenizer;
    }

    public String postTokenizerTipText() {
        return "The tokenizer to use for the final tokenization (after cleaning).";
    }

    public boolean hasMoreElements() {
        return this.m_PostTokenizer.hasMoreElements();
    }

    /* renamed from: nextElement, reason: merged with bridge method [inline-methods] */
    public String m192nextElement() {
        return this.m_PostTokenizer.nextElement();
    }

    public void tokenize(String str) {
        this.m_PreTokenizer.tokenize(str);
        StringBuilder sb = new StringBuilder();
        while (this.m_PreTokenizer.hasMoreElements()) {
            String clean = this.m_Cleaner.clean(this.m_PreTokenizer.nextElement());
            if (clean != null) {
                if (sb.length() > 0) {
                    sb.append(" ");
                }
                sb.append(clean);
            }
        }
        this.m_PostTokenizer.tokenize(sb.toString());
    }

    public String getRevision() {
        return RevisionUtils.extract("$Revision: 10824 $");
    }
}
