/*
 * Decompiled with CFR 0.152.
 */
package edu.berkeley.nlp.PCFGLA;

import edu.berkeley.nlp.PCFGLA.Featurizer;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class SimpleFeaturizer
implements Featurizer,
Serializable {
    private final int uncommonThreshold;
    private final int rareThreshold;

    public SimpleFeaturizer(int uncommonThreshold, int rareThreshold) {
        this.uncommonThreshold = uncommonThreshold;
        this.rareThreshold = rareThreshold;
    }

    @Override
    public List<String>[] featurize(String word, int tag, int numSubstates, int wordCount, int tagWordCount) {
        List<String> templates = this.fillTemplates(word, wordCount, tagWordCount);
        List[] ret = new List[numSubstates];
        String coarsePrefix = "#" + tag + ":";
        for (int sub = 0; sub < ret.length; ++sub) {
            String finePrefix = coarsePrefix + "sub-" + sub + ":";
            ret[sub] = new ArrayList();
            for (String template : templates) {
                ret[sub].add(coarsePrefix + template);
                ret[sub].add(finePrefix + template);
            }
        }
        return ret;
    }

    public List<String> fillTemplates(String word, int wordCount, int tagWordCount) {
        ArrayList<String> templates = new ArrayList<String>();
        if (wordCount > this.rareThreshold) {
            if (tagWordCount > 0) {
                templates.add(word);
            }
            if (wordCount > this.uncommonThreshold) {
                return templates;
            }
        }
        int wlen = word.length();
        int numCaps = 0;
        boolean hasDigit = false;
        boolean hasDash = false;
        boolean hasLower = false;
        boolean hasLetter = false;
        for (int i = 0; i < wlen; ++i) {
            char ch = word.charAt(i);
            if (Character.isDigit(ch)) {
                hasDigit = true;
                continue;
            }
            if (ch == '-') {
                hasDash = true;
                continue;
            }
            if (!Character.isLetter(ch)) continue;
            hasLetter = true;
            if (Character.isLowerCase(ch)) {
                hasLower = true;
                continue;
            }
            if (Character.isTitleCase(ch)) {
                hasLower = true;
                ++numCaps;
                continue;
            }
            ++numCaps;
        }
        if (hasLetter) {
            templates.add("shape:LETTER");
        } else {
            templates.add("shape:NOLETTER");
        }
        char ch0 = word.charAt(0);
        String lowered = word.toLowerCase();
        if (Character.isUpperCase(ch0) || Character.isTitleCase(ch0)) {
            templates.add("shape:CAPS");
        } else if (!Character.isLetter(ch0) && numCaps > 0) {
            templates.add("shape:CAPS");
        } else if (hasLower) {
            templates.add("shape:LC");
        }
        if (hasDigit) {
            templates.add("shape:NUM");
        }
        if (hasDash) {
            templates.add("shape:DASH");
        }
        if (lowered.endsWith("s") && wlen >= 3) {
            char ch2 = lowered.charAt(wlen - 2);
            if (ch2 != 's' && ch2 != 'i' && ch2 != 'u') {
                templates.add("shape:s");
            }
        } else if (!(word.length() < 5 || hasDash || hasDigit && numCaps > 0)) {
            if (lowered.endsWith("ed")) {
                templates.add("shape:ed");
            } else if (lowered.endsWith("ing")) {
                templates.add("shape:ing");
            } else if (lowered.endsWith("ion")) {
                templates.add("shape:ion");
            } else if (lowered.endsWith("er")) {
                templates.add("shape:er");
            } else if (lowered.endsWith("est")) {
                templates.add("shape:est");
            } else if (lowered.endsWith("ly")) {
                templates.add("shape:ly");
            } else if (lowered.endsWith("ity")) {
                templates.add("shape:ity");
            } else if (lowered.endsWith("y")) {
                templates.add("shape:y");
            } else if (lowered.endsWith("al")) {
                templates.add("shape:al");
            }
        }
        return templates;
    }
}

