package weka.filters.unsupervised.instance.multirowprocessor.selection;

import adams.core.Index;
import adams.core.base.BaseObject;
import adams.core.base.BaseRegExp;
import adams.data.weka.WekaAttributeIndex;
import gnu.trove.list.TIntList;
import gnu.trove.list.array.TIntArrayList;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Vector;
import weka.core.Instances;
import weka.core.WekaOptionUtils;
import weka.filters.unsupervised.attribute.EquiDistance;
import weka.filters.unsupervised.attribute.NominalToNumeric;

/* loaded from: input_file:weka/filters/unsupervised/instance/multirowprocessor/selection/GroupExpression.class */
public class GroupExpression extends AbstractRowSelection {
    private static final long serialVersionUID = -8519118208205929299L;
    public static final String DEFAULT_GROUP = "$0";
    protected static String INDEX = NominalToNumeric.INDEX;
    protected static String REGEXP = EquiDistance.REGEXP;
    protected static String GROUP = "group";
    protected WekaAttributeIndex m_Index = getDefaultIndex();
    protected BaseRegExp m_RegExp = getDefaultRegExp();
    protected String m_Group = DEFAULT_GROUP;

    @Override // weka.filters.unsupervised.instance.multirowprocessor.AbstractMultiRowProcessorPlugin
    public String globalInfo() {
        return "Identifies groups in strings using regular expressions.\nIf the group attribute is numeric, then the values get turned into strings first.";
    }

    @Override // weka.filters.unsupervised.instance.multirowprocessor.AbstractMultiRowProcessorPlugin
    public Enumeration listOptions() {
        Vector vector = new Vector();
        WekaOptionUtils.addOption(vector, indexTipText(), (Index) getDefaultIndex(), INDEX);
        WekaOptionUtils.addOption(vector, regExpTipText(), getDefaultRegExp().getValue(), REGEXP);
        WekaOptionUtils.addOption(vector, groupTipText(), DEFAULT_GROUP, GROUP);
        WekaOptionUtils.add(vector, super.listOptions());
        return WekaOptionUtils.toEnumeration(vector);
    }

    @Override // weka.filters.unsupervised.instance.multirowprocessor.AbstractMultiRowProcessorPlugin
    public void setOptions(String[] strArr) throws Exception {
        setIndex((WekaAttributeIndex) WekaOptionUtils.parse(strArr, INDEX, (Index) getDefaultIndex()));
        setRegExp((BaseRegExp) WekaOptionUtils.parse(strArr, REGEXP, (BaseObject) getDefaultRegExp()));
        setGroup(WekaOptionUtils.parse(strArr, GROUP, DEFAULT_GROUP));
        super.setOptions(strArr);
    }

    @Override // weka.filters.unsupervised.instance.multirowprocessor.AbstractMultiRowProcessorPlugin
    public String[] getOptions() {
        ArrayList arrayList = new ArrayList();
        WekaOptionUtils.add((List<String>) arrayList, INDEX, (Index) getIndex());
        WekaOptionUtils.add((List<String>) arrayList, REGEXP, (BaseObject) getRegExp());
        WekaOptionUtils.add((List<String>) arrayList, GROUP, getGroup());
        WekaOptionUtils.add(arrayList, super.getOptions());
        return WekaOptionUtils.toArray(arrayList);
    }

    protected WekaAttributeIndex getDefaultIndex() {
        return new WekaAttributeIndex("first");
    }

    public void setIndex(WekaAttributeIndex wekaAttributeIndex) {
        this.m_Index = wekaAttributeIndex;
        reset();
    }

    public WekaAttributeIndex getIndex() {
        return this.m_Index;
    }

    public String indexTipText() {
        return "The index of the attribute to determine the group from.";
    }

    protected BaseRegExp getDefaultRegExp() {
        return new BaseRegExp(".*");
    }

    public void setRegExp(BaseRegExp baseRegExp) {
        this.m_RegExp = baseRegExp;
        reset();
    }

    public BaseRegExp getRegExp() {
        return this.m_RegExp;
    }

    public String regExpTipText() {
        return "The regular expression for identifying the group (eg '^(.*)-([0-9]+)-(.*)$').";
    }

    public void setGroup(String str) {
        this.m_Group = str;
        reset();
    }

    public String getGroup() {
        return this.m_Group;
    }

    public String groupTipText() {
        return "The replacement string to use as group (eg '$2').";
    }

    @Override // weka.filters.unsupervised.instance.multirowprocessor.selection.AbstractRowSelection
    protected List<int[]> doSelectRows(Instances instances) throws Exception {
        this.m_Index.setData(instances);
        int intIndex = this.m_Index.getIntIndex();
        if (intIndex == -1) {
            throw new Exception("Group attribute not found: " + this.m_Index.getIndex());
        }
        boolean isNumeric = instances.attribute(intIndex).isNumeric();
        ArrayList arrayList = new ArrayList();
        HashMap hashMap = new HashMap();
        boolean equals = this.m_Group.equals(DEFAULT_GROUP);
        for (int i = 0; i < instances.numInstances(); i++) {
            String stringValue = isNumeric ? instances.instance(i).value(intIndex) : instances.instance(i).stringValue(intIndex);
            if (!equals) {
                stringValue = stringValue.replaceAll(this.m_RegExp.getValue(), this.m_Group);
            }
            if (!hashMap.containsKey(stringValue)) {
                hashMap.put(stringValue, new TIntArrayList());
            }
            ((TIntList) hashMap.get(stringValue)).add(i);
        }
        if (getDebug()) {
            debugMsg("Groups: " + hashMap);
        }
        Iterator it = hashMap.keySet().iterator();
        while (it.hasNext()) {
            arrayList.add(((TIntList) hashMap.get((String) it.next())).toArray());
        }
        Collections.sort(arrayList, Comparator.comparingInt(iArr -> {
            return iArr[0];
        }));
        return arrayList;
    }
}
