/*
 * Decompiled with CFR 0.152.
 */
package weka.core.tokenizers.cleaners;

import java.util.regex.Pattern;
import weka.core.tokenizers.cleaners.AbstractTokenCleaner;

public class NormalizeURLs
extends AbstractTokenCleaner {
    private static final long serialVersionUID = -7758011723883830212L;
    public static final String URL = "http://url";
    public static final String PATTERN = "((mailto\\:|(news|(ht|f)tp(s?))\\://){1}\\S+)";
    protected transient Pattern m_Pattern = null;

    public String globalInfo() {
        return "Replaces all urls with the same dummy url: http://url";
    }

    protected void reset() {
        super.reset();
        this.m_Pattern = null;
    }

    public String clean(String token) {
        if (this.m_Pattern == null) {
            this.m_Pattern = Pattern.compile(PATTERN);
        }
        if (this.m_Pattern.matcher(token).matches()) {
            return URL;
        }
        return token;
    }
}

