/*
 * Decompiled with CFR 0.152.
 */
package edu.cmu.minorthird.text;

import edu.cmu.minorthird.text.Document;
import edu.cmu.minorthird.text.FancyLoader;
import edu.cmu.minorthird.text.TextToken;
import edu.cmu.minorthird.text.Tokenizer;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;

public class RegexTokenizer
implements Tokenizer {
    private static Logger log = Logger.getLogger(RegexTokenizer.class);
    public static final String TOKEN_REGEX_PROP = "edu.cmu.minorthird.tokenRegex";
    public static final String TOKEN_REGEX_DEFAULT_VALUE = "\\s*([0-9]+|[a-zA-Z]+|\\W)\\s*";
    public static String standardTokenRegexPattern;
    public String regexPattern = standardTokenRegexPattern;

    public RegexTokenizer() {
    }

    public RegexTokenizer(String pattern) {
        this.regexPattern = pattern;
    }

    public String[] splitIntoTokens(String string2) {
        ArrayList<String> list = new ArrayList<String>();
        Pattern pattern = Pattern.compile(this.regexPattern);
        Matcher matcher = pattern.matcher(string2);
        while (matcher.find()) {
            list.add(matcher.group(1));
        }
        return list.toArray(new String[list.size()]);
    }

    public TextToken[] splitIntoTokens(Document document) {
        ArrayList<TextToken> tokenList = new ArrayList<TextToken>();
        String string2 = document.getText();
        Pattern pattern = Pattern.compile(this.regexPattern);
        Matcher matcher = pattern.matcher(string2);
        while (matcher.find()) {
            tokenList.add(new TextToken(document, matcher.start(1), matcher.end(1) - matcher.start(1)));
        }
        TextToken[] tokenArray = tokenList.toArray(new TextToken[0]);
        return tokenArray;
    }

    static {
        Properties props = new Properties();
        try {
            InputStream in = FancyLoader.class.getClassLoader().getResourceAsStream("token.properties");
            if (in != null) {
                props.load(in);
                log.debug("loaded properties from stream " + in);
            } else {
                log.info("no token.properties found on classpath");
            }
        }
        catch (Exception ex) {
            log.debug("can't open token.properties:" + ex);
        }
        standardTokenRegexPattern = props.getProperty(TOKEN_REGEX_PROP, System.getProperty(TOKEN_REGEX_PROP, TOKEN_REGEX_DEFAULT_VALUE));
        log.info("tokenization regex: " + standardTokenRegexPattern);
    }
}

