package de.fu_berlin.ties.text;

import de.fu_berlin.ties.TiesConfiguration;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.builder.ToStringBuilder;

/* loaded from: input_file:de/fu_berlin/ties/text/TokenizerFactory.class */
public class TokenizerFactory {
    public static final String CONFIG_TOKEN_PATTERNS = "tokenizer.pattern";
    public static final String CONFIG_WHITESPACE_PATTERN = "tokenizer.whitespace";
    public static final String WHITESPACE_CONTROL_OTHER = "[\\p{Z}\\p{C}]*";
    private final String[] tokenPatterns;
    private final String whitespacePattern;

    public static TextTokenizer createAlnumTokenizer(CharSequence charSequence) {
        return new TextTokenizer(new String[]{"([\\p{L}\\p{M}\\p{N}]+)", "\\p{S}+", "(\\p{P})\\1*"}, WHITESPACE_CONTROL_OTHER, charSequence);
    }

    public static TextTokenizer createCategoryTokenizer(CharSequence charSequence) {
        return new TextTokenizer(new String[]{"([\\p{L}\\p{M}]+)", "(\\p{N}+)", "\\p{S}+", "\\p{P}+"}, WHITESPACE_CONTROL_OTHER, charSequence);
    }

    public static TextTokenizer createThoroughTokenizer(CharSequence charSequence) {
        return new TextTokenizer(new String[]{"[\\p{L}\\p{M}]+", "\\p{N}+", "\\p{Sm}+", "\\p{Sc}+", "[\\p{Sk}\\p{So}]+", "(\\p{P})\\1*"}, WHITESPACE_CONTROL_OTHER, charSequence);
    }

    public TokenizerFactory(TiesConfiguration tiesConfiguration) {
        this(tiesConfiguration, null);
    }

    public TokenizerFactory(TiesConfiguration tiesConfiguration, String str) {
        String adaptKey = tiesConfiguration.adaptKey(CONFIG_TOKEN_PATTERNS, str);
        String adaptKey2 = tiesConfiguration.adaptKey(CONFIG_WHITESPACE_PATTERN, str);
        this.tokenPatterns = tiesConfiguration.getStringArray(adaptKey);
        this.whitespacePattern = tiesConfiguration.getString(adaptKey2);
    }

    public TextTokenizer createTokenizer(CharSequence charSequence) {
        return new TextTokenizer(this.tokenPatterns, this.whitespacePattern, charSequence);
    }

    public String toString() {
        return new ToStringBuilder(this).append("token patterns", ArrayUtils.toString(this.tokenPatterns)).append("whitespace pattern", this.whitespacePattern).toString();
    }
}
