package de.fu_berlin.ties.text;

import de.fu_berlin.ties.classify.feature.SBPHTransformer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.apache.commons.lang.builder.ToStringBuilder;

/* loaded from: input_file:de/fu_berlin/ties/text/TextTokenizer.class */
public class TextTokenizer {
    private int afterPenultimateMatch;
    private String precedingWhitespace;
    private int startOfLastMatch;
    private CharSequence textToTokenize;
    private final Matcher tokenMatcher;
    private final Matcher whitespaceMatcher;
    private String normalizedWhitespace = SBPHTransformer.DEFAULT_SEPARATOR;
    private boolean normalizedWhitespacePrepended = false;
    private boolean whitespacePatternEnsured = false;

    public TextTokenizer(String[] strArr, String str, CharSequence charSequence) throws PatternSyntaxException {
        StringBuffer stringBuffer = strArr.length > 1 ? new StringBuffer("(?:") : new StringBuffer();
        for (int i = 0; i < strArr.length; i++) {
            stringBuffer.append(strArr[i]);
            if (i != strArr.length - 1) {
                stringBuffer.append("|");
            } else if (strArr.length > 1) {
                stringBuffer.append(")");
            }
        }
        this.textToTokenize = charSequence;
        this.tokenMatcher = Pattern.compile(stringBuffer.toString(), 32).matcher(charSequence);
        this.whitespaceMatcher = Pattern.compile(str, 32).matcher("");
        reinit();
    }

    public final String capturedText() {
        StringBuffer stringBuffer = new StringBuffer();
        for (int i = 1; i <= this.tokenMatcher.groupCount(); i++) {
            String group = this.tokenMatcher.group(i);
            if (group != null) {
                stringBuffer.append(group);
            }
        }
        return stringBuffer.toString();
    }

    private void doFindPrecedingWhitespace() throws IllegalStateException {
        if (this.startOfLastMatch < 0) {
            throw new IllegalStateException("Prior call to nextToken() required");
        }
        if (this.precedingWhitespace == null) {
            this.precedingWhitespace = this.textToTokenize.subSequence(this.afterPenultimateMatch, this.startOfLastMatch).toString();
        }
    }

    private void findPrecedingWhitespace() throws IllegalStateException, IllegalArgumentException {
        doFindPrecedingWhitespace();
        if (isWhitespacePatternEnsured() && !precedingWhitespaceIsValid()) {
            throw new IllegalArgumentException(new StringBuffer().append("Supposed whitespace '").append(this.precedingWhitespace).append("' between position ").append(this.afterPenultimateMatch).append(" and ").append(this.startOfLastMatch).append(" doesn't match specified pattern ").append(this.whitespaceMatcher.pattern().pattern()).toString());
        }
    }

    public final String getNormalizedWhitespace() {
        return this.normalizedWhitespace;
    }

    public final boolean hasPrecedingWhitespace() throws IllegalStateException, IllegalArgumentException {
        findPrecedingWhitespace();
        return this.precedingWhitespace.length() > 0;
    }

    public int initialWhitespaceCount(String str) {
        this.whitespaceMatcher.reset(str);
        if (this.whitespaceMatcher.lookingAt()) {
            return this.whitespaceMatcher.end();
        }
        return 0;
    }

    public final boolean isNormalizedWhitespacePrepended() {
        return this.normalizedWhitespacePrepended;
    }

    public boolean isValidWhitespace(String str) {
        if (str.length() <= 0) {
            return true;
        }
        this.whitespaceMatcher.reset(str);
        return this.whitespaceMatcher.matches();
    }

    public final boolean isWhitespacePatternEnsured() {
        return this.whitespacePatternEnsured;
    }

    public CharSequence leftText() throws IllegalStateException {
        if (this.startOfLastMatch < 0) {
            throw new IllegalStateException("Prior call to nextToken() required");
        }
        return this.textToTokenize.subSequence(0, this.startOfLastMatch);
    }

    public final String nextToken() throws IllegalArgumentException {
        String str;
        this.precedingWhitespace = null;
        if (this.startOfLastMatch >= 0 && this.startOfLastMatch < this.textToTokenize.length()) {
            this.afterPenultimateMatch = this.tokenMatcher.end();
        }
        if (this.tokenMatcher.find()) {
            str = this.tokenMatcher.group();
            this.startOfLastMatch = this.tokenMatcher.start();
        } else {
            str = null;
            this.startOfLastMatch = this.textToTokenize.length();
        }
        return (str != null && isNormalizedWhitespacePrepended() && hasPrecedingWhitespace()) ? new StringBuffer().append(getNormalizedWhitespace()).append(str).toString() : str;
    }

    public final String precedingWhitespace() throws IllegalStateException, IllegalArgumentException {
        findPrecedingWhitespace();
        return this.precedingWhitespace;
    }

    public boolean precedingWhitespaceIsValid() throws IllegalStateException {
        doFindPrecedingWhitespace();
        return isValidWhitespace(this.precedingWhitespace);
    }

    private void reinit() {
        this.startOfLastMatch = -1;
        this.afterPenultimateMatch = 0;
        this.precedingWhitespace = null;
    }

    public final void reset() {
        this.tokenMatcher.reset();
        reinit();
    }

    public final void reset(CharSequence charSequence) {
        this.tokenMatcher.reset(charSequence);
        this.textToTokenize = charSequence;
        reinit();
    }

    public CharSequence rightText() throws IllegalStateException {
        if (this.startOfLastMatch < 0) {
            throw new IllegalStateException("Prior call to nextToken() required");
        }
        return this.textToTokenize.subSequence(this.tokenMatcher.end(), this.textToTokenize.length());
    }

    public final void setNormalizedWhitespace(String str) {
        this.normalizedWhitespace = str;
    }

    public final void setNormalizedWhitespacePrepended(boolean z) {
        this.normalizedWhitespacePrepended = z;
    }

    public final void setWhitespacePatternEnsured(boolean z) {
        this.whitespacePatternEnsured = z;
    }

    public String toString() {
        ToStringBuilder append = new ToStringBuilder(this).append("token pattern", this.tokenMatcher.pattern().pattern()).append("whitespace pattern", this.whitespaceMatcher.pattern().pattern()).append("whitespace pattern ensured", this.whitespacePatternEnsured);
        if (this.normalizedWhitespacePrepended) {
            append.append("normalized whitespace (is prepended)", this.normalizedWhitespace);
        }
        return append.toString();
    }

    public int trailingWhitespaceCount(String str) {
        Matcher matcher = Pattern.compile(new StringBuffer().append(this.whitespaceMatcher.pattern().pattern()).append("\\z").toString()).matcher(str);
        if (matcher.find()) {
            return matcher.group().length();
        }
        return 0;
    }
}
