package de.fu_berlin.ties.text;

import de.fu_berlin.ties.TiesConfiguration;
import de.fu_berlin.ties.classify.feature.DefaultFeatureVector;
import de.fu_berlin.ties.classify.feature.FeatureExtractor;
import de.fu_berlin.ties.classify.feature.FeatureVector;
import de.fu_berlin.ties.io.IOUtils;
import java.io.IOException;
import java.io.Reader;
import org.apache.commons.lang.builder.ToStringBuilder;

/* loaded from: input_file:de/fu_berlin/ties/text/TokenizingExtractor.class */
public class TokenizingExtractor implements FeatureExtractor {
    private final TextTokenizer tokenizer;

    public TokenizingExtractor(TiesConfiguration tiesConfiguration, String str) {
        this.tokenizer = new TokenizerFactory(tiesConfiguration, str).createTokenizer("");
    }

    @Override // de.fu_berlin.ties.classify.feature.FeatureExtractor
    public FeatureVector buildFeatures(Reader reader) throws IOException {
        DefaultFeatureVector defaultFeatureVector = new DefaultFeatureVector();
        defaultFeatureVector.addAllTokens(IOUtils.readToString(reader), this.tokenizer);
        return defaultFeatureVector;
    }

    public TextTokenizer getTokenizer() {
        return this.tokenizer;
    }

    public String toString() {
        return new ToStringBuilder(this).append("tokenizer", this.tokenizer).toString();
    }
}
