package de.fu_berlin.ties.extract;

import de.fu_berlin.ties.ContextMap;
import de.fu_berlin.ties.ProcessingException;
import de.fu_berlin.ties.TiesConfiguration;
import de.fu_berlin.ties.classify.Classifier;
import de.fu_berlin.ties.classify.Prediction;
import de.fu_berlin.ties.combi.CombinationState;
import de.fu_berlin.ties.combi.CombinationStrategy;
import de.fu_berlin.ties.context.Representation;
import de.fu_berlin.ties.io.FieldContainer;
import de.fu_berlin.ties.text.TokenizerFactory;
import de.fu_berlin.ties.util.Util;
import de.fu_berlin.ties.xml.dom.TokenProcessor;
import de.fu_berlin.ties.xml.dom.TokenWalker;
import java.io.File;
import java.io.IOException;
import java.io.Writer;
import org.apache.commons.lang.builder.ToStringBuilder;
import org.dom4j.Document;
import org.dom4j.Element;

/* loaded from: input_file:de/fu_berlin/ties/extract/Extractor.class */
public class Extractor extends ExtractorBase implements TokenProcessor {
    public static final String EXT_EXTRACTIONS = "ext";
    private ExtractionContainer predictedExtractions;

    public Extractor(String str) throws IllegalArgumentException, ProcessingException {
        this(str, TiesConfiguration.CONF);
    }

    public Extractor(String str, TiesConfiguration tiesConfiguration) throws IllegalArgumentException, ProcessingException {
        super(str, tiesConfiguration);
    }

    public Extractor(String str, File file, TiesConfiguration tiesConfiguration) throws IllegalArgumentException, ProcessingException {
        super(str, file, tiesConfiguration);
    }

    public Extractor(String str, Trainer trainer) {
        this(str, trainer.getTargetStructure(), trainer.getClassifier(), trainer.getRepresentation(), trainer.getStrategy(), trainer.getFactory(), trainer.getConfig());
    }

    public Extractor(String str, TargetStructure targetStructure, Classifier classifier, Representation representation, CombinationStrategy combinationStrategy, TokenizerFactory tokenizerFactory, TiesConfiguration tiesConfiguration) {
        super(str, targetStructure, classifier, representation, combinationStrategy, tokenizerFactory, tiesConfiguration);
    }

    protected ExtractionContainer getPredictedExtractions() {
        return this.predictedExtractions;
    }

    public ExtractionContainer extract(Document document) throws IOException, ProcessingException {
        initFields();
        this.predictedExtractions = new ExtractionContainer(getTargetStructure());
        new TokenWalker(this, getFactory()).walk(document, null);
        Util.LOG.debug(new StringBuffer().append("Finished extracting based on ").append(getClassifier().toString()).toString());
        return getPredictedExtractions();
    }

    @Override // de.fu_berlin.ties.DocumentReader
    public void process(Document document, Writer writer, ContextMap contextMap) throws IOException, ProcessingException {
        ExtractionContainer extract = extract(document);
        FieldContainer createFieldContainer = FieldContainer.createFieldContainer();
        extract.storeEntries(createFieldContainer);
        createFieldContainer.store(writer);
    }

    @Override // de.fu_berlin.ties.xml.dom.TokenProcessor
    public void processToken(Element element, String str, String str2, String str3, int i, boolean z, ContextMap contextMap) throws ProcessingException {
        updateState(element, str, str2, str3);
        Prediction best = getClassifier().classify(getFeatures(), getActiveClasses()).best();
        CombinationState translateResult = getStrategy().translateResult(best.getType());
        Util.LOG.debug(new StringBuffer().append("Predicted type: '").append(best.getType()).append("'; translated state: ").append(translateResult).append("").toString());
        if (translateResult.getType() == null) {
            Extraction last = getPredictedExtractions().last();
            if (last != null && !last.isSealed()) {
                last.seal();
            }
        } else if (translateResult.isBegin()) {
            Extraction extraction = new Extraction(translateResult.getType(), best.getProbability(), best.getPR(), str2, i);
            getPredictedExtractions().add(extraction);
            getPriorRecognitions().add(extraction);
        } else {
            Extraction last2 = getPredictedExtractions().last();
            if (!last2.getType().equals(translateResult.getType())) {
                throw new IllegalStateException(new StringBuffer().append("Type mismatch: ").append(translateResult).append(" cannot continue extraction ").append(last2).toString());
            }
            last2.append(str2, z, best.getProbability(), best.getPR());
        }
        getStrategy().updateState(translateResult);
    }

    @Override // de.fu_berlin.ties.extract.ExtractorBase, de.fu_berlin.ties.TextProcessor
    public String toString() {
        return new ToStringBuilder(this).appendSuper(super.toString()).append("predicted extractions", this.predictedExtractions).toString();
    }
}
