package de.fu_berlin.ties.extract;

import de.fu_berlin.ties.ContextMap;
import de.fu_berlin.ties.ProcessingException;
import de.fu_berlin.ties.TiesConfiguration;
import de.fu_berlin.ties.text.TokenDetails;
import de.fu_berlin.ties.text.TokenizerFactory;
import de.fu_berlin.ties.util.Util;
import de.fu_berlin.ties.xml.dom.TokenProcessor;
import de.fu_berlin.ties.xml.dom.TokenWalker;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import org.apache.commons.lang.StringUtils;
import org.dom4j.Document;
import org.dom4j.Element;

/* loaded from: input_file:de/fu_berlin/ties/extract/ExtractionMatcher.class */
public class ExtractionMatcher implements TokenProcessor {
    public static final Object PROP_DUPLICATE = new Object();
    private final TokenizerFactory factory;
    private final TokenWalker walker;
    private ExtractionLocator[] locators;
    private Extraction[] extToLocate;
    private int[] extractionsFound;
    private ExtractionContainer resultContainer;
    private TargetStructure targetStruct;

    public ExtractionMatcher(TiesConfiguration tiesConfiguration) {
        this.factory = new TokenizerFactory(tiesConfiguration);
        this.walker = new TokenWalker(this, this.factory);
    }

    private Extraction handleToken(ExtractionLocator extractionLocator, TokenDetails tokenDetails) {
        Extraction currentExtraction;
        boolean z;
        String token = tokenDetails.getToken();
        int rep = tokenDetails.getRep();
        int index = tokenDetails.getIndex();
        if (extractionLocator.startOfExtraction(token, rep)) {
            currentExtraction = extractionLocator.getCurrentExtraction();
            if (currentExtraction.getIndex() != index) {
                currentExtraction.setIndex(index);
            }
        } else {
            currentExtraction = extractionLocator.inExtraction() ? extractionLocator.getCurrentExtraction() : null;
        }
        if (currentExtraction != null) {
            extractionLocator.updateExtraction(token, rep);
            z = extractionLocator.endOfExtraction();
        } else {
            z = false;
        }
        if (!z) {
            return null;
        }
        if (currentExtraction.getLastIndex() != index) {
            currentExtraction.setLastIndex(index);
        }
        extractionLocator.switchToNextExtraction();
        return currentExtraction;
    }

    private ExtractionLocator initLocator(Extraction extraction) {
        ExtractionContainer extractionContainer = new ExtractionContainer(this.targetStruct);
        extractionContainer.add(extraction);
        return new ExtractionLocator(extractionContainer, this.factory.createTokenizer(StringUtils.EMPTY), extraction.getFirstTokenRep() < 0);
    }

    public ExtractionContainer matchAndOrderExtractions(ExtractionContainer extractionContainer, Document document) throws IOException, ProcessingException {
        this.targetStruct = extractionContainer.getTargetStructure();
        this.resultContainer = new ExtractionContainer(this.targetStruct);
        int size = extractionContainer.size();
        this.locators = new ExtractionLocator[size];
        this.extToLocate = new Extraction[size];
        this.extractionsFound = new int[size];
        Iterator<Extraction> it = extractionContainer.iterator();
        for (int i = 0; i < size; i++) {
            Extraction next = it.next();
            this.locators[i] = initLocator(next);
            this.extToLocate[i] = next;
            this.extractionsFound[i] = 0;
        }
        this.walker.walk(document, null);
        for (int i2 = 0; i2 < size; i2++) {
            if (this.extractionsFound[i2] < 1) {
                Util.LOG.error("Failed to locate extraction: " + this.extToLocate[i2].toString());
            }
        }
        Iterator<Extraction> it2 = this.resultContainer.iterator();
        HashMap hashMap = new HashMap(this.resultContainer.size());
        while (it2.hasNext()) {
            Extraction next2 = it2.next();
            String str = next2.getType() + ": " + next2.getText();
            Extraction extraction = (Extraction) hashMap.get(str);
            if (extraction != null) {
                next2.setProperty(PROP_DUPLICATE);
                Util.LOG.debug("Marked " + next2 + " as duplicate of " + extraction);
            } else {
                hashMap.put(str, next2);
            }
        }
        return this.resultContainer;
    }

    @Override // de.fu_berlin.ties.xml.dom.TokenProcessor
    public void processToken(Element element, String str, TokenDetails tokenDetails, String str2, ContextMap contextMap) throws IOException, ProcessingException {
        Extraction last;
        for (int i = 0; i < this.locators.length; i++) {
            Extraction handleToken = handleToken(this.locators[i], tokenDetails);
            if (handleToken != null) {
                if (handleToken.getFirstTokenRep() < 0) {
                    this.locators[i] = initLocator(handleToken.m18clone());
                }
                handleToken.setFirstTokenRep(tokenDetails.getRep());
                int[] iArr = this.extractionsFound;
                int i2 = i;
                iArr[i2] = iArr[i2] + 1;
                boolean z = true;
                boolean z2 = true;
                while (z2 && (last = this.resultContainer.last()) != null && last.getLastIndex() >= handleToken.getIndex()) {
                    int lastIndex = (last.getLastIndex() - last.getIndex()) + 1;
                    int lastIndex2 = (handleToken.getLastIndex() - handleToken.getIndex()) + 1;
                    if (lastIndex2 <= lastIndex) {
                        z = false;
                        z2 = false;
                        if (lastIndex2 == lastIndex) {
                            Util.LOG.debug("Kept earlier overlapping match: " + last + " instead of " + handleToken);
                        } else {
                            Util.LOG.debug("Kept longer overlapping match: " + last + " instead of " + handleToken);
                        }
                    } else {
                        if (this.resultContainer.removeLast() != last) {
                            throw new RuntimeException("Implementation error: container didn't remove last extraction " + last + " as expected");
                        }
                        Util.LOG.debug("Kept longer overlapping match: " + handleToken + " instead of " + last);
                    }
                }
                if (z) {
                    this.resultContainer.add(handleToken);
                }
            }
        }
    }
}
