package de.fu_berlin.ties.preprocess;

import de.fu_berlin.ties.ContextMap;
import de.fu_berlin.ties.ParsingException;
import de.fu_berlin.ties.TextProcessor;
import de.fu_berlin.ties.TiesConfiguration;
import de.fu_berlin.ties.io.IOUtils;
import de.fu_berlin.ties.text.TextUtils;
import de.fu_berlin.ties.util.ExternalCommand;
import de.fu_berlin.ties.util.Util;
import de.fu_berlin.ties.xml.TagIsolator;
import de.fu_berlin.ties.xml.XMLAdjuster;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;
import java.io.Writer;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.builder.ToStringBuilder;

/* loaded from: input_file:de/fu_berlin/ties/preprocess/TreeTagger.class */
public class TreeTagger extends TextProcessor {
    private static final String CONFIG_COMMAND = "treetagger.command";
    private static final String CONFIG_END_OF_SENTENCE = "treetagger.eos";
    private static final String CONFIG_AFTER_EOS = "treetagger.after-eos";
    private static final String END_TAG_CONSTITUENT = "</const>";
    private static final String END_TAG_SENTENCE = "</sent>";
    private static final String APOSTROPHE_CHAR = "'";
    private static final String PSEUDO_ENTITY_APOSTROPHE = "&';";
    private static final String PSEUDO_ENTITY_WHITESPACE = "&;";
    private static final String XML_ENTITY_REPLACEMENT = "$1&";
    private static final String XML_ENTITY_RESTORE = "$1;";
    private static final Map SUBSTITUTES;
    private static final String SUBSTITUTES_FILE = "conf/substitutes.cfg";
    private final XMLAdjuster xmlAdjuster;
    private final TagIsolator tagIsolator;
    private static final Map<Pattern, String> REPLACE_WITHIN_TAGS = new HashMap();
    private static final Map<Pattern, String> RESTORE_WITHIN_TAGS = new HashMap();
    private static final Pattern XML_ENTITY = Pattern.compile("(&(?:[:A-Z_a-zÀ-ÖØ-öø-˿Ͱ-ͽͿ-\u1fff\u200c-\u200d⁰-\u218fⰀ-\u2fef、-\ud7ff豈-﷏ﷰ-�]|[�-�][�-�])(?:[-:A-Z_a-zÀ-ÖØ-öø-˿Ͱ-ͽͿ-\u1fff\u200c-\u200d⁰-\u218fⰀ-\u2fef、-\ud7ff豈-﷏ﷰ-�.0-9·̀-ͯ‿-⁀]|[�-�][�-�])*);");
    private static final Pattern REPLACED_XML_ENTITY = Pattern.compile("(&(?:[:A-Z_a-zÀ-ÖØ-öø-˿Ͱ-ͽͿ-\u1fff\u200c-\u200d⁰-\u218fⰀ-\u2fef、-\ud7ff豈-﷏ﷰ-�]|[�-�][�-�])(?:[-:A-Z_a-zÀ-ÖØ-öø-˿Ͱ-ͽͿ-\u1fff\u200c-\u200d⁰-\u218fⰀ-\u2fef、-\ud7ff豈-﷏ﷰ-�.0-9·̀-ͯ‿-⁀]|[�-�][�-�])*)&");

    public TreeTagger(String str) {
        this(str, TiesConfiguration.CONF);
    }

    public TreeTagger(String str, TiesConfiguration tiesConfiguration) {
        super(str, tiesConfiguration);
        this.tagIsolator = new TagIsolator();
        this.xmlAdjuster = new XMLAdjuster(null, null, null, false, false, false, tiesConfiguration);
    }

    @Override // de.fu_berlin.ties.TextProcessor
    protected void doProcess(Reader reader, Writer writer, ContextMap contextMap) throws IOException, ParsingException {
        String replaceAll = TextUtils.replaceAll(TextUtils.multipleReplaceAll(IOUtils.readToString(reader), SUBSTITUTES), XML_ENTITY, XML_ENTITY_REPLACEMENT);
        StringWriter stringWriter = new StringWriter();
        this.tagIsolator.isolateTags(new StringReader(replaceAll), stringWriter, REPLACE_WITHIN_TAGS);
        this.xmlAdjuster.adjust(new StringReader(tagSentences(TextUtils.multipleReplaceAll(TextUtils.replaceAll(new ExternalCommand(new String[]{getConfig().getString(getConfig().localizeKey(CONFIG_COMMAND))}).execute(null, stringWriter.toString()), REPLACED_XML_ENTITY, XML_ENTITY_RESTORE), RESTORE_WITHIN_TAGS))), writer);
    }

    protected final String tagSentences(String str) {
        String string = getConfig().getString(getConfig().localizeKey(CONFIG_END_OF_SENTENCE));
        String[] stringArray = getConfig().getStringArray(getConfig().localizeKey(CONFIG_AFTER_EOS));
        StringBuffer stringBuffer = new StringBuffer(typedPos(string));
        stringBuffer.append("(?:\\s*");
        stringBuffer.append((stringArray == null || stringArray.length <= 0) ? END_TAG_CONSTITUENT : TextUtils.joinAlternatives(new String[]{typedPos(TextUtils.joinAlternatives(stringArray)), END_TAG_CONSTITUENT}));
        stringBuffer.append(")*");
        return TextUtils.replaceAll(str, Pattern.compile(stringBuffer.toString()), "$0</sent>");
    }

    @Override // de.fu_berlin.ties.TextProcessor
    public String toString() {
        return new ToStringBuilder(this).appendSuper(super.toString()).append("tag isolator", this.tagIsolator).append("XML adjuster", this.xmlAdjuster).toString();
    }

    private String typedPos(String str) {
        return "<pos\\s+type=\"" + str + ".+?</pos>";
    }

    static {
        Properties properties = new Properties();
        InputStream systemResourceAsStream = ClassLoader.getSystemResourceAsStream(SUBSTITUTES_FILE);
        if (systemResourceAsStream == null) {
            Util.LOG.error("Class loader returned null for conf/substitutes.cfg");
        }
        HashMap hashMap = new HashMap();
        try {
            properties.load(systemResourceAsStream);
            Enumeration<?> propertyNames = properties.propertyNames();
            Matcher matcher = Pattern.compile("\\p{Alpha}+").matcher("");
            Matcher matcher2 = Pattern.compile("\\p{Digit}+").matcher("");
            Matcher matcher3 = Pattern.compile("[\\s|]+").matcher("\\p{Digit}+");
            while (propertyNames.hasMoreElements()) {
                String str = (String) propertyNames.nextElement();
                matcher2.reset(TextUtils.replaceAll(TextUtils.replaceAll(properties.getProperty(str).trim(), matcher3, "|"), matcher, "&$0;"));
                StringBuffer stringBuffer = new StringBuffer();
                while (matcher2.find()) {
                    matcher2.appendReplacement(stringBuffer, "&#$0;");
                    stringBuffer.append("|").append((char) Integer.parseInt(matcher2.group()));
                }
                matcher2.appendTail(stringBuffer);
                hashMap.put(Pattern.compile("(?:" + ((Object) stringBuffer) + ")"), str);
            }
        } catch (IOException e) {
            Util.LOG.error("Could not initialize substitution table from conf/substitutes.cfg: ", e);
        } catch (RuntimeException e2) {
            Util.LOG.error("Could not initialize substitution table from conf/substitutes.cfg = ", e2);
        }
        SUBSTITUTES = Collections.unmodifiableMap(hashMap);
        REPLACE_WITHIN_TAGS.put(Pattern.compile(APOSTROPHE_CHAR), PSEUDO_ENTITY_APOSTROPHE);
        RESTORE_WITHIN_TAGS.put(Pattern.compile(PSEUDO_ENTITY_APOSTROPHE), APOSTROPHE_CHAR);
        REPLACE_WITHIN_TAGS.put(Pattern.compile("\\s+"), PSEUDO_ENTITY_WHITESPACE);
        RESTORE_WITHIN_TAGS.put(Pattern.compile(PSEUDO_ENTITY_WHITESPACE), " ");
    }
}
