package de.fu_berlin.ties.xml;

import de.fu_berlin.ties.ContextMap;
import de.fu_berlin.ties.ParsingException;
import de.fu_berlin.ties.TextProcessor;
import de.fu_berlin.ties.TiesConfiguration;
import de.fu_berlin.ties.io.IOUtils;
import de.fu_berlin.ties.text.TextTokenizer;
import de.fu_berlin.ties.text.TextUtils;
import de.fu_berlin.ties.util.CollectionUtils;
import de.fu_berlin.ties.util.Util;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.builder.ToStringBuilder;

/* loaded from: input_file:de/fu_berlin/ties/xml/XMLAdjuster.class */
public class XMLAdjuster extends TextProcessor {
    public static final String CONFIG_MISSING_ROOT = "adjust.missing-root";
    public static final String CONFIG_EMPTIABLE_TAGS = "adjust.emptiable-tags";
    public static final String CONFIG_DELETE_CONTROL_CHARS = "adjust.delete.control-chars";
    public static final String CONFIG_DELETE_PSEUDO_TAGS = "adjust.delete.pseudo-tags";
    public static final String CONFIG_ESCAPE_PSEUDO_ENTITIES = "adjust.escape.pseudo-entities";
    public static final String UNQUOTED_ATTRIB_CHARS = "[^<>= \\t\\n\\r\u0085\u2028]+?";
    public static final String UNQUOTED_ATTRIBUTE = "(?:[:A-Z_a-zÀ-ÖØ-öø-˿Ͱ-ͽͿ-\u1fff\u200c-\u200d⁰-\u218fⰀ-\u2fef、-\ud7ff豈-﷏ﷰ-�]|[�-�][�-�])(?:[-:A-Z_a-zÀ-ÖØ-öø-˿Ͱ-ͽͿ-\u1fff\u200c-\u200d⁰-\u218fⰀ-\u2fef、-\ud7ff豈-﷏ﷰ-�.0-9·̀-ͯ‿-⁀]|[�-�][�-�])*[ \\t\\n\\r\u0085\u2028]*(=)[ \\t\\n\\r\u0085\u2028]*((?:[^<>= \\t\\n\\r\u0085\u2028]+?[ \\t\\n\\r\u0085\u2028]+)*?[^<>= \\t\\n\\r\u0085\u2028]+?)";
    public static final String ESCAPED_AMP = "&amp;";
    protected static final String EVENT_CONVERTED_TO_EMPTY_TAG = "Converted to empty tag";
    protected static final String EVENT_INSERTED_MISSING_END_TAG = "Inserted missing end tag";
    protected static final String EVENT_INSERTED_MISSING_ROOT_ELEMENT = "Inserted missing root element";
    protected static final String EVENT_INSERTED_MISSING_START_TAG = "Inserted missing start tag";
    protected static final String EVENT_MOVED_END_TAG_UP = "Moved end tag up";
    protected static final String EVENT_MOVED_START_TAG_DOWN = "Moved start tag down";
    protected static final String EVENT_SPLIT_TAG = "Split tag";
    protected static final String EVENT_DELETED_CONTROL_CHARS = "Deleted control characters";
    protected static final String EVENT_DELETED_PSEUDO_TAG = "Deleted pseudo-tag";
    protected static final String EVENT_ESCAPED_CHARS = "Escaped characters";
    protected static final String EVENT_QUOTED_ATTRIBUTE_VALUES = "Quoted attribute values";
    private final String missingRootName;
    private final Set<String> emptiableTags;
    private final boolean deletingControlChars;
    private final boolean deletingPseudoTags;
    private final boolean escapingPseudoEntities;
    public static final Pattern LAX_START_OR_EMPTY_TAG = Pattern.compile("<((?:[:A-Z_a-zÀ-ÖØ-öø-˿Ͱ-ͽͿ-\u1fff\u200c-\u200d⁰-\u218fⰀ-\u2fef、-\ud7ff豈-﷏ﷰ-�]|[�-�][�-�])(?:[-:A-Z_a-zÀ-ÖØ-öø-˿Ͱ-ͽͿ-\u1fff\u200c-\u200d⁰-\u218fⰀ-\u2fef、-\ud7ff豈-﷏ﷰ-�.0-9·̀-ͯ‿-⁀]|[�-�][�-�])*)(?:[ \\t\\n\\r\u0085\u2028]+(?:(?:[:A-Z_a-zÀ-ÖØ-öø-˿Ͱ-ͽͿ-\u1fff\u200c-\u200d⁰-\u218fⰀ-\u2fef、-\ud7ff豈-﷏ﷰ-�]|[�-�][�-�])(?:[-:A-Z_a-zÀ-ÖØ-öø-˿Ͱ-ͽͿ-\u1fff\u200c-\u200d⁰-\u218fⰀ-\u2fef、-\ud7ff豈-﷏ﷰ-�.0-9·̀-ͯ‿-⁀]|[�-�][�-�])*[ \\t\\n\\r\u0085\u2028]*=[ \\t\\n\\r\u0085\u2028]*(?:\"[^\"]*\"|'[^']*')|(?:[:A-Z_a-zÀ-ÖØ-öø-˿Ͱ-ͽͿ-\u1fff\u200c-\u200d⁰-\u218fⰀ-\u2fef、-\ud7ff豈-﷏ﷰ-�]|[�-�][�-�])(?:[-:A-Z_a-zÀ-ÖØ-öø-˿Ͱ-ͽͿ-\u1fff\u200c-\u200d⁰-\u218fⰀ-\u2fef、-\ud7ff豈-﷏ﷰ-�.0-9·̀-ͯ‿-⁀]|[�-�][�-�])*[ \\t\\n\\r\u0085\u2028]*(=)[ \\t\\n\\r\u0085\u2028]*((?:[^<>= \\t\\n\\r\u0085\u2028]+?[ \\t\\n\\r\u0085\u2028]+)*?[^<>= \\t\\n\\r\u0085\u2028]+?)))*[ \\t\\n\\r\u0085\u2028]*(/)?>");
    public static final Pattern PSEUDO_AMP = Pattern.compile("&(?!(?:amp|lt|gt|apos|quot|#[0-9]+|#x[0-9a-fA-F]+);)");
    public static final Pattern SPURIOUS_AMP = Pattern.compile("&(?!(?:(?:[:A-Z_a-zÀ-ÖØ-öø-˿Ͱ-ͽͿ-\u1fff\u200c-\u200d⁰-\u218fⰀ-\u2fef、-\ud7ff豈-﷏ﷰ-�]|[�-�][�-�])(?:[-:A-Z_a-zÀ-ÖØ-öø-˿Ͱ-ͽͿ-\u1fff\u200c-\u200d⁰-\u218fⰀ-\u2fef、-\ud7ff豈-﷏ﷰ-�.0-9·̀-ͯ‿-⁀]|[�-�][�-�])*|#[0-9]+|#x[0-9a-fA-F]+);)");
    public static final Pattern CONTROL_CHARS = Pattern.compile("[\u0001-\b\u000b-\f\u000e-\u001f]+");

    public XMLAdjuster() {
        this("xml");
    }

    public XMLAdjuster(String str) {
        this(str, TiesConfiguration.CONF);
    }

    public XMLAdjuster(String str, TiesConfiguration tiesConfiguration) {
        this(str, tiesConfiguration.getString(CONFIG_MISSING_ROOT, null), tiesConfiguration.containsKey(CONFIG_EMPTIABLE_TAGS) ? CollectionUtils.arrayAsSet(tiesConfiguration.getStringArray(CONFIG_EMPTIABLE_TAGS)) : null, tiesConfiguration.getBoolean(CONFIG_DELETE_CONTROL_CHARS), tiesConfiguration.getBoolean(CONFIG_DELETE_PSEUDO_TAGS), tiesConfiguration.getBoolean(CONFIG_ESCAPE_PSEUDO_ENTITIES), tiesConfiguration);
    }

    public XMLAdjuster(String str, String str2, Set<String> set, boolean z, boolean z2, boolean z3, TiesConfiguration tiesConfiguration) {
        super(str, tiesConfiguration);
        this.missingRootName = str2;
        this.emptiableTags = set;
        this.deletingControlChars = z;
        this.deletingPseudoTags = z2;
        this.escapingPseudoEntities = z3;
    }

    public final void adjust(CharSequence charSequence, Writer writer) throws IOException, ParsingException {
        XMLConstituent fixedConstituents = fixedConstituents(charSequence);
        while (true) {
            XMLConstituent xMLConstituent = fixedConstituents;
            if (xMLConstituent == null) {
                writer.flush();
                return;
            } else {
                writer.write(xMLConstituent.getRepresentantion());
                fixedConstituents = xMLConstituent.nextConstituent();
            }
        }
    }

    public final void adjust(Reader reader, Writer writer) throws IOException, ParsingException {
        adjust(IOUtils.readToString(reader), writer);
    }

    protected void checkEvent(String str) throws ParsingException {
    }

    private void checkNextAppearance(TagConstituent tagConstituent, TagConstituent tagConstituent2, UnprocessedTags unprocessedTags) throws ParsingException {
        if (correspondingEndTag(tagConstituent.getName(), -1, unprocessedTags, false) != null) {
            TagConstituent tagConstituent3 = new TagConstituent((short) 0, tagConstituent.getName(), tagConstituent2.getMarkupSeriesNo());
            tagConstituent3.setVariety(TagVariety.TENTATIVE);
            XMLConstituent nextConstituent = tagConstituent2.nextConstituent();
            if (nextConstituent.getType() == 9) {
                nextConstituent.insertAfter(tagConstituent3);
            } else {
                tagConstituent2.insertAfter(tagConstituent3);
            }
            unprocessedTags.push(tagConstituent3, false);
            logEvent(EVENT_INSERTED_MISSING_START_TAG, tagConstituent3);
        }
    }

    private XMLConstituent createMarkupConstituent(String str, String str2, int i, UnprocessedTags unprocessedTags) {
        XMLConstituent tagConstituent;
        char charAt = str2.charAt(0);
        char charAt2 = str2.charAt(str2.length() - 1);
        if (charAt == '/') {
            tagConstituent = new TagConstituent((short) 1, str2.substring(1), str, i);
            if (unprocessedTags != null) {
                unprocessedTags.push((TagConstituent) tagConstituent);
            }
        } else if (charAt2 == '/') {
            tagConstituent = new TagConstituent((short) 2, str2.substring(0, str2.length() - 1), str, i);
        } else if (charAt == '!') {
            tagConstituent = new OtherConstituent((short) 3, str);
        } else if (charAt == '?') {
            tagConstituent = str2.substring(1).equals("xml") ? new OtherConstituent((short) 4, str) : new OtherConstituent((short) 5, str);
        } else if (charAt == '<') {
            tagConstituent = new OtherConstituent((short) 6, str);
        } else {
            tagConstituent = new TagConstituent((short) 0, str2, str, i);
            if (unprocessedTags != null) {
                unprocessedTags.push((TagConstituent) tagConstituent);
            }
        }
        return tagConstituent;
    }

    private TagConstituent correspondingEndTag(String str, int i, UnprocessedTags unprocessedTags, boolean z) {
        TagConstituent tagConstituent;
        TagConstituent findInSeries = i >= 0 ? unprocessedTags.findInSeries(str, i, true) : unprocessedTags.findFirst(str);
        if (findInSeries == null || findInSeries.getType() != 1) {
            tagConstituent = null;
        } else {
            tagConstituent = findInSeries;
            if (z) {
                unprocessedTags.forceRemove(findInSeries);
            }
        }
        return tagConstituent;
    }

    private TagConstituent correspondingOpenTag(String str, int i, OpenTags openTags) {
        TagConstituent findInSeries = openTags.findInSeries(str, i, false);
        if (findInSeries == null) {
            findInSeries = openTags.findTentativeTag(str);
        }
        if (findInSeries != null) {
            if (openTags.isRoot(findInSeries)) {
                findInSeries = null;
            } else {
                openTags.forceRemove(findInSeries);
            }
        }
        return findInSeries;
    }

    @Override // de.fu_berlin.ties.TextProcessor
    protected void doProcess(Reader reader, Writer writer, ContextMap contextMap) throws IOException, ParsingException {
        adjust(reader, writer);
    }

    private boolean endTagMissing(String str, UnprocessedTags unprocessedTags) {
        return correspondingEndTag(str, -1, unprocessedTags, false) == null && unprocessedTags.startTagCount(str) >= unprocessedTags.endTagCount(str);
    }

    private void replaceInRep(XMLConstituent xMLConstituent, Pattern pattern, String str, String str2) throws ParsingException {
        String representantion = xMLConstituent.getRepresentantion();
        String replaceAll = TextUtils.replaceAll(representantion, pattern, str);
        if (representantion != replaceAll) {
            logEvent(str2, "old: '" + representantion + "', new: '" + replaceAll + "'");
            xMLConstituent.setRepresentantion(replaceAll);
        }
    }

    public final XMLConstituent fixedConstituents(CharSequence charSequence) throws ParsingException {
        UnprocessedTags unprocessedTags = new UnprocessedTags();
        XMLConstituent rawConstituents = rawConstituents(charSequence, true, unprocessedTags);
        XMLConstituent xMLConstituent = rawConstituents;
        XMLConstituent xMLConstituent2 = xMLConstituent;
        OpenTags openTags = new OpenTags();
        XMLConstituent xMLConstituent3 = null;
        boolean z = false;
        while (xMLConstituent != null) {
            if (xMLConstituent3 == null && isRootContent(xMLConstituent)) {
                xMLConstituent3 = xMLConstituent;
                if (xMLConstituent.getType() != 0) {
                    insertMissingRoot(xMLConstituent3, openTags);
                    z = true;
                }
            } else if (openTags.isEmpty() && isRootContent(xMLConstituent)) {
                insertMissingRoot(xMLConstituent3, openTags);
                z = true;
            }
            if (xMLConstituent instanceof TagConstituent) {
                TagConstituent tagConstituent = (TagConstituent) xMLConstituent;
                if (tagConstituent.getType() == 0) {
                    unprocessedTags.forceRemove(tagConstituent);
                    openTags.push(tagConstituent);
                } else if (tagConstituent.getType() == 1) {
                    unprocessedTags.forceRemove(tagConstituent);
                    handleEndTag(tagConstituent, openTags, unprocessedTags);
                }
            }
            xMLConstituent2 = xMLConstituent;
            xMLConstituent = xMLConstituent.nextConstituent();
        }
        handleEOF(xMLConstituent2, openTags, unprocessedTags, z);
        while (rawConstituents.hasPrevious()) {
            rawConstituents = rawConstituents.previousConstituent();
        }
        return rawConstituents;
    }

    private void fixRepresentation(XMLConstituent xMLConstituent) throws ParsingException {
        if (this.deletingControlChars) {
            replaceInRep(xMLConstituent, CONTROL_CHARS, "", EVENT_DELETED_CONTROL_CHARS);
        }
        if (needsAmpEscape(xMLConstituent)) {
            if (this.escapingPseudoEntities) {
                replaceInRep(xMLConstituent, PSEUDO_AMP, ESCAPED_AMP, EVENT_ESCAPED_CHARS);
            } else {
                replaceInRep(xMLConstituent, SPURIOUS_AMP, ESCAPED_AMP, EVENT_ESCAPED_CHARS);
            }
        }
    }

    protected void handleEndTag(TagConstituent tagConstituent, OpenTags openTags, UnprocessedTags unprocessedTags) throws ParsingException {
        TagConstituent correspondingEndTag;
        boolean z = false;
        while (!z) {
            TagConstituent peek = openTags.peek();
            TagVariety variety = peek.getVariety();
            String name = peek.getName();
            if (name.equals(tagConstituent.getName())) {
                if (openTags.popAndRegularize() && variety == TagVariety.TENTATIVE) {
                    checkNextAppearance(tagConstituent, tagConstituent, unprocessedTags);
                }
                z = true;
            } else if (variety == TagVariety.TENTATIVE) {
                moveTentativeTag(peek, tagConstituent, openTags, unprocessedTags);
            } else if (!openTags.containsNonTentative(tagConstituent.getName()) || (correspondingEndTag = correspondingEndTag(name, tagConstituent.getMarkupSeriesNo(), unprocessedTags, true)) == null) {
                TagConstituent correspondingOpenTag = correspondingOpenTag(tagConstituent.getName(), peek.getMarkupSeriesNo(), openTags);
                if (correspondingOpenTag != null) {
                    correspondingOpenTag.remove();
                    peek.insertAfter(correspondingOpenTag);
                    if (correspondingOpenTag.getVariety() == TagVariety.REGULAR) {
                        logEvent(EVENT_MOVED_START_TAG_DOWN, correspondingOpenTag);
                    } else {
                        if (correspondingOpenTag.getVariety() == TagVariety.TENTATIVE) {
                            checkNextAppearance(tagConstituent, tagConstituent, unprocessedTags);
                        }
                        correspondingOpenTag.setVariety(TagVariety.REGULAR);
                    }
                    z = true;
                } else if (!openTags.contains(tagConstituent.getName())) {
                    TagConstituent tagConstituent2 = new TagConstituent((short) 0, tagConstituent.getName(), peek.getMarkupSeriesNo());
                    peek.insertAfter(tagConstituent2);
                    logEvent(EVENT_INSERTED_MISSING_START_TAG, tagConstituent2);
                    checkNextAppearance(tagConstituent, tagConstituent, unprocessedTags);
                    z = true;
                } else if (peek.getMarkupSeriesNo() != tagConstituent.getMarkupSeriesNo() || endTagMissing(name, unprocessedTags)) {
                    boolean endTagMissing = endTagMissing(name, unprocessedTags);
                    if (endTagMissing && isAnEmptiableTag(name)) {
                        logEvent(EVENT_CONVERTED_TO_EMPTY_TAG, replaceWithEmptyCopy(peek));
                    } else {
                        TagConstituent tagConstituent3 = new TagConstituent((short) 1, name, tagConstituent.getMarkupSeriesNo());
                        tagConstituent.insertBefore(tagConstituent3);
                        if (!endTagMissing) {
                            TagConstituent tagConstituent4 = new TagConstituent((short) 0, name, peek.getRepresentantion(), tagConstituent.getMarkupSeriesNo());
                            tagConstituent4.setVariety(TagVariety.CONTINUATION);
                            tagConstituent.insertAfter(tagConstituent4);
                            unprocessedTags.push(tagConstituent4, false);
                            logEvent(EVENT_SPLIT_TAG, peek);
                        } else if (variety == TagVariety.REGULAR) {
                            logEvent(EVENT_INSERTED_MISSING_END_TAG, tagConstituent3);
                        }
                        openTags.popAndRegularize();
                    }
                } else {
                    openTags.forceRemove(peek);
                    peek.remove();
                    tagConstituent.insertAfter(peek);
                    unprocessedTags.push(peek, false);
                    if (variety == TagVariety.REGULAR) {
                        logEvent(EVENT_MOVED_START_TAG_DOWN, peek);
                    }
                }
            } else {
                openTags.popAndRegularize();
                correspondingEndTag.remove();
                tagConstituent.insertBefore(correspondingEndTag);
                if (variety == TagVariety.TENTATIVE) {
                    checkNextAppearance(correspondingEndTag, tagConstituent, unprocessedTags);
                }
                logEvent(EVENT_MOVED_END_TAG_UP, correspondingEndTag);
            }
        }
    }

    protected void handleEOF(XMLConstituent xMLConstituent, OpenTags openTags, UnprocessedTags unprocessedTags, boolean z) throws ParsingException {
        if (xMLConstituent.nextConstituent() != null) {
            Util.LOG.error("Implementation error: constituents after last constituent: " + xMLConstituent);
        }
        if (!unprocessedTags.isEmpty()) {
            Util.LOG.error("Implementation error: still unprocessed tags at end-of-file -- last constituent: " + xMLConstituent + " unprocessed tags: " + unprocessedTags);
        }
        XMLConstituent xMLConstituent2 = xMLConstituent;
        TagConstituent peek = openTags.peek();
        if (peek != null) {
            while (!isRootContent(xMLConstituent2)) {
                xMLConstituent2 = xMLConstituent2.previousConstituent();
            }
            while (peek != null) {
                TagConstituent tagConstituent = new TagConstituent((short) 1, peek.getName());
                xMLConstituent2.insertAfter(tagConstituent);
                xMLConstituent2 = tagConstituent;
                openTags.pop();
                peek = openTags.peek();
                if (!z || !openTags.isEmpty()) {
                    logEvent(EVENT_INSERTED_MISSING_END_TAG, tagConstituent);
                }
            }
        }
    }

    protected void logEvent(String str, String str2) throws ParsingException {
        checkEvent(str);
        Util.LOG.debug("Modified document: " + str + " (" + str2.toString() + ')');
    }

    protected void logEvent(String str, TagConstituent tagConstituent) throws ParsingException {
        checkEvent(str);
        Util.LOG.debug("Modified document: " + str + " (" + tagConstituent.toString() + ')');
    }

    private void insertMissingRoot(XMLConstituent xMLConstituent, OpenTags openTags) throws ParsingException {
        if (this.missingRootName == null) {
            throw new ParsingException("Root tag missing (resp. tags or textual content outside root element)");
        }
        TagConstituent tagConstituent = new TagConstituent((short) 0, this.missingRootName, 0);
        xMLConstituent.insertBefore(tagConstituent);
        openTags.push(tagConstituent);
        logEvent(EVENT_INSERTED_MISSING_ROOT_ELEMENT, tagConstituent);
    }

    protected boolean isAnEmptiableTag(String str) {
        return this.emptiableTags != null && this.emptiableTags.contains(str);
    }

    public boolean isDeletingControlChars() {
        return this.deletingControlChars;
    }

    public boolean isDeletingPseudoTags() {
        return this.deletingPseudoTags;
    }

    public boolean isEscapingPseudoEntities() {
        return this.escapingPseudoEntities;
    }

    private boolean isRootContent(XMLConstituent xMLConstituent) {
        return xMLConstituent instanceof TagConstituent ? true : xMLConstituent.getType() == 7 || xMLConstituent.getType() == 8;
    }

    private void moveTentativeTag(TagConstituent tagConstituent, TagConstituent tagConstituent2, OpenTags openTags, UnprocessedTags unprocessedTags) {
        openTags.forceRemove(tagConstituent);
        tagConstituent.remove();
        tagConstituent.setMarkupSeriesNo(tagConstituent2.getMarkupSeriesNo());
        tagConstituent2.insertAfter(tagConstituent);
        unprocessedTags.push(tagConstituent, false);
    }

    private boolean needsAmpEscape(XMLConstituent xMLConstituent) {
        short type = xMLConstituent.getType();
        return type == 0 || type == 2 || type == 8;
    }

    public final XMLConstituent rawConstituents(CharSequence charSequence, boolean z) throws ParsingException {
        return rawConstituents(charSequence, z, null);
    }

    protected final XMLConstituent rawConstituents(CharSequence charSequence, boolean z, UnprocessedTags unprocessedTags) throws ParsingException {
        XMLConstituent otherConstituent;
        TextTokenizer createXMLTokenizer = XMLTokenizerFactory.createXMLTokenizer(charSequence, !z);
        String str = null;
        boolean z2 = false;
        XMLConstituent xMLConstituent = null;
        XMLConstituent xMLConstituent2 = null;
        boolean z3 = true;
        int i = 0;
        XMLConstituent xMLConstituent3 = null;
        String str2 = null;
        while (!z2) {
            try {
                String nextToken = createXMLTokenizer.nextToken();
                if (nextToken == null) {
                    z2 = true;
                } else {
                    str = createXMLTokenizer.capturedText();
                }
                if (createXMLTokenizer.hasPrecedingWhitespace()) {
                    String precedingWhitespace = createXMLTokenizer.precedingWhitespace();
                    if (z && !createXMLTokenizer.precedingWhitespaceIsValid()) {
                        boolean z4 = false;
                        if (precedingWhitespace.endsWith("<") && str.length() == 0) {
                            String substring = precedingWhitespace.substring(0, precedingWhitespace.length() - 1);
                            int indexOf = nextToken.indexOf(62);
                            if (createXMLTokenizer.isValidWhitespace(substring) && indexOf > 0) {
                                Object[] tryToFixTag = tryToFixTag('<' + nextToken);
                                if (tryToFixTag != null) {
                                    xMLConstituent3 = (TagConstituent) tryToFixTag[0];
                                    nextToken = (String) tryToFixTag[1];
                                    z4 = true;
                                    precedingWhitespace = substring;
                                } else if (this.deletingPseudoTags) {
                                    logEvent(EVENT_DELETED_PSEUDO_TAG, '<' + nextToken.substring(0, indexOf + 1));
                                    nextToken = nextToken.substring(indexOf + 1);
                                    z4 = true;
                                    precedingWhitespace = substring;
                                }
                            }
                        }
                        if (z4) {
                            int initialWhitespaceCount = createXMLTokenizer.initialWhitespaceCount(nextToken);
                            if (initialWhitespaceCount > 0) {
                                if (xMLConstituent3 != null) {
                                    str2 = nextToken.substring(0, initialWhitespaceCount);
                                } else {
                                    precedingWhitespace = precedingWhitespace + nextToken.substring(0, initialWhitespaceCount);
                                }
                                nextToken = nextToken.substring(initialWhitespaceCount);
                            }
                        } else {
                            String escapeXml = StringEscapeUtils.escapeXml(precedingWhitespace);
                            int initialWhitespaceCount2 = createXMLTokenizer.initialWhitespaceCount(escapeXml);
                            if (initialWhitespaceCount2 > 0) {
                                precedingWhitespace = escapeXml.substring(0, initialWhitespaceCount2);
                                escapeXml = escapeXml.substring(initialWhitespaceCount2);
                            } else {
                                precedingWhitespace = "";
                            }
                            logEvent(EVENT_ESCAPED_CHARS, escapeXml);
                            if (str.length() == 0) {
                                nextToken = escapeXml + nextToken;
                            } else {
                                int trailingWhitespaceCount = createXMLTokenizer.trailingWhitespaceCount(escapeXml);
                                if (trailingWhitespaceCount > 0) {
                                    str2 = escapeXml.substring(escapeXml.length() - trailingWhitespaceCount);
                                    escapeXml = escapeXml.substring(0, escapeXml.length() - trailingWhitespaceCount);
                                }
                                xMLConstituent3 = new OtherConstituent((short) 8, escapeXml);
                            }
                        }
                    }
                    if (precedingWhitespace.length() > 0) {
                        OtherConstituent otherConstituent2 = new OtherConstituent((short) 9, precedingWhitespace);
                        if (xMLConstituent2 == null) {
                            xMLConstituent = otherConstituent2;
                        } else {
                            xMLConstituent2.insertAfter(otherConstituent2);
                        }
                        xMLConstituent2 = otherConstituent2;
                    }
                    if (xMLConstituent3 != null) {
                        if (xMLConstituent3.getType() == 8 || xMLConstituent3.getType() == 7) {
                            z3 = false;
                        } else {
                            if (!z3) {
                                z3 = true;
                                i++;
                            }
                            if (xMLConstituent3 instanceof TagConstituent) {
                                ((TagConstituent) xMLConstituent3).setMarkupSeriesNo(i);
                                if (unprocessedTags != null && xMLConstituent3.getType() != 2) {
                                    unprocessedTags.push((TagConstituent) xMLConstituent3);
                                }
                            }
                        }
                        if (z) {
                            fixRepresentation(xMLConstituent3);
                        }
                        if (xMLConstituent2 == null) {
                            xMLConstituent = xMLConstituent3;
                        } else {
                            xMLConstituent2.insertAfter(xMLConstituent3);
                        }
                        xMLConstituent2 = xMLConstituent3;
                        xMLConstituent3 = null;
                        if (str2 != null) {
                            OtherConstituent otherConstituent3 = new OtherConstituent((short) 9, str2);
                            xMLConstituent2.insertAfter(otherConstituent3);
                            xMLConstituent2 = otherConstituent3;
                            str2 = null;
                        }
                    }
                }
                if (!z2 && nextToken.length() > 0) {
                    if (str.length() == 0 || str.equals("[CDATA")) {
                        if (z3) {
                            z3 = false;
                        }
                        otherConstituent = str.length() == 0 ? new OtherConstituent((short) 8, nextToken) : new OtherConstituent((short) 7, nextToken);
                    } else {
                        if (!z3) {
                            z3 = true;
                            i++;
                        }
                        otherConstituent = createMarkupConstituent(nextToken, str, i, unprocessedTags);
                    }
                    if (z) {
                        fixRepresentation(otherConstituent);
                    }
                    if (xMLConstituent2 == null) {
                        xMLConstituent = otherConstituent;
                    } else {
                        xMLConstituent2.insertAfter(otherConstituent);
                    }
                    xMLConstituent2 = otherConstituent;
                }
            } catch (IllegalArgumentException e) {
                throw new ParsingException("Uncorrectable error in XML input: " + e.getMessage());
            }
        }
        return xMLConstituent;
    }

    private TagConstituent replaceWithEmptyCopy(TagConstituent tagConstituent) throws IllegalArgumentException {
        if (tagConstituent.getType() != 0) {
            throw new IllegalArgumentException("Tag to replace must be a start tag (actual type: " + ((int) tagConstituent.getType()) + ')');
        }
        StringBuffer stringBuffer = new StringBuffer(tagConstituent.getRepresentantion());
        int lastIndexOf = stringBuffer.lastIndexOf(">");
        if (lastIndexOf < 0) {
            throw new IllegalArgumentException("Start tag representation is invalid: '>' missing!");
        }
        stringBuffer.insert(lastIndexOf, '/');
        TagConstituent tagConstituent2 = new TagConstituent((short) 2, tagConstituent.getName(), stringBuffer.toString(), tagConstituent.getMarkupSeriesNo());
        XMLConstituent previousConstituent = tagConstituent.previousConstituent();
        XMLConstituent nextConstituent = tagConstituent.nextConstituent();
        tagConstituent.remove();
        if (previousConstituent != null) {
            previousConstituent.insertAfter(tagConstituent2);
        } else if (nextConstituent != null) {
            tagConstituent2.insertAfter(nextConstituent);
        }
        return tagConstituent2;
    }

    @Override // de.fu_berlin.ties.TextProcessor
    public String toString() {
        return new ToStringBuilder(this).append("missing root name", this.missingRootName).append("emptiable tags", this.emptiableTags).append("delete control characters", this.deletingControlChars).append("delete pseudo-tags", this.deletingPseudoTags).append("escape pseudo-entities", this.escapingPseudoEntities).toString();
    }

    private Object[] tryToFixTag(String str) throws ParsingException {
        Object[] objArr;
        short s;
        Matcher matcher = LAX_START_OR_EMPTY_TAG.matcher(str);
        if (matcher.lookingAt()) {
            String group = matcher.group(1);
            objArr = new Object[2];
            objArr[1] = str.substring(matcher.end());
            String group2 = matcher.group();
            String group3 = matcher.group(matcher.groupCount());
            if (group3 == null) {
                s = 0;
            } else {
                if (!"/".equals(group3)) {
                    throw new RuntimeException("Implementation error: last group of lax tag '" + matcher.group() + "' is '" + group3 + "' instead of '/' or null");
                }
                s = 2;
            }
            StringBuilder sb = null;
            while (matcher.groupCount() > 2 && matcher.group(3) != null) {
                sb = new StringBuilder(group2.substring(0, matcher.start(3)));
                String group4 = matcher.group(3);
                if (group4.startsWith("\"") || group4.startsWith("'")) {
                    group4 = group4.substring(1);
                }
                if (group4.endsWith("\"") || group4.endsWith("'")) {
                    group4 = group4.substring(0, group4.length() - 1);
                }
                String replaceAll = TextUtils.replaceAll(group4, Pattern.compile("\""), "&quot;");
                sb.append('\"');
                sb.append(replaceAll);
                sb.append('\"');
                sb.append(group2.substring(matcher.end(3), matcher.end()));
                matcher.reset(sb);
                if (!matcher.matches()) {
                    throw new RuntimeException("Implementation error while trying to fix unquoted attribute values: '" + ((Object) sb) + "' is no longer parsable as a tag");
                }
                group2 = sb.toString();
            }
            objArr[0] = new TagConstituent(s, group, sb.toString());
            logEvent(EVENT_QUOTED_ATTRIBUTE_VALUES, sb.toString());
        } else {
            objArr = null;
        }
        return objArr;
    }
}
