package org.apache.tika.parser.epub;

import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.apache.commons.compress.archivers.zip.ZipFile;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.commons.lang3.StringUtils;
import org.apache.tika.config.Field;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.EmbeddedDocumentUtil;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.xml.DcXMLParser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.EmbeddedContentHandler;
import org.apache.tika.sax.XHTMLContentHandler;
import org.apache.tika.utils.ParserUtils;
import org.apache.tika.utils.XMLReaderUtils;
import org.apache.tika.zip.utils.ZipSalvager;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/* JADX WARN: Classes with same name are omitted:
  input_file:WEB-INF/lib/tika-parser-miscoffice-module-2.4.1.jar:org/apache/tika/parser/epub/EpubParser.class
  input_file:WEB-INF/lib/tika-parsers-standard-package-2.4.1.jar:org/apache/tika/parser/epub/EpubParser.class
 */
/* loaded from: input_file:org/apache/tika/parser/epub/EpubParser.class */
public class EpubParser extends AbstractParser {
    private static final long serialVersionUID = 215176772484050550L;
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(new HashSet(Arrays.asList(MediaType.application("epub+zip"), MediaType.application("x-ibooks+zip"))));

    @Field
    boolean streaming = false;
    private Parser meta = new DcXMLParser();
    private Parser content = new EpubContentParser();

    /* JADX INFO: Access modifiers changed from: private */
    /* JADX WARN: Classes with same name are omitted:
      input_file:WEB-INF/lib/tika-parser-miscoffice-module-2.4.1.jar:org/apache/tika/parser/epub/EpubParser$ContentOrderScraper.class
      input_file:WEB-INF/lib/tika-parsers-standard-package-2.4.1.jar:org/apache/tika/parser/epub/EpubParser$ContentOrderScraper.class
     */
    /* loaded from: input_file:org/apache/tika/parser/epub/EpubParser$ContentOrderScraper.class */
    public static class ContentOrderScraper extends DefaultHandler {
        Map<String, HRefMediaPair> locationMap;
        List<String> contentItems;
        boolean inManifest;
        boolean inSpine;

        private ContentOrderScraper() {
            this.locationMap = new HashMap();
            this.contentItems = new ArrayList();
            this.inManifest = false;
            this.inSpine = false;
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
            String attrValue;
            if ("manifest".equalsIgnoreCase(str2)) {
                this.inManifest = true;
            } else if ("spine".equalsIgnoreCase(str2)) {
                this.inSpine = true;
            }
            if (this.inManifest && "item".equalsIgnoreCase(str2)) {
                String attrValue2 = XMLReaderUtils.getAttrValue("id", attributes);
                String attrValue3 = XMLReaderUtils.getAttrValue("href", attributes);
                String attrValue4 = XMLReaderUtils.getAttrValue("media-type", attributes);
                if (attrValue2 != null && attrValue3 != null) {
                    try {
                        attrValue3 = URLDecoder.decode(attrValue3, StandardCharsets.UTF_8.name());
                    } catch (UnsupportedEncodingException e) {
                    }
                    this.locationMap.put(attrValue2, new HRefMediaPair(attrValue3, attrValue4));
                }
            }
            if (this.inSpine && "itemRef".equalsIgnoreCase(str2) && (attrValue = XMLReaderUtils.getAttrValue("idref", attributes)) != null) {
                this.contentItems.add(attrValue);
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
            if ("manifest".equalsIgnoreCase(str2)) {
                this.inManifest = false;
            } else if ("spine".equalsIgnoreCase(str2)) {
                this.inSpine = false;
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* JADX WARN: Classes with same name are omitted:
      input_file:WEB-INF/lib/tika-parser-miscoffice-module-2.4.1.jar:org/apache/tika/parser/epub/EpubParser$HRefMediaPair.class
      input_file:WEB-INF/lib/tika-parsers-standard-package-2.4.1.jar:org/apache/tika/parser/epub/EpubParser$HRefMediaPair.class
     */
    /* loaded from: input_file:org/apache/tika/parser/epub/EpubParser$HRefMediaPair.class */
    public static class HRefMediaPair {
        private final String href;
        private final String media;

        HRefMediaPair(String str, String str2) {
            this.href = str;
            this.media = str2;
        }

        public String toString() {
            return "HRefMediaPair{href='" + this.href + "', media='" + this.media + "'}";
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* JADX WARN: Classes with same name are omitted:
      input_file:WEB-INF/lib/tika-parser-miscoffice-module-2.4.1.jar:org/apache/tika/parser/epub/EpubParser$RootFinder.class
      input_file:WEB-INF/lib/tika-parsers-standard-package-2.4.1.jar:org/apache/tika/parser/epub/EpubParser$RootFinder.class
     */
    /* loaded from: input_file:org/apache/tika/parser/epub/EpubParser$RootFinder.class */
    public static class RootFinder extends DefaultHandler {
        String root;

        private RootFinder() {
            this.root = null;
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
            if ("rootfile".equalsIgnoreCase(str2)) {
                this.root = XMLReaderUtils.getAttrValue("full-path", attributes);
            }
        }
    }

    public Parser getMetaParser() {
        return this.meta;
    }

    public void setMetaParser(Parser parser) {
        this.meta = parser;
    }

    public Parser getContentParser() {
        return this.content;
    }

    public void setContentParser(Parser parser) {
        this.content = parser;
    }

    @Override // org.apache.tika.parser.Parser
    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return SUPPORTED_TYPES;
    }

    @Override // org.apache.tika.parser.Parser
    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
        XHTMLContentHandler xHTMLContentHandler = new XHTMLContentHandler(contentHandler, metadata);
        xHTMLContentHandler.startDocument();
        IOException iOException = null;
        EmbeddedContentHandler embeddedContentHandler = new EmbeddedContentHandler(new BodyContentHandler(xHTMLContentHandler));
        if (this.streaming) {
            try {
                streamingParse(inputStream, embeddedContentHandler, metadata, parseContext);
            } catch (IOException e) {
                iOException = e;
            }
        } else {
            try {
                bufferedParse(inputStream, embeddedContentHandler, xHTMLContentHandler, metadata, parseContext);
            } catch (IOException e2) {
                iOException = e2;
            }
        }
        xHTMLContentHandler.endDocument();
        if (iOException != null) {
            throw iOException;
        }
    }

    private void streamingParse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, TikaException, SAXException {
        InputStream zipArchiveInputStream = new ZipArchiveInputStream(inputStream);
        ZipArchiveEntry nextZipEntry = zipArchiveInputStream.getNextZipEntry();
        while (true) {
            ZipArchiveEntry zipArchiveEntry = nextZipEntry;
            if (zipArchiveEntry == null) {
                return;
            }
            if (zipArchiveEntry.getName().equals("mimetype")) {
                updateMimeType(zipArchiveInputStream, metadata);
            } else if (zipArchiveEntry.getName().equals("metadata.xml")) {
                this.meta.parse(zipArchiveInputStream, new DefaultHandler(), metadata, parseContext);
            } else if (zipArchiveEntry.getName().endsWith(".opf")) {
                this.meta.parse(zipArchiveInputStream, new DefaultHandler(), metadata, parseContext);
            } else if (zipArchiveEntry.getName().endsWith(".htm") || zipArchiveEntry.getName().endsWith(".html") || zipArchiveEntry.getName().endsWith(".xhtml") || zipArchiveEntry.getName().endsWith(".xml")) {
                this.content.parse(zipArchiveInputStream, contentHandler, metadata, parseContext);
            }
            nextZipEntry = zipArchiveInputStream.getNextZipEntry();
        }
    }

    private void updateMimeType(InputStream inputStream, Metadata metadata) throws IOException {
        String iOUtils = IOUtils.toString(inputStream, StandardCharsets.UTF_8);
        if (iOUtils != null) {
            iOUtils = iOUtils.trim();
        }
        metadata.set("Content-Type", iOUtils);
    }

    private void bufferedParse(InputStream inputStream, ContentHandler contentHandler, XHTMLContentHandler xHTMLContentHandler, Metadata metadata, ParseContext parseContext) throws IOException, TikaException, SAXException {
        TikaInputStream tikaInputStream;
        TemporaryResources temporaryResources = null;
        if (TikaInputStream.isTikaInputStream(inputStream)) {
            tikaInputStream = TikaInputStream.cast(inputStream);
            if (tikaInputStream.getOpenContainer() instanceof ZipFile) {
                bufferedParseZipFile((ZipFile) tikaInputStream.getOpenContainer(), contentHandler, xHTMLContentHandler, metadata, parseContext, true);
                return;
            }
        } else {
            temporaryResources = new TemporaryResources();
            tikaInputStream = TikaInputStream.get(new CloseShieldInputStream(inputStream), temporaryResources);
        }
        try {
            try {
                ZipFile zipFile = new ZipFile(tikaInputStream.getPath().toFile());
                if (temporaryResources != null) {
                    tikaInputStream.close();
                }
                try {
                    bufferedParseZipFile(zipFile, contentHandler, xHTMLContentHandler, metadata, parseContext, true);
                    zipFile.close();
                } catch (Throwable th) {
                    zipFile.close();
                    throw th;
                }
            } catch (IOException e) {
                ParserUtils.recordParserFailure(this, e, metadata);
                trySalvage(tikaInputStream.getPath(), contentHandler, xHTMLContentHandler, metadata, parseContext);
                if (temporaryResources != null) {
                    tikaInputStream.close();
                }
            }
        } catch (Throwable th2) {
            if (temporaryResources != null) {
                tikaInputStream.close();
            }
            throw th2;
        }
    }

    private void trySalvage(Path path, ContentHandler contentHandler, XHTMLContentHandler xHTMLContentHandler, Metadata metadata, ParseContext parseContext) throws IOException, TikaException, SAXException {
        TemporaryResources temporaryResources = new TemporaryResources();
        Throwable th = null;
        try {
            Path createTempFile = temporaryResources.createTempFile();
            ZipSalvager.salvageCopy(path.toFile(), createTempFile.toFile());
            ZipFile zipFile = new ZipFile(createTempFile.toFile());
            Throwable th2 = null;
            try {
                try {
                    boolean bufferedParseZipFile = bufferedParseZipFile(zipFile, contentHandler, xHTMLContentHandler, metadata, parseContext, false);
                    if (zipFile != null) {
                        if (0 != 0) {
                            try {
                                zipFile.close();
                            } catch (Throwable th3) {
                                th2.addSuppressed(th3);
                            }
                        } else {
                            zipFile.close();
                        }
                    }
                    if (!bufferedParseZipFile) {
                        TikaInputStream tikaInputStream = TikaInputStream.get(createTempFile);
                        Throwable th4 = null;
                        try {
                            try {
                                streamingParse(tikaInputStream, xHTMLContentHandler, metadata, parseContext);
                                if (tikaInputStream != null) {
                                    if (0 != 0) {
                                        try {
                                            tikaInputStream.close();
                                        } catch (Throwable th5) {
                                            th4.addSuppressed(th5);
                                        }
                                    } else {
                                        tikaInputStream.close();
                                    }
                                }
                            } catch (Throwable th6) {
                                th4 = th6;
                                throw th6;
                            }
                        } catch (Throwable th7) {
                            if (tikaInputStream != null) {
                                if (th4 != null) {
                                    try {
                                        tikaInputStream.close();
                                    } catch (Throwable th8) {
                                        th4.addSuppressed(th8);
                                    }
                                } else {
                                    tikaInputStream.close();
                                }
                            }
                            throw th7;
                        }
                    }
                    if (temporaryResources != null) {
                        if (0 == 0) {
                            temporaryResources.close();
                            return;
                        }
                        try {
                            temporaryResources.close();
                        } catch (Throwable th9) {
                            th.addSuppressed(th9);
                        }
                    }
                } catch (Throwable th10) {
                    th2 = th10;
                    throw th10;
                }
            } catch (Throwable th11) {
                if (zipFile != null) {
                    if (th2 != null) {
                        try {
                            zipFile.close();
                        } catch (Throwable th12) {
                            th2.addSuppressed(th12);
                        }
                    } else {
                        zipFile.close();
                    }
                }
                throw th11;
            }
        } catch (Throwable th13) {
            if (temporaryResources != null) {
                if (0 != 0) {
                    try {
                        temporaryResources.close();
                    } catch (Throwable th14) {
                        th.addSuppressed(th14);
                    }
                } else {
                    temporaryResources.close();
                }
            }
            throw th13;
        }
    }

    private boolean bufferedParseZipFile(ZipFile zipFile, ContentHandler contentHandler, XHTMLContentHandler xHTMLContentHandler, Metadata metadata, ParseContext parseContext, boolean z) throws IOException, TikaException, SAXException {
        ZipArchiveEntry entry;
        ZipArchiveEntry entry2;
        ZipArchiveEntry entry3;
        String root = getRoot(zipFile, parseContext);
        if (root == null || (entry = zipFile.getEntry(root)) == null || !zipFile.canReadEntryData(entry)) {
            return false;
        }
        this.meta.parse(zipFile.getInputStream(entry), new DefaultHandler(), metadata, parseContext);
        ContentOrderScraper contentOrderScraper = new ContentOrderScraper();
        InputStream inputStream = zipFile.getInputStream(entry);
        Throwable th = null;
        try {
            try {
                XMLReaderUtils.parseSAX(inputStream, contentOrderScraper, parseContext);
                if (inputStream != null) {
                    if (0 != 0) {
                        try {
                            inputStream.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        inputStream.close();
                    }
                }
                if (contentOrderScraper.contentItems.size() == 0) {
                    return false;
                }
                String substring = root.lastIndexOf("/") > -1 ? root.substring(0, root.lastIndexOf("/") + 1) : "";
                if (z) {
                    int i = 0;
                    Iterator<String> it = contentOrderScraper.contentItems.iterator();
                    while (it.hasNext()) {
                        HRefMediaPair hRefMediaPair = contentOrderScraper.locationMap.get(it.next());
                        if (hRefMediaPair != null && hRefMediaPair.href != null && (entry3 = zipFile.getEntry(substring + hRefMediaPair.href)) != null && zipFile.canReadEntryData(entry3)) {
                            i++;
                        }
                    }
                    if (i != contentOrderScraper.contentItems.size()) {
                        return false;
                    }
                }
                extractMetadata(zipFile, metadata, parseContext);
                HashSet hashSet = new HashSet();
                for (String str : contentOrderScraper.contentItems) {
                    HRefMediaPair hRefMediaPair2 = contentOrderScraper.locationMap.get(str);
                    if (hRefMediaPair2 != null && hRefMediaPair2.href != null) {
                        boolean z2 = false;
                        String lowerCase = hRefMediaPair2.href.toLowerCase(Locale.US);
                        if (hRefMediaPair2.media != null) {
                            if (hRefMediaPair2.media.toLowerCase(Locale.US).contains("html")) {
                                z2 = true;
                            }
                        } else if (lowerCase.endsWith("htm") || lowerCase.endsWith("html") || lowerCase.endsWith(".xml")) {
                            z2 = true;
                        }
                        if (z2 && (entry2 = zipFile.getEntry(substring + hRefMediaPair2.href)) != null) {
                            inputStream = zipFile.getInputStream(entry2);
                            Throwable th3 = null;
                            try {
                                try {
                                    this.content.parse(inputStream, contentHandler, metadata, parseContext);
                                    hashSet.add(str);
                                    if (inputStream != null) {
                                        if (0 != 0) {
                                            try {
                                                inputStream.close();
                                            } catch (Throwable th4) {
                                                th3.addSuppressed(th4);
                                            }
                                        } else {
                                            inputStream.close();
                                        }
                                    }
                                } catch (Throwable th5) {
                                    th3 = th5;
                                    throw th5;
                                }
                            } finally {
                            }
                        }
                    }
                }
                EmbeddedDocumentExtractor embeddedDocumentExtractor = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(parseContext);
                for (String str2 : contentOrderScraper.locationMap.keySet()) {
                    if (!hashSet.contains(str2)) {
                        HRefMediaPair hRefMediaPair3 = contentOrderScraper.locationMap.get(str2);
                        if (shouldHandleEmbedded(hRefMediaPair3.media)) {
                            handleEmbedded(zipFile, substring, hRefMediaPair3, embeddedDocumentExtractor, xHTMLContentHandler, metadata);
                        }
                    }
                }
                return true;
            } catch (Throwable th6) {
                th = th6;
                throw th6;
            }
        } finally {
        }
    }

    private boolean shouldHandleEmbedded(String str) {
        if (str == null) {
            return true;
        }
        String lowerCase = str.toLowerCase(Locale.US);
        return (lowerCase.contains("css") || lowerCase.contains("svg") || lowerCase.endsWith("/xml") || lowerCase.contains("x-ibooks") || lowerCase.equals("application/x-dtbncx+xml")) ? false : true;
    }

    private void handleEmbedded(ZipFile zipFile, String str, HRefMediaPair hRefMediaPair, EmbeddedDocumentExtractor embeddedDocumentExtractor, XHTMLContentHandler xHTMLContentHandler, Metadata metadata) throws IOException, SAXException {
        ZipArchiveEntry entry;
        if (hRefMediaPair.href == null || (entry = zipFile.getEntry(str + hRefMediaPair.href)) == null || !zipFile.canReadEntryData(entry)) {
            return;
        }
        Metadata metadata2 = new Metadata();
        if (!StringUtils.isBlank(hRefMediaPair.media)) {
            metadata2.set("Content-Type", hRefMediaPair.media);
        }
        if (embeddedDocumentExtractor.shouldParseEmbedded(metadata2)) {
            try {
                TikaInputStream tikaInputStream = TikaInputStream.get(zipFile.getInputStream(entry));
                xHTMLContentHandler.startElement("div", "class", "embedded");
                try {
                    embeddedDocumentExtractor.parseEmbedded(tikaInputStream, new EmbeddedContentHandler(xHTMLContentHandler), metadata2, true);
                    IOUtils.closeQuietly((InputStream) tikaInputStream);
                    xHTMLContentHandler.endElement("div");
                } catch (Throwable th) {
                    IOUtils.closeQuietly((InputStream) tikaInputStream);
                    throw th;
                }
            } catch (IOException e) {
                EmbeddedDocumentUtil.recordEmbeddedStreamException(e, metadata);
            }
        }
    }

    private void extractMetadata(ZipFile zipFile, Metadata metadata, ParseContext parseContext) throws IOException, TikaException, SAXException {
        InputStream inputStream;
        ZipArchiveEntry entry = zipFile.getEntry("mimetype");
        if (entry != null && zipFile.canReadEntryData(entry)) {
            inputStream = zipFile.getInputStream(entry);
            Throwable th = null;
            try {
                try {
                    updateMimeType(inputStream, metadata);
                    if (inputStream != null) {
                        if (0 != 0) {
                            try {
                                inputStream.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            inputStream.close();
                        }
                    }
                } catch (Throwable th3) {
                    th = th3;
                    throw th3;
                }
            } finally {
            }
        }
        ZipArchiveEntry entry2 = zipFile.getEntry("metadata.xml");
        if (entry2 == null || !zipFile.canReadEntryData(entry2)) {
            return;
        }
        inputStream = zipFile.getInputStream(entry2);
        Throwable th4 = null;
        try {
            try {
                this.meta.parse(inputStream, new DefaultHandler(), metadata, parseContext);
                if (inputStream != null) {
                    if (0 == 0) {
                        inputStream.close();
                        return;
                    }
                    try {
                        inputStream.close();
                    } catch (Throwable th5) {
                        th4.addSuppressed(th5);
                    }
                }
            } catch (Throwable th6) {
                th4 = th6;
                throw th6;
            }
        } finally {
        }
    }

    private String getRoot(ZipFile zipFile, ParseContext parseContext) throws IOException, TikaException, SAXException {
        ZipArchiveEntry entry = zipFile.getEntry("META-INF/container.xml");
        if (entry == null) {
            Enumeration entries = zipFile.getEntries();
            while (entries.hasMoreElements()) {
                ZipArchiveEntry zipArchiveEntry = (ZipArchiveEntry) entries.nextElement();
                if (zipArchiveEntry.getName().toLowerCase(Locale.US).endsWith(".opf") && zipFile.canReadEntryData(zipArchiveEntry)) {
                    return zipArchiveEntry.getName();
                }
            }
            return null;
        }
        RootFinder rootFinder = new RootFinder();
        InputStream inputStream = zipFile.getInputStream(entry);
        Throwable th = null;
        try {
            try {
                XMLReaderUtils.parseSAX(inputStream, rootFinder, parseContext);
                if (inputStream != null) {
                    if (0 != 0) {
                        try {
                            inputStream.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        inputStream.close();
                    }
                }
                return rootFinder.root;
            } finally {
            }
        } catch (Throwable th3) {
            if (inputStream != null) {
                if (th != null) {
                    try {
                        inputStream.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    inputStream.close();
                }
            }
            throw th3;
        }
    }
}
