/*
 * Decompiled with CFR 0.152.
 */
package com.huawei.support.icscbb.commonsearch.index.parser.pdf;

import com.huawei.support.icscbb.commonsearch.index.parser.DocumentParser;
import com.huawei.support.icscbb.commonsearch.index.parser.util.DocumentParserUtils;
import com.huawei.support.icscbb.commonsearch.lucene.index.util.IndexUtils;
import com.huawei.support.icscbb.log.common.service.CodeCCUtils;
import com.huawei.support.icscbb.log.lite.adapter.CommonLogger;
import com.huawei.support.icscbb.log.lite.adapter.CommonLoggerFactory;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.pdfbox.io.RandomAccessBuffer;
import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.text.PDFTextStripper;

public class PdfDocumentParser
implements DocumentParser {
    private static final CommonLogger LOGGER = CommonLoggerFactory.getLogger(PdfDocumentParser.class);

    /*
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    @Override
    public List<Field> extract(String topicUrl, String path, String title) {
        try (InputStream inputStream = DocumentParserUtils.getTopicInputStream(path, topicUrl);){
            List<Field> list = this.getField(inputStream, topicUrl, title);
            return list;
        }
        catch (IOException e) {
            CodeCCUtils.INSTANCE.errorLog(LOGGER, "[index]Pdf:{0} create pdfParser occurred io exception.", (Throwable)e, new Object[]{topicUrl});
            return Collections.emptyList();
        }
    }

    /*
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    private List<Field> getField(InputStream inputStream, String topicUrl, String title) {
        if (inputStream == null) {
            CodeCCUtils.INSTANCE.errorLog(LOGGER, "[index]topic not exist:{0}.", new Object[]{topicUrl});
            return Collections.emptyList();
        }
        try (RandomAccessBuffer source = new RandomAccessBuffer(inputStream);){
            PDFParser pdfParser = new PDFParser((RandomAccessRead)source);
            pdfParser.parse();
            List<Field> list = this.getPdfFieldList(pdfParser, topicUrl, title);
            return list;
        }
        catch (IOException e) {
            CodeCCUtils.INSTANCE.errorLog(LOGGER, "[index]Pdf:{0} create pdfParser occurred io exception.", (Throwable)e, new Object[]{topicUrl});
            return Collections.emptyList();
        }
    }

    /*
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    private List<Field> getPdfFieldList(PDFParser pdfParser, String topicUrl, String title) {
        ArrayList<Field> pdfFields = new ArrayList<Field>();
        try (PDDocument pdDocument = pdfParser.getPDDocument();){
            if (pdDocument == null) {
                CodeCCUtils.INSTANCE.infoLog(LOGGER, "[index]Pdf:{0} parse fail.", new Object[]{topicUrl});
                ArrayList<Field> arrayList = pdfFields;
                return arrayList;
            }
            PDFTextStripper textStripper = new PDFTextStripper();
            String content = textStripper.getText(pdDocument);
            String pdfTitle = (String)StringUtils.defaultIfBlank((CharSequence)title, (CharSequence)IndexUtils.getFileNameNotSuffix(topicUrl));
            pdfFields.add(IndexUtils.getField("t_title", pdfTitle, Field.Store.YES, TextField.class.getSimpleName()));
            pdfFields.add(IndexUtils.getField("t_content", content, Field.Store.YES, TextField.class.getSimpleName()));
            PDDocumentInformation pdDocumentInformation = pdDocument.getDocumentInformation();
            if (pdDocumentInformation == null) return pdfFields;
            pdfFields.add(IndexUtils.getField("summary", pdDocumentInformation.getSubject(), Field.Store.YES, TextField.class.getSimpleName()));
            pdfFields.add(IndexUtils.getField("tp_keywords", pdDocumentInformation.getKeywords(), Field.Store.YES, TextField.class.getSimpleName()));
            return pdfFields;
        }
        catch (IOException e) {
            CodeCCUtils.INSTANCE.errorLog(LOGGER, "[index]Error occurred while parsing pdf.", (Throwable)e);
        }
        return pdfFields;
    }
}

