/*
 * Decompiled with CFR 0.152.
 */
package com.huawei.support.icscbb.commonsearch.index.parser.html;

import com.huawei.support.icscbb.commonsearch.index.parser.hdxmeta.factory.HdxMetaContext;
import com.huawei.support.icscbb.commonsearch.index.parser.util.DocumentParserUtils;
import com.huawei.support.icscbb.commonsearch.lucene.index.util.IndexUtils;
import com.huawei.support.icscbb.log.common.service.CodeCCUtils;
import com.huawei.support.icscbb.log.lite.adapter.CommonLogger;
import com.huawei.support.icscbb.log.lite.adapter.CommonLoggerFactory;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;

public abstract class AbstractHtmlDocumentParser {
    private static final CommonLogger LOGGER = CommonLoggerFactory.getLogger(AbstractHtmlDocumentParser.class);
    private static final List<String> REDUNDANT_ELEMENT_ATTR_LIST = Collections.unmodifiableList(Arrays.asList("ulchildlink", "reltasks", "relref", "relconcepts", "relinfo", "hwcopyright", "footerNavBar clearfix"));

    protected List<Field> extractField(String topicUrl, String path, String title) {
        ArrayList<Field> textFields = new ArrayList<Field>();
        try (InputStream hdxIs = DocumentParserUtils.getTopicInputStream(path, topicUrl);){
            Document hdxDocument = Jsoup.parse((InputStream)hdxIs, null, (String)topicUrl);
            textFields.add(this.getTitleField(title, topicUrl, hdxDocument));
            textFields.add(this.getContentField(hdxDocument));
            this.addTopicMeta(hdxDocument, textFields);
        }
        catch (IOException e) {
            CodeCCUtils.INSTANCE.errorLog(LOGGER, "[index]Html parse occurred io error.", (Throwable)e);
        }
        return textFields;
    }

    protected void addTopicMeta(Document document, List<Field> textFields) {
        this.addKeywordsField((Element)document, textFields);
        this.addDefaultDocMeta(document, textFields);
    }

    private Field getContentField(Document document) {
        Element body = document.body();
        this.cleanTag(body);
        String elementText = IndexUtils.getElementText(body);
        return IndexUtils.getField("t_content", (String)StringUtils.defaultIfBlank((CharSequence)StringUtils.trim((String)elementText), (CharSequence)""), Field.Store.YES, TextField.class.getSimpleName());
    }

    private void addKeywordsField(Element element, List<Field> htmlFields) {
        Elements elements = element.getElementsByAttributeValue("name", "keywords");
        if (CollectionUtils.isEmpty((Collection)elements)) {
            return;
        }
        HashSet keywords = new HashSet();
        for (Element node : elements) {
            String value = StringUtils.trim((String)node.attr("content"));
            if (StringUtils.isBlank((CharSequence)value)) continue;
            String[] arr = StringUtils.replace((String)value, (String)"\uff0c", (String)",").split(",");
            keywords.addAll(Arrays.stream(arr).collect(Collectors.toSet()));
        }
        String mergeKeywords = String.join((CharSequence)", ", keywords);
        htmlFields.add(IndexUtils.getField("tp_keywords", mergeKeywords, Field.Store.YES, TextField.class.getSimpleName()));
    }

    private void addDefaultDocMeta(Document document, List<Field> textFields) {
        Elements metas = document.getElementsByTag("meta");
        if (CollectionUtils.isEmpty((Collection)metas)) {
            return;
        }
        HdxMetaContext hdxMetaContext = new HdxMetaContext();
        metas.forEach(element -> hdxMetaContext.process((Element)element, textFields));
    }

    private void cleanTag(Element tagNode) {
        if (tagNode == null) {
            return;
        }
        REDUNDANT_ELEMENT_ATTR_LIST.forEach(attrValue -> this.removeNodeByAttr(tagNode, (String)attrValue));
    }

    private void removeNodeByAttr(Element element, String attrValue) {
        Elements elements = element.getElementsByAttributeValue("class", attrValue);
        elements.forEach(Node::remove);
    }

    protected abstract Field getTitleField(String var1, String var2, Document var3);
}

