/*
 * Decompiled with CFR 0.152.
 */
package com.huawei.support.icscbb.commonsearch.lucene.analyzer;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.analysis.cn.smart.Utility;
import org.apache.lucene.analysis.cn.smart.hhmm.SegToken;
import org.apache.lucene.analysis.cn.smart.hhmm.SegTokenFilter;

public class HanlpWordSegmenter {
    private Segment segment;
    private SegTokenFilter tokenFilter = new SegTokenFilter();

    public HanlpWordSegmenter() {
        this(false);
    }

    public HanlpWordSegmenter(boolean enableIndexMode) {
        this.segment = HanLP.newSegment().enableOffset(true).enableJapaneseNameRecognize(false).enableNameRecognize(false).enableTranslatedNameRecognize(false).enableIndexMode(enableIndexMode);
    }

    public List<SegToken> segSentence(String sentence, int startOffset) {
        String replacedSentence = sentence.replaceAll("[\uff0c,\uff0e.\u3001\u3002]", " ");
        List terms = this.segment.seg(replacedSentence);
        if (terms.size() == 0) {
            return Collections.emptyList();
        }
        ArrayList<SegToken> segTokenList = new ArrayList<SegToken>(terms.size());
        for (Term term : terms) {
            segTokenList.add(this.convertTerm(term));
        }
        for (SegToken st : segTokenList) {
            this.convertSegToken(st, sentence, startOffset);
        }
        return segTokenList;
    }

    private SegToken convertTerm(Term term) {
        char[] charArray = term.word.toCharArray();
        int startOffset = term.offset;
        int endOffset = term.offset + term.length();
        int wordType = this.getWordType(Utility.getCharType((char)charArray[0]));
        int weight = term.getFrequency();
        return new SegToken(charArray, startOffset, endOffset, wordType, weight);
    }

    private int getWordType(int firstCharType) {
        switch (firstCharType) {
            case 0: 
            case 4: {
                return 5;
            }
            case 3: 
            case 8: {
                return 2;
            }
            case 1: 
            case 5: {
                return 6;
            }
            case 2: 
            case 6: {
                return 7;
            }
        }
        return 3;
    }

    public SegToken convertSegToken(SegToken segToken, String sentence, int sentenceStartOffset) {
        switch (segToken.wordType) {
            case 3: 
            case 6: 
            case 7: {
                segToken.charArray = sentence.substring(segToken.startOffset, segToken.endOffset).toCharArray();
                break;
            }
        }
        segToken = this.tokenFilter.filter(segToken);
        segToken.startOffset += sentenceStartOffset;
        segToken.endOffset += sentenceStartOffset;
        return segToken;
    }
}

