package com.xy.nlp.tokenizer.seg;

import com.xy.louds.louds.TailLOUDSTrie;
import com.xy.louds.util.Pair;
import com.xy.nlp.tokenizer.XyNLP;
import com.xy.nlp.tokenizer.corpus.tag.Nature;
import com.xy.nlp.tokenizer.dictionary.Attribute;
import com.xy.nlp.tokenizer.dictionary.CoreBiGramTableDictionary;
import com.xy.nlp.tokenizer.dictionary.CoreDictionary;
import com.xy.nlp.tokenizer.dictionary.CustomDictionary;
import com.xy.nlp.tokenizer.dictionary.other.CharTable;
import com.xy.nlp.tokenizer.dictionary.other.CharType;
import com.xy.nlp.tokenizer.seg.NShort.Path.AtomNode;
import com.xy.nlp.tokenizer.seg.common.Term;
import com.xy.nlp.tokenizer.seg.common.Vertex;
import com.xy.nlp.tokenizer.seg.common.WordNet;
import com.xy.nlp.tokenizer.utility.LogManager;
import com.xy.nlp.tokenizer.utility.Predefine;
import com.xy.nlp.tokenizer.utility.SentencesUtil;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.Set;

/* loaded from: classes4.dex */
public abstract class Segment {
    public static final /* synthetic */ boolean $assertionsDisabled = false;
    public static String ARAB_NUMS = "0123456789０１２３４５６７８９";
    public CoreBiGramTableDictionary biGramTableDictionary;
    public Config config = new Config();
    public CoreDictionary coreDictionary;
    public CustomDictionary customDictionary;
    public Set<String> naw;

    /* loaded from: classes4.dex */
    public class WorkThread extends Thread {
        public int from;
        public String[] sentenceArray;
        public List<Term>[] termListArray;

        /* renamed from: to, reason: collision with root package name */
        public int f27051to;

        public WorkThread(String[] strArr, List<Term>[] listArr, int i10, int i11) {
            this.sentenceArray = strArr;
            this.termListArray = listArr;
            this.from = i10;
            this.f27051to = i11;
        }

        @Override // java.lang.Thread, java.lang.Runnable
        public void run() {
            for (int i10 = this.from; i10 < this.f27051to; i10++) {
                this.termListArray[i10] = Segment.this.segSentence(this.sentenceArray[i10].toCharArray());
            }
        }
    }

    public Segment(CoreDictionary coreDictionary, CoreBiGramTableDictionary coreBiGramTableDictionary, CustomDictionary customDictionary) {
        this.coreDictionary = coreDictionary;
        this.biGramTableDictionary = coreBiGramTableDictionary;
        this.customDictionary = customDictionary;
    }

    public static List<AtomNode> atomSegment(char[] cArr, int i10, int i11) {
        boolean z10;
        ArrayList arrayList = new ArrayList();
        StringBuilder sb2 = new StringBuilder();
        int i12 = i11 - i10;
        int[] iArr = new int[i12];
        for (int i13 = 0; i13 < i12; i13++) {
            int i14 = i13 + i10;
            char c10 = cArr[i14];
            iArr[i13] = CharType.get(c10);
            if (c10 == '.' && i14 < cArr.length - 1 && CharType.get(cArr[i14 + 1]) == 9) {
                iArr[i13] = 9;
            } else {
                if (c10 == '.' && i14 < cArr.length - 1) {
                    int i15 = i14 + 1;
                    if (cArr[i15] >= '0' && cArr[i15] <= '9') {
                        iArr[i13] = 5;
                    }
                }
                if (iArr[i13] == 8) {
                    iArr[i13] = 5;
                }
            }
        }
        int i16 = i10;
        while (i16 < i11) {
            int i17 = iArr[i16 - i10];
            if (i17 == 7 || i17 == 10 || i17 == 6 || i17 == 17) {
                String valueOf = String.valueOf(cArr[i16]);
                if (valueOf.length() != 0) {
                    arrayList.add(new AtomNode(valueOf, i17));
                }
            } else {
                int i18 = i11 - 1;
                if (i16 >= i18 || !(i17 == 5 || i17 == 9)) {
                    arrayList.add(new AtomNode(cArr[i16], i17));
                } else {
                    sb2.delete(0, sb2.length());
                    sb2.append(cArr[i16]);
                    while (true) {
                        if (i16 >= i18) {
                            z10 = true;
                            break;
                        }
                        i16++;
                        if (iArr[i16 - i10] != i17) {
                            z10 = false;
                            break;
                        }
                        sb2.append(cArr[i16]);
                    }
                    arrayList.add(new AtomNode(sb2.toString(), i17));
                    if (z10) {
                    }
                }
            }
            i16++;
        }
        return arrayList;
    }

    private static void combineWords(Vertex[] vertexArr, int i10, int i11, Attribute attribute) {
        if (i10 + 1 == i11) {
            vertexArr[i10].attribute = attribute;
            return;
        }
        StringBuilder sb2 = new StringBuilder();
        for (int i12 = i10; i12 < i11; i12++) {
            if (vertexArr[i12] != null) {
                sb2.append(vertexArr[i12].realWord);
                vertexArr[i12] = null;
            }
        }
        vertexArr[i10] = new Vertex(sb2.toString(), attribute);
    }

    public static List<AtomNode> quickAtomSegment(char[] cArr, int i10, int i11) {
        byte b10;
        char c10;
        int i12;
        LinkedList linkedList = new LinkedList();
        char c11 = cArr[i10];
        byte b11 = CharType.get(c11);
        if (b11 == 9 && ARAB_NUMS.indexOf(cArr[i10]) == -1) {
            c10 = c11;
            b10 = 29;
        } else {
            b10 = b11;
            c10 = c11;
        }
        int i13 = i10;
        while (true) {
            i10++;
            if (i10 >= i11) {
                break;
            }
            char c12 = cArr[i10];
            byte b12 = CharType.get(c12);
            if (b12 == 9 && ARAB_NUMS.indexOf(c12) == -1) {
                b12 = 29;
            }
            if (b12 != b10 || ((b12 == 6 || b12 == 17) && c12 != c10)) {
                if (c12 == '.' && b10 == 9 && (i12 = i10 + 1) < i11) {
                    byte b13 = CharType.get(cArr[i12]);
                    if (b13 == 9 && ARAB_NUMS.indexOf(cArr[i12]) == -1) {
                        b13 = 29;
                    }
                    if (b13 == 9) {
                    }
                }
                linkedList.add(new AtomNode(new String(cArr, i13, i10 - i13), b10));
                i13 = i10;
            }
            c10 = c12;
            b10 = b12;
        }
        if (i10 == i11) {
            linkedList.add(new AtomNode(new String(cArr, i13, i10 - i13), b10));
        }
        return linkedList;
    }

    private static void removeFromWordNet(Vertex vertex, WordNet wordNet, int i10, int i11) {
        LinkedList<Vertex>[] vertexes = wordNet.getVertexes();
        int i12 = i10 + i11;
        Iterator<Vertex> it2 = vertexes[i12].iterator();
        while (it2.hasNext()) {
            Vertex next = it2.next();
            if (next.from == vertex) {
                next.from = null;
            }
        }
        ListIterator<Vertex> listIterator = vertexes[i12 - vertex.realWord.length()].listIterator();
        while (listIterator.hasNext()) {
            if (listIterator.next() == vertex) {
                listIterator.remove();
            }
        }
    }

    public static List<AtomNode> simpleAtomSegment(char[] cArr, int i10, int i11) {
        LinkedList linkedList = new LinkedList();
        linkedList.add(new AtomNode(new String(cArr, i10, i11 - i10), 8));
        return linkedList;
    }

    public void addAnw(String str) {
        if (this.naw == null) {
            this.naw = new HashSet();
        }
        this.naw.add(str);
    }

    public void addNawArr(String... strArr) {
        for (String str : strArr) {
            addAnw(str);
        }
    }

    public void addNawSet(Set<String> set) {
        Iterator<String> it2 = set.iterator();
        while (it2.hasNext()) {
            addAnw(it2.next());
        }
    }

    public List<Vertex> combineByCustomDictionary(List<Vertex> list) {
        CustomDictionary customDictionary = this.customDictionary;
        if (customDictionary != null && customDictionary.trie != null) {
            try {
                int size = list.size();
                Vertex[] vertexArr = new Vertex[size];
                list.toArray(vertexArr);
                TailLOUDSTrie<Attribute> tailLOUDSTrie = this.customDictionary.trie;
                int i10 = 0;
                while (i10 < size) {
                    Pair<Integer, Integer> transition = tailLOUDSTrie.transition(new Pair<>(0, -1), vertexArr[i10].realWord);
                    if (transition.getFirst().intValue() > 0) {
                        int i11 = i10 + 1;
                        Attribute findParams = transition.getSecond().intValue() == -1 ? tailLOUDSTrie.findParams(transition.getFirst().intValue()) : null;
                        int i12 = i11;
                        while (i11 < size) {
                            transition = tailLOUDSTrie.transition(transition, vertexArr[i11].realWord);
                            if (transition.getFirst().intValue() < 0) {
                                break;
                            }
                            Attribute findParams2 = tailLOUDSTrie.findParams(transition.getFirst().intValue());
                            if (findParams2 != null) {
                                i12 = i11 + 1;
                                findParams = findParams2;
                            }
                            i11++;
                        }
                        if (findParams != null) {
                            combineWords(vertexArr, i10, i12, findParams);
                            i10 = i12 - 1;
                        }
                    }
                    i10++;
                }
                list.clear();
                for (int i13 = 0; i13 < size; i13++) {
                    Vertex vertex = vertexArr[i13];
                    if (vertex != null) {
                        list.add(vertex);
                    }
                }
            } catch (Throwable th2) {
                LogManager.logExp(null, "用户自定义词典分词异常！", th2);
            }
        }
        return list;
    }

    public Segment enableAllNamedEntityRecognize(boolean z10) {
        Config config = this.config;
        config.nameRecognize = z10;
        config.japaneseNameRecognize = z10;
        config.translatedNameRecognize = z10;
        config.placeRecognize = z10;
        config.organizationRecognize = z10;
        config.updateNerConfig();
        return this;
    }

    public Segment enableCustomDictionary(boolean z10) {
        this.config.useCustomDictionary = z10;
        return this;
    }

    public Segment enableIndexMode(boolean z10) {
        this.config.indexMode = z10;
        return this;
    }

    public Segment enableJapaneseNameRecognize(boolean z10) {
        Config config = this.config;
        config.japaneseNameRecognize = z10;
        config.updateNerConfig();
        return this;
    }

    public Segment enableMultithreading(int i10) {
        this.config.threadNumber = i10;
        return this;
    }

    public Segment enableMultithreading(boolean z10) {
        if (z10) {
            this.config.threadNumber = 4;
        } else {
            this.config.threadNumber = 1;
        }
        return this;
    }

    public Segment enableNameRecognize(boolean z10) {
        Config config = this.config;
        config.nameRecognize = z10;
        config.updateNerConfig();
        return this;
    }

    public Segment enableNumberQuantifierRecognize(boolean z10) {
        this.config.numberQuantifierRecognize = z10;
        return this;
    }

    public Segment enableOffset(boolean z10) {
        this.config.offset = z10;
        return this;
    }

    public Segment enableOrganizationRecognize(boolean z10) {
        Config config = this.config;
        config.organizationRecognize = z10;
        config.updateNerConfig();
        return this;
    }

    public Segment enablePartOfSpeechTagging(boolean z10) {
        this.config.speechTagging = z10;
        return this;
    }

    public Segment enablePlaceRecognize(boolean z10) {
        Config config = this.config;
        config.placeRecognize = z10;
        config.updateNerConfig();
        return this;
    }

    public Segment enableTranslatedNameRecognize(boolean z10) {
        Config config = this.config;
        config.translatedNameRecognize = z10;
        config.updateNerConfig();
        return this;
    }

    public Set<String> getNaw() {
        return this.naw;
    }

    public void mergeNumberQuantifier(List<Vertex> list, WordNet wordNet, Config config) {
        if (list.size() < 4) {
            return;
        }
        StringBuilder sb2 = new StringBuilder();
        ListIterator<Vertex> listIterator = list.listIterator();
        listIterator.next();
        int i10 = 1;
        while (listIterator.hasNext()) {
            Vertex next = listIterator.next();
            if (next.hasNature(Nature.m)) {
                sb2.append(next.realWord);
                Vertex vertex = null;
                while (listIterator.hasNext()) {
                    vertex = listIterator.next();
                    if (!vertex.hasNature(Nature.m)) {
                        break;
                    }
                    sb2.append(vertex.realWord);
                    listIterator.remove();
                    removeFromWordNet(vertex, wordNet, i10, sb2.length());
                }
                if (vertex != null) {
                    if (vertex.hasNature(Nature.q) || vertex.hasNature(Nature.qv) || vertex.hasNature(Nature.qt)) {
                        if (config.indexMode) {
                            wordNet.add(i10, new Vertex(sb2.toString(), vertex.coreDictionary, new Attribute(Nature.m)));
                        }
                        sb2.append(vertex.realWord);
                        listIterator.remove();
                        removeFromWordNet(vertex, wordNet, i10, sb2.length());
                    } else {
                        i10 += vertex.realWord.length();
                    }
                }
                if (sb2.length() != next.realWord.length()) {
                    next.realWord = sb2.toString();
                    next.word = Predefine.TAG_NUMBER;
                    next.attribute = new Attribute(Nature.mq);
                    next.wordID = this.coreDictionary.M_WORD_ID;
                    sb2.setLength(0);
                }
            }
            sb2.setLength(0);
            i10 += next.realWord.length();
        }
    }

    public List<Term> seg(String str) {
        int i10;
        char[] charArray = str.toCharArray();
        if (XyNLP.Config.Normalization) {
            CharTable.normalization(charArray);
        }
        if (this.config.threadNumber <= 1 || charArray.length <= 10000) {
            return segSentence(charArray);
        }
        List<String> sentenceList = SentencesUtil.toSentenceList(charArray);
        int size = sentenceList.size();
        String[] strArr = new String[size];
        sentenceList.toArray(strArr);
        List[] listArr = new List[size];
        int i11 = this.config.threadNumber;
        int i12 = size / i11;
        WorkThread[] workThreadArr = new WorkThread[i11];
        int i13 = 0;
        int i14 = 0;
        while (true) {
            i10 = this.config.threadNumber;
            if (i14 >= i10 - 1) {
                break;
            }
            int i15 = i14 * i12;
            workThreadArr[i14] = new WorkThread(strArr, listArr, i15, i15 + i12);
            workThreadArr[i14].start();
            i14++;
        }
        workThreadArr[i10 - 1] = new WorkThread(strArr, listArr, (this.config.threadNumber - 1) * i12, size);
        workThreadArr[this.config.threadNumber - 1].start();
        for (int i16 = 0; i16 < i11; i16++) {
            try {
                workThreadArr[i16].join();
            } catch (Throwable th2) {
                LogManager.logExp("", "线程同步异常", th2);
                return Collections.emptyList();
            }
        }
        LinkedList linkedList = new LinkedList();
        Config config = this.config;
        if (config.offset || config.indexMode) {
            int i17 = 0;
            while (i13 < size) {
                for (Term term : listArr[i13]) {
                    term.offset += i17;
                    linkedList.add(term);
                }
                i17 += strArr[i13].length();
                i13++;
            }
        } else {
            while (i13 < size) {
                linkedList.addAll(listArr[i13]);
                i13++;
            }
        }
        return linkedList;
    }

    public List<Term> seg(char[] cArr) {
        if (XyNLP.Config.Normalization) {
            CharTable.normalization(cArr);
        }
        return segSentence(cArr);
    }

    public List<List<Term>> seg2sentence(String str) {
        LinkedList linkedList = new LinkedList();
        Iterator<String> it2 = SentencesUtil.toSentenceList(str).iterator();
        while (it2.hasNext()) {
            linkedList.add(segSentence(it2.next().toCharArray()));
        }
        return linkedList;
    }

    public abstract List<Term> segSentence(char[] cArr);

    public void setNaw(Set<String> set) {
        this.naw = set;
    }
}
