package com.hankcs.hanlp.model.perceptron;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.corpus.document.sentence.Sentence;
import com.hankcs.hanlp.model.perceptron.common.TaskType;
import com.hankcs.hanlp.model.perceptron.feature.FeatureMap;
import com.hankcs.hanlp.model.perceptron.instance.CWSInstance;
import com.hankcs.hanlp.model.perceptron.instance.Instance;
import com.hankcs.hanlp.model.perceptron.model.LinearModel;
import com.hankcs.hanlp.model.perceptron.tagset.CWSTagSet;
import com.hankcs.hanlp.model.perceptron.tagset.TagSet;
import com.hankcs.hanlp.model.perceptron.utility.Utility;
import com.hankcs.hanlp.tokenizer.lexical.Segmenter;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;

/* loaded from: classes2.dex */
public class PerceptronSegmenter extends PerceptronTagger implements Segmenter {
    private final CWSTagSet CWSTagSet;

    public PerceptronSegmenter() throws IOException {
        this(HanLP.Config.PerceptronCWSModelPath);
    }

    public PerceptronSegmenter(LinearModel linearModel) {
        super(linearModel);
        TagSet tagSet = linearModel.featureMap.tagSet;
        if (tagSet.type != TaskType.CWS) {
            throw new IllegalArgumentException(String.format("错误的模型类型: 传入的不是分词模型，而是 %s 模型", linearModel.featureMap.tagSet.type));
        }
        this.CWSTagSet = (CWSTagSet) tagSet;
    }

    public PerceptronSegmenter(String str) throws IOException {
        this(new LinearModel(str));
    }

    @Override // com.hankcs.hanlp.model.perceptron.InstanceConsumer
    protected Instance createInstance(Sentence sentence, FeatureMap featureMap) {
        return CWSInstance.create(sentence, featureMap);
    }

    @Override // com.hankcs.hanlp.model.perceptron.PerceptronTagger
    public double[] evaluate(String str) throws IOException {
        return Utility.prf(Utility.evaluateCWS(str, this));
    }

    public boolean learn(String str) {
        return learn(str.split("\\s+"));
    }

    public boolean learn(String... strArr) {
        return learn(new CWSInstance(strArr, this.model.featureMap));
    }

    @Override // com.hankcs.hanlp.tokenizer.lexical.Segmenter
    public List<String> segment(String str) {
        LinkedList linkedList = new LinkedList();
        segment(str, linkedList);
        return linkedList;
    }

    public void segment(String str, Instance instance, List<String> list) {
        int[] iArr = instance.tagArray;
        this.model.viterbiDecode(instance, iArr);
        StringBuilder sb = new StringBuilder();
        sb.append(str.charAt(0));
        for (int i8 = 1; i8 < iArr.length; i8++) {
            int i9 = iArr[i8];
            CWSTagSet cWSTagSet = this.CWSTagSet;
            if (i9 == cWSTagSet.B || i9 == cWSTagSet.S) {
                list.add(sb.toString());
                sb.setLength(0);
            }
            sb.append(str.charAt(i8));
        }
        if (sb.length() != 0) {
            list.add(sb.toString());
        }
    }

    @Override // com.hankcs.hanlp.tokenizer.lexical.Segmenter
    public void segment(String str, String str2, List<String> list) {
        if (str.isEmpty()) {
            return;
        }
        segment(str, new CWSInstance(str2, this.model.featureMap), list);
    }

    public void segment(String str, List<String> list) {
        segment(str, normalize(str), list);
    }
}
