package org.xm.tokenizer;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.corpus.io.IOUtil;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: classes8.dex */
public class Tokenizer {
    private static final Logger logger = LoggerFactory.getLogger((Class<?>) Tokenizer.class);

    public static void fileSegment(Segment segment, String str, String str2) {
        try {
            WordFreqStatistics.statistics(segment, str);
            BufferedReader newBufferedReader = IOUtil.newBufferedReader(str);
            long currentTimeMillis = System.currentTimeMillis();
            String str3 = str.replace(".txt", "") + "-Segment-Result.txt";
            if (str2 == null || str2.trim().length() <= 0) {
                str2 = str3;
            }
            FileOutputStream fileOutputStream = new FileOutputStream(new File(str2));
            long j = 0;
            long j2 = 0;
            while (true) {
                String readLine = newBufferedReader.readLine();
                if (readLine == null) {
                    fileOutputStream.flush();
                    fileOutputStream.close();
                    newBufferedReader.close();
                    long currentTimeMillis2 = System.currentTimeMillis();
                    System.out.println("segment result save：" + str2);
                    System.out.println("total " + j + " chars, " + j2 + " words, spend" + (currentTimeMillis2 - currentTimeMillis) + "ms ");
                    return;
                }
                List<Term> seg = segment.seg(readLine);
                StringBuilder sb = new StringBuilder();
                for (Term term : seg) {
                    sb.append(term.toString() + "\t");
                    if (term.word.trim().length() > 0) {
                        j += term.length();
                        j2++;
                    }
                }
                fileOutputStream.write(sb.toString().trim().getBytes());
                fileOutputStream.write("\n".getBytes());
            }
        } catch (IOException e) {
            logger.error("IO error: " + e.getLocalizedMessage());
        }
    }

    public static void fileSegment(String str, String str2) {
        fileSegment(HanLP.newSegment(), str, str2);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static /* synthetic */ Word lambda$segment$0(Term term) {
        return new Word(term.word, term.nature.name());
    }

    public static List<Word> segment(String str) {
        ArrayList arrayList = new ArrayList();
        arrayList.addAll((Collection) HanLP.segment(str).stream().map(new Function() { // from class: org.xm.tokenizer.Tokenizer$$ExternalSyntheticLambda0
            @Override // java.util.function.Function
            public final Object apply(Object obj) {
                return Tokenizer.lambda$segment$0((Term) obj);
            }
        }).collect(Collectors.toList()));
        return arrayList;
    }
}
