package com.wangc.bookkeeping.test.hanlp.classification.utilities;

import com.hankcs.hanlp.corpus.io.IOUtil;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.tokenizer.NotionalTokenizer;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.TreeMap;

/* loaded from: classes3.dex */
public class TextProcessUtility {
    public static String[] extractKeywords(String str) {
        List<Term> segment = NotionalTokenizer.segment(str);
        int size = segment.size();
        String[] strArr = new String[size];
        Iterator<Term> it = segment.iterator();
        for (int i8 = 0; i8 < size; i8++) {
            strArr[i8] = it.next().word;
        }
        return strArr;
    }

    public static Map<String, Integer> getKeywordCounts(String[] strArr) {
        HashMap hashMap = new HashMap();
        for (int i8 = 0; i8 < strArr.length; i8++) {
            Integer num = (Integer) hashMap.get(strArr[i8]);
            if (num == null) {
                num = 0;
            }
            hashMap.put(strArr[i8], Integer.valueOf(num.intValue() + 1));
        }
        return hashMap;
    }

    public static Map<String, String[]> loadCorpus(String str) {
        File[] listFiles;
        TreeMap treeMap = new TreeMap();
        File[] listFiles2 = new File(str).listFiles();
        if (listFiles2 == null) {
            return null;
        }
        for (File file : listFiles2) {
            if (!file.isFile() && (listFiles = file.listFiles()) != null) {
                String[] strArr = new String[listFiles.length];
                for (int i8 = 0; i8 < listFiles.length; i8++) {
                    strArr[i8] = IOUtil.readTxt(listFiles[i8].getAbsolutePath());
                }
                treeMap.put(file.getName(), strArr);
            }
        }
        return treeMap;
    }

    public static Map<String, String[]> loadCorpusWithException(String str) throws IOException {
        return loadCorpusWithException(str, "UTF-8");
    }

    public static Map<String, String[]> loadCorpusWithException(String str, String str2) throws IOException {
        File[] listFiles;
        if (str == null) {
            throw new IllegalArgumentException("参数 folderPath == null");
        }
        File file = new File(str);
        if (!file.exists()) {
            throw new IllegalArgumentException(String.format("目录 %s 不存在", file.getAbsolutePath()));
        }
        if (!file.isDirectory()) {
            throw new IllegalArgumentException(String.format("目录 %s 不是一个目录", file.getAbsolutePath()));
        }
        TreeMap treeMap = new TreeMap();
        File[] listFiles2 = file.listFiles();
        if (listFiles2 == null) {
            return null;
        }
        for (File file2 : listFiles2) {
            if (!file2.isFile() && (listFiles = file2.listFiles()) != null) {
                String[] strArr = new String[listFiles.length];
                for (int i8 = 0; i8 < listFiles.length; i8++) {
                    strArr[i8] = readTxt(listFiles[i8], str2);
                }
                treeMap.put(file2.getName(), strArr);
            }
        }
        return treeMap;
    }

    public static String preprocess(String str) {
        return str.replaceAll("\\p{P}", " ").replaceAll("\\s+", " ").toLowerCase(Locale.getDefault());
    }

    public static String readTxt(File file, String str) throws IOException {
        FileInputStream fileInputStream = new FileInputStream(file);
        int available = fileInputStream.available();
        byte[] bArr = new byte[available];
        int i8 = 0;
        while (true) {
            int read = fileInputStream.read(bArr, i8, available - i8);
            if (read == -1 || i8 >= available) {
                break;
            }
            i8 += read;
        }
        fileInputStream.close();
        return new String(bArr, str);
    }
}
