package com.hankcs.hanlp.mining.word2vec;

import com.hankcs.hanlp.utility.Predefine;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.Comparator;
import okhttp3.internal.ws.g;

/* loaded from: classes2.dex */
class Word2VecTraining {
    static final int EXP_TABLE_SIZE = 1000;
    static final int MAX_EXP = 6;
    static final int MAX_SENTENCE_LENGTH = 1000;
    static final int TABLE_SIZE = 100000000;
    static double[] syn0;
    static double[] syn1;
    static double[] syn1neg;
    private final Config config;
    int[] table;
    int threadCount;
    long timeStart;
    static final Charset ENCODING = Charset.forName("UTF-8");
    static final double[] expTable = new double[1001];

    /* loaded from: classes2.dex */
    static class TrainModelThread extends Thread {
        static int wordCountActual;
        float alpha;
        final Config config;
        final Corpus corpus;
        final int id;
        final float startingAlpha;
        final int[] table;
        final long timeStart;
        final float trainWords;
        final Word2VecTraining vec;
        final VocabWord[] vocab;
        final int vocabSize;

        public TrainModelThread(Word2VecTraining word2VecTraining, Corpus corpus, Config config, int i8) {
            this.vec = word2VecTraining;
            this.corpus = corpus;
            this.config = config;
            float alpha = config.getAlpha();
            this.alpha = alpha;
            this.startingAlpha = alpha;
            this.id = i8;
            this.table = word2VecTraining.table;
            this.trainWords = corpus.getTrainWords();
            this.timeStart = word2VecTraining.timeStart;
            this.vocabSize = corpus.getVocabSize();
            this.vocab = corpus.getVocab();
        }

        /* JADX WARN: Code restructure failed: missing block: B:28:0x0175, code lost:
        
            if (r10 < ((okhttp3.internal.ws.g.f58635s & r13) / 65536.0d)) goto L35;
         */
        /* JADX WARN: Removed duplicated region for block: B:149:0x0345 A[Catch: IOException -> 0x00d5, LOOP:13: B:148:0x0343->B:149:0x0345, LOOP_END, TryCatch #0 {IOException -> 0x00d5, blocks: (B:3:0x0035, B:4:0x0053, B:6:0x005d, B:8:0x0086, B:10:0x00cb, B:11:0x00d8, B:12:0x00f4, B:14:0x0111, B:19:0x012e, B:38:0x0196, B:63:0x01ad, B:69:0x01cc, B:73:0x01d5, B:75:0x01da, B:78:0x01f4, B:82:0x0204, B:86:0x020f, B:90:0x022e, B:93:0x021b, B:95:0x022c, B:104:0x0249, B:109:0x0263, B:111:0x026b, B:113:0x0276, B:119:0x02d1, B:124:0x028e, B:126:0x02ab, B:130:0x02c0, B:135:0x02d8, B:139:0x0308, B:141:0x030f, B:145:0x0322, B:146:0x0327, B:149:0x0345, B:153:0x035a, B:155:0x036b, B:158:0x032e, B:159:0x0332, B:160:0x02e2, B:162:0x02f5, B:171:0x0373, B:179:0x037c, B:185:0x0386, B:175:0x0396, B:190:0x0539, B:200:0x03b3, B:203:0x03bc, B:207:0x03ca, B:210:0x03d0, B:212:0x03d5, B:217:0x03e2, B:219:0x03ea, B:221:0x03f5, B:227:0x0461, B:231:0x0416, B:233:0x0437, B:237:0x044c, B:243:0x0474, B:247:0x04a5, B:249:0x04ac, B:253:0x04c3, B:254:0x04c8, B:257:0x04ea, B:261:0x04ff, B:265:0x0516, B:268:0x04d1, B:269:0x04d5, B:270:0x047e, B:272:0x0491, B:280:0x051e, B:282:0x052d, B:42:0x0571, B:49:0x0582, B:44:0x059a, B:23:0x013a, B:27:0x0145, B:32:0x017a, B:299:0x00dc), top: B:2:0x0035 }] */
        /* JADX WARN: Removed duplicated region for block: B:153:0x035a A[Catch: IOException -> 0x00d5, LOOP:14: B:152:0x0358->B:153:0x035a, LOOP_END, TryCatch #0 {IOException -> 0x00d5, blocks: (B:3:0x0035, B:4:0x0053, B:6:0x005d, B:8:0x0086, B:10:0x00cb, B:11:0x00d8, B:12:0x00f4, B:14:0x0111, B:19:0x012e, B:38:0x0196, B:63:0x01ad, B:69:0x01cc, B:73:0x01d5, B:75:0x01da, B:78:0x01f4, B:82:0x0204, B:86:0x020f, B:90:0x022e, B:93:0x021b, B:95:0x022c, B:104:0x0249, B:109:0x0263, B:111:0x026b, B:113:0x0276, B:119:0x02d1, B:124:0x028e, B:126:0x02ab, B:130:0x02c0, B:135:0x02d8, B:139:0x0308, B:141:0x030f, B:145:0x0322, B:146:0x0327, B:149:0x0345, B:153:0x035a, B:155:0x036b, B:158:0x032e, B:159:0x0332, B:160:0x02e2, B:162:0x02f5, B:171:0x0373, B:179:0x037c, B:185:0x0386, B:175:0x0396, B:190:0x0539, B:200:0x03b3, B:203:0x03bc, B:207:0x03ca, B:210:0x03d0, B:212:0x03d5, B:217:0x03e2, B:219:0x03ea, B:221:0x03f5, B:227:0x0461, B:231:0x0416, B:233:0x0437, B:237:0x044c, B:243:0x0474, B:247:0x04a5, B:249:0x04ac, B:253:0x04c3, B:254:0x04c8, B:257:0x04ea, B:261:0x04ff, B:265:0x0516, B:268:0x04d1, B:269:0x04d5, B:270:0x047e, B:272:0x0491, B:280:0x051e, B:282:0x052d, B:42:0x0571, B:49:0x0582, B:44:0x059a, B:23:0x013a, B:27:0x0145, B:32:0x017a, B:299:0x00dc), top: B:2:0x0035 }] */
        /* JADX WARN: Removed duplicated region for block: B:257:0x04ea A[Catch: IOException -> 0x00d5, LOOP:25: B:256:0x04e8->B:257:0x04ea, LOOP_END, TryCatch #0 {IOException -> 0x00d5, blocks: (B:3:0x0035, B:4:0x0053, B:6:0x005d, B:8:0x0086, B:10:0x00cb, B:11:0x00d8, B:12:0x00f4, B:14:0x0111, B:19:0x012e, B:38:0x0196, B:63:0x01ad, B:69:0x01cc, B:73:0x01d5, B:75:0x01da, B:78:0x01f4, B:82:0x0204, B:86:0x020f, B:90:0x022e, B:93:0x021b, B:95:0x022c, B:104:0x0249, B:109:0x0263, B:111:0x026b, B:113:0x0276, B:119:0x02d1, B:124:0x028e, B:126:0x02ab, B:130:0x02c0, B:135:0x02d8, B:139:0x0308, B:141:0x030f, B:145:0x0322, B:146:0x0327, B:149:0x0345, B:153:0x035a, B:155:0x036b, B:158:0x032e, B:159:0x0332, B:160:0x02e2, B:162:0x02f5, B:171:0x0373, B:179:0x037c, B:185:0x0386, B:175:0x0396, B:190:0x0539, B:200:0x03b3, B:203:0x03bc, B:207:0x03ca, B:210:0x03d0, B:212:0x03d5, B:217:0x03e2, B:219:0x03ea, B:221:0x03f5, B:227:0x0461, B:231:0x0416, B:233:0x0437, B:237:0x044c, B:243:0x0474, B:247:0x04a5, B:249:0x04ac, B:253:0x04c3, B:254:0x04c8, B:257:0x04ea, B:261:0x04ff, B:265:0x0516, B:268:0x04d1, B:269:0x04d5, B:270:0x047e, B:272:0x0491, B:280:0x051e, B:282:0x052d, B:42:0x0571, B:49:0x0582, B:44:0x059a, B:23:0x013a, B:27:0x0145, B:32:0x017a, B:299:0x00dc), top: B:2:0x0035 }] */
        /* JADX WARN: Removed duplicated region for block: B:261:0x04ff A[Catch: IOException -> 0x00d5, LOOP:26: B:260:0x04fd->B:261:0x04ff, LOOP_END, TryCatch #0 {IOException -> 0x00d5, blocks: (B:3:0x0035, B:4:0x0053, B:6:0x005d, B:8:0x0086, B:10:0x00cb, B:11:0x00d8, B:12:0x00f4, B:14:0x0111, B:19:0x012e, B:38:0x0196, B:63:0x01ad, B:69:0x01cc, B:73:0x01d5, B:75:0x01da, B:78:0x01f4, B:82:0x0204, B:86:0x020f, B:90:0x022e, B:93:0x021b, B:95:0x022c, B:104:0x0249, B:109:0x0263, B:111:0x026b, B:113:0x0276, B:119:0x02d1, B:124:0x028e, B:126:0x02ab, B:130:0x02c0, B:135:0x02d8, B:139:0x0308, B:141:0x030f, B:145:0x0322, B:146:0x0327, B:149:0x0345, B:153:0x035a, B:155:0x036b, B:158:0x032e, B:159:0x0332, B:160:0x02e2, B:162:0x02f5, B:171:0x0373, B:179:0x037c, B:185:0x0386, B:175:0x0396, B:190:0x0539, B:200:0x03b3, B:203:0x03bc, B:207:0x03ca, B:210:0x03d0, B:212:0x03d5, B:217:0x03e2, B:219:0x03ea, B:221:0x03f5, B:227:0x0461, B:231:0x0416, B:233:0x0437, B:237:0x044c, B:243:0x0474, B:247:0x04a5, B:249:0x04ac, B:253:0x04c3, B:254:0x04c8, B:257:0x04ea, B:261:0x04ff, B:265:0x0516, B:268:0x04d1, B:269:0x04d5, B:270:0x047e, B:272:0x0491, B:280:0x051e, B:282:0x052d, B:42:0x0571, B:49:0x0582, B:44:0x059a, B:23:0x013a, B:27:0x0145, B:32:0x017a, B:299:0x00dc), top: B:2:0x0035 }] */
        @Override // java.lang.Thread, java.lang.Runnable
        /*
            Code decompiled incorrectly, please refer to instructions dump.
            To view partially-correct add '--show-bad-code' argument
        */
        public void run() {
            /*
                Method dump skipped, instructions count: 1478
                To view this dump add '--comments-level debug' option
            */
            throw new UnsupportedOperationException("Method not decompiled: com.hankcs.hanlp.mining.word2vec.Word2VecTraining.TrainModelThread.run():void");
        }
    }

    /* loaded from: classes2.dex */
    static class VocabWordComparator implements Comparator<VocabWord> {
        VocabWordComparator() {
        }

        @Override // java.util.Comparator
        public int compare(VocabWord vocabWord, VocabWord vocabWord2) {
            return vocabWord2.f34193cn - vocabWord.f34193cn;
        }
    }

    static {
        for (int i8 = 0; i8 < 1000; i8++) {
            double[] dArr = expTable;
            double exp = Math.exp((((i8 / 1000.0d) * 2.0d) - 1.0d) * 6.0d);
            dArr[i8] = exp;
            dArr[i8] = exp / (1.0d + exp);
        }
    }

    public Word2VecTraining(Config config) {
        this.config = config;
    }

    static long nextRandom(long j8) {
        return (j8 * 25214903917L) + 11;
    }

    static double[] posixMemAlign128(int i8) {
        return i8 % 128 > 0 ? new double[((i8 / 128) + 1) * 128] : new double[i8];
    }

    public Config getConfig() {
        return this.config;
    }

    void initNet(Corpus corpus) {
        int layer1Size = this.config.getLayer1Size();
        int vocabSize = corpus.getVocabSize();
        int i8 = vocabSize * layer1Size;
        syn0 = posixMemAlign128(i8);
        if (this.config.useHierarchicalSoftmax()) {
            syn1 = posixMemAlign128(i8);
            for (int i9 = 0; i9 < vocabSize; i9++) {
                for (int i10 = 0; i10 < layer1Size; i10++) {
                    syn1[(i9 * layer1Size) + i10] = 0.0d;
                }
            }
        }
        if (this.config.getNegative() > 0) {
            syn1neg = posixMemAlign128(i8);
            for (int i11 = 0; i11 < vocabSize; i11++) {
                for (int i12 = 0; i12 < layer1Size; i12++) {
                    syn1neg[(i11 * layer1Size) + i12] = 0.0d;
                }
            }
        }
        long j8 = 1;
        for (int i13 = 0; i13 < vocabSize; i13++) {
            for (int i14 = 0; i14 < layer1Size; i14++) {
                j8 = nextRandom(j8);
                syn0[(i13 * layer1Size) + i14] = (((g.f58635s & j8) / 65536.0d) - 0.5d) / layer1Size;
            }
        }
        corpus.createBinaryTree();
    }

    void initUnigramTable(Corpus corpus) {
        int vocabSize = corpus.getVocabSize();
        VocabWord[] vocab = corpus.getVocab();
        this.table = new int[TABLE_SIZE];
        long j8 = 0;
        for (int i8 = 0; i8 < vocabSize; i8++) {
            j8 = (long) (j8 + Math.pow(vocab[i8].f34193cn, 0.75d));
        }
        double d9 = j8;
        double pow = Math.pow(vocab[0].f34193cn, 0.75d) / d9;
        int i9 = 0;
        for (int i10 = 0; i10 < TABLE_SIZE; i10++) {
            this.table[i10] = i9;
            if (i10 / 1.0E8d > pow) {
                i9++;
                pow += Math.pow(vocab[i9].f34193cn, 0.75d) / d9;
            }
            if (i9 >= vocabSize) {
                i9 = vocabSize - 1;
            }
        }
    }

    public void trainModel() throws IOException {
        FileOutputStream fileOutputStream;
        OutputStreamWriter outputStreamWriter;
        int layer1Size = this.config.getLayer1Size();
        TextFileCorpus textFileCorpus = new TextFileCorpus(this.config);
        Predefine.logger.info("learning vocabulary");
        textFileCorpus.learnVocab();
        Predefine.logger.info("sorting vocabulary");
        textFileCorpus.sortVocab();
        int vocabSize = textFileCorpus.getVocabSize();
        VocabWord[] vocab = textFileCorpus.getVocab();
        Predefine.logger.info("Vocab size: " + vocabSize);
        Predefine.logger.info("Words in train file: " + textFileCorpus.getTrainWords());
        if (this.config.getOutputFile() == null) {
            return;
        }
        initNet(textFileCorpus);
        if (this.config.getNegative() > 0) {
            initUnigramTable(textFileCorpus);
        }
        this.timeStart = System.currentTimeMillis();
        this.threadCount = this.config.getNumThreads();
        for (int i8 = 0; i8 < this.config.getNumThreads(); i8++) {
            new TrainModelThread(this, new CacheCorpus(textFileCorpus), this.config, i8).start();
        }
        textFileCorpus.shutdown();
        synchronized (this) {
            while (this.threadCount > 0) {
                try {
                    wait();
                } catch (InterruptedException unused) {
                }
            }
        }
        System.err.println();
        Predefine.logger.info(String.format("finished training in %s", Utility.humanTime(System.currentTimeMillis() - this.timeStart)));
        PrintWriter printWriter = null;
        syn1 = null;
        this.table = null;
        try {
            fileOutputStream = new FileOutputStream(this.config.getOutputFile());
            try {
                outputStreamWriter = new OutputStreamWriter(fileOutputStream, ENCODING);
                try {
                    PrintWriter printWriter2 = new PrintWriter(outputStreamWriter);
                    try {
                        Predefine.logger.info("now saving the word vectors to the file " + this.config.getOutputFile());
                        printWriter2.printf("%d %d\n", Integer.valueOf(vocabSize), Integer.valueOf(layer1Size));
                        for (int i9 = 0; i9 < vocabSize; i9++) {
                            printWriter2.print(vocab[i9].word);
                            for (int i10 = 0; i10 < layer1Size; i10++) {
                                printWriter2.printf(" %f", Double.valueOf(syn0[(i9 * layer1Size) + i10]));
                            }
                            printWriter2.println();
                        }
                        textFileCorpus.close();
                        Utility.closeQuietly((Writer) printWriter2);
                        Utility.closeQuietly((Writer) outputStreamWriter);
                        Utility.closeQuietly((OutputStream) fileOutputStream);
                    } catch (Throwable th) {
                        th = th;
                        printWriter = printWriter2;
                        textFileCorpus.close();
                        Utility.closeQuietly((Writer) printWriter);
                        Utility.closeQuietly((Writer) outputStreamWriter);
                        Utility.closeQuietly((OutputStream) fileOutputStream);
                        throw th;
                    }
                } catch (Throwable th2) {
                    th = th2;
                }
            } catch (Throwable th3) {
                th = th3;
                outputStreamWriter = null;
            }
        } catch (Throwable th4) {
            th = th4;
            fileOutputStream = null;
            outputStreamWriter = null;
        }
    }
}
