package org.xm.similarity.text;

import com.huawei.hms.framework.common.ContainerUtils;
import java.math.BigInteger;
import java.util.List;
import java.util.function.Consumer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xm.tokenizer.Word;

/* loaded from: classes8.dex */
public class SimHashPlusHammingDistanceTextSimilarity extends TextSimilarity {
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) SimHashPlusHammingDistanceTextSimilarity.class);
    private int hashBitCount;

    public SimHashPlusHammingDistanceTextSimilarity() {
        this.hashBitCount = 128;
    }

    public SimHashPlusHammingDistanceTextSimilarity(int i) {
        this.hashBitCount = i;
    }

    private int hammingDistance(String str, String str2) {
        if (str.length() != str2.length()) {
            return -1;
        }
        int length = str.length();
        int i = 0;
        for (int i2 = 0; i2 < length; i2++) {
            if (str.charAt(i2) != str2.charAt(i2)) {
                i++;
            }
        }
        return i;
    }

    private BigInteger hash(String str) {
        if (str == null || str.length() == 0) {
            return new BigInteger("0");
        }
        char[] charArray = str.toCharArray();
        BigInteger valueOf = BigInteger.valueOf(charArray[0] << 7);
        BigInteger bigInteger = new BigInteger("1000003");
        BigInteger subtract = new BigInteger("2").pow(this.hashBitCount).subtract(new BigInteger("1"));
        long j = 0;
        for (char c : charArray) {
            j += c;
        }
        BigInteger xor = valueOf.multiply(bigInteger).xor(BigInteger.valueOf(j)).and(subtract).xor(new BigInteger(String.valueOf(str.length())));
        return xor.equals(new BigInteger("-1")) ? new BigInteger("-2") : xor;
    }

    public static void main(String[] strArr) throws Exception {
        SimHashPlusHammingDistanceTextSimilarity simHashPlusHammingDistanceTextSimilarity = new SimHashPlusHammingDistanceTextSimilarity();
        double similarity = simHashPlusHammingDistanceTextSimilarity.getSimilarity("我爱购物", "我爱购物");
        double similarity2 = simHashPlusHammingDistanceTextSimilarity.getSimilarity("我爱购物", "我爱读书");
        double similarity3 = simHashPlusHammingDistanceTextSimilarity.getSimilarity("我爱购物", "他是黑客");
        double similarity4 = simHashPlusHammingDistanceTextSimilarity.getSimilarity("我爱读书", "我爱读书");
        double similarity5 = simHashPlusHammingDistanceTextSimilarity.getSimilarity("我爱读书", "他是黑客");
        double similarity6 = simHashPlusHammingDistanceTextSimilarity.getSimilarity("他是黑客", "他是黑客");
        System.out.println("我爱购物 和 我爱购物 的相似度分值：" + similarity);
        System.out.println("我爱购物 和 我爱读书 的相似度分值：" + similarity2);
        System.out.println("我爱购物 和 他是黑客 的相似度分值：" + similarity3);
        System.out.println("我爱读书 和 我爱读书 的相似度分值：" + similarity4);
        System.out.println("我爱读书 和 他是黑客 的相似度分值：" + similarity5);
        System.out.println("他是黑客 和 他是黑客 的相似度分值：" + similarity6);
    }

    private String simHash(List<Word> list) {
        final float[] fArr = new float[this.hashBitCount];
        list.forEach(new Consumer() { // from class: org.xm.similarity.text.SimHashPlusHammingDistanceTextSimilarity$$ExternalSyntheticLambda0
            @Override // java.util.function.Consumer
            public final void accept(Object obj) {
                SimHashPlusHammingDistanceTextSimilarity.this.m6763xd862ea64(fArr, (Word) obj);
            }
        });
        StringBuffer stringBuffer = new StringBuffer();
        for (int i = 0; i < this.hashBitCount; i++) {
            if (fArr[i] >= 0.0f) {
                stringBuffer.append("1");
            } else {
                stringBuffer.append("0");
            }
        }
        return stringBuffer.toString();
    }

    public int getHashBitCount() {
        return this.hashBitCount;
    }

    @Override // org.xm.similarity.text.TextSimilarity
    protected double getSimilarityImpl(List<Word> list, List<Word> list2) {
        taggingWeightByFrequency(list, list2);
        String simHash = simHash(list);
        String simHash2 = simHash(list2);
        int hammingDistance = hammingDistance(simHash, simHash2);
        if (hammingDistance == -1) {
            Logger logger = LOGGER;
            logger.error("文本1：" + list.toString());
            logger.error("文本2：" + list2.toString());
            logger.error("文本1SimHash值：" + simHash);
            logger.error("文本2SimHash值：" + simHash2);
            logger.error("文本1和文本2的SimHash值长度不相等，不能计算汉明距离");
            return 0.0d;
        }
        int length = simHash.length();
        double d = 1.0d - (hammingDistance / length);
        Logger logger2 = LOGGER;
        if (logger2.isDebugEnabled()) {
            logger2.debug("文本1：" + list.toString());
            logger2.debug("文本2：" + list2.toString());
            logger2.debug("文本1SimHash值：" + simHash);
            logger2.debug("文本2SimHash值：" + simHash2);
            logger2.debug("hashBitCount：" + this.hashBitCount);
            logger2.debug("SimHash值之间的汉明距离：" + hammingDistance);
            logger2.debug("文本1和文本2的相似度分值：1 - " + hammingDistance + " / (double)" + length + ContainerUtils.KEY_VALUE_DELIMITER + d);
        }
        return d;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: lambda$simHash$0$org-xm-similarity-text-SimHashPlusHammingDistanceTextSimilarity, reason: not valid java name */
    public /* synthetic */ void m6763xd862ea64(float[] fArr, Word word) {
        float floatValue = word.getWeight() == null ? 1.0f : word.getWeight().floatValue();
        BigInteger hash = hash(word.getName());
        for (int i = 0; i < this.hashBitCount; i++) {
            if (hash.and(new BigInteger("1").shiftLeft(i)).signum() != 0) {
                fArr[i] = fArr[i] + floatValue;
            } else {
                fArr[i] = fArr[i] - floatValue;
            }
        }
    }

    public void setHashBitCount(int i) {
        this.hashBitCount = i;
    }
}
