/*
 * Decompiled with CFR 0.152.
 */
package org.carrot2.text.vsm;

import com.carrotsearch.hppc.BitSet;
import com.carrotsearch.hppc.IntIntHashMap;
import com.carrotsearch.hppc.sorting.IndirectComparator;
import com.carrotsearch.hppc.sorting.IndirectSort;
import org.apache.mahout.math.matrix.DoubleMatrix2D;
import org.apache.mahout.math.matrix.impl.DenseDoubleMatrix2D;
import org.apache.mahout.math.matrix.impl.SparseDoubleMatrix2D;
import org.carrot2.core.attribute.Internal;
import org.carrot2.core.attribute.Processing;
import org.carrot2.matrix.MatrixUtils;
import org.carrot2.text.analysis.TokenTypeUtils;
import org.carrot2.text.preprocessing.PreprocessingContext;
import org.carrot2.text.vsm.ITermWeighting;
import org.carrot2.text.vsm.LinearTfIdfTermWeighting;
import org.carrot2.text.vsm.LogTfIdfTermWeighting;
import org.carrot2.text.vsm.TfTermWeighting;
import org.carrot2.text.vsm.VectorSpaceModelContext;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.AttributeLevel;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Group;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Level;
import org.carrot2.util.attribute.Required;
import org.carrot2.util.attribute.constraint.DoubleRange;
import org.carrot2.util.attribute.constraint.ImplementingClasses;
import org.carrot2.util.attribute.constraint.IntRange;

@Bindable(prefix="TermDocumentMatrixBuilder")
public class TermDocumentMatrixBuilder {
    public static final String MATRIX_MODEL = "Matrix model";
    @Input
    @Processing
    @Attribute
    @DoubleRange(min=0.0, max=10.0)
    @Level(value=AttributeLevel.MEDIUM)
    @Group(value="Labels")
    public double titleWordsBoost = 2.0;
    @Input
    @Processing
    @Attribute
    @IntRange(min=5000)
    @Internal(configuration=true)
    @Level(value=AttributeLevel.ADVANCED)
    @Group(value="Matrix model")
    public int maximumMatrixSize = 37500;
    @Input
    @Processing
    @Attribute
    @DoubleRange(min=0.0, max=1.0)
    @Level(value=AttributeLevel.ADVANCED)
    @Group(value="Matrix model")
    public double maxWordDf = 0.9;
    @Input
    @Processing
    @Attribute
    @Required
    @ImplementingClasses(classes={LogTfIdfTermWeighting.class, LinearTfIdfTermWeighting.class, TfTermWeighting.class}, strict=false)
    @Level(value=AttributeLevel.ADVANCED)
    @Group(value="Matrix model")
    public ITermWeighting termWeighting = new LogTfIdfTermWeighting();

    public void buildTermDocumentMatrix(VectorSpaceModelContext vectorSpaceModelContext) {
        int n;
        int n2;
        PreprocessingContext preprocessingContext = vectorSpaceModelContext.preprocessingContext;
        int n3 = preprocessingContext.documents.size();
        int[] nArray = preprocessingContext.allStems.tf;
        int[][] nArray2 = preprocessingContext.allStems.tfByDocument;
        byte[] byArray = preprocessingContext.allStems.fieldIndices;
        if (n3 == 0) {
            vectorSpaceModelContext.termDocumentMatrix = new DenseDoubleMatrix2D(0, 0);
            vectorSpaceModelContext.stemToRowIndex = new IntIntHashMap();
            return;
        }
        int n4 = -1;
        String[] stringArray = preprocessingContext.allFields.name;
        int n5 = 0;
        while (n5 < stringArray.length) {
            if ("title".equals(stringArray[n5])) {
                n4 = n5;
                break;
            }
            ++n5;
        }
        int[] nArray3 = this.computeRequiredStemIndices(preprocessingContext);
        double[] dArray = new double[nArray3.length];
        int n6 = 0;
        while (n6 < nArray3.length) {
            n2 = nArray3[n6];
            dArray[n6] = this.termWeighting.calculateTermWeight(nArray[n2], nArray2[n2].length / 2, n3) * this.getWeightBoost(n4, byArray[n2]);
            ++n6;
        }
        int[] nArray4 = IndirectSort.mergesort((int)0, (int)dArray.length, (IndirectComparator)new IndirectComparator.DescendingDoubleComparator(dArray));
        n2 = this.maximumMatrixSize / n3;
        DenseDoubleMatrix2D denseDoubleMatrix2D = new DenseDoubleMatrix2D(Math.min(n2, nArray3.length), n3);
        int n7 = 0;
        while (n7 < nArray4.length && n7 < n2) {
            n = nArray3[nArray4[n7]];
            int[] nArray5 = nArray2[n];
            int n8 = nArray5.length / 2;
            byte by = byArray[n];
            int n9 = 0;
            while (n9 < n8) {
                double d = this.termWeighting.calculateTermWeight(nArray5[n9 * 2 + 1], n8, n3);
                denseDoubleMatrix2D.set(n7, nArray5[n9 * 2], d *= this.getWeightBoost(n4, by));
                ++n9;
            }
            ++n7;
        }
        IntIntHashMap intIntHashMap = new IntIntHashMap();
        n = 0;
        while (n < nArray4.length && n < denseDoubleMatrix2D.rows()) {
            intIntHashMap.put(nArray3[nArray4[n]], n);
            ++n;
        }
        vectorSpaceModelContext.termDocumentMatrix = denseDoubleMatrix2D;
        vectorSpaceModelContext.stemToRowIndex = intIntHashMap;
    }

    public void buildTermPhraseMatrix(VectorSpaceModelContext vectorSpaceModelContext) {
        PreprocessingContext preprocessingContext = vectorSpaceModelContext.preprocessingContext;
        IntIntHashMap intIntHashMap = vectorSpaceModelContext.stemToRowIndex;
        int[] nArray = preprocessingContext.allLabels.featureIndex;
        int n = preprocessingContext.allLabels.firstPhraseIndex;
        if (n >= 0 && intIntHashMap.size() > 0) {
            int[] nArray2 = new int[nArray.length - n];
            int n2 = 0;
            while (n2 < nArray2.length) {
                nArray2[n2] = nArray[n2 + n];
                ++n2;
            }
            DoubleMatrix2D doubleMatrix2D = TermDocumentMatrixBuilder.buildAlignedMatrix(vectorSpaceModelContext, nArray2, this.termWeighting);
            MatrixUtils.normalizeColumnL2((DoubleMatrix2D)doubleMatrix2D, null);
            vectorSpaceModelContext.termPhraseMatrix = doubleMatrix2D.viewDice();
        }
    }

    private double getWeightBoost(int n, byte by) {
        if ((by & 1 << n) != 0) {
            return this.titleWordsBoost;
        }
        return 1.0;
    }

    private int[] computeRequiredStemIndices(PreprocessingContext preprocessingContext) {
        int[] nArray = preprocessingContext.allLabels.featureIndex;
        int[] nArray2 = preprocessingContext.allWords.stemIndex;
        short[] sArray = preprocessingContext.allWords.type;
        int[][] nArray3 = preprocessingContext.allPhrases.wordIndices;
        int n = nArray2.length;
        int[][] nArray4 = preprocessingContext.allStems.tfByDocument;
        int n2 = preprocessingContext.documents.size();
        BitSet bitSet = new BitSet((long)nArray.length);
        int n3 = 0;
        while (n3 < nArray.length) {
            int n4 = nArray[n3];
            if (n4 < n) {
                this.addStemIndex(nArray2, n2, nArray4, bitSet, n4);
            } else {
                int[] nArray5 = nArray3[n4 - n];
                int n5 = 0;
                while (n5 < nArray5.length) {
                    int n6 = nArray5[n5];
                    if (!TokenTypeUtils.isCommon(sArray[n6])) {
                        this.addStemIndex(nArray2, n2, nArray4, bitSet, n6);
                    }
                    ++n5;
                }
            }
            ++n3;
        }
        return bitSet.asIntLookupContainer().toArray();
    }

    private void addStemIndex(int[] nArray, int n, int[][] nArray2, BitSet bitSet, int n2) {
        int n3 = nArray[n2];
        int n4 = nArray2[n3].length / 2;
        if ((double)n4 / (double)n <= this.maxWordDf) {
            bitSet.set((long)n3);
        }
    }

    static DoubleMatrix2D buildAlignedMatrix(VectorSpaceModelContext vectorSpaceModelContext, int[] nArray, ITermWeighting iTermWeighting) {
        IntIntHashMap intIntHashMap = vectorSpaceModelContext.stemToRowIndex;
        if (nArray.length == 0) {
            return new DenseDoubleMatrix2D(intIntHashMap.size(), 0);
        }
        SparseDoubleMatrix2D sparseDoubleMatrix2D = new SparseDoubleMatrix2D(intIntHashMap.size(), nArray.length);
        PreprocessingContext preprocessingContext = vectorSpaceModelContext.preprocessingContext;
        int[] nArray2 = preprocessingContext.allWords.stemIndex;
        int[] nArray3 = preprocessingContext.allStems.tf;
        int[][] nArray4 = preprocessingContext.allStems.tfByDocument;
        int[][] nArray5 = preprocessingContext.allPhrases.wordIndices;
        int n = preprocessingContext.documents.size();
        int n2 = nArray2.length;
        int n3 = 0;
        while (n3 < nArray.length) {
            int n4 = nArray[n3];
            int[] nArray6 = n4 < n2 ? new int[]{n4} : nArray5[n4 - n2];
            int n5 = 0;
            while (n5 < nArray6.length) {
                int n6 = nArray2[nArray6[n5]];
                int n7 = intIntHashMap.indexOf(n6);
                if (intIntHashMap.indexExists(n7)) {
                    int n8 = intIntHashMap.indexGet(n7);
                    double d = iTermWeighting.calculateTermWeight(nArray3[n6], nArray4[n6].length / 2, n);
                    sparseDoubleMatrix2D.setQuick(n8, n3, d);
                }
                ++n5;
            }
            ++n3;
        }
        return sparseDoubleMatrix2D;
    }
}

