/*
 * Decompiled with CFR 0.152.
 */
package org.carrot2.text.preprocessing;

import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.IntIntAssociativeContainer;
import com.carrotsearch.hppc.IntIntHashMap;
import com.carrotsearch.hppc.cursors.IntIntCursor;
import com.google.common.collect.Lists;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.carrot2.core.attribute.Processing;
import org.carrot2.text.preprocessing.PreprocessingContext;
import org.carrot2.text.preprocessing.Substring;
import org.carrot2.text.preprocessing.SubstringComparator;
import org.carrot2.text.preprocessing.SuffixSorter;
import org.carrot2.util.IntMapUtils;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.AttributeLevel;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Group;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Label;
import org.carrot2.util.attribute.Level;
import org.carrot2.util.attribute.constraint.IntRange;

@Bindable(prefix="PhraseExtractor")
public class PhraseExtractor {
    private static final int MIN_PHRASE_LENGTH = 2;
    static final int MAX_PHRASE_LENGTH = 8;
    @Processing
    @Input
    @Attribute
    @IntRange(min=1, max=100)
    @Label(value="Phrase document frequency threshold")
    @Level(value=AttributeLevel.ADVANCED)
    @Group(value="Phrase extraction")
    public int dfThreshold = 1;
    private SuffixSorter suffixSorter = new SuffixSorter();

    public void extractPhrases(PreprocessingContext preprocessingContext) {
        this.suffixSorter.suffixSort(preprocessingContext);
        int[] nArray = preprocessingContext.allTokens.suffixOrder;
        int[] nArray2 = preprocessingContext.allTokens.lcp;
        int[] nArray3 = preprocessingContext.allTokens.wordIndex;
        int[] nArray4 = preprocessingContext.allTokens.documentIndex;
        int[] nArray5 = preprocessingContext.allWords.stemIndex;
        List<Substring> list = this.discoverRcs(nArray, nArray2, nArray4);
        ArrayList arrayList = Lists.newArrayList();
        IntArrayList intArrayList = new IntArrayList();
        ArrayList arrayList2 = Lists.newArrayList();
        if (list.size() > 0) {
            Object object;
            Collections.sort(list, new SubstringComparator(nArray3, nArray5));
            int n = list.get((int)0).frequency;
            Substring substring = list.get(0);
            IntIntHashMap intIntHashMap = new IntIntHashMap();
            intIntHashMap.putAll((IntIntAssociativeContainer)substring.tfByDocument);
            int n2 = 0;
            while (n2 < list.size() - 1) {
                Substring substring2;
                object = list.get(n2);
                if (((Substring)object).isEquivalentTo(substring2 = list.get(n2 + 1), nArray3, nArray5)) {
                    n += substring2.frequency;
                    PhraseExtractor.addAllWithOffset(intIntHashMap, substring2.tfByDocument, -1);
                    if (substring.frequency < substring2.frequency) {
                        substring = substring2;
                    }
                } else {
                    int[] nArray6 = new int[substring.to - substring.from];
                    int n3 = 0;
                    while (n3 < nArray6.length) {
                        nArray6[n3] = nArray3[substring.from + n3];
                        ++n3;
                    }
                    arrayList.add(nArray6);
                    intArrayList.add(n);
                    arrayList2.add(IntMapUtils.flatten((IntIntHashMap)intIntHashMap));
                    n = substring2.frequency;
                    substring = substring2;
                    intIntHashMap.clear();
                    intIntHashMap.putAll((IntIntAssociativeContainer)substring2.tfByDocument);
                }
                ++n2;
            }
            Substring substring3 = list.get(list.size() - 1);
            object = new int[substring3.to - substring3.from];
            int n4 = 0;
            while (n4 < ((Object)object).length) {
                object[n4] = nArray3[substring.from + n4];
                ++n4;
            }
            arrayList.add(object);
            intArrayList.add(n);
            arrayList2.add(IntMapUtils.flatten((IntIntHashMap)intIntHashMap));
        }
        preprocessingContext.allPhrases.wordIndices = (int[][])arrayList.toArray((T[])new int[arrayList.size()][]);
        preprocessingContext.allPhrases.tf = intArrayList.toArray();
        preprocessingContext.allPhrases.tfByDocument = (int[][])arrayList2.toArray((T[])new int[arrayList2.size()][]);
    }

    private List<Substring> discoverRcs(int[] nArray, int[] nArray2, int[] nArray3) {
        Substring[] substringArray = new Substring[nArray2.length];
        int n = -1;
        int n2 = 1;
        ArrayList arrayList = Lists.newArrayList();
        while (n2 < nArray2.length - 1) {
            int n3 = nArray[n2];
            int n4 = nArray3[n3];
            int n5 = Math.min(8, nArray2[n2]);
            if (n < 0) {
                if (n5 >= 2) {
                    int n6 = n5;
                    int n7 = n6 - 2;
                    while (n7 >= 0) {
                        substringArray[++n] = new Substring(n2, n3, n3 + n5 - n7, n7 == 0 ? 2 : 1);
                        substringArray[n].tfByDocument = new IntIntHashMap();
                        substringArray[n].tfByDocument.put(nArray3[nArray[n2 - 1]], 1);
                        if (n7 == 0) {
                            substringArray[n].tfByDocument.putOrAdd(n4, 1, 1);
                        } else {
                            substringArray[n].documentIndexToOffset = nArray3[nArray[n2 - 1]];
                        }
                        --n7;
                    }
                }
                ++n2;
                continue;
            }
            Substring substring = substringArray[n];
            if (substring.to - substring.from < n5) {
                Substring substring2 = substringArray[n];
                substring2.documentIndexToOffset = nArray3[nArray[n2 - 1]];
                int n8 = n5 - (substring2.to - substring2.from);
                int n9 = n8 - 1;
                while (n9 >= 0) {
                    if (n5 - n9 >= 2) {
                        substringArray[++n] = new Substring(n2, n3, n3 + n5 - n9, n9 == 0 ? 2 : 1);
                        substringArray[n].tfByDocument = new IntIntHashMap();
                        substringArray[n].tfByDocument.put(nArray3[nArray[n2 - 1]], 1);
                        if (n9 == 0) {
                            substringArray[n].tfByDocument.putOrAdd(n4, 1, 1);
                        } else {
                            substringArray[n].documentIndexToOffset = nArray3[nArray[n2 - 1]];
                        }
                    }
                    --n9;
                }
                ++n2;
                continue;
            }
            Substring substring3 = substringArray[n];
            if (substring3.to - substring3.from == n5) {
                ++substringArray[n].frequency;
                substringArray[n].tfByDocument.putOrAdd(n4, 1, 1);
                ++n2;
                continue;
            }
            do {
                if (substringArray[n].tfByDocument.size() >= this.dfThreshold) {
                    arrayList.add(substringArray[n]);
                }
                Substring substring4 = substringArray[n];
                if (--n < 0) continue;
                substringArray[n].frequency += substring4.frequency - 1;
                PhraseExtractor.addAllWithOffset(substringArray[n].tfByDocument, substring4.tfByDocument, substringArray[n].documentIndexToOffset);
            } while (n >= 0 && substringArray[n].to - substringArray[n].from > n5);
        }
        return arrayList;
    }

    private static void addAllWithOffset(IntIntHashMap intIntHashMap, IntIntHashMap intIntHashMap2, int n) {
        for (IntIntCursor intIntCursor : intIntHashMap2) {
            int n2 = intIntCursor.key;
            int n3 = intIntCursor.value + (n2 != n ? 0 : -1);
            intIntHashMap.putOrAdd(n2, n3, n3);
        }
    }
}

