/*
 * Decompiled with CFR 0.152.
 */
package org.carrot2.clustering.lingo;

import com.carrotsearch.hppc.BitSet;
import com.google.common.collect.Lists;
import java.util.Collections;
import java.util.List;
import org.carrot2.clustering.lingo.ClusterBuilder;
import org.carrot2.clustering.lingo.LingoProcessingContext;
import org.carrot2.core.Cluster;
import org.carrot2.core.Document;
import org.carrot2.core.IClusteringAlgorithm;
import org.carrot2.core.LanguageCode;
import org.carrot2.core.ProcessingComponentBase;
import org.carrot2.core.ProcessingException;
import org.carrot2.core.attribute.CommonAttributes;
import org.carrot2.core.attribute.Init;
import org.carrot2.core.attribute.Internal;
import org.carrot2.core.attribute.Processing;
import org.carrot2.text.clustering.IMonolingualClusteringAlgorithm;
import org.carrot2.text.clustering.MultilingualClustering;
import org.carrot2.text.preprocessing.LabelFormatter;
import org.carrot2.text.preprocessing.PreprocessingContext;
import org.carrot2.text.preprocessing.pipeline.CompletePreprocessingPipeline;
import org.carrot2.text.preprocessing.pipeline.IPreprocessingPipeline;
import org.carrot2.text.vsm.ReducedVectorSpaceModelContext;
import org.carrot2.text.vsm.TermDocumentMatrixBuilder;
import org.carrot2.text.vsm.TermDocumentMatrixReducer;
import org.carrot2.text.vsm.VectorSpaceModelContext;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.AttributeLevel;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Group;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Label;
import org.carrot2.util.attribute.Level;
import org.carrot2.util.attribute.Output;
import org.carrot2.util.attribute.Required;
import org.carrot2.util.attribute.constraint.DoubleRange;
import org.carrot2.util.attribute.constraint.ImplementingClasses;
import org.carrot2.util.attribute.constraint.IntRange;

@Bindable(prefix="LingoClusteringAlgorithm", inherit={CommonAttributes.class})
@Label(value="Lingo Clustering")
public class LingoClusteringAlgorithm
extends ProcessingComponentBase
implements IClusteringAlgorithm {
    @Processing
    @Input
    @Internal
    @Attribute(key="query", inherit=true)
    public String query = null;
    @Processing
    @Input
    @Required
    @Internal
    @Attribute(key="documents", inherit=true)
    public List<Document> documents;
    @Processing
    @Output
    @Internal
    @Attribute(key="clusters", inherit=true)
    public List<Cluster> clusters = null;
    @Input
    @Processing
    @Attribute
    @DoubleRange(min=0.0, max=1.0)
    @Label(value="Size-Score sorting ratio")
    @Level(value=AttributeLevel.MEDIUM)
    @Group(value="Clusters")
    public double scoreWeight = 0.0;
    @Input
    @Processing
    @Attribute
    @IntRange(min=2, max=100)
    @Label(value="Cluster count base")
    @Level(value=AttributeLevel.BASIC)
    @Group(value="Clusters")
    public int desiredClusterCountBase = 30;
    @Init
    @Input
    @Attribute
    @Internal
    @ImplementingClasses(classes={}, strict=false)
    @Level(value=AttributeLevel.ADVANCED)
    public IPreprocessingPipeline preprocessingPipeline = new CompletePreprocessingPipeline();
    public final TermDocumentMatrixBuilder matrixBuilder = new TermDocumentMatrixBuilder();
    public final TermDocumentMatrixReducer matrixReducer = new TermDocumentMatrixReducer();
    public final ClusterBuilder clusterBuilder = new ClusterBuilder();
    public final LabelFormatter labelFormatter = new LabelFormatter();
    public final MultilingualClustering multilingualClustering = new MultilingualClustering();

    public void process() throws ProcessingException {
        List<Document> list = this.documents;
        this.clusters = this.multilingualClustering.process(this.documents, new IMonolingualClusteringAlgorithm(){

            public List<Cluster> process(List<Document> list, LanguageCode languageCode) {
                LingoClusteringAlgorithm.this.documents = list;
                LingoClusteringAlgorithm.this.cluster(languageCode);
                return LingoClusteringAlgorithm.this.clusters;
            }
        });
        this.documents = list;
    }

    private void cluster(LanguageCode languageCode) {
        PreprocessingContext preprocessingContext = this.preprocessingPipeline.preprocess(this.documents, this.query, languageCode);
        this.clusters = Lists.newArrayList();
        if (preprocessingContext.hasLabels()) {
            VectorSpaceModelContext vectorSpaceModelContext = new VectorSpaceModelContext(preprocessingContext);
            ReducedVectorSpaceModelContext reducedVectorSpaceModelContext = new ReducedVectorSpaceModelContext(vectorSpaceModelContext);
            LingoProcessingContext lingoProcessingContext = new LingoProcessingContext(reducedVectorSpaceModelContext);
            this.matrixBuilder.buildTermDocumentMatrix(vectorSpaceModelContext);
            this.matrixBuilder.buildTermPhraseMatrix(vectorSpaceModelContext);
            this.matrixReducer.reduce(reducedVectorSpaceModelContext, LingoClusteringAlgorithm.computeClusterCount(this.desiredClusterCountBase, this.documents.size()));
            this.clusterBuilder.buildLabels(lingoProcessingContext, this.matrixBuilder.termWeighting);
            this.clusterBuilder.assignDocuments(lingoProcessingContext);
            this.clusterBuilder.merge(lingoProcessingContext);
            int[] nArray = lingoProcessingContext.clusterLabelFeatureIndex;
            BitSet[] bitSetArray = lingoProcessingContext.clusterDocuments;
            double[] dArray = lingoProcessingContext.clusterLabelScore;
            int n = 0;
            while (n < nArray.length) {
                Cluster cluster = new Cluster();
                int n2 = nArray[n];
                if (n2 >= 0) {
                    cluster.addPhrases(new String[]{this.labelFormatter.format(preprocessingContext, n2)});
                    cluster.setAttribute("score", (Object)dArray[n]);
                    BitSet bitSet = bitSetArray[n];
                    int n3 = bitSet.nextSetBit(0);
                    while (n3 >= 0) {
                        cluster.addDocuments(new Document[]{this.documents.get(n3)});
                        n3 = bitSet.nextSetBit(n3 + 1);
                    }
                    this.clusters.add(cluster);
                }
                ++n;
            }
            Collections.sort(this.clusters, Cluster.byReversedWeightedScoreAndSizeComparator((double)this.scoreWeight));
        }
        Cluster.appendOtherTopics(this.documents, this.clusters);
    }

    static int computeClusterCount(int n, int n2) {
        return Math.min((int)((double)n / 10.0 * Math.sqrt(n2)), n2);
    }
}

