/*
 * Decompiled with CFR 0.152.
 */
package org.carrot2.clustering.synthetic;

import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.Lists;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.lang.ArrayUtils;
import org.carrot2.core.Cluster;
import org.carrot2.core.Document;
import org.carrot2.core.IClusteringAlgorithm;
import org.carrot2.core.ProcessingComponentBase;
import org.carrot2.core.ProcessingException;
import org.carrot2.core.attribute.CommonAttributes;
import org.carrot2.core.attribute.Internal;
import org.carrot2.core.attribute.Processing;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Label;
import org.carrot2.util.attribute.Output;

@Bindable(inherit={CommonAttributes.class})
@Label(value="By URL Clustering")
public class ByUrlClusteringAlgorithm
extends ProcessingComponentBase
implements IClusteringAlgorithm {
    private static final Set<String> STOP_URL_PARTS = new HashSet<String>();
    @Processing
    @Input
    @Internal
    @Attribute(key="documents", inherit=true)
    public List<Document> documents;
    @Processing
    @Output
    @Internal
    @Attribute(key="clusters", inherit=true)
    public List<Cluster> clusters = null;

    static {
        STOP_URL_PARTS.add("www");
    }

    public void process() throws ProcessingException {
        Document[] documentArray = this.documents.toArray(new Document[this.documents.size()]);
        String[][] stringArray = this.buildUrlParts(documentArray);
        ArrayList<Integer> arrayList = new ArrayList<Integer>(documentArray.length);
        int n = 0;
        while (n < documentArray.length) {
            arrayList.add(n);
            ++n;
        }
        this.clusters = this.createClusters(documentArray, arrayList, stringArray, 0, "");
        if (this.clusters.size() == 0) {
            Cluster.appendOtherTopics(this.documents, this.clusters, (String)"Other Sites");
        }
    }

    private List<Cluster> createClusters(Document[] documentArray, Collection<Integer> collection, String[][] stringArray, int n, String string) {
        Object object2;
        LinkedHashMultimap linkedHashMultimap = LinkedHashMultimap.create();
        for (Integer serializable2 : collection) {
            object2 = stringArray[serializable2];
            if (object2 == null || ((String[])object2).length <= n || STOP_URL_PARTS.contains(object2[n])) continue;
            linkedHashMultimap.put((Object)object2[n], (Object)serializable2);
        }
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        ArrayList arrayList = new ArrayList();
        for (Object object2 : linkedHashMultimap.keySet()) {
            Object object3;
            Collection collection2 = linkedHashMultimap.get(object2);
            if (collection2.size() <= 1) continue;
            Cluster cluster = new Cluster();
            String string2 = String.valueOf(object2) + (string.length() > 0 ? "." + string : "");
            List<Cluster> list = this.createClusters(documentArray, collection2, stringArray, n + 1, string2);
            if (list.size() > 1) {
                cluster.addSubclusters(list);
            } else if (list.size() == 1) {
                object3 = list.get(0);
                string2 = (String)object3.getPhrases().get(0);
                cluster.addDocuments((Iterable)object3.getDocuments());
                cluster.addSubclusters((Iterable)object3.getSubclusters());
            } else {
                Iterator iterator = collection2.iterator();
                while (iterator.hasNext()) {
                    object3 = (Integer)iterator.next();
                    cluster.addDocuments(new Document[]{documentArray[(Integer)object3]});
                }
            }
            cluster.addPhrases(new String[]{string2});
            arrayList.add(cluster);
            linkedHashSet.addAll(collection2);
        }
        if (linkedHashSet.isEmpty()) {
            return Lists.newArrayList();
        }
        Collections.sort(arrayList, Cluster.BY_REVERSED_SIZE_AND_LABEL_COMPARATOR);
        object2 = Lists.newArrayListWithExpectedSize((int)collection.size());
        for (Integer n2 : collection) {
            ((ArrayList)object2).add(documentArray[n2]);
        }
        Cluster.appendOtherTopics((List)object2, (List)arrayList, (String)"Other Sites");
        return arrayList;
    }

    final String[][] buildUrlParts(Document[] documentArray) {
        String[][] stringArray = new String[documentArray.length][];
        int n = 0;
        while (n < documentArray.length) {
            block5: {
                int n2;
                String string;
                block7: {
                    block6: {
                        string = (String)documentArray[n].getField("url");
                        if (string == null) break block5;
                        n2 = string.indexOf("://");
                        if (n2 >= 0) break block6;
                        n2 = 0;
                        break block7;
                    }
                    if (n2 + 3 >= string.length()) break block5;
                    n2 += 3;
                }
                int n3 = string.indexOf(47, n2 + 3);
                if (n3 < 0) {
                    n3 = string.length();
                }
                String string2 = string.substring(n2, n3).toLowerCase();
                Object[] objectArray = string2.split("\\.");
                ArrayUtils.reverse((Object[])objectArray);
                stringArray[n] = objectArray;
            }
            ++n;
        }
        return stringArray;
    }
}

