package jml.topics;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.StringTokenizer;
import java.util.TreeMap;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import jml.utils.Utility;
import org.apache.commons.math.linear.OpenMapRealMatrix;
import org.apache.commons.math.linear.RealMatrix;

/* loaded from: input_file:jml/topics/Corpus.class */
public class Corpus {
    public static int IdxStart = 0;
    public ArrayList<TreeMap<Integer, Integer>> docTermCountArray = new ArrayList<>();
    private Vector<Vector<Integer>> corpus = new Vector<>();
    public int[][] documents = null;
    public int nTerm = 0;
    public int nDoc = 0;

    public void clearCorpus() {
        for (int i = 0; i < this.corpus.size(); i++) {
            this.corpus.get(i).clear();
        }
        this.corpus.clear();
        this.nTerm = 0;
        this.nDoc = 0;
    }

    public void clearDocTermCountArray() {
        if (this.docTermCountArray.size() == 0) {
            return;
        }
        Iterator<TreeMap<Integer, Integer>> it = this.docTermCountArray.iterator();
        while (it.hasNext()) {
            it.next().clear();
        }
        this.docTermCountArray.clear();
    }

    public int[][] getDocuments() {
        return this.documents;
    }

    public void readCorpusFromLDAInputFile(String str) {
        clearCorpus();
        BufferedReader bufferedReader = null;
        try {
            bufferedReader = new BufferedReader(new FileReader(str));
        } catch (FileNotFoundException e) {
            System.out.println("Cannot open file: " + str);
            e.printStackTrace();
        }
        int i = 0;
        while (true) {
            try {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                i++;
                this.nDoc++;
                Vector<Integer> vector = new Vector<>();
                StringTokenizer stringTokenizer = new StringTokenizer(readLine);
                System.out.println("DocID: " + i + ", nUniqueTerms: " + Integer.parseInt(stringTokenizer.nextToken(" :\t")));
                while (stringTokenizer.hasMoreTokens()) {
                    int parseInt = Integer.parseInt(stringTokenizer.nextToken(" :\t")) + (1 - IdxStart);
                    int parseInt2 = Integer.parseInt(stringTokenizer.nextToken(" :\t"));
                    for (int i2 = 0; i2 < parseInt2; i2++) {
                        vector.add(Integer.valueOf(parseInt));
                    }
                    if (parseInt > this.nTerm) {
                        this.nTerm = parseInt;
                    }
                }
                this.corpus.add(vector);
            } catch (IOException e2) {
                e2.printStackTrace();
            } catch (NumberFormatException e3) {
                e3.printStackTrace();
            }
        }
        bufferedReader.close();
        this.documents = corpus2Documents(this.corpus);
    }

    public void readCorpusFromDocTermCountFile(String str) {
        clearDocTermCountArray();
        clearCorpus();
        BufferedReader bufferedReader = null;
        TreeMap<Integer, Integer> treeMap = null;
        Vector<Integer> vector = null;
        Pattern compile = Pattern.compile("[(]([\\d]+), ([\\d]+)[)]: ([\\d]+)");
        try {
            bufferedReader = new BufferedReader(new FileReader(str));
        } catch (FileNotFoundException e) {
            System.out.println("Cannot open file: " + str);
            e.printStackTrace();
        }
        while (true) {
            try {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                Matcher matcher = compile.matcher(readLine);
                if (!matcher.find()) {
                    System.out.println("Data format for the docTermCountFile should be: (docID, termID): count");
                    System.exit(0);
                }
                int parseInt = Integer.parseInt(matcher.group(1));
                if (parseInt != this.nDoc) {
                    if (this.nDoc > 0) {
                        this.docTermCountArray.add(treeMap);
                        this.corpus.add(vector);
                        if (this.nTerm < treeMap.lastKey().intValue()) {
                            this.nTerm = treeMap.lastKey().intValue();
                        }
                        System.out.println("DocID: " + this.nDoc + ", nUniqueTerms: " + treeMap.size());
                    }
                    for (int i = this.nDoc + 1; i < parseInt; i++) {
                        this.docTermCountArray.add(new TreeMap<>(new Utility.keyAscendComparator()));
                        this.corpus.add(new Vector<>());
                        PrintStream printStream = System.out;
                        StringBuilder sb = new StringBuilder("DocID: ");
                        int i2 = this.nDoc + 1;
                        this.nDoc = i2;
                        printStream.println(sb.append(i2).append(", Empty").toString());
                    }
                    treeMap = new TreeMap<>(new Utility.keyAscendComparator());
                    vector = new Vector<>();
                    this.nDoc++;
                }
                int parseInt2 = Integer.parseInt(matcher.group(2));
                int parseInt3 = Integer.parseInt(matcher.group(3));
                treeMap.put(Integer.valueOf(parseInt2), Integer.valueOf(parseInt3));
                for (int i3 = 0; i3 < parseInt3; i3++) {
                    vector.add(Integer.valueOf(parseInt2));
                }
            } catch (IOException e2) {
                e2.printStackTrace();
            } catch (NumberFormatException e3) {
                e3.printStackTrace();
            }
        }
        if (treeMap != null) {
            this.docTermCountArray.add(treeMap);
            this.corpus.add(vector);
            if (this.nTerm < treeMap.lastKey().intValue()) {
                this.nTerm = treeMap.lastKey().intValue();
            }
            System.out.println("DocID: " + this.nDoc + ", nUniqueTerms: " + treeMap.size());
        }
        bufferedReader.close();
        this.documents = corpus2Documents(this.corpus);
    }

    public void readCorpusFromDocTermCountArray(ArrayList<TreeMap<Integer, Integer>> arrayList) {
        clearCorpus();
        Iterator<TreeMap<Integer, Integer>> it = arrayList.iterator();
        while (it.hasNext()) {
            TreeMap<Integer, Integer> next = it.next();
            Vector<Integer> vector = new Vector<>();
            this.nDoc++;
            Iterator<Integer> it2 = next.keySet().iterator();
            while (it2.hasNext()) {
                int intValue = it2.next().intValue();
                int intValue2 = next.get(Integer.valueOf(intValue)).intValue();
                for (int i = 0; i < intValue2; i++) {
                    vector.add(Integer.valueOf(intValue));
                }
            }
            if (this.nTerm < next.lastKey().intValue()) {
                this.nTerm = next.lastKey().intValue();
            }
            this.corpus.add(vector);
        }
        this.documents = corpus2Documents(this.corpus);
    }

    public void readCorpusFromMatrix(RealMatrix realMatrix) {
        clearCorpus();
        int columnDimension = realMatrix.getColumnDimension();
        this.nTerm = realMatrix.getRowDimension();
        for (int i = 0; i < columnDimension; i++) {
            Vector<Integer> vector = new Vector<>();
            for (int i2 = 0; i2 < this.nTerm; i2++) {
                int entry = (int) realMatrix.getEntry(i2, i);
                if (entry != 0) {
                    int i3 = i2 + 1;
                    for (int i4 = 0; i4 < entry; i4++) {
                        vector.add(Integer.valueOf(i3));
                    }
                }
            }
            this.corpus.add(vector);
        }
        this.documents = corpus2Documents(this.corpus);
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v2, types: [int[], int[][]] */
    public static int[][] corpus2Documents(Vector<Vector<Integer>> vector) {
        ?? r0 = new int[vector.size()];
        for (int i = 0; i < vector.size(); i++) {
            r0[i] = new int[vector.get(i).size()];
            for (int i2 = 0; i2 < vector.get(i).size(); i2++) {
                r0[i][i2] = vector.get(i).get(i2).intValue() - 1;
            }
        }
        return r0;
    }

    public static RealMatrix documents2Matrix(int[][] iArr) {
        if (iArr == null || iArr.length == 0) {
            System.err.println("Empty documents!");
            System.exit(1);
        }
        OpenMapRealMatrix openMapRealMatrix = new OpenMapRealMatrix(getVocabularySize(iArr), iArr.length);
        for (int i = 0; i < iArr.length; i++) {
            for (int i2 : iArr[i]) {
                openMapRealMatrix.setEntry(i2, i, openMapRealMatrix.getEntry(i2, i) + 1.0d);
            }
        }
        return openMapRealMatrix;
    }

    public static int getVocabularySize(int[][] iArr) {
        int i = 0;
        for (int i2 = 0; i2 < iArr.length; i2++) {
            for (int i3 = 0; i3 < iArr[i2].length; i3++) {
                if (i < iArr[i2][i3]) {
                    i = iArr[i2][i3];
                }
            }
        }
        return i + 1;
    }

    public static void setLDATermIndexStart(int i) {
        IdxStart = i;
    }
}
