/*
 * Decompiled with CFR 0.152.
 */
package org.carrot2.text.preprocessing;

import com.carrotsearch.hppc.ByteArrayList;
import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.ShortArrayList;
import com.google.common.collect.Lists;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.carrot2.core.Document;
import org.carrot2.core.ProcessingException;
import org.carrot2.core.attribute.Init;
import org.carrot2.text.analysis.ITokenizer;
import org.carrot2.text.preprocessing.PreprocessingContext;
import org.carrot2.text.util.MutableCharArray;
import org.carrot2.util.CharArrayUtils;
import org.carrot2.util.ExceptionUtils;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.AttributeLevel;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Group;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Label;
import org.carrot2.util.attribute.Level;

@Bindable(prefix="Tokenizer")
public final class Tokenizer {
    @Init
    @Input
    @Attribute
    @Label(value="Document fields")
    @Level(value=AttributeLevel.ADVANCED)
    @Group(value="Preprocessing")
    public Collection<String> documentFields = Arrays.asList("title", "snippet");
    private ArrayList<char[]> images;
    private ShortArrayList tokenTypes;
    private IntArrayList documentIndices;
    private ByteArrayList fieldIndices;

    public void tokenize(PreprocessingContext preprocessingContext) {
        List<Document> list = preprocessingContext.documents;
        String[] stringArray = this.documentFields.toArray(new String[this.documentFields.size()]);
        if (stringArray.length > 8) {
            throw new ProcessingException("Maximum number of tokenized fields is 8.");
        }
        this.images = Lists.newArrayList();
        this.tokenTypes = new ShortArrayList();
        this.documentIndices = new IntArrayList();
        this.fieldIndices = new ByteArrayList();
        Iterator<Document> iterator = list.iterator();
        int n = 0;
        ITokenizer iTokenizer = preprocessingContext.language.getTokenizer();
        MutableCharArray mutableCharArray = new MutableCharArray(CharArrayUtils.EMPTY_ARRAY);
        while (iterator.hasNext()) {
            Document document = iterator.next();
            boolean bl = false;
            int n2 = 0;
            while (n2 < stringArray.length) {
                byte by = (byte)n2;
                String string = stringArray[n2];
                String string2 = (String)document.getField(string);
                if (!StringUtils.isEmpty((String)string2)) {
                    try {
                        iTokenizer.reset(new StringReader(string2));
                        short s = iTokenizer.nextToken();
                        if (s != -1) {
                            if (bl) {
                                this.addFieldSeparator(n);
                            }
                            do {
                                iTokenizer.setTermBuffer(mutableCharArray);
                                this.add(n, by, preprocessingContext.intern(mutableCharArray), s);
                            } while ((s = iTokenizer.nextToken()) != -1);
                            bl = true;
                        }
                    }
                    catch (IOException iOException) {
                        throw ExceptionUtils.wrapAsRuntimeException((Throwable)iOException);
                    }
                }
                ++n2;
            }
            if (iterator.hasNext()) {
                this.addDocumentSeparator();
            }
            ++n;
        }
        this.addTerminator();
        preprocessingContext.allTokens.documentIndex = this.documentIndices.toArray();
        preprocessingContext.allTokens.fieldIndex = this.fieldIndices.toArray();
        preprocessingContext.allTokens.image = (char[][])this.images.toArray((T[])new char[this.images.size()][]);
        preprocessingContext.allTokens.type = this.tokenTypes.toArray();
        preprocessingContext.allFields.name = stringArray;
        this.images = null;
        this.fieldIndices = null;
        this.tokenTypes = null;
        this.documentIndices = null;
    }

    void addTerminator() {
        this.add(-1, (byte)-1, null, (short)2048);
    }

    void addDocumentSeparator() {
        this.add(-1, (byte)-1, null, (short)512);
    }

    void addFieldSeparator(int n) {
        this.add(n, (byte)-1, null, (short)1024);
    }

    void addSentenceSeparator(int n, byte by) {
        this.add(n, by, null, (short)1024);
    }

    void add(int n, byte by, char[] cArray, short s) {
        this.documentIndices.add(n);
        this.fieldIndices.add(by);
        this.images.add(cArray);
        this.tokenTypes.add(s);
    }
}

