package edu.mit.broad.genome.parsers;

import edu.mit.broad.genome.NotImplementedException;
import edu.mit.broad.genome.Version;
import edu.mit.broad.genome.objects.PersistentObject;
import edu.mit.broad.vdb.meg.Unigene;
import java.io.BufferedReader;
import java.io.File;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.StringTokenizer;
import org.apache.log4j.Priority;

/* compiled from: EIKM */
/* loaded from: input_file:edu/mit/broad/genome/parsers/UnigeneParser.class */
public class UnigeneParser extends AbstractParser {
    private static final String ID = "ID";
    private static final String SYMBOL = "GENE";
    private static final String TITLE = "TITLE";
    private static final String CYTOBAND = "CYTOBAND";
    private static final String LOCUSLINK = "LOCUSLINK";
    private static final String SEQUENCE = "SEQUENCE";
    private static final String SEQTYPEMRNA = "SEQTYPE=mRNA";

    public UnigeneParser() {
        super(Unigene.class);
    }

    @Override // edu.mit.broad.genome.parsers.Parser
    public final void export(PersistentObject persistentObject, File file) {
        throw new NotImplementedException();
    }

    @Override // edu.mit.broad.genome.parsers.Parser
    public final List parse(String str, InputStream inputStream) {
        return unmodlist(parseUnigene(new File(str), true));
    }

    public final Unigene parseUnigene(File file, boolean z) {
        BufferedReader buf = ParseUtils.buf(file);
        ArrayList arrayList = null;
        ArrayList arrayList2 = new ArrayList();
        int i = 0;
        boolean z2 = true;
        for (String nextLine = ParseUtils.nextLine(buf); nextLine != null; nextLine = ParseUtils.nextLine(buf)) {
            i++;
            if (z2 && !nextLine.startsWith(ID)) {
                throw new IllegalArgumentException("Bad format pn line, expected : ID but got >" + nextLine + "< > linenum: " + i);
            }
            if (nextLine.startsWith(ID)) {
                arrayList = new ArrayList();
                z2 = false;
            }
            if (nextLine.equals("//")) {
                arrayList2.add(parseOneRecord(arrayList, z));
                if (arrayList2.size() % Priority.DEBUG_INT == 0) {
                    System.out.println("Done unigene record #: " + arrayList2.size());
                }
                z2 = true;
            }
            if (arrayList != null) {
                arrayList.add(nextLine);
            }
        }
        this.log.info("Total # of unigene records: " + arrayList2.size());
        return new Unigene(file, new Version("Oct2005"), (Unigene.Record[]) arrayList2.toArray(new Unigene.Record[arrayList2.size()]));
    }

    private static Unigene.Record parseOneRecord(List list, boolean z) {
        if (list == null || list.isEmpty()) {
            throw new IllegalArgumentException("Parameter lines cannot be null nor empty");
        }
        String str = null;
        String str2 = null;
        String str3 = null;
        String str4 = null;
        String str5 = null;
        HashSet hashSet = new HashSet();
        for (int i = 0; i < list.size(); i++) {
            String obj = list.get(i).toString();
            if (obj.startsWith(ID)) {
                str = _parse(obj, ID, str);
            }
            if (obj.startsWith(SYMBOL)) {
                str2 = _parse(obj, SYMBOL, str2);
            }
            if (obj.startsWith(CYTOBAND)) {
                str3 = _parse(obj, CYTOBAND, str3);
            }
            if (obj.startsWith(TITLE)) {
                str4 = _parse(obj, TITLE, str4);
            }
            if (obj.startsWith(LOCUSLINK)) {
                str5 = _parse(obj, LOCUSLINK, str5);
            }
            if (obj.startsWith(SEQUENCE)) {
                _parseAccessions(obj, hashSet, z);
            }
        }
        return new Unigene.Record(str, str2, str4, str3, str5, removeAccessionExtensions(hashSet));
    }

    private static String _parse(String str, String str2, String str3) {
        if (!str.startsWith(str2)) {
            throw new IllegalArgumentException("Does not start with startsWith: " + str2);
        }
        if (str3 != null) {
            throw new IllegalArgumentException("Current value is not null for field " + str2);
        }
        StringTokenizer stringTokenizer = new StringTokenizer(str, " ");
        if (!stringTokenizer.nextToken().trim().equals(str2)) {
            throw new IllegalArgumentException("Does not start with startsWith: " + str2);
        }
        StringBuffer stringBuffer = new StringBuffer();
        while (stringTokenizer.hasMoreTokens()) {
            stringBuffer.append(stringTokenizer.nextToken());
            if (stringTokenizer.hasMoreTokens()) {
                stringBuffer.append(" ");
            }
        }
        return stringBuffer.toString().trim();
    }

    private static Set removeAccessionExtensions(Set set) {
        HashSet hashSet = new HashSet();
        Iterator it = set.iterator();
        while (it.hasNext()) {
            String obj = it.next().toString();
            StringTokenizer stringTokenizer = new StringTokenizer(obj, ".");
            if (stringTokenizer.countTokens() != 1 && stringTokenizer.countTokens() != 2) {
                throw new ParserException("Bad accession format: " + obj + " invalid num tokens: " + stringTokenizer.countTokens());
            }
            hashSet.add(stringTokenizer.nextToken().trim());
        }
        return hashSet;
    }

    private static void _parseAccessions(String str, Set set, boolean z) {
        if (!str.startsWith(SEQUENCE)) {
            throw new IllegalArgumentException("Does not start with startsWith: SEQUENCE");
        }
        if (z && str.indexOf(SEQTYPEMRNA) == -1) {
            return;
        }
        StringTokenizer stringTokenizer = new StringTokenizer(str, " ;");
        if (!stringTokenizer.nextToken().trim().equals(SEQUENCE)) {
            throw new IllegalArgumentException("Does not start with startsWith: SEQUENCE");
        }
        while (stringTokenizer.hasMoreTokens()) {
            String nextToken = stringTokenizer.nextToken();
            if (nextToken.startsWith("ACC")) {
                StringTokenizer stringTokenizer2 = new StringTokenizer(nextToken, "=");
                if (stringTokenizer2.countTokens() != 2) {
                    throw new ParserException("Bad format on acc line: " + nextToken);
                }
                stringTokenizer2.nextToken();
                set.add(stringTokenizer2.nextToken().trim());
            }
        }
    }
}
