package edu.mit.broad.genome.parsers;

import com.jidesoft.filter.Filter;
import edu.mit.broad.genome.Constants;
import edu.mit.broad.genome.Errors;
import edu.mit.broad.genome.NamingConventions;
import edu.mit.broad.genome.NotImplementedException;
import edu.mit.broad.genome.Version;
import edu.mit.broad.genome.objects.PersistentObject;
import edu.mit.broad.vdb.Organism;
import edu.mit.broad.vdb.meg.EntrezGene;
import edu.mit.broad.vdb.meg.EntrezGeneDb;
import edu.mit.broad.vdb.meg.EntrezGeneImpl;
import edu.mit.broad.vdb.meg.GeneDb;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

/* compiled from: EIKM */
/* loaded from: input_file:edu/mit/broad/genome/parsers/EntrezGeneDbParser.class */
public class EntrezGeneDbParser extends AbstractParser {
    private static Set kValid_gene_types;

    /* compiled from: EIKM */
    /* loaded from: input_file:edu/mit/broad/genome/parsers/EntrezGeneDbParser$EntrezFileSet.class */
    public class EntrezFileSet {
        private File fBaseDir;
        private File geneInfoFile;
        private File gene2accessionFile;
        private File gene2refseqFile;
        private File gene2unigeneFile;
        private Organism[] fOrgs;

        public EntrezFileSet(File file, Organism organism) {
            this(file, new Organism[]{organism});
        }

        public EntrezFileSet(File file, Organism[] organismArr) {
            if (file == null || !file.exists()) {
                throw new IllegalArgumentException("Does not exist or null: " + file);
            }
            this.fBaseDir = file;
            this.fOrgs = organismArr;
            Errors errors = new Errors();
            this.geneInfoFile = new File(getBaseDir(), "gene_info");
            this.gene2accessionFile = new File(getBaseDir(), "gene2accession");
            this.gene2refseqFile = new File(getBaseDir(), "gene2refseq");
            this.gene2unigeneFile = new File(getBaseDir(), "gene2unigene");
            if (!this.geneInfoFile.exists()) {
                errors.add(this.geneInfoFile.getPath());
            }
            if (!this.gene2accessionFile.exists()) {
                errors.add(this.gene2accessionFile.getPath());
            }
            if (!this.gene2refseqFile.exists()) {
                errors.add(this.gene2refseqFile.getPath());
            }
            if (!this.gene2unigeneFile.exists()) {
                errors.add(this.gene2unigeneFile.getPath());
            }
            errors.barfIfNotEmpty();
        }

        public final File getBaseDir() {
            return this.fBaseDir;
        }

        public final Organism[] getOrganisms() {
            return this.fOrgs;
        }
    }

    public EntrezGeneDbParser() {
        super(GeneDb.class);
    }

    @Override // edu.mit.broad.genome.parsers.Parser
    public final void export(PersistentObject persistentObject, File file) {
        throw new NotImplementedException();
    }

    @Override // edu.mit.broad.genome.parsers.Parser
    public final List parse(String str, InputStream inputStream) {
        throw new NotImplementedException();
    }

    public static final Set getValidGeneTypes() {
        if (kValid_gene_types == null) {
            kValid_gene_types = new HashSet();
            kValid_gene_types.add("protein-coding");
            kValid_gene_types.add("pseudo");
            kValid_gene_types.add("other");
            kValid_gene_types.add("miscRNA");
            kValid_gene_types.add("unknown");
            kValid_gene_types.add("snoRNA");
            kValid_gene_types.add("snRNA");
            kValid_gene_types = Collections.unmodifiableSet(kValid_gene_types);
        }
        return kValid_gene_types;
    }

    public final EntrezGeneDb parseEntrezDb(File file, boolean z) {
        return parseEntrezDb(new EntrezFileSet(file, new Organism[]{Organism.HUMAN}), true);
    }

    public final EntrezGeneDb parseEntrezDb(EntrezFileSet entrezFileSet, boolean z) {
        return parseEntrezDb(entrezFileSet, new Organism[]{Organism.HUMAN});
    }

    public final EntrezGeneDb parseEntrezDb(EntrezFileSet entrezFileSet, Organism[] organismArr) {
        EntrezGeneDb parseEntrezDb_only_gene_info = new EntrezGeneDbParser().parseEntrezDb_only_gene_info(entrezFileSet.geneInfoFile, entrezFileSet.getOrganisms());
        EntrezGene.GeneIdAccessionSet[] parseEntrezGeneAccessions = parseEntrezGeneAccessions(entrezFileSet.gene2accessionFile, entrezFileSet.getOrganisms());
        EntrezGene.GeneIdAccessionSet[] parseEntrezGeneRefSeq = parseEntrezGeneRefSeq(entrezFileSet.gene2refseqFile, entrezFileSet.getOrganisms());
        EntrezGene.GeneIdAccessionSet[] parseEntrezGeneUnigene = parseEntrezGeneUnigene(entrezFileSet.gene2unigeneFile, entrezFileSet.getOrganisms());
        ArrayList arrayList = new ArrayList();
        arrayList.addAll(Arrays.asList(parseEntrezGeneAccessions));
        arrayList.addAll(Arrays.asList(parseEntrezGeneRefSeq));
        arrayList.addAll(Arrays.asList(parseEntrezGeneUnigene));
        EntrezGene.GeneIdAccessionSet[] geneIdAccessionSetArr = (EntrezGene.GeneIdAccessionSet[]) arrayList.toArray(new EntrezGene.GeneIdAccessionSet[arrayList.size()]);
        this.log.info("sets_acc: " + parseEntrezGeneAccessions.length + " sets_refseq: " + parseEntrezGeneRefSeq.length + " sets_unigene: " + parseEntrezGeneUnigene.length + " all: " + geneIdAccessionSetArr.length);
        parseEntrezDb_only_gene_info.cloneShallow(EntrezGene.Helper.toMap(geneIdAccessionSetArr));
        return parseEntrezDb_only_gene_info;
    }

    public final EntrezGeneDb parseEntrezDb_only_gene_info(File file, boolean z) {
        return parseEntrezDb_only_gene_info(file, new Organism[]{Organism.HUMAN});
    }

    public final EntrezGeneDb parseEntrezDb_only_gene_info(File file, Organism[] organismArr) {
        ArrayList arrayList = new ArrayList();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
        nextLineTrimless(bufferedReader);
        String nextLineTrimless = nextLineTrimless(bufferedReader);
        Set taxIds = Organism.getTaxIds(organismArr);
        this.log.debug("Allowed tax ids: " + taxIds);
        Set validGeneTypes = getValidGeneTypes();
        int i = 0;
        while (nextLineTrimless != null) {
            String[] string2strings = ParseUtils.string2strings(nextLineTrimless, Filter.SEPARATOR, false);
            String str = string2strings[0];
            boolean z = false;
            String str2 = string2strings[9];
            if (taxIds.contains(str)) {
                z = true;
            }
            if (!validGeneTypes.contains(str2)) {
                z = false;
            }
            if (z) {
                String str3 = string2strings[1];
                String str4 = string2strings[2];
                String str5 = string2strings[8];
                String str6 = string2strings[7];
                String str7 = string2strings[4];
                String str8 = string2strings[12];
                Set string2stringsSet = ParseUtils.string2stringsSet(str7, "|", false);
                string2stringsSet.remove(Constants.HYPHEN);
                arrayList.add(new EntrezGeneImpl(Integer.parseInt(str3), Integer.parseInt(str), str4, str5, string2stringsSet, str6, null, str8));
            }
            nextLineTrimless = nextLineTrimless(bufferedReader);
            i++;
            if (i % 100000 == 0) {
                System.out.println("Done entrez line: " + i + " #=" + arrayList.size());
            }
        }
        bufferedReader.close();
        return new EntrezGeneDb(file, new Version(1), (EntrezGene[]) arrayList.toArray(new EntrezGene[arrayList.size()]));
    }

    public final EntrezGene.GeneIdAccessionSet[] parseEntrezGeneAccessions(File file, Organism[] organismArr) {
        HashMap hashMap = new HashMap();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
        String nextLineTrimless = nextLineTrimless(bufferedReader);
        Set taxIds = Organism.getTaxIds(organismArr);
        this.log.debug("Allowed tax ids: " + taxIds);
        int i = 0;
        while (nextLineTrimless != null) {
            String[] string2strings = ParseUtils.string2strings(nextLineTrimless, Filter.SEPARATOR, false);
            long parseLong = Long.parseLong(string2strings[0]);
            String str = Long.parseLong(string2strings[1]) + "";
            String str2 = string2strings[3];
            boolean z = false;
            if (taxIds.contains("" + parseLong)) {
                z = true;
            }
            if (str2 == null || str2.length() == 0 || str2.equals(Constants.HYPHEN)) {
                z = false;
            }
            if (z) {
                String removeExtension = NamingConventions.removeExtension(str2);
                Object obj = hashMap.get(str);
                if (obj == null) {
                    obj = new HashSet();
                }
                ((Set) obj).add(removeExtension);
                hashMap.put(str, obj);
            }
            nextLineTrimless = nextLineTrimless(bufferedReader);
            i++;
            if (i % 100000 == 0) {
                System.out.println("Done entrez gene accessions line: " + i + " #=" + hashMap.size());
            }
        }
        bufferedReader.close();
        EntrezGene.GeneIdAccessionSet[] geneIdAccessionSetArr = new EntrezGene.GeneIdAccessionSet[hashMap.size()];
        int i2 = 0;
        Iterator it = hashMap.keySet().iterator();
        while (it.hasNext()) {
            String obj2 = it.next().toString();
            int i3 = i2;
            i2++;
            geneIdAccessionSetArr[i3] = new EntrezGene.GeneIdAccessionSet(Long.parseLong(obj2), (Set) hashMap.get(obj2));
        }
        this.log.info("Made SymbolAccessions: " + geneIdAccessionSetArr.length);
        return geneIdAccessionSetArr;
    }

    public final EntrezGene.GeneIdAccessionSet[] parseEntrezGeneRefSeq(File file, Organism[] organismArr) {
        HashMap hashMap = new HashMap();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
        String nextLineTrimless = nextLineTrimless(bufferedReader);
        Set taxIds = Organism.getTaxIds(organismArr);
        this.log.debug("Allowed tax ids: " + taxIds);
        int i = 0;
        while (nextLineTrimless != null) {
            String[] string2strings = ParseUtils.string2strings(nextLineTrimless, Filter.SEPARATOR, false);
            long parseLong = Long.parseLong(string2strings[0]);
            String str = Long.parseLong(string2strings[1]) + "";
            String str2 = string2strings[3];
            boolean z = false;
            if (taxIds.contains(parseLong + "")) {
                z = true;
            }
            if (str2 == null || str2.length() == 0 || str2.equals(Constants.HYPHEN)) {
                z = false;
            }
            if (z) {
                String removeExtension = NamingConventions.removeExtension(str2);
                Object obj = hashMap.get(str);
                if (obj == null) {
                    obj = new HashSet();
                }
                ((Set) obj).add(removeExtension);
                hashMap.put(str, obj);
            }
            nextLineTrimless = nextLineTrimless(bufferedReader);
            i++;
            if (i % 100000 == 0) {
                System.out.println("Done entrez gene refseqs line: " + i + " #=" + hashMap.size());
            }
        }
        bufferedReader.close();
        EntrezGene.GeneIdAccessionSet[] geneIdAccessionSetArr = new EntrezGene.GeneIdAccessionSet[hashMap.size()];
        int i2 = 0;
        Iterator it = hashMap.keySet().iterator();
        while (it.hasNext()) {
            String obj2 = it.next().toString();
            int i3 = i2;
            i2++;
            geneIdAccessionSetArr[i3] = new EntrezGene.GeneIdAccessionSet(Long.parseLong(obj2), (Set) hashMap.get(obj2));
        }
        this.log.info("Made GeneRefSeq: " + geneIdAccessionSetArr.length);
        return geneIdAccessionSetArr;
    }

    public final EntrezGene.GeneIdAccessionSet[] parseEntrezGeneUnigene(File file, Organism[] organismArr) {
        HashMap hashMap = new HashMap();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
        String nextLineTrimless = nextLineTrimless(bufferedReader);
        Set unigeneCodes = Organism.getUnigeneCodes(organismArr);
        this.log.debug("Allowed tax ids: " + unigeneCodes);
        int i = 0;
        while (nextLineTrimless != null) {
            String[] string2strings = ParseUtils.string2strings(nextLineTrimless, Filter.SEPARATOR, false);
            if (string2strings.length != 2) {
                throw new ParserException("Expected 2 fields got: " + string2strings.length + " line >" + nextLineTrimless + "<");
            }
            long parseLong = Long.parseLong(string2strings[0]);
            String str = string2strings[1];
            if (str != null && str.length() != 0 && !str.equals(Constants.HYPHEN)) {
                boolean z = false;
                if (unigeneCodes.contains(NamingConventions.removeExtension(str))) {
                    z = true;
                }
                if (z) {
                    Object obj = hashMap.get("" + parseLong);
                    if (obj == null) {
                        obj = new HashSet();
                    }
                    ((Set) obj).add(str);
                    hashMap.put("" + parseLong, obj);
                }
                nextLineTrimless = nextLineTrimless(bufferedReader);
                i++;
                if (i % 25000 == 0) {
                    System.out.println("Done entrez gene accessions line: " + i + " #=" + hashMap.size());
                }
            }
        }
        bufferedReader.close();
        EntrezGene.GeneIdAccessionSet[] geneIdAccessionSetArr = new EntrezGene.GeneIdAccessionSet[hashMap.size()];
        int i2 = 0;
        Iterator it = hashMap.keySet().iterator();
        while (it.hasNext()) {
            String obj2 = it.next().toString();
            int i3 = i2;
            i2++;
            geneIdAccessionSetArr[i3] = new EntrezGene.GeneIdAccessionSet(Long.parseLong(obj2), (Set) hashMap.get(obj2));
        }
        this.log.info("Made GeneRefSeq: " + geneIdAccessionSetArr.length);
        return geneIdAccessionSetArr;
    }
}
