package joelib2.algo.datamining.weka;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.List;
import java.util.Vector;
import joelib2.feature.NativeValue;
import joelib2.feature.data.MoleculeCache;
import joelib2.io.IOType;
import joelib2.molecule.BasicMoleculeVector;
import joelib2.molecule.Molecule;
import joelib2.molecule.MoleculeVector;
import joelib2.molecule.types.PairData;
import joelib2.process.types.DescriptorBinning;
import joelib2.process.types.DescriptorStatistic;
import joelib2.util.BasicMoleculeCacheHolder;
import org.apache.log4j.Category;
import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instances;
import wsi.ra.tool.ArrayBinning;
import wsi.ra.tool.ArrayStatistic;

/* loaded from: input_file:lib/joelib2.jar:joelib2/algo/datamining/weka/MolInstancesCache.class */
public class MolInstancesCache extends Instances implements MoleculeCache {
    private static final long serialVersionUID = 1;
    private static Category logger = Category.getInstance(MolInstancesCache.class.getName());
    private static final String FILE_EXT = ".molcache";
    private static final String DEFAULT_CLASS_ATTRIBUTE = "CLASS_ATTRIBUTE";
    private Hashtable binning;
    private String classAttributeName;
    private List desc2ignore;
    private String IdentifierValue;
    private String moleculeIdentifier;
    private MoleculeVector molecules;
    private Hashtable molIDsIndex;
    private Hashtable molNamesIndex;
    private DescriptorStatistic statistic;

    public MolInstancesCache() {
        super("Cache", new FastVector(), 0);
        this.molecules = new BasicMoleculeVector();
        this.statistic = new DescriptorStatistic();
        this.molIDsIndex = new Hashtable();
        this.molNamesIndex = new Hashtable();
    }

    public MolInstancesCache(IOType iOType, String str) {
        this(iOType, str, true);
    }

    public MolInstancesCache(IOType iOType, String str, boolean z) {
        this(iOType, str, true, DEFAULT_CLASS_ATTRIBUTE);
    }

    public MolInstancesCache(IOType iOType, String str, boolean z, String str2) {
        this();
        try {
            if (!loadMatrix(iOType, str, z, str2)) {
                logger.error("Error while loading Matrix from File " + str);
            }
        } catch (Exception e) {
            logger.error("Error while loading Matrix from File " + str);
        }
    }

    private MolInstancesCache(MolInstancesCache molInstancesCache) {
        super(molInstancesCache);
        this.molecules = new BasicMoleculeVector();
        this.statistic = new DescriptorStatistic();
    }

    private MolInstancesCache(MoleculeVector moleculeVector, Instances instances) {
        super(instances);
        this.molecules = moleculeVector;
        this.statistic = DescriptorStatistic.getDescStatistic(moleculeVector);
        this.molNamesIndex = new Hashtable();
        this.molIDsIndex = new Hashtable();
        buildIDs();
        for (int i = 0; i < moleculeVector.getSize(); i++) {
            this.molNamesIndex.put(moleculeVector.getMol(i).getTitle(), new Integer(i));
        }
    }

    @Override // joelib2.feature.data.MoleculeCache
    public boolean calcVarianceNorm(DescriptorStatistic descriptorStatistic) {
        String[] names = getNames();
        int length = names.length;
        for (int i = 0; i < length; i++) {
            ArrayStatistic descriptorStatistic2 = descriptorStatistic.getDescriptorStatistic(names[i]);
            int numAttributes = instance(i).numAttributes();
            if (descriptorStatistic2 != null) {
                for (int i2 = 0; i2 < numAttributes; i2++) {
                    instance(i).setClassValue(descriptorStatistic2.varianceNormalization(instance(i).value(i2)));
                }
            } else {
                ArrayStatistic descriptorStatistic3 = this.statistic.getDescriptorStatistic(names[i]);
                if (descriptorStatistic3 != null) {
                    logger.warn("Using internal data set statistic for variance normalization for '" + names[i] + "'.");
                    for (int i3 = 0; i3 < numAttributes; i3++) {
                        instance(i).setClassValue(descriptorStatistic3.varianceNormalization(instance(i).value(i3)));
                    }
                } else {
                    logger.warn(" Skipping variance normalization for '" + names[i] + "'.");
                }
            }
        }
        return true;
    }

    @Override // joelib2.feature.data.MoleculeCache
    public MoleculeCache clone(MoleculeCache moleculeCache) {
        if (!(moleculeCache instanceof MolInstancesCache)) {
            logger.error("target must be of type MolInstancesCache");
            return null;
        }
        this.binning = ((MolInstancesCache) moleculeCache).binning;
        for (int i = 0; i < numAttributes(); i++) {
            deleteAttributeAt(i);
        }
        for (int i2 = 0; i2 < ((MolInstancesCache) moleculeCache).numAttributes(); i2++) {
            insertAttributeAt(((MolInstancesCache) moleculeCache).attribute(i2), i2);
        }
        for (int i3 = 0; i3 < ((MolInstancesCache) moleculeCache).molecules.getSize(); i3++) {
            setMoleculeDescriptors(((MolInstancesCache) moleculeCache).molecules.getMol(i3), i3);
        }
        this.statistic = DescriptorStatistic.getDescStatistic(this.molecules);
        this.classAttributeName = ((MolInstancesCache) moleculeCache).classAttribute().name();
        setClass(((MolInstancesCache) moleculeCache).classAttribute());
        if (((MolInstancesCache) moleculeCache).desc2ignore != null) {
            this.desc2ignore = new Vector();
            for (int i4 = 0; i4 < ((MolInstancesCache) moleculeCache).desc2ignore.size(); i4++) {
                this.desc2ignore.add(((MolInstancesCache) moleculeCache).desc2ignore.get(i4));
            }
        }
        return moleculeCache;
    }

    @Override // joelib2.feature.data.MoleculeCache
    public boolean existsMatrixFileFor(String str) {
        try {
            new FileInputStream(str);
            return true;
        } catch (Exception e) {
            return false;
        }
    }

    @Override // joelib2.feature.data.MoleculeCache
    public boolean fromFileFor(String str) {
        logger.info("Load descriptor matrix from " + str);
        return fromFile(str);
    }

    @Override // joelib2.feature.data.MoleculeCache
    public Hashtable getBinning(int i) {
        return getBinning(i, false);
    }

    @Override // joelib2.feature.data.MoleculeCache
    public Hashtable getBinning(int i, boolean z) {
        if (this.binning == null || z) {
            this.binning = new Hashtable(instance(0).numAttributes());
        }
        String[] names = getNames();
        int length = names.length;
        for (int i2 = 0; i2 < length; i2++) {
            ArrayStatistic descriptorStatistic = this.statistic.getDescriptorStatistic(names[i2]);
            if (descriptorStatistic == null) {
                logger.error("No statistic available for '" + names[i2] + "'.");
                return null;
            }
            ArrayBinning arrayBinning = new ArrayBinning(i, descriptorStatistic);
            int numAttributes = instance(0).numAttributes();
            for (int i3 = 0; i3 < numAttributes; i3++) {
                arrayBinning.add(instance(i3).value(i3));
            }
            this.binning.put(names[i2], arrayBinning);
        }
        return this.binning;
    }

    @Override // joelib2.feature.data.MoleculeCache
    public String[] getDescContainsNaN() {
        int numAttributes = instance(0).numAttributes();
        String[] names = getNames();
        Hashtable hashtable = new Hashtable(20);
        for (int i = 0; i < names.length; i++) {
            for (int i2 = 0; i2 < numAttributes; i2++) {
                if (Double.isNaN(instance(i2).value(i))) {
                    hashtable.put(names[i], "");
                }
                if (hashtable.containsKey(names[i])) {
                    break;
                }
            }
        }
        String[] strArr = new String[hashtable.size()];
        int i3 = 0;
        Enumeration keys = hashtable.keys();
        while (keys.hasMoreElements()) {
            int i4 = i3;
            i3++;
            strArr[i4] = (String) keys.nextElement();
        }
        return strArr;
    }

    @Override // joelib2.feature.data.MoleculeCache
    public double[] getDescFromMolByIdentifier(String str) {
        Integer num = (Integer) this.molIDsIndex.get(str);
        if (num != null) {
            return ((MolInstance) super.instance(num.intValue())).m_AttValues;
        }
        logger.error("Molecule identifier '" + str + "' not found in descriptor matrix.");
        return null;
    }

    @Override // joelib2.feature.data.MoleculeCache
    public double[] getDescFromMolByIndex(int i) {
        return ((MolInstance) super.instance(i)).m_AttValues;
    }

    @Override // joelib2.feature.data.MoleculeCache
    public double[] getDescFromMolByName(String str) {
        return ((MolInstance) super.instance(((Integer) this.molNamesIndex.get(str)).intValue())).m_AttValues;
    }

    @Override // joelib2.feature.data.MoleculeCache
    public String[] getDescNames() {
        FastVector attributes = super.getAttributes();
        String[] strArr = new String[attributes.size()];
        for (int i = 0; i < attributes.size(); i++) {
            strArr[i] = ((Attribute) attributes.elementAt(i)).name();
        }
        return strArr;
    }

    @Override // joelib2.feature.data.MoleculeCache
    public double[] getDescValues(String str) {
        double[] dArr = new double[super.numInstances()];
        Attribute attribute = super.attribute(str);
        Enumeration enumerateInstances = super.enumerateInstances();
        int i = 0;
        while (enumerateInstances.hasMoreElements()) {
            dArr[i] = ((MolInstance) enumerateInstances.nextElement()).value(attribute);
            i++;
        }
        return dArr;
    }

    /* JADX WARN: Type inference failed for: r0v2, types: [double[], double[][]] */
    @Override // joelib2.feature.data.MoleculeCache
    public double[][] getDescValues(String[] strArr) {
        ?? r0 = new double[strArr.length];
        for (int i = 0; i < r0.length; i++) {
            r0[i] = getDescValues(strArr[i]);
        }
        return r0;
    }

    @Override // joelib2.feature.data.MoleculeCache
    public double[][] getDescValues(String[] strArr, int[] iArr, int[] iArr2) {
        if (iArr == null) {
            iArr = new int[0];
        }
        if (iArr2 == null) {
            iArr2 = new int[0];
        }
        Arrays.sort(iArr);
        Arrays.sort(iArr2);
        double[][] dArr = new double[strArr.length][iArr.length];
        for (int i = 0; i < dArr.length; i++) {
            double[] attributeToDoubleArray = super.attributeToDoubleArray(super.attribute(strArr[i]).index());
            for (int i2 = 0; i2 < dArr[i].length; i2++) {
                dArr[i][i2] = attributeToDoubleArray[iArr[i2]];
            }
        }
        return dArr;
    }

    @Override // joelib2.feature.data.MoleculeCache
    public double[][] getMatrix() {
        double[][] dArr = new double[super.numInstances()][super.numAttributes()];
        String[] strArr = new String[super.numAttributes()];
        for (int i = 0; i < strArr.length; i++) {
            strArr[i] = super.attribute(i).name();
        }
        double[][] descValues = getDescValues(strArr);
        for (int i2 = 0; i2 < descValues.length; i2++) {
            for (int i3 = 0; i3 < descValues[i2].length; i3++) {
                dArr[i3][i2] = descValues[i2][i3];
            }
        }
        return dArr;
    }

    @Override // joelib2.feature.data.MoleculeCache
    public String[] getMolNames() {
        String[] strArr = new String[super.numInstances()];
        for (int i = 0; i < strArr.length; i++) {
            strArr[i] = ((MolInstance) super.instance(i)).getMolecule().getTitle();
        }
        return strArr;
    }

    @Override // joelib2.feature.data.MoleculeCache
    public DescriptorStatistic getStatistic() {
        return this.statistic == null ? DescriptorStatistic.getDescStatistic(this.molecules) : this.statistic;
    }

    @Override // joelib2.feature.data.MoleculeCache
    public boolean loadMatrix(IOType iOType, String str) throws Exception {
        return loadMatrix(iOType, str, true);
    }

    @Override // joelib2.feature.data.MoleculeCache
    public boolean loadMatrix(IOType iOType, String str, boolean z) throws Exception {
        return loadMatrix(iOType, str, z, DEFAULT_CLASS_ATTRIBUTE);
    }

    public boolean loadMatrix(IOType iOType, String str, boolean z, String str2) throws Exception {
        logger.info("Loading Matrix");
        if (z && BasicMoleculeCacheHolder.instance().contains(str)) {
            logger.info("Get " + str + " from Cache");
            BasicMoleculeCacheHolder.instance().get(str).clone(this);
            return true;
        }
        if (existsMatrixFileFor(str)) {
            fromFileFor(str);
            if (z) {
                BasicMoleculeCacheHolder.instance().put(str, this);
            }
        }
        try {
            this.classAttributeName = str2;
            BasicMoleculeVector basicMoleculeVector = new BasicMoleculeVector(new FileInputStream(str));
            DescriptorBinning descBinning = DescriptorBinning.getDescBinning(basicMoleculeVector);
            Enumeration descriptors = descBinning.getDescriptors();
            String[] strArr = new String[descBinning.numberOfDescriptors()];
            int[] iArr = new int[descBinning.numberOfDescriptors()];
            int i = 0;
            setClassIndex(-1);
            while (descriptors.hasMoreElements()) {
                strArr[i] = (String) descriptors.nextElement();
                if (this.classAttributeName == null || !strArr[i].equalsIgnoreCase(this.classAttributeName)) {
                    iArr[i] = 0;
                } else {
                    iArr[i] = 1;
                }
                i++;
            }
            buildInstances(basicMoleculeVector, strArr, iArr);
            if (!z) {
                return true;
            }
            BasicMoleculeCacheHolder.instance().put(str, this);
            return true;
        } catch (Exception e) {
            logger.error(e.getMessage());
            e.printStackTrace();
            return false;
        }
    }

    @Override // joelib2.feature.data.MoleculeCache
    public int numberOfDescriptors() {
        return super.numAttributes();
    }

    @Override // joelib2.feature.data.MoleculeCache
    public int numberOfMolecules() {
        return super.numInstances();
    }

    @Override // joelib2.feature.data.MoleculeCache
    public boolean setMoleculeDescriptors(Molecule molecule, int i) {
        MolInstance instanceFor = getInstanceFor(molecule);
        if (!super.checkInstance(instanceFor)) {
            logger.error("Instance generated for " + molecule.getTitle() + " is not compatible with Instances!");
            return false;
        }
        int numInstances = super.numInstances();
        this.molIDsIndex.put(this.IdentifierValue, new Integer(numInstances));
        this.molNamesIndex.put(molecule.getTitle(), new Integer(numInstances));
        super.add(instanceFor);
        this.molecules.addMol(molecule);
        if (numInstances + 1 == this.molecules.getSize()) {
            return true;
        }
        System.out.println(numInstances + "\t" + this.molecules.getSize());
        return true;
    }

    @Override // joelib2.feature.data.MoleculeCache
    public void setMolIdentifier(String str) {
        this.moleculeIdentifier = str;
    }

    @Override // joelib2.feature.data.MoleculeCache
    public void writeMatrixFileFor(String str) {
        String str2 = str + ".arff";
        try {
            FileWriter fileWriter = new FileWriter(str + "_mols.sdf");
            for (int i = 0; i < this.molecules.getSize(); i++) {
                fileWriter.write(this.molecules.getMol(i).toString());
            }
            fileWriter.close();
        } catch (Exception e) {
            logger.error("Exception while writing molecules!");
            e.printStackTrace();
        }
        try {
            FileWriter fileWriter2 = new FileWriter(str2);
            fileWriter2.write(toString());
            fileWriter2.close();
        } catch (Exception e2) {
            logger.error("Exception while writing arff file!");
            e2.printStackTrace();
        }
    }

    private void buildIDs() {
        String str = "";
        for (int i = 0; i < super.numInstances(); i++) {
            Molecule molecule = ((MolInstance) super.instance(i)).getMolecule();
            for (int i2 = 0; i2 < super.numAttributes(); i2++) {
                PairData data = molecule.getData(super.attribute(i2).name(), true);
                if (data.getKey().equals(this.moleculeIdentifier)) {
                    str = (String) data.getKeyValue();
                }
                this.IdentifierValue = str;
                this.molIDsIndex.put(str, new Integer(i2));
            }
        }
    }

    private void buildInstances(MoleculeVector moleculeVector, String[] strArr, int[] iArr) {
        DescriptorBinning descBinning = DescriptorBinning.getDescBinning(moleculeVector);
        FastVector fastVector = new FastVector(descBinning.numberOfDescriptors());
        Enumeration descriptors = descBinning.getDescriptors();
        for (int i = 0; i < strArr.length; i++) {
            if (iArr[i] == 0) {
                fastVector.addElement(new Attribute((String) descriptors.nextElement(), fastVector.size()));
            } else if (iArr[i] == 1) {
                Hashtable hashtable = new Hashtable();
                for (int i2 = 0; i2 < moleculeVector.getSize(); i2++) {
                    PairData data = moleculeVector.getMol(i2).getData(strArr[i], false);
                    if (data != null) {
                        if (data.getKeyValue() instanceof String) {
                            hashtable.put(data.getKeyValue(), "");
                        } else {
                            hashtable.put(data.toString(), "");
                        }
                    }
                }
                FastVector fastVector2 = new FastVector(hashtable.size());
                Enumeration keys = hashtable.keys();
                while (keys.hasMoreElements()) {
                    fastVector2.addElement((String) keys.nextElement());
                }
                fastVector.addElement(new Attribute(strArr[i], fastVector2, fastVector.size()));
            }
        }
        update(fastVector);
        for (int i3 = 0; i3 < moleculeVector.getSize(); i3++) {
            setMoleculeDescriptors(moleculeVector.getMol(i3), 0);
        }
    }

    private boolean fromFile(String str) {
        String str2 = str + "_mols.sdf";
        String str3 = str + ".arff";
        try {
            Instances instances = new Instances(new BufferedReader(new InputStreamReader(new FileInputStream(str3))));
            try {
                BasicMoleculeVector basicMoleculeVector = new BasicMoleculeVector(new FileInputStream(str2));
                Instances instances2 = new Instances(instances);
                instances2.delete();
                delete();
                for (int i = 0; i < numAttributes(); i++) {
                    deleteAttributeAt(i);
                }
                for (int i2 = 0; i2 < instances.numAttributes(); i2++) {
                    insertAttributeAt(instances.attribute(i2), i2);
                    if (instances.attribute(i2).isNominal()) {
                        setClass(instances.attribute(i2));
                    }
                }
                for (int i3 = 0; i3 < instances.numInstances(); i3++) {
                    instances2.add(new MolInstance(basicMoleculeVector.getMol(i3), instances.instance(i3).weight(), instances.instance(i3).m_AttValues));
                    setMoleculeDescriptors(basicMoleculeVector.getMol(i3), i3);
                }
                MolInstancesCache molInstancesCache = new MolInstancesCache(basicMoleculeVector, instances2);
                logger.info("" + molInstancesCache.numAttributes() + "\t" + molInstancesCache.numberOfDescriptors() + "\t" + molInstancesCache.numInstances() + "\t" + molInstancesCache.numberOfMolecules());
                logger.info("" + numAttributes() + "\t" + numberOfDescriptors() + "\t" + numInstances() + "\t" + numberOfMolecules());
                this.statistic = DescriptorStatistic.getDescStatistic(basicMoleculeVector);
                this.binning = molInstancesCache.binning;
                this.classAttributeName = classAttribute().name();
                logger.info("" + numAttributes() + "\t" + numberOfDescriptors() + "\t" + numInstances() + "\t" + numberOfMolecules());
                return true;
            } catch (Exception e) {
                logger.error("Unable to read file: " + str2);
                return false;
            }
        } catch (Exception e2) {
            logger.error("Unable to read file: " + str3);
            return false;
        }
    }

    private MolInstance getInstanceFor(Molecule molecule) {
        double[] dArr = new double[super.numAttributes()];
        String str = "";
        for (int i = 0; i < dArr.length; i++) {
            Attribute attribute = super.attribute(i);
            PairData data = molecule.getData(attribute.name(), true);
            if (data.getKey().equals(this.moleculeIdentifier)) {
                str = (String) data.getKeyValue();
            }
            this.IdentifierValue = str;
            if (data == null) {
                dArr[attribute.index()] = MolInstance.missingValue();
            } else if (attribute.isNominal()) {
                if (data.toString().trim().indexOf("\n") != -1) {
                    logger.error("Descriptor " + attribute.name() + " contains multiple lines and is not a valid nominal value.");
                } else {
                    dArr[attribute.index()] = attribute.indexOfValue(data.toString());
                    if (dArr[attribute.index()] == -1.0d) {
                        logger.error("Invalid nominal value");
                        return null;
                    }
                }
            } else if (data instanceof NativeValue) {
                double doubleNV = ((NativeValue) data).getDoubleNV();
                if (Double.isNaN(doubleNV)) {
                    dArr[attribute.index()] = MolInstance.missingValue();
                } else {
                    dArr[attribute.index()] = doubleNV;
                }
            } else {
                logger.error("Descriptor " + attribute.name() + " is not a native value.");
            }
            attribute.index();
        }
        return new MolInstance(molecule, 1.0d, dArr);
    }

    private String[] getNames() {
        FastVector attributes = getAttributes();
        String[] strArr = new String[attributes.size()];
        for (int i = 0; i < attributes.size(); i++) {
            strArr[i] = ((Attribute) attributes.elementAt(i)).name();
        }
        return strArr;
    }

    private void update(FastVector fastVector) {
        if (super.numAttributes() != 0) {
            logger.error("Instances not empty");
        }
        for (int i = 0; i < fastVector.size(); i++) {
            super.insertAttributeAt((Attribute) fastVector.elementAt(i), i);
        }
        super.setClass(super.attribute(this.classAttributeName));
        super.setClassIndex(super.attribute(this.classAttributeName).index());
    }
}
