package org.fhcrc.cpl.viewer.amt.commandline;

import java.io.File;
import java.util.Set;
import org.apache.log4j.Logger;
import org.fhcrc.cpl.toolbox.ApplicationContext;
import org.fhcrc.cpl.toolbox.commandline.CommandLineModule;
import org.fhcrc.cpl.toolbox.commandline.CommandLineModuleExecutionException;
import org.fhcrc.cpl.toolbox.commandline.arguments.ArgumentValidationException;
import org.fhcrc.cpl.toolbox.commandline.arguments.BooleanArgumentDefinition;
import org.fhcrc.cpl.toolbox.commandline.arguments.CommandLineArgumentDefinition;
import org.fhcrc.cpl.toolbox.commandline.arguments.DecimalArgumentDefinition;
import org.fhcrc.cpl.toolbox.commandline.arguments.EnumeratedValuesArgumentDefinition;
import org.fhcrc.cpl.toolbox.commandline.arguments.FileToReadArgumentDefinition;
import org.fhcrc.cpl.toolbox.commandline.arguments.FileToWriteArgumentDefinition;
import org.fhcrc.cpl.toolbox.commandline.arguments.IntegerArgumentDefinition;
import org.fhcrc.cpl.toolbox.commandline.arguments.StringArgumentDefinition;
import org.fhcrc.cpl.toolbox.proteomics.ProteinUtilities;
import org.fhcrc.cpl.toolbox.proteomics.commandline.arguments.FeatureFileArgumentDefinition;
import org.fhcrc.cpl.toolbox.proteomics.feature.FeatureSet;
import org.fhcrc.cpl.viewer.amt.AmtDatabase;
import org.fhcrc.cpl.viewer.amt.AmtDatabaseManager;
import org.fhcrc.cpl.viewer.amt.AmtPeptideEntry;
import org.fhcrc.cpl.viewer.amt.AmtRunEntry;
import org.fhcrc.cpl.viewer.amt.AmtXmlReader;
import org.fhcrc.cpl.viewer.amt.AmtXmlWriter;
import org.fhcrc.cpl.viewer.commandline.modules.BaseViewerCommandLineModuleImpl;

/* loaded from: input_file:org/fhcrc/cpl/viewer/amt/commandline/AmtDatabaseManagerCommandLineModule.class */
public class AmtDatabaseManagerCommandLineModule extends BaseViewerCommandLineModuleImpl implements CommandLineModule {
    protected static final int REMOVE_OUTLIER_OBSERVATIONS_MODE = 0;
    protected static final int REMOVE_PREDICTED_H_OUTLIERS_MODE = 1;
    protected static final int REMOVE_FEW_OBSERVATIONS_MODE = 2;
    protected static final int REMOVE_RUNS_WITHOUT_MASS_MATCHES_MODE = 3;
    protected static final int ALIGN_ALL_RUNS_MODE = 4;
    protected static final int ADJUST_ACRYLAMIDE_MODE = 5;
    protected static final int REMOVE_PEPTIDES_WITH_RESIDUE_MODE = 6;
    protected static final int REMOVE_FASTA_PEPTIDES_MODE = 7;
    protected static final int FILTER_OBSERVATIONS_BY_PPROPHET_MODE = 8;
    protected static Logger _log = Logger.getLogger(AmtDatabaseManagerCommandLineModule.class);
    protected static final String[] modeStrings = {"removeoutlierobservations", "removepredictedhoutliers", "removefewobservations", "removerunswithoutmassmatches", "alignallruns", "adjustacrylamide", "removepeptideswithresidue", "removefastapeptides", "filterobservationsbypprophet"};
    protected static final String[] modeExplanations = {"Remove all individual observations that are at least 3 standard deviations from the median for that peptide, for peptides with at least three observations", "Remove all peptides with only one observation, for which that observation is at least 2 standard deviations away from the prediction", "Remove all peptides with fewer than minobservations observations", "Make mass matches between each run's entries and an MS1 feature file.  Remove all runs that don't mass-match at least minmassmatchpercent percent of peptides", "Nonlinearly align all runs in the database to a single run, starting with the run with the most peptide overlap with other runs in the database.  This is an extremely important step.", "Adjust all Cysteine-bearing observations to take the H contribution of acrylamide into account.  Direction of adjustment depends on the 'fromacryltonot' parameter", "Remove all peptides containing a given residue", "Remove all peptides that occur in a specified FASTA database", "Remove all observations below 'minpprophet' PeptideProphet probability"};
    protected AmtDatabase amtDatabase = null;
    protected File outFile = null;
    protected File fastaFile = null;
    boolean fromAcrylamideToNot = true;
    protected float minPeptideProphet = 0.0f;
    protected int mode = -1;
    protected int matchingDegree = 5;
    protected float predictedHOutlierDeviationMultipleCutoff = 0.001f;
    protected int minObservations = 2;
    protected int minMassMatchPercent = 20;
    protected int maxEntriesInMassMatchedDatabase = Integer.MAX_VALUE;
    protected int maxRunsInMassMatchedDatabase = Integer.MAX_VALUE;
    protected FeatureSet ms1FeatureSet = null;
    protected boolean showCharts = false;
    protected String residueToRemove = null;
    protected float massMatchDeltaMass = 5.0f;
    protected int massMatchDeltaMassType = 1;

    public AmtDatabaseManagerCommandLineModule() {
        init();
    }

    protected void init() {
        this.mCommandName = "manageamt";
        this.mShortDescription = "Tools for managing an AMT database";
        this.mHelpMessage = "Refine an AMT database by removing outlier observations or peptides, or nonlinearly aligning all runs to each other";
        addArgumentDefinitions(new CommandLineArgumentDefinition[]{new EnumeratedValuesArgumentDefinition("mode", true, modeStrings, modeExplanations), new FileToWriteArgumentDefinition("out", false, null), createUnnamedFileArgumentDefinition(true, "AMT database file"), new IntegerArgumentDefinition("minobservations", false, "Minimum number of observations for features kept in the database", this.minObservations), new BooleanArgumentDefinition("showcharts", false, "Show charts?", this.showCharts), new StringArgumentDefinition("residue", false, "Residue (for 'removepeptideswithresidue' mode)"), new DecimalArgumentDefinition("minpprophet", false, "Minimum PeptideProphet score (for 'filterobservationsbypprophet' mode)")});
        addArgumentDefinitions(new CommandLineArgumentDefinition[]{new IntegerArgumentDefinition("minmassmatchpercent", false, "Minimum percent of peptides mass-matched to MS1, per run (removerunswithoutmassmatches mode only)", this.minMassMatchPercent), new IntegerArgumentDefinition("maxentries", false, "Maximum DB entries (removerunswithoutmassmatches mode only)", this.maxEntriesInMassMatchedDatabase), new IntegerArgumentDefinition("maxruns", false, "Maximum DB runs (removerunswithoutmassmatches mode only)", this.maxRunsInMassMatchedDatabase), new FeatureFileArgumentDefinition("ms1features", false, "MS1 features (removerunswithoutmassmatches mode only)"), new BooleanArgumentDefinition("fromacryltonot", false, "For mode adjustacrylamide.  If true, adjusts all observations to _remove_ the effect  of acrylamide.  If false, adjusts observations to _add_ the effect."), new FileToReadArgumentDefinition("fasta", false, "FASTA database (removefastapeptides mode only)"), new IntegerArgumentDefinition("alignmentdegree", false, "Degree of polynomial to use in alignment (for 'alignallruns' mode)", this.matchingDegree)}, true);
    }

    @Override // org.fhcrc.cpl.toolbox.commandline.CommandLineModule
    public void assignArgumentValues() throws ArgumentValidationException {
        this.mode = ((EnumeratedValuesArgumentDefinition) getArgumentDefinition("mode")).getIndexForArgumentValue(getStringArgumentValue("mode"));
        if (this.mode == 8) {
            assertArgumentPresent("minpprophet", "mode");
            this.minPeptideProphet = (float) getDoubleArgumentValue("minpprophet");
        }
        try {
            this.amtDatabase = new AmtXmlReader(getFileArgumentValue(CommandLineArgumentDefinition.UNNAMED_PARAMETER_VALUE_ARGUMENT)).getDatabase();
            _log.info("Loaded AMT database: " + this.amtDatabase);
            if (this.mode == 2) {
                assertArgumentPresent("minobservations");
                this.minObservations = getIntegerArgumentValue("minobservations");
            } else {
                assertArgumentAbsent("minobservations");
            }
            if (this.mode == 3) {
                assertArgumentPresent("minmassmatchpercent");
                this.minMassMatchPercent = getIntegerArgumentValue("minmassmatchpercent");
                assertArgumentPresent("ms1features");
                this.ms1FeatureSet = getFeatureSetArgumentValue("ms1features");
                assertArgumentPresent("maxentries");
                this.maxEntriesInMassMatchedDatabase = getIntegerArgumentValue("maxentries");
                this.maxRunsInMassMatchedDatabase = getIntegerArgumentValue("maxruns");
            } else {
                assertArgumentAbsent("minmassmatchpercent");
                assertArgumentAbsent("ms1features");
            }
            if (this.mode == 5) {
                assertArgumentPresent("fromacryltonot");
                this.fromAcrylamideToNot = getBooleanArgumentValue("fromacryltonot");
            } else {
                assertArgumentAbsent("fromacryltonot");
            }
            this.fastaFile = getFileArgumentValue("fasta");
            if (this.mode == 7) {
                assertArgumentPresent("fasta", "mode");
            }
            this.residueToRemove = getStringArgumentValue("residue");
            if (this.mode == 6) {
                assertArgumentPresent("residue", "mode");
            }
            this.matchingDegree = getIntegerArgumentValue("alignmentdegree");
            this.outFile = getFileArgumentValue("out");
            this.showCharts = getBooleanArgumentValue("showcharts");
        } catch (Exception e) {
            throw new ArgumentValidationException(e);
        }
    }

    @Override // org.fhcrc.cpl.toolbox.commandline.CommandLineModule
    public void execute() throws CommandLineModuleExecutionException {
        ApplicationContext.infoMessage("Read AMT Database with " + this.amtDatabase.numEntries() + " entries.");
        switch (this.mode) {
            case 0:
                int i = 0;
                for (AmtRunEntry amtRunEntry : this.amtDatabase.getRuns()) {
                    i += this.amtDatabase.getObservationsForRun(amtRunEntry).length;
                }
                AmtDatabaseManager.removeHydrophobicityOutliers(this.amtDatabase, 3.0d);
                int i2 = 0;
                for (AmtRunEntry amtRunEntry2 : this.amtDatabase.getRuns()) {
                    i2 += this.amtDatabase.getObservationsForRun(amtRunEntry2).length;
                }
                ApplicationContext.infoMessage("\nRemoved " + (i - i2) + " observations (out of " + i + ")");
                break;
            case 1:
                double calculateMeanDifferenceFromPredictedHydro = this.amtDatabase.calculateMeanDifferenceFromPredictedHydro() + (this.predictedHOutlierDeviationMultipleCutoff * this.amtDatabase.calculateStandardDeviationDifferenceFromPredictedHydro());
                ApplicationContext.infoMessage("Removing entries with one observation and observed hydro > " + calculateMeanDifferenceFromPredictedHydro + " different from predicted");
                AmtDatabaseManager.removePredictedHOutliers(this.amtDatabase, (float) calculateMeanDifferenceFromPredictedHydro);
                break;
            case 2:
                ApplicationContext.infoMessage("Removing entries with less than " + this.minObservations + " observations");
                for (AmtPeptideEntry amtPeptideEntry : this.amtDatabase.getEntries()) {
                    if (amtPeptideEntry.getNumObservations() < this.minObservations) {
                        this.amtDatabase.removeEntry(amtPeptideEntry.getPeptideSequence());
                    }
                }
                break;
            case 3:
                this.amtDatabase = AmtDatabaseManager.removeRunsWithoutMassMatches(this.amtDatabase, this.ms1FeatureSet.getFeatures(), this.minMassMatchPercent, this.massMatchDeltaMass, this.massMatchDeltaMassType, this.maxEntriesInMassMatchedDatabase, this.maxRunsInMassMatchedDatabase, AmtDatabaseMatcherCLM.defaultMS2ModificationsForMatching, this.showCharts);
                break;
            case 4:
                this.amtDatabase = AmtDatabaseManager.alignAllRunsUsingCommonPeptides(this.amtDatabase, 30, this.matchingDegree, this.showCharts);
                break;
            case 5:
                this.amtDatabase = AmtDatabaseManager.adjustEntriesForAcrylamide(this.amtDatabase, this.fromAcrylamideToNot, this.showCharts);
                break;
            case 6:
                ApplicationContext.infoMessage("Removing all peptides containing '" + this.residueToRemove + "'...");
                for (AmtPeptideEntry amtPeptideEntry2 : this.amtDatabase.getEntries()) {
                    if (amtPeptideEntry2.getPeptideSequence().contains(this.residueToRemove)) {
                        this.amtDatabase.removeEntry(amtPeptideEntry2.getPeptideSequence());
                    }
                }
                break;
            case 7:
                ApplicationContext.infoMessage("Loading FASTA peptides...");
                Set<String> loadTrypticPeptidesFromFasta = ProteinUtilities.loadTrypticPeptidesFromFasta(this.fastaFile);
                ApplicationContext.infoMessage("Loaded FASTA peptides.  Removing them...");
                for (AmtPeptideEntry amtPeptideEntry3 : this.amtDatabase.getEntries()) {
                    if (loadTrypticPeptidesFromFasta.contains(amtPeptideEntry3.getPeptideSequence())) {
                        this.amtDatabase.removeEntry(amtPeptideEntry3.getPeptideSequence());
                    }
                }
                break;
            case 8:
                ApplicationContext.infoMessage("Removing all peptide observations with pprophet < " + this.minPeptideProphet);
                int i3 = 0;
                int i4 = 0;
                for (AmtPeptideEntry amtPeptideEntry4 : this.amtDatabase.getEntries()) {
                    boolean z = false;
                    for (AmtPeptideEntry.AmtPeptideObservation amtPeptideObservation : amtPeptideEntry4.getObservations()) {
                        if (amtPeptideObservation.getPeptideProphet() < this.minPeptideProphet) {
                            amtPeptideEntry4.removeObservation(amtPeptideObservation);
                            i3++;
                        } else {
                            z = true;
                        }
                    }
                    if (!z) {
                        this.amtDatabase.removeEntry(amtPeptideEntry4.getPeptideSequence());
                        i4++;
                    }
                }
                ApplicationContext.infoMessage("Removed " + i3 + " observations.  Entirely removed " + i4 + " peptides");
                break;
        }
        if (this.outFile == null || this.amtDatabase == null) {
            return;
        }
        writeAmtDatabase(this.amtDatabase, this.outFile);
    }

    protected static void writeAmtDatabase(AmtDatabase amtDatabase, File file) {
        try {
            new AmtXmlWriter(amtDatabase).write(file);
            ApplicationContext.infoMessage("Wrote " + amtDatabase.numEntries() + " entries to amtxml file " + file.getAbsolutePath());
        } catch (Exception e) {
            e.printStackTrace(System.err);
            ApplicationContext.infoMessage("Error writing amt file " + file.getAbsolutePath());
        }
    }
}
