From 16caf521898a5f38b260dc2e0efe6797de441fb6 Mon Sep 17 00:00:00 2001 From: wfondrie Date: Mon, 13 Jul 2020 16:29:47 -0700 Subject: [PATCH] Added __repr__ to major classes --- mokapot/confidence.py | 9 +++++++++ mokapot/dataset.py | 21 ++++++++++++++++++--- mokapot/model.py | 9 ++++++++- 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/mokapot/confidence.py b/mokapot/confidence.py index a77ebd68..4709ff20 100644 --- a/mokapot/confidence.py +++ b/mokapot/confidence.py @@ -190,6 +190,15 @@ def __init__(self, psms, scores, desc=True, eval_fdr=0.01): self._assign_confidence(desc=desc) + def __repr__(self): + """How to print the class""" + pass_psms = (self.psms["mokapot q-value"] <= self._eval_fdr).sum() + pass_peps = (self.peptides["mokapot q-value"] <= self._eval_fdr).sum() + + return ("A mokapot.confidence.LinearConfidence object:\n" + f"\t- PSMs at q<={self._eval_fdr:g}: {pass_psms}\n" + f"\t- Peptides at q<={self._eval_fdr:g}: {pass_peps}") + def _assign_confidence(self, desc=True): """ Assign confidence to PSMs and peptides. diff --git a/mokapot/dataset.py b/mokapot/dataset.py index 0f6e1d7e..01bcc0f7 100644 --- a/mokapot/dataset.py +++ b/mokapot/dataset.py @@ -309,9 +309,9 @@ class LinearPsmDataset(PsmDataset): metadata : pandas.DataFrame features : pandas.DataFrame spectra : pandas.DataFrame + peptides: pandas.DataFrame targets : numpy.ndarray columns : list of str - """ def __init__(self, psms, @@ -349,16 +349,31 @@ def __init__(self, if not num_targets: raise ValueError("No target PSMs were detected.") - elif not num_decoys: + if not num_decoys: raise ValueError("No decoy PSMs were detected.") - elif not len(self.data): + if not self.data.shape[0]: raise ValueError("No PSMs were detected.") + def __repr__(self): + """How to print the class""" + return (f"A mokapot.dataset.LinearPsmDataset with {len(self.data)} " + "PSMs:\n" + f"\t- target PSMs: {self.targets.sum()}\n" + f"\t- decoy PSMs: {self.targets.sum()}\n" + f"\t- unique spectra: {len(self.spectra.drop_duplicates())}\n" + f"\t- unique peptides: {len(self.peptides.drop_duplicates())}\n" + f"\t- features: {self._feature_columns}") + @property def targets(self): """An array indicating whether each PSM is a target sequence.""" return self.data[self._target_column].values + @property + def peptides(self): + """A :py:class:`pandas.DataFrame` of peptide columns.""" + return self.data.loc[:, self._peptide_columns] + def _update_labels(self, scores, fdr_threshold=0.01, desc=True): """ Return the label for each PSM, given it's score. diff --git a/mokapot/model.py b/mokapot/model.py index e2182035..e220d74b 100644 --- a/mokapot/model.py +++ b/mokapot/model.py @@ -84,7 +84,6 @@ def __init__(self, estimator=None, scaler=None, is_trained=False): self.estimator = base.clone(estimator) self.features = None self.is_trained = is_trained - self._base_params = self.estimator.get_params() if scaler == "as-is": self.scaler = DummyScaler() @@ -93,6 +92,14 @@ def __init__(self, estimator=None, scaler=None, is_trained=False): else: self.scaler = base.clone(scaler) + def __repr__(self): + """How to print the class""" + trained = {True: "A trained", False: "An untrained"} + + return (f"{trained[self.is_trained]} mokapot.model.Model object:\n" + f"\testimator: {self.estimator}\n" + f"\tscaler: {self.scaler}\n" + f"\tfeatures: {self.features}") def save(self, out_file): """