diff --git a/deeprank2/dataset.py b/deeprank2/dataset.py index 3718f3018..18796ac26 100644 --- a/deeprank2/dataset.py +++ b/deeprank2/dataset.py @@ -194,9 +194,8 @@ def _check_inherited_params( """Check if the parameters for validation and/or testing are the same as in the pre-trained model or training set provided. Args: - inherited_params (List[str]): List of parameters that need to be checked for inheritance. - data (Union[dict, class:`GraphDataset`, class:`GridDataset`]): The parameters in `inherited_param` will be inherited - from the information contained in `data`. + inherited_params: List of parameters that need to be checked for inheritance. + data: The parameters in `inherited_param` will be inherited from the information contained in `data`. """ self_vars = vars(self) if not isinstance(data, dict): @@ -256,10 +255,10 @@ def _filter_targets(self, grp: h5py.Group) -> bool: of the form: { target_name : target_condition } or None. Args: - grp (:class:`h5py.Group`): The entry group in the .HDF5 file. + grp: The entry group in the .HDF5 file. Returns: - bool: True if we keep the entry False otherwise. + True if we keep the entry; False otherwise. Raises: ValueError: If an unsuported condition is provided. @@ -360,17 +359,16 @@ def save_hist( # noqa: C901 """After having generated a pd.DataFrame using hdf5_to_pandas method, histograms of the features can be saved in an image. Args: - features (str | list[str]): Features to be plotted. - fname (str): str or path-like or binary file-like object. - Defaults to 'features_hist.png'. - bins (int | list[float] | str, optional): If bins is an integer, it defines the number of equal-width bins in the range. + features: Features to be plotted. + fname: str or path-like or binary file-like object. Defaults to 'features_hist.png'. + bins: If bins is an integer, it defines the number of equal-width bins in the range. If bins is a sequence, it defines the bin edges, including the left edge of the first bin and the right edge of the last bin; in this case, bins may be unequally spaced. All but the last (righthand-most) bin is half-open. If bins is a string, it is one of the binning strategies supported by numpy.histogram_bin_edges: 'auto', 'fd', 'doane', 'scott', 'stone', 'rice', 'sturges', or 'sqrt'. Defaults to 10. - figsize (tuple, optional): Saved figure sizes. Defaults to (15, 15). - log (bool): Whether to apply log transformation to the data indicated by the `features` parameter. Defaults to False. + figsize: Saved figure sizes. Defaults to (15, 15). + log: Whether to apply log transformation to the data indicated by the `features` parameter. Defaults to False. """ if self.df is None: self.hdf5_to_pandas() @@ -474,18 +472,18 @@ class GridDataset(DeeprankDataset): """Class to load the .HDF5 files data into grids. Args: - hdf5_path (str | list): Path to .HDF5 file(s). For multiple .HDF5 files, insert the paths in a list. Defaults to None. - subset (list[str] | None, optional): list of keys from .HDF5 file to include. Defaults to None (meaning include all). - train_source (str | class:`GridDataset` | None, optional): data to inherit information from the training dataset or the pre-trained model. + hdf5_path: Path to .HDF5 file(s). For multiple .HDF5 files, insert the paths in a list. Defaults to None. + subset: list of keys from .HDF5 file to include. Defaults to None (meaning include all). + train_source: data to inherit information from the training dataset or the pre-trained model. If None, the current dataset is considered as the training set. Otherwise, `train_source` needs to be a dataset of the same class or the path of a DeepRank2 pre-trained model. If set, the parameters `features`, `target`, `traget_transform`, `task`, and `classes` will be inherited from `train_source`. Defaults to None. - features (list[str] | str | Literal["all"] | None, optional): Consider all pre-computed features ("all") or some defined node features + features: Consider all pre-computed features ("all") or some defined node features (provide a list, example: ["res_type", "polarity", "bsa"]). The complete list can be found in `deeprank2.domain.gridstorage`. Value will be ignored and inherited from `train_source` if `train_source` is assigned. Defaults to "all". - target (str | None, optional): Default options are irmsd, lrmsd, fnat, binary, capri_class, and dockq. It can also be + target: Default options are irmsd, lrmsd, fnat, binary, capri_class, and dockq. It can also be a custom-defined target given to the Query class as input (see: `deeprank2.query`); in this case, the task parameter needs to be explicitly specified as well. Only numerical target variables are supported, not categorical. @@ -493,28 +491,25 @@ class GridDataset(DeeprankDataset): numerical class indices before defining the :class:`GraphDataset` instance. Value will be ignored and inherited from `train_source` if `train_source` is assigned. Defaults to None. - target_transform (bool, optional): Apply a log and then a sigmoid transformation to the target (for regression only). + target_transform: Apply a log and then a sigmoid transformation to the target (for regression only). This puts the target value between 0 and 1, and can result in a more uniform target distribution and speed up the optimization. Value will be ignored and inherited from `train_source` if `train_source` is assigned. Defaults to False. - target_filter (dict[str, str] | None, optional): Dictionary of type [target: cond] to filter the molecules. + target_filter: Dictionary of type [target: cond] to filter the molecules. Note that the you can filter on a different target than the one selected as the dataset target. Defaults to None. - task (Literal["regress", "classif"] | None, optional): 'regress' for regression or 'classif' for classification. Required if target not in + task: 'regress' for regression or 'classif' for classification. Required if target not in ['irmsd', 'lrmsd', 'fnat', 'binary', 'capri_class', or 'dockq'], otherwise this setting is ignored. Automatically set to 'classif' if the target is 'binary' or 'capri_classes'. Automatically set to 'regress' if the target is 'irmsd', 'lrmsd', 'fnat', or 'dockq'. Value will be ignored and inherited from `train_source` if `train_source` is assigned. Defaults to None. - classes (list[str] | list[int] | list[float] | None): Define the dataset target classes in classification mode. + classes: Define the dataset target classes in classification mode. Value will be ignored and inherited from `train_source` if `train_source` is assigned. Defaults to None. - use_tqdm (bool, optional): Show progress bar. - Defaults to True. - root (str, optional): Root directory where the dataset should be saved. - Defaults to "./". - check_integrity (bool, optional): Whether to check the integrity of the hdf5 files. - Defaults to True. + use_tqdm: Show progress bar. Defaults to True. + root: Root directory where the dataset should be saved. Defaults to "./". + check_integrity: Whether to check the integrity of the hdf5 files. Defaults to True. """ def __init__( @@ -658,7 +653,7 @@ def get(self, idx: int) -> Data: """Gets one grid item from its unique index. Args: - idx (int): Index of the item, ranging from 0 to len(dataset). + idx: Index of the item, ranging from 0 to len(dataset). Returns: :class:`torch_geometric.data.data.Data`: item with tensors x, y if present, entry_names. @@ -670,8 +665,8 @@ def load_one_grid(self, hdf5_path: str, entry_name: str) -> Data: """Loads one grid. Args: - hdf5_path (str): .HDF5 file name. - entry_name (str): Name of the entry. + hdf5_path: .HDF5 file name. + entry_name: Name of the entry. Returns: :class:`torch_geometric.data.data.Data`: item with tensors x, y if present, entry_names. @@ -716,26 +711,24 @@ class GraphDataset(DeeprankDataset): """Class to load the .HDF5 files data into graphs. Args: - hdf5_path (str | list): Path to .HDF5 file(s). For multiple .HDF5 files, insert the paths in a list. Defaults to None. - subset (list[str] | None, optional): list of keys from .HDF5 file to include. Defaults to None (meaning include all). - train_source (str | class:`GraphDataset` | None, optional): data to inherit information from the training dataset or the pre-trained model. + hdf5_path: Path to .HDF5 file(s). For multiple .HDF5 files, insert the paths in a list. Defaults to None. + subset: list of keys from .HDF5 file to include. Defaults to None (meaning include all). + train_source: data to inherit information from the training dataset or the pre-trained model. If None, the current dataset is considered as the training set. Otherwise, `train_source` needs to be a dataset of the same class or the path of a DeepRank2 pre-trained model. If set, the parameters `node_features`, `edge_features`, `features_transform`, `target`, `target_transform`, `task`, and `classes` will be inherited from `train_source`. If standardization was performed in the training dataset/step, also the attributes `means` and `devs` will be inherited from `train_source`, and they will be used to scale the validation/testing set. Defaults to None. - node_features (list[str] | str | Literal["all"] | None, optional): Consider all pre-computed node features ("all") or - some defined node features (provide a list, example: ["res_type", "polarity", "bsa"]). + node_features: Consider all pre-computed node features ("all") or some defined node features (provide a list, e.g.: ["res_type", "polarity", "bsa"]). The complete list can be found in `deeprank2.domain.nodestorage`. Value will be ignored and inherited from `train_source` if `train_source` is assigned. Defaults to "all". - edge_features (list[str] | str | Literal["all"] | None, optional): Consider all pre-computed edge features ("all") or - some defined edge features (provide a list, example: ["dist", "coulomb"]). + edge_features: Consider all pre-computed edge features ("all") or some defined edge features (provide a list, e.g.: ["dist", "coulomb"]). The complete list can be found in `deeprank2.domain.edgestorage`. Value will be ignored and inherited from `train_source` if `train_source` is assigned. Defaults to "all". - features_transform (dict | None, optional): Dictionary to indicate the transformations to apply to each feature in the dictionary, being the + features_transform: Dictionary to indicate the transformations to apply to each feature in the dictionary, being the transformations lambda functions and/or standardization. Example: `features_transform = {'bsa': {'transform': lambda t:np.log(t+1),' standardize': True}}` for the feature `bsa`. An `all` key can be set in the dictionary for indicating to apply the same `standardize` and `transform` to all the features. @@ -743,16 +736,15 @@ class GraphDataset(DeeprankDataset): If both `all` and feature name/s are present, the latter have the priority over what indicated in `all`. Value will be ignored and inherited from `train_source` if `train_source` is assigned. Defaults to None. - clustering_method (str | None, optional): "mcl" for Markov cluster algorithm (see https://micans.org/mcl/), - or "louvain" for Louvain method (see https://en.wikipedia.org/wiki/Louvain_method). + clustering_method: "mcl" for Markov cluster algorithm (see https://micans.org/mcl/), or "louvain" for Louvain method (see https://en.wikipedia.org/wiki/Louvain_method). In both options, for each graph, the chosen method first finds communities (clusters) of nodes and generates - a torch tensor whose elements represent the cluster to which the node belongs to. Each tensor is then saved in - the .HDF5 file as a :class:`Dataset` called "depth_0". Then, all cluster members beloging to the same community are - pooled into a single node, and the resulting tensor is used to find communities among the pooled clusters. - The latter tensor is saved into the .HDF5 file as a :class:`Dataset` called "depth_1". Both "depth_0" and "depth_1" - :class:`Datasets` belong to the "cluster" Group. They are saved in the .HDF5 file to make them available to networks - that make use of clustering methods. Defaults to None. - target (str | None, optional): Default options are irmsd, lrmsd, fnat, binary, capri_class, and dockq. + a torch tensor whose elements represent the cluster to which the node belongs to. Each tensor is then saved + in the .HDF5 file as a :class:`Dataset` called "depth_0". Then, all cluster members beloging to the same + community are pooled into a single node, and the resulting tensor is used to find communities among the + pooled clusters. The latter tensor is saved into the .HDF5 file as a :class:`Dataset` called "depth_1". Both + "depth_0" and "depth_1" :class:`Datasets` belong to the "cluster" Group. They are saved in the .HDF5 file to + make them available to networks that make use of clustering methods. Defaults to None. + target: Default options are irmsd, lrmsd, fnat, binary, capri_class, and dockq. It can also be a custom-defined target given to the Query class as input (see: `deeprank2.query`); in this case, the task parameter needs to be explicitly specified as well. Only numerical target variables are supported, not categorical. @@ -760,26 +752,25 @@ class GraphDataset(DeeprankDataset): numerical class indices before defining the :class:`GraphDataset` instance. Value will be ignored and inherited from `train_source` if `train_source` is assigned. Defaults to None. - target_transform (bool, optional): Apply a log and then a sigmoid transformation to the target (for regression only). + target_transform: Apply a log and then a sigmoid transformation to the target (for regression only). This puts the target value between 0 and 1, and can result in a more uniform target distribution and speed up the optimization. Value will be ignored and inherited from `train_source` if `train_source` is assigned. Defaults to False. - target_filter (dict[str, str] | None, optional): Dictionary of type [target: cond] to filter the molecules. + target_filter: Dictionary of type [target: cond] to filter the molecules. Note that the you can filter on a different target than the one selected as the dataset target. Defaults to None. - task (Literal["regress", "classif"] | None, optional): 'regress' for regression or 'classif' for classification. Required if target not in + task: 'regress' for regression or 'classif' for classification. Required if target not in ['irmsd', 'lrmsd', 'fnat', 'binary', 'capri_class', or 'dockq'], otherwise this setting is ignored. Automatically set to 'classif' if the target is 'binary' or 'capri_classes'. Automatically set to 'regress' if the target is 'irmsd', 'lrmsd', 'fnat', or 'dockq'. Value will be ignored and inherited from `train_source` if `train_source` is assigned. Defaults to None. - classes (list[str] | list[int] | list[float] | None): Define the dataset target classes in classification mode. + classes: Define the dataset target classes in classification mode. Value will be ignored and inherited from `train_source` if `train_source` is assigned. Defaults to None. - use_tqdm (bool, optional): Show progress bar. Defaults to True. - root (str, optional): Root directory where the dataset should be saved. Defaults to "./". - check_integrity (bool, optional): Whether to check the integrity of the hdf5 files. - Defaults to True. + use_tqdm: Show progress bar. Defaults to True. + root: Root directory where the dataset should be saved. Defaults to "./". + check_integrity: Whether to check the integrity of the hdf5 files. Defaults to True. """ def __init__( # noqa: C901 @@ -881,7 +872,7 @@ def get(self, idx: int) -> Data: """Gets one graph item from its unique index. Args: - idx (int): Index of the item, ranging from 0 to len(dataset). + idx: Index of the item, ranging from 0 to len(dataset). Returns: :class:`torch_geometric.data.data.Data`: item with tensors x, y if present, edge_index, edge_attr, pos, entry_names. @@ -893,8 +884,8 @@ def load_one_graph(self, fname: str, entry_name: str) -> Data: # noqa: PLR0915, """Loads one graph. Args: - fname (str): .HDF5 file name. - entry_name (str): Name of the entry. + fname: .HDF5 file name. + entry_name: Name of the entry. Returns: :class:`torch_geometric.data.data.Data`: item with tensors x, y if present, edge_index, edge_attr, pos, entry_names. @@ -1140,10 +1131,10 @@ def save_hdf5_keys( """Save references to keys in src_ids in a new .HDF5 file. Args: - f_src_path (str): The path to the .HDF5 file containing the keys. - src_ids (list[str]): Keys to be saved in the new .HDF5 file. It should be a list containing at least one key. - f_dest_path (str): The path to the new .HDF5 file. - hardcopy (bool, optional): If False, the new file contains only references (external links, see :class:`ExternalLink` class from `h5py`) + f_src_path: The path to the .HDF5 file containing the keys. + src_ids: Keys to be saved in the new .HDF5 file. It should be a list containing at least one key. + f_dest_path: The path to the new .HDF5 file. + hardcopy: If False, the new file contains only references (external links, see :class:`ExternalLink` class from `h5py`) to the original .HDF5 file. If True, the new file contains a copy of the objects specified in src_ids (see h5py :class:`HardLink` from `h5py`). Defaults to False. diff --git a/deeprank2/domain/aminoacidlist.py b/deeprank2/domain/aminoacidlist.py index 36a799184..1597033fc 100644 --- a/deeprank2/domain/aminoacidlist.py +++ b/deeprank2/domain/aminoacidlist.py @@ -386,9 +386,8 @@ def convert_aa_nomenclature(aa: str, output_format: Literal[0, 1, 3] = 0) -> str codes, and full names of amino acids. Args: - aa (str): The amino acid to be converted in any of its formats. The - length of the string is used to determine which format is used. - output_format (Literal[0, 1, 3], optional): Nomenclature style to return: + aa: The amino acid to be converted in any of its formats. The length of the string is used to determine which format is used. + output_format: Nomenclature style to return: 0 (default) returns the full name, 1 returns the 1-letter code, 3 returns the 3-letter code. @@ -397,7 +396,7 @@ def convert_aa_nomenclature(aa: str, output_format: Literal[0, 1, 3] = 0) -> str ValueError: If aa is not recognized or an invalid output format was given Returns: - str: amino acid in the selected nomenclature system. + Amino acid identifier in the selected nomenclature system. """ try: if len(aa) == 1: diff --git a/deeprank2/features/contact.py b/deeprank2/features/contact.py index 47085da38..addb32908 100644 --- a/deeprank2/features/contact.py +++ b/deeprank2/features/contact.py @@ -32,8 +32,8 @@ def _get_nonbonded_energy( However, the potential tends to 0 at large distance. Args: - atoms (list[Atom]): list of all atoms in the structure - distances (NDArray[np.float64]): matrix of pairwise distances between all atoms in the structure + atoms: list of all atoms in the structure + distances: matrix of pairwise distances between all atoms in the structure in the format that is the output of scipy.spatial's distance_matrix (i.e. a diagonally symmetric matrix) Returns: diff --git a/deeprank2/features/irc.py b/deeprank2/features/irc.py index f1d0f4c07..a43246727 100644 --- a/deeprank2/features/irc.py +++ b/deeprank2/features/irc.py @@ -18,7 +18,7 @@ def _id_from_residue(residue: tuple[str, int, str]) -> str: """Create and id from pdb2sql rendered residues that is similar to the id of residue nodes. Args: - residue (tuple): Input residue as rendered by pdb2sql: ( str(), int(), str( ) + residue: Input residue as rendered by pdb2sql: ( str(), int(), str( ) For example: ('A', 27, 'GLU'). Returns: @@ -44,9 +44,9 @@ def get_IRCs(pdb_path: str, chains: list[str], cutoff: float = 5.5) -> dict[str, """Get all close contact residues from the opposite chain. Args: - pdb_path (str): Path to pdb file to read molecular information from. - chains (Sequence[str]): list (or list-like object) containing strings of the chains to be considered. - cutoff (float, optional): Cutoff distance (in Ångström) to be considered a close contact. Defaults to 10. + pdb_path: Path to pdb file to read molecular information from. + chains: list (or list-like object) containing strings of the chains to be considered. + cutoff: Cutoff distance (in Ångström) to be considered a close contact. Defaults to 10. Returns: Dict[str, _ContactDensity]: diff --git a/deeprank2/features/secondary_structure.py b/deeprank2/features/secondary_structure.py index ae080272f..1a606d83d 100644 --- a/deeprank2/features/secondary_structure.py +++ b/deeprank2/features/secondary_structure.py @@ -91,7 +91,7 @@ def _get_secstructure(pdb_path: str) -> dict: """Process the DSSP output to extract secondary structure information. Args: - pdb_path (str): The file path of the PDB file to be processed. + pdb_path: The file path of the PDB file to be processed. Returns: dict: A dictionary containing secondary structure information for each chain and residue. diff --git a/deeprank2/molstruct/aminoacid.py b/deeprank2/molstruct/aminoacid.py index 2a9f33c4c..3e70d3c65 100644 --- a/deeprank2/molstruct/aminoacid.py +++ b/deeprank2/molstruct/aminoacid.py @@ -24,17 +24,17 @@ class AminoAcid: """An amino acid represents the type of `Residue` in a `PDBStructure`. Args: - name (str): Full name of the amino acid. - three_letter_code (str): Three-letter code of the amino acid (as in PDB). - one_letter_code (str): One-letter of the amino acid (as in fasta). - charge (int): Charge of the amino acid. - polarity (:class:`Polarity`): The polarity of the amino acid. - size (int): The number of non-hydrogen atoms in the side chain. - mass (float): Average residue mass (i.e. mass of amino acid - H20) in Daltons. - pI (float): Isolectric point; pH at which the molecule has no net electric charge. - hydrogen_bond_donors (int): Number of hydrogen bond donors. - hydrogen_bond_acceptors (int): Number of hydrogen bond acceptors. - index (int): The rank of the amino acid, used for computing one-hot encoding. + name: Full name of the amino acid. + three_letter_code: Three-letter code of the amino acid (as in PDB). + one_letter_code: One-letter of the amino acid (as in fasta). + charge: Charge of the amino acid. + polarity: The polarity of the amino acid. + size: The number of non-hydrogen atoms in the side chain. + mass: Average residue mass (i.e. mass of amino acid - H20) in Daltons. + pI: Isolectric point; pH at which the molecule has no net electric charge. + hydrogen_bond_donors: Number of hydrogen bond donors. + hydrogen_bond_acceptors: Number of hydrogen bond acceptors. + index: The rank of the amino acid, used for computing one-hot encoding. """ def __init__( diff --git a/deeprank2/molstruct/atom.py b/deeprank2/molstruct/atom.py index d26f0cc13..e100a4226 100644 --- a/deeprank2/molstruct/atom.py +++ b/deeprank2/molstruct/atom.py @@ -33,11 +33,11 @@ class Atom: """One atom in a PDBStructure. Args: - residue (:class:`Residue`): The residue that this atom belongs to. - name (str): Pdb atom name. - element (:class:`AtomicElement`): The chemical element. - position (np.array): Pdb position xyz of this atom. - occupancy (float): Pdb occupancy value. + residue: The residue that this atom belongs to. + name: Pdb atom name. + element: The chemical element. + position: Pdb position xyz of this atom. + occupancy: Pdb occupancy value. This represents the proportion of structures where the atom is detected at a given position. Sometimes a single atom can be detected at multiple positions. In that case separate structures exist where sum(occupancy) == 1. Note that only the highest occupancy atom is used by deeprank2 (see tools.pdb._add_atom_to_residue). diff --git a/deeprank2/molstruct/pair.py b/deeprank2/molstruct/pair.py index 463db4200..809b3b7af 100644 --- a/deeprank2/molstruct/pair.py +++ b/deeprank2/molstruct/pair.py @@ -11,8 +11,8 @@ class Pair: """A hashable, comparable object for any set of two inputs where order doesn't matter. Args: - item1 (Any object): The pair's first object, must be convertable to string. - item2 (Any object): The pair's second object, must be convertable to string. + item1: The pair's first object, must be convertable to string. + item2: The pair's second object, must be convertable to string. """ def __init__(self, item1: Any, item2: Any): # noqa: ANN401 diff --git a/deeprank2/molstruct/residue.py b/deeprank2/molstruct/residue.py index 17553b3fb..a2553a70a 100644 --- a/deeprank2/molstruct/residue.py +++ b/deeprank2/molstruct/residue.py @@ -30,11 +30,10 @@ def __init__( Each `Residue` is of a certain `AminoAcid` type and consists of multiple `Atom`s. Args: - chain (:class:`Chain`): The chain that this residue belongs to. - number (int): the residue number - amino_acid (:class:`AminoAcid`, optional): The residue's amino acid (if it's part of a protein). - Defaults to None. - insertion_code (str, optional): The pdb insertion code, if any. Defaults to None. + chain: The chain that this residue belongs to. + number: the residue number + amino_acid: The residue's amino acid (if it's part of a protein). Defaults to None. + insertion_code: The pdb insertion code, if any. Defaults to None. """ self._chain = chain self._number = number @@ -122,8 +121,8 @@ class SingleResidueVariant: """A single residue mutation of a PDBStrcture. Args: - residue (Residue): the `Residue` object from the PDBStructure that is mutated. - variant_amino_acid (AminoAcid): the amino acid that the `Residue` is mutated into. + residue: the `Residue` object from the PDBStructure that is mutated. + variant_amino_acid: the amino acid that the `Residue` is mutated into. """ def __init__(self, residue: Residue, variant_amino_acid: AminoAcid): diff --git a/deeprank2/molstruct/structure.py b/deeprank2/molstruct/structure.py index cbbcc0a40..1eb3e9b71 100644 --- a/deeprank2/molstruct/structure.py +++ b/deeprank2/molstruct/structure.py @@ -20,8 +20,7 @@ def __init__(self, id_: str | None = None): One PDBStructure consists of a number of `Residue`s, each of which is of a particular `AminoAcid` type and in turn consists of a number of `Atom`s. Args: - id_ (str, optional): An unique identifier for this structure, can be the pdb accession code. - Defaults to None. + id_: An unique identifier for this structure, can be the pdb accession code. Defaults to None. """ self._id = id_ self._chains = {} @@ -76,8 +75,8 @@ def __init__(self, model: PDBStructure, id_: str | None): """One chain of a PDBStructure. Args: - model (:class:`PDBStructure`): The model that this chain is part of. - id_ (str): The pdb identifier of this chain. + model: The model that this chain is part of. + id_: The pdb identifier of this chain. """ self._model = model self._id = id_ diff --git a/deeprank2/neuralnets/gnn/foutnet.py b/deeprank2/neuralnets/gnn/foutnet.py index 54ad2f912..8e4c4fadb 100644 --- a/deeprank2/neuralnets/gnn/foutnet.py +++ b/deeprank2/neuralnets/gnn/foutnet.py @@ -18,10 +18,9 @@ class FoutLayer(nn.Module): by Alex Fout et al. NIPS 2018. Args: - in_channels (int): Size of each input sample. - out_channels (int): Size of each output sample. - bias (bool, optional): If set to :obj:`False`, the layer will not learn - an additive bias. Defaults to True. + in_channels: Size of each input sample. + out_channels: Size of each output sample. + bias: If set to :obj:`False`, the layer will not learn an additive bias. Defaults to True. """ def __init__(self, in_channels: int, out_channels: int, bias: bool = True): diff --git a/deeprank2/neuralnets/gnn/naive_gnn.py b/deeprank2/neuralnets/gnn/naive_gnn.py index 9b5cc0432..8f936045e 100644 --- a/deeprank2/neuralnets/gnn/naive_gnn.py +++ b/deeprank2/neuralnets/gnn/naive_gnn.py @@ -36,9 +36,9 @@ def __init__(self, input_shape: int, output_shape: int, input_shape_edge: int): """NaiveNetwork. Args: - input_shape (int): Number of node input features. - output_shape (int): Number of output value per graph. - input_shape_edge (int): Number of edge input features. + input_shape: Number of node input features. + output_shape: Number of output value per graph. + input_shape_edge: Number of edge input features. """ super().__init__() self._external1 = NaiveConvolutionalLayer(input_shape, input_shape_edge) diff --git a/deeprank2/neuralnets/gnn/sgat.py b/deeprank2/neuralnets/gnn/sgat.py index 1546ef0e6..10ca17309 100644 --- a/deeprank2/neuralnets/gnn/sgat.py +++ b/deeprank2/neuralnets/gnn/sgat.py @@ -19,11 +19,11 @@ class SGraphAttentionLayer(nn.Module): Ni is the number of neighbor of node i \\Sum_j runs over the neighbors of node i a_ij is the edge attribute between node i and j + Args: - in_channels (int): Size of each input sample. - out_channels (int): Size of each output sample. - bias (bool, optional): If set to :obj:`False`, the layer will not learn - an additive bias. Defaults to True. + in_channels: Size of each input sample. + out_channels: Size of each output sample. + bias: If set to :obj:`False`, the layer will not learn an additive bias. Defaults to True. """ # noqa: D301 def __init__( diff --git a/deeprank2/query.py b/deeprank2/query.py index 50b3e5549..89f171a4d 100644 --- a/deeprank2/query.py +++ b/deeprank2/query.py @@ -108,10 +108,10 @@ def _check_pssm(self, verbosity: Literal[0, 1, 2] = 0) -> None: # noqa: C901 """Checks whether information stored in pssm file matches the corresponding pdb file. Args: - pdb_path (str): Path to the PDB file. - pssm_paths (dict[str, str]): The paths to the PSSM files, per chain identifier. - suppress (bool): Suppress errors and throw warnings instead. - verbosity (int): Level of verbosity of error/warning. Defaults to 0. + pdb_path: Path to the PDB file. + pssm_paths: The paths to the PSSM files, per chain identifier. + suppress: Suppress errors and throw warnings instead. + verbosity: Level of verbosity of error/warning. Defaults to 0. 0 (low): Only state file name where error occurred; 1 (medium): Also state number of incorrect and missing residues; 2 (high): Also list the incorrect residues @@ -180,8 +180,7 @@ def build( """Builds the graph from the .PDB structure. Args: - feature_modules (list[str]): the feature modules used to build the graph. - These must be filenames existing inside `deeprank2.features` subpackage. + feature_modules: the feature modules used to build the graph. These must be filenames existing inside `deeprank2.features` subpackage. Returns: :class:`Graph`: The resulting :class:`Graph` object with all the features and targets. @@ -213,23 +212,20 @@ class SingleResidueVariantQuery(Query): """A query that builds a single residue variant graph. Args: - pdb_path (str): the path to the PDB file to query. - resolution (Literal['residue', 'atom']): sets whether each node is a residue or atom. - chain_ids (list[str] | str): the chain identifier of the variant residue (generally a single capital letter). + pdb_path: the path to the PDB file to query. + resolution: sets whether each node is a residue or atom. + chain_ids: the chain identifier of the variant residue (generally a single capital letter). Note that this does not limit the structure to residues from this chain. - pssm_paths (dict[str, str]): the name of the chain(s) (key) and path to the pssm file(s) (value). - targets (dict[str, float]) = Name(s) (key) and target value(s) (value) associated with this query. - influence_radius (float | None): all residues within this radius from the variant residue - will be included in the graph, irrespective of the chain they are on. - max_edge_length (float | None): the maximum distance between two nodes to generate an edge connecting them. - suppress_pssm_errors (bool): Whether or not to suppress the error raised if the .pssm files do not - match the .pdb files. If True, a warning is returned instead. - variant_residue_number (int): the residue number of the variant residue. - insertion_code (str | None): the insertion code of the variant residue. - wildtype_amino_acid (AminoAcid): the amino acid at above position in the wildtype protein. - variant_amino_acid (AminoAcid): the amino acid at above position in the variant protein. - radius (float): all Residues within this radius (in Å) from the variant residue will - be included in the graph. + pssm_paths: the name of the chain(s) (key) and path to the pssm file(s) (value). + targets: Name(s) (key) and target value(s) (value) associated with this query. + influence_radius: all residues within this radius from the variant residue will be included in the graph, irrespective of the chain they are on. + max_edge_length: the maximum distance between two nodes to generate an edge connecting them. + suppress_pssm_errors: Whether to suppress the error raised if the .pssm files do not match the .pdb files. If True, a warning is returned instead. + variant_residue_number: the residue number of the variant residue. + insertion_code: the insertion code of the variant residue. + wildtype_amino_acid: the amino acid at above position in the wildtype protein. + variant_amino_acid: the amino acid at above position in the variant protein. + radius: all Residues within this radius (in Å) from the variant residue will be included in the graph. """ variant_residue_number: int @@ -315,17 +311,15 @@ class ProteinProteinInterfaceQuery(Query): """A query that builds a protein-protein interface graph. Args: - pdb_path (str): the path to the PDB file to query. - resolution (Literal['residue', 'atom']): sets whether each node is a residue or atom. - chain_ids (list[str] | str): the chain identifiers of the interacting interfaces (generally a single capital letter each). + pdb_path: the path to the PDB file to query. + resolution: sets whether each node is a residue or atom. + chain_ids: the chain identifiers of the interacting interfaces (generally a single capital letter each). Note that this does not limit the structure to residues from these chains. - pssm_paths (dict[str, str]): the name of the chain(s) (key) and path to the pssm file(s) (value). - targets (dict[str, float]) = Name(s) (key) and target value(s) (value) associated with this query. - influence_radius (float | None): all residues within this radius from the interacting interface - will be included in the graph, irrespective of the chain they are on. - max_edge_length (float | None): the maximum distance between two nodes to generate an edge connecting them. - suppress_pssm_errors (bool): Whether or not to suppress the error raised if the .pssm files do not - match the .pdb files. If True, a warning is returned instead. + pssm_paths: the name of the chain(s) (key) and path to the pssm file(s) (value). + targets: Name(s) (key) and target value(s) (value) associated with this query. + influence_radius: all residues within this radius from the interacting interface will be included in the graph, irrespective of the chain they are on. + max_edge_length: the maximum distance between two nodes to generate an edge connecting them. + suppress_pssm_errors: Whether to suppress the error raised if the .pssm files do not match the .pdb files. If True, a warning is returned instead. """ def __post_init__(self): @@ -417,9 +411,9 @@ def add( """Add a new query to the collection. Args: - query(:class:`Query`): The `Query` to add to the collection. - verbose(bool): For logging query IDs added. Defaults to `False`. - warn_duplicate (bool): Log a warning before renaming if a duplicate query is identified. Defaults to `True`. + query: The `Query` to add to the collection. + verbose: For logging query IDs added. Defaults to `False`. + warn_duplicate: Log a warning before renaming if a duplicate query is identified. Defaults to `True`. """ query_id = query.get_query_id() if verbose: @@ -440,7 +434,7 @@ def export_dict(self, dataset_path: str) -> None: """Exports the colection of all queries to a dictionary file. Args: - dataset_path (str): The path where to save the list of queries. + dataset_path: The path where to save the list of queries. """ with open(dataset_path, "wb") as pkl_file: pickle.dump(self, pkl_file) @@ -503,27 +497,22 @@ def process( """Render queries into graphs (and optionally grids). Args: - prefix (str | None, optional): Prefix for naming the output files. Defaults to "processed-queries". - feature_modules (list[ModuleType] | list[str] | Literal ['all'], optional): Feature module or list of feature modules - used to generate features (given as string or as an imported module). + prefix: Prefix for naming the output files. Defaults to "processed-queries". + feature_modules: Feature module or list of feature modules used to generate features (given as string or as an imported module). Each module must implement the :py:func:`add_features` function, and all feature modules must exist inside `deeprank2.features` folder. If set to 'all', all available modules in `deeprank2.features` are used to generate the features. Defaults to the two primary feature modules `deeprank2.features.components` and `deeprank2.features.contact`. - cpu_count (int | None, optional): The number of processes to be run in parallel (i.e. number of CPUs used), capped by - the number of CPUs available to the system. + cpu_count: The number of processes to be run in parallel (i.e. number of CPUs used), capped by the number of CPUs available to the system. Defaults to None, which takes all available cpu cores. - combine_output (bool, optional): + combine_output: If `True` (default): all processes are combined into a single HDF5 file. If `False`: separate HDF5 files are created for each process (i.e. for each CPU used). - grid_settings (:class:`GridSettings` | None, optional): If valid together with `grid_map_method`, the grid data will be stored as well. - Defaults to None. - grid_map_method (:class:`MapMethod` | None, optional): If valid together with `grid_settings`, the grid data will be stored as well. - Defaults to None. - grid_augmentation_count (int, optional): Number of grid data augmentations (must be >= 0). - Defaults to 0. + grid_settings: If valid together with `grid_map_method`, the grid data will be stored as well. Defaults to None. + grid_map_method: If valid together with `grid_settings`, the grid data will be stored as well. Defaults to None. + grid_augmentation_count: Number of grid data augmentations (must be >= 0). Defaults to 0. Returns: - list[str]: The list of paths of the generated HDF5 files. + The list of paths of the generated HDF5 files. """ # set defaults feature_modules = feature_modules or [components, contact] diff --git a/deeprank2/tools/target.py b/deeprank2/tools/target.py index 81f43ff29..35e5e05df 100644 --- a/deeprank2/tools/target.py +++ b/deeprank2/tools/target.py @@ -21,12 +21,10 @@ def add_target( # noqa: C901 """Add a target to all the graphs in hdf5 files. Args: - graph_path (str | list(str)): Either a directory containing all the hdf5 files, - or a single hdf5 filename - or a list of hdf5 filenames. - target_name (str): The name of the new target. - target_list (str): Name of the file containing the data. - sep (str, optional): Separator in target list. Defaults to " ". + graph_path: Either a directory containing all the hdf5 files, a single hdf5 filename, or a list of hdf5 filenames. + target_name: The name of the new target. + target_list: Name of the file containing the data. + sep: Separator in target list. Defaults to " " (single space). Notes: The input target list should respect the following format : @@ -91,10 +89,10 @@ def compute_ppi_scores( 4 - incorrect). See https://deeprank2.readthedocs.io/en/latest/docking.html for more details about the scores. Args: - pdb_path (str): Path to the decoy. - reference_pdb_path (str): Path to the reference (native) structure. + pdb_path: Path to the decoy. + reference_pdb_path: Path to the reference (native) structure. - Returns: a dictionary containing values for lrmsd, irmsd, fnat, dockq, binary, capri_class. + Returns: dict containing values for lrmsd, irmsd, fnat, dockq, binary, capri_class. """ ref_name = os.path.splitext(os.path.basename(reference_pdb_path))[0] sim = StructureSimilarity( diff --git a/deeprank2/trainer.py b/deeprank2/trainer.py index 2f5f60008..8f5b5d647 100644 --- a/deeprank2/trainer.py +++ b/deeprank2/trainer.py @@ -32,33 +32,31 @@ class Trainer: """Class from which the network is trained, evaluated and tested. Args: - neuralnet (child class of :class:`torch.nn.Module`, optional): Neural network class (ex. :class:`GINet`, :class:`Foutnet` etc.). + neuralnet: Neural network class (ex. :class:`GINet`, :class:`Foutnet` etc.). It should subclass :class:`torch.nn.Module`, and it shouldn't be specific to regression or classification in terms of output shape (:class:`Trainer` class takes care of formatting the output shape according to the task). More specifically, in classification task cases, softmax shouldn't be used as the last activation function. Defaults to None. - dataset_train (:class:`GraphDataset` | :class:`GridDataset` | None, optional): Training set used during training. - Can't be None if pretrained_model is also None. Defaults to None. - dataset_val (:class:`GraphDataset` | :class:`GridDataset` | None, optional): Evaluation set used during training. - If None, training set will be split randomly into training set and validation set during training, using val_size parameter. - Defaults to None. - dataset_test (:class:`GraphDataset` | :class:`GridDataset` | None, optional): Independent evaluation set. Defaults to None. - val_size (float | int | None, optional): Fraction of dataset (if float) or number of datapoints (if int) to use for validation. + dataset_train: Training set used during training. Can't be None if pretrained_model is also None. Defaults to None. + dataset_val: Evaluation set used during training. If None, training set will be split randomly into training set and validation set during training, + using val_size parameter. Defaults to None. + dataset_test: Independent evaluation set. Defaults to None. + val_size: Fraction of dataset (if float) or number of datapoints (if int) to use for validation. Only used if dataset_val is not specified. Can be set to 0 if no validation set is needed. Defaults to None (in _divide_dataset function). - test_size (float | int | None, optional): Fraction of dataset (if float) or number of datapoints (if int) to use for test dataset. + test_size: Fraction of dataset (if float) or number of datapoints (if int) to use for test dataset. Only used if dataset_test is not specified. Can be set to 0 if no test set is needed. Defaults to None. - class_weights (bool, optional): Assign class weights based on the dataset content. Defaults to False. - pretrained_model (str | None, optional): Path to pre-trained model. Defaults to None. - cuda (bool, optional): Whether to use CUDA. Defaults to False. - ngpu (int, optional): Number of GPU to be used. Defaults to 0. - output_exporters (list[OutputExporter] | None, optional): The output exporters to use for saving/exploring/plotting predictions/targets/losses - over the epochs. If None, defaults to :class:`HDF5OutputExporter`, which saves all the results in an .HDF5 file stored in ./output directory. + class_weights: Assign class weights based on the dataset content. Defaults to False. + pretrained_model: Path to pre-trained model. Defaults to None. + cuda: Whether to use CUDA. Defaults to False. + ngpu: Number of GPU to be used. Defaults to 0. + output_exporters: The output exporters to use for saving/exploring/plotting predictions/targets/losses over the epochs. If None, defaults to + :class:`HDF5OutputExporter`, which saves all the results in an .HDF5 file stored in ./output directory. Defaults to None. """ def __init__( # noqa: PLR0915, C901 self, - neuralnet: nn.Module = None, + neuralnet: nn.Module | None = None, dataset_train: GraphDataset | GridDataset | None = None, dataset_val: GraphDataset | GridDataset | None = None, dataset_test: GraphDataset | GridDataset | None = None, @@ -353,7 +351,7 @@ def _put_model_to_device(self, dataset: GraphDataset | GridDataset) -> None: """Puts the model on the available device. Args: - dataset (:class:`GraphDataset` | :class:`GridDataset`): GraphDataset object. + dataset: GraphDataset object. Raises: ValueError: Incorrect output shape @@ -409,13 +407,10 @@ def configure_optimizers( """Configure optimizer and its main parameters. Args: - optimizer (:class:`torch.optim`, optional): PyTorch optimizer object. If none, defaults to :class:`torch.optim.Adam`. - Defaults to None. - - lr (float, optional): Learning rate. Defaults to 0.001. - - weight_decay (float, optional): Weight decay (L2 penalty). - Weight decay is fundamental for GNNs, otherwise, parameters can become too big and the gradient may explode. Defaults to 1e-05. + optimizer: PyTorch optimizer object. If none, defaults to :class:`torch.optim.Adam`. Defaults to None. + lr: Learning rate. Defaults to 0.001. + weight_decay: Weight decay (L2 penalty). This is fundamental for GNNs, otherwise, parameters can become too big and the gradient may explode. + Defaults to 1e-05. """ self.lr = lr self.weight_decay = weight_decay @@ -438,16 +433,11 @@ def set_lossfunction( # noqa: C901 """Set the loss function. Args: - lossfunction (optional): Make sure to use a loss function that is appropriate for - your task (classification or regression). All loss functions - from torch.nn.modules.loss are listed as belonging to either - category (or to neither) and an exception is raised if an invalid - loss function is chosen for the set task. - Default for regression: MSELoss. - Default for classification: CrossEntropyLoss. - Defaults to None. - override_invalid (bool, optional): If True, loss functions that are considered - invalid for the task do no longer automaticallt raise an exception. + lossfunction: Make sure to use a loss function that is appropriate for your task (classification or + regression). All loss functions from torch.nn.modules.loss are listed as belonging to either category + (or to neither) and an exception is raised if an invalid loss function is chosen for the set task. + Default for regression: MSELoss. Default for classification: CrossEntropyLoss. + override_invalid: If True, loss functions that are considered invalid for the task do no longer automaticallt raise an exception. Defaults to False. """ default_regression_loss = nn.MSELoss @@ -526,27 +516,18 @@ def train( # noqa: PLR0915, C901 """Performs the training of the model. Args: - nepoch (int, optional): Maximum number of epochs to run. - Defaults to 1. - batch_size (int, optional): Sets the size of the batch. - Defaults to 32. - shuffle (bool, optional): Whether to shuffle the training dataloaders data (train set and validation set). - Default: True. - earlystop_patience (int | None, optional): Training ends if the model has run for this number of epochs without improving the validation loss. - Defaults to None. - earlystop_maxgap (float | None, optional): Training ends if the difference between validation and training loss exceeds this value. - Defaults to None. - min_epoch (float, optional): Minimum epoch to be reached before looking at maxgap. - Defaults to 10. - validate (bool, optional): Perform validation on independent data set (requires a validation data set). - Defaults to False. - num_workers (int, optional): How many subprocesses to use for data loading. 0 means that the data will be loaded in the main process. - Defaults to 0. - best_model (bool, optional): - If True, the best model (in terms of validation loss) is selected for later testing or saving. - If False, the last model tried is selected. - Defaults to True. - filename (str, optional): Name of the file where to save the selected model. If not None, the model is saved to `filename`. + nepoch: Maximum number of epochs to run. Defaults to 1. + batch_size: Sets the size of the batch. Defaults to 32. + shuffle: Whether to shuffle the training dataloaders data (train set and validation set). Default: True. + earlystop_patience: Training ends if the model has run for this number of epochs without improving the validation loss. Defaults to None. + earlystop_maxgap: Training ends if the difference between validation and training loss exceeds this value. Defaults to None. + min_epoch: Minimum epoch to be reached before looking at maxgap. Defaults to 10. + validate: Perform validation on independent data set (requires a validation data set). Defaults to False. + num_workers: How many subprocesses to use for data loading. 0 means that the data will be loaded in the main process. Defaults to 0. + best_model: + If True (default), the best model (in terms of validation loss) is selected for later testing or saving. + If False, the last model tried is selected. + filename: Name of the file where to save the selected model. If not None, the model is saved to `filename`. If None, the model is not saved. Defaults to 'model.pth.tar'. """ if self.dataset_train is None: @@ -686,8 +667,8 @@ def _epoch(self, epoch_number: int, pass_name: str) -> float | None: """Runs a single epoch. Args: - epoch_number (int): the current epoch number - pass_name (str): 'training', 'validation' or 'testing' + epoch_number: the current epoch number + pass_name: 'training', 'validation' or 'testing' Returns: Running loss. @@ -751,9 +732,9 @@ def _eval( """Evaluates the model. Args: - loader (Dataloader): Data to evaluate on. - epoch_number (int): Number for this epoch, used for storing the outputs. - pass_name (str): 'training', 'validation' or 'testing' + loader: Data to evaluate on. + epoch_number: Number for this epoch, used for storing the outputs. + pass_name: 'training', 'validation' or 'testing' Returns: Running loss. @@ -817,9 +798,9 @@ def _log_epoch_data(stage: str, loss: float, time: float) -> None: """Prints the data of each epoch. Args: - stage (str): Train or valid. - loss (float): Loss during that epoch. - time (float): Timing of the epoch. + stage: Train or valid. + loss: Loss during that epoch. + time: Timing of the epoch. """ _log.info(f"{stage} loss {loss} | time {time}") @@ -861,10 +842,8 @@ def test( """Performs the testing of the model. Args: - batch_size (int, optional): Sets the size of the batch. - Defaults to 32. - num_workers (int, optional): How many subprocesses to use for data loading. 0 means that the data will be loaded in the main process. - Defaults to 0. + batch_size: Sets the size of the batch. Defaults to 32. + num_workers: How many subprocesses to use for data loading. 0 means that the data will be loaded in the main process. Defaults to 0. """ if (not self.pretrained_model) and (not self.model_load_state_dict): msg = "No pretrained model provided and no training performed. Please provide a pretrained model or train the model before testing." @@ -929,11 +908,7 @@ def _load_params(self) -> None: self.ngpu = state["ngpu"] def _save_model(self) -> dict[str, Any]: - """Saves the model to a file. - - Args: - filename (str, optional): Name of the file. Defaults to None. - """ + """Saves the model to a file.""" features_transform_to_save = copy.deepcopy(self.features_transform) # prepare transform dictionary for being saved if features_transform_to_save: diff --git a/deeprank2/utils/buildgraph.py b/deeprank2/utils/buildgraph.py index a6e9b3c63..16c6f4e76 100644 --- a/deeprank2/utils/buildgraph.py +++ b/deeprank2/utils/buildgraph.py @@ -38,8 +38,8 @@ def _add_atom_data_to_structure( This function should be called for one atom at a time. Args: - structure (:class:`PDBStructure`): The structure to which this atom should be added to. - pdb_obj (pdb2sql_object): The `pdb2sql` object to retrieve the data from. + structure: The structure to which this atom should be added to. + pdb_obj: The `pdb2sql` object to retrieve the data from. kwargs: as required by the get function for the `pdb2sql` object. """ pdb2sql_columns = "x,y,z,name,altLoc,occ,element,chainID,resSeq,resName,iCode" @@ -81,8 +81,8 @@ def get_structure(pdb_obj: pdb2sql_object, id_: str) -> PDBStructure: """Builds a structure from rows in a pdb file. Args: - pdb_obj (pdb2sql object): The pdb structure that we're investigating. - id_ (str): Unique id for the pdb structure. + pdb_obj: The pdb structure that we're investigating. + id_: Unique id for the pdb structure. Returns: PDBStructure: The structure object, giving access to chains, residues, atoms. @@ -126,14 +126,14 @@ def get_residue_contact_pairs( """Find all residue pairs that may influence each other. Args: - pdb_path (str): The path of the pdb file, that the structure was built from. - structure (:class:`PDBStructure`): From which to take the residues. - chain_id1 (str): First protein chain identifier. - chain_id2 (str): Second protein chain identifier. - influence_radius (float): Maximum distance between residues to consider them as interacting. + pdb_path: The path of the pdb file, that the structure was built from. + structure: From which to take the residues. + chain_id1: First protein chain identifier. + chain_id2: Second protein chain identifier. + influence_radius: Maximum distance between residues to consider them as interacting. Returns: - list[Pair]: The pairs of contacting residues. + list of Pair objects of contacting residues. """ # Find out which residues are pairs interface = pdb2sql_interface(pdb_path) @@ -181,12 +181,12 @@ def get_surrounding_residues( """Get the residues that lie within a radius around a residue. Args: - structure (:class:`Chain` | :class:`PDBStructure`): The structure to take residues from. - residue (:class:`Residue`): The residue in the structure. - radius (float): Max distance in Ångström between atoms of the residue and the other residues. + structure: The structure to take residues from. + residue: The residue in the structure. + radius: Max distance in Ångström between atoms of the residue and the other residues. Returns: - list[:class:`Residue`]: The surrounding residues. + list of surrounding Residue objects. """ structure_atoms = structure.get_atoms() structure_atom_positions = [atom.position for atom in structure_atoms] diff --git a/deeprank2/utils/community_pooling.py b/deeprank2/utils/community_pooling.py index 553ab4b4f..b22653521 100644 --- a/deeprank2/utils/community_pooling.py +++ b/deeprank2/utils/community_pooling.py @@ -39,9 +39,9 @@ def community_detection_per_batch( Args: edge_index (Tensor): Edge index. batch (?): ? - num_nodes (int): Number of nodes. + num_nodes: Number of nodes. edge_attr (Tensor, optional): Edge attributes. Defaults to None. - method (str, optional): Method. Defaults to "mcl". + method: Method. Defaults to "mcl". Raises: ValueError: Requires a valid clustering method ('mcl' or 'louvain') @@ -103,9 +103,9 @@ def community_detection( Args: edge_index (Tensor): Edge index. - num_nodes (int): Number of nodes. + num_nodes: Number of nodes. edge_attr (Tensor, optional): Edge attributes. Defaults to None. - method (str, optional): Method. Defaults to "mcl". + method: Method. Defaults to "mcl". Raises: ValueError: Requires a valid clustering method ('mcl' or 'louvain') diff --git a/deeprank2/utils/earlystopping.py b/deeprank2/utils/earlystopping.py index 7b08c6605..04f5be8b0 100644 --- a/deeprank2/utils/earlystopping.py +++ b/deeprank2/utils/earlystopping.py @@ -7,18 +7,12 @@ class EarlyStopping: Triggered if validation loss doesn't improve after a given patience or if a maximum gap between validation and training loss is reached. Args: - patience (int, optional): How long to wait after last time validation loss improved. - Defaults to 10. - delta (float, optional): Minimum change required to reset the early stopping counter. - Defaults to 0. - maxgap (float, optional): Maximum difference between between training and validation loss. - Defaults to None. - min_epoch (float, optional): Minimum epoch to be reached before looking at maxgap. - Defaults to 10. - verbose (bool, optional): If True, prints a message for each validation loss improvement. - Defaults to True. - trace_func (Callable, optional): Function used for recording EarlyStopping status. - Defaults to print. + patience: How long to wait after last time validation loss improved. Defaults to 10. + delta: Minimum change required to reset the early stopping counter. Defaults to 0. + maxgap: Maximum difference between between training and validation loss. Defaults to None. + min_epoch: Minimum epoch to be reached before looking at maxgap. Defaults to 10. + verbose: If True, prints a message for each validation loss improvement. Defaults to True. + trace_func: Function used for recording EarlyStopping status. Defaults to print. """ def __init__( diff --git a/deeprank2/utils/exporters.py b/deeprank2/utils/exporters.py index 04756e145..bdefec733 100644 --- a/deeprank2/utils/exporters.py +++ b/deeprank2/utils/exporters.py @@ -183,8 +183,8 @@ class ScatterPlotExporter(OutputExporter): On the Y-axis: output values Args: - directory_path (str): Where to store the plots. - epoch_interval (int, optional): How often to make a plot, 5 means: every 5 epochs. Defaults to 1. + directory_path: Where to store the plots. + epoch_interval: How often to make a plot, 5 means: every 5 epochs. Defaults to 1. """ def __init__(self, directory_path: str, epoch_interval: int = 1): diff --git a/deeprank2/utils/graph.py b/deeprank2/utils/graph.py index e131421bb..89540f574 100644 --- a/deeprank2/utils/graph.py +++ b/deeprank2/utils/graph.py @@ -326,10 +326,9 @@ def build_graph( """Builds a graph. Args: - nodes (list[Atom] | list[Residue]): List of `Atom`s or `Residue`s to include in graph. - All nodes must be of same type. - graph_id (str): Human readable identifier for graph. - max_edge_length (float): Maximum distance between two nodes to connect them with an edge. + nodes: List of `Atom`s or `Residue`s to include in graph. All nodes must be of same type. + graph_id: Human readable identifier for graph. + max_edge_length: Maximum distance between two nodes to connect them with an edge. Returns: Graph: Containing nodes (with positions) and edges. diff --git a/deeprank2/utils/parsing/__init__.py b/deeprank2/utils/parsing/__init__.py index f22f4e52a..7e8aa56fa 100644 --- a/deeprank2/utils/parsing/__init__.py +++ b/deeprank2/utils/parsing/__init__.py @@ -77,8 +77,10 @@ def get_charge(self, atom: Atom) -> float: """Get the charge of a given `Atom`. Args: - atom(Atom): the atom to get the charge for - Returns(float): the charge of the given atom. + atom: the atom to get the charge for + + Returns: + the charge of the given atom. """ atom_name = atom.name amino_acid_code = atom.residue.amino_acid.three_letter_code diff --git a/deeprank2/utils/parsing/pssm.py b/deeprank2/utils/parsing/pssm.py index 26a99f1d0..6a480d9fb 100644 --- a/deeprank2/utils/parsing/pssm.py +++ b/deeprank2/utils/parsing/pssm.py @@ -10,11 +10,11 @@ def parse_pssm(file_: TextIO, chain: Chain) -> PssmTable: """Read the PSSM data. Args: - file_ (python text file object): The pssm file. - chain (:class:`Chain`): The chain that the pssm file represents, residues from this chain must match the pssm file. + file_: The pssm file. + chain: The chain that the pssm file represents, residues from this chain must match the pssm file. Returns: - PssmTable: The position-specific scoring table, parsed from the pssm file. + The position-specific scoring table, parsed from the pssm file. """ conservation_rows = {} diff --git a/tests/features/__init__.py b/tests/features/__init__.py index 921444cac..ebcd22655 100644 --- a/tests/features/__init__.py +++ b/tests/features/__init__.py @@ -32,24 +32,21 @@ def build_testgraph( # noqa: C901 """Creates a Graph object for feature tests. Args: - pdb_path (str): Path of pdb file. - detail (Literal['atom', 'residue']): Type of graph to create. - influence_radius (float): max distance to include in graph. - max_edge_length (float): max distance to create an edge. - central_res (int | None, optional): Residue to center a single-chain graph around. - Use None to create a 2-chain graph, or any value for a single-chain graph. + pdb_path: Path of pdb file. + detail: Type of graph to create. + influence_radius: max distance to include in graph. + max_edge_length: max distance to create an edge. + central_res: Residue to center a single-chain graph around. Use None to create a 2-chain graph, or any value for a single-chain graph. Defaults to None. - variant (AminoAcid | None, optional): Amino acid to use as a variant amino acid. - Defaults to None. - chain_ids (str | tuple[str, str] | None, optional): Explicitly specify which chain(s) to use. - Defaults to None, which will use the first (two) chain(s) from the structure. - - Raises: - TypeError: if detail is set to anything other than 'residue' or 'atom' + variant: Amino acid to use as a variant amino acid. Defaults to None. + chain_ids: Explicitly specify which chain(s) to use. Defaults the first (two) chain(s) from the structure. Returns: Graph: As generated by Graph.build_graph SingleResidueVariant: returns None if central_res is None + + Raises: + TypeError: if detail is set to anything other than 'residue' or 'atom' """ pdb = pdb2sql(pdb_path) try: diff --git a/tests/test_querycollection.py b/tests/test_querycollection.py index 39a64fbea..f4e4d7281 100644 --- a/tests/test_querycollection.py +++ b/tests/test_querycollection.py @@ -3,6 +3,7 @@ from shutil import rmtree from tempfile import mkdtemp from types import ModuleType +from typing import Literal import h5py import pytest @@ -16,7 +17,7 @@ def _querycollection_tester( - query_type: str, + query_type: Literal["ppi", "srv"], n_queries: int = 3, feature_modules: ModuleType | list[ModuleType] | None = None, cpu_count: int = 1, @@ -25,12 +26,11 @@ def _querycollection_tester( """Generic function to test QueryCollection class. Args: - query_type (str): query type to be generated. It accepts only 'ppi' (ProteinProteinInterface) or 'srv' (SingleResidueVariant). - Defaults to 'ppi'. - n_queries (int): number of queries to be generated. + query_type: query type to be generated. It accepts only 'ppi' (ProteinProteinInterface) or 'srv' (SingleResidueVariant). + n_queries: number of queries to be generated. Defaults to 3. feature_modules: module or list of feature modules (from deeprank2.features) to be passed to process. Defaults to components and contact, which are the defaults for `query.process` - cpu_count (int): number of cpus to be used during the queries processing. + cpu_count: number of cpus to be used during the queries processing. Defaults to 1. combine_output (bool): boolean for combining the hdf5 files generated by the processes. By default, the hdf5 files generated are combined into one, and then deleted. """ @@ -102,9 +102,9 @@ def _assert_correct_modules( """Helper function to assert inclusion of correct features. Args: - output_paths (str): output_paths as returned from _querycollection_tester - features (str | list[str]): feature(s) that should be present - absent (str): feature that should be absent + output_paths: output_paths as returned from _querycollection_tester. + features: feature(s) that should be present. + absent: feature that should be absent. """ if isinstance(features, str): features = [features]