From 8048a3fbecf2c476744c9887f3c4ffb35ad40196 Mon Sep 17 00:00:00 2001 From: Philip Loche Date: Mon, 14 Aug 2023 09:56:10 +0200 Subject: [PATCH] Move more explanations into code * change header underlining --- docs/src/index.rst | 10 +------ docs/src/references/decomposition.rst | 12 +++----- docs/src/references/linear_models.rst | 14 ++++------ docs/src/references/metrics.rst | 29 ++++---------------- docs/src/references/preprocessing.rst | 1 + docs/src/references/selection.rst | 5 +--- docs/src/references/utils.rst | 22 ++++++--------- src/skmatter/__init__.py | 9 ++++++ src/skmatter/_selection.py | 27 +++++++++--------- src/skmatter/metrics/__init__.py | 38 ++++++++++++++++---------- src/skmatter/preprocessing/__init__.py | 5 +--- src/skmatter/sample_selection/_base.py | 1 - 12 files changed, 73 insertions(+), 100 deletions(-) diff --git a/docs/src/index.rst b/docs/src/index.rst index d52639b5d9..c35729b0ea 100644 --- a/docs/src/index.rst +++ b/docs/src/index.rst @@ -1,11 +1,4 @@ -scikit-matter -============= - -scikit-matter is a toolbox of methods developed in the -computational chemical and materials science community, following the -`scikit-learn `_ API -and coding guidelines to promote usability and interoperability with existing workflows. - +.. automodule:: skmatter .. include:: ../../README.rst :start-after: marker-issues @@ -22,6 +15,5 @@ and coding guidelines to promote usability and interoperability with existing wo contributing bibliography - If you would like to contribute to scikit-matter, check out our :ref:`contributing` page! diff --git a/docs/src/references/decomposition.rst b/docs/src/references/decomposition.rst index da794507a1..8ae92be4bf 100644 --- a/docs/src/references/decomposition.rst +++ b/docs/src/references/decomposition.rst @@ -4,11 +4,9 @@ Principal Covariates Regression (PCovR) .. _PCovR-api: PCovR -##### +----- -.. currentmodule:: skmatter.decomposition - -.. autoclass:: PCovR +.. autoclass:: skmatter.decomposition.PCovR :show-inheritance: :special-members: @@ -25,11 +23,9 @@ PCovR .. _KPCovR-api: Kernel PCovR -############ - -.. currentmodule:: skmatter.decomposition +------------ -.. autoclass:: KernelPCovR +.. autoclass:: skmatter.decomposition.KernelPCovR :show-inheritance: :special-members: diff --git a/docs/src/references/linear_models.rst b/docs/src/references/linear_models.rst index 4833c844d0..52e55a94f1 100644 --- a/docs/src/references/linear_models.rst +++ b/docs/src/references/linear_models.rst @@ -1,21 +1,17 @@ Linear Models ============= -.. currentmodule:: skmatter.linear_model._base - Orthogonal Regression -##################### - -.. autoclass:: OrthogonalRegression +--------------------- -.. currentmodule:: skmatter.linear_model._ridge +.. autoclass:: skmatter.linear_model.OrthogonalRegression Ridge Regression with Two-fold Cross Validation -############################################### +----------------------------------------------- -.. autoclass:: RidgeRegression2FoldCV +.. autoclass:: skmatter.linear_model.RidgeRegression2FoldCV PCovR -##### +----- Principal Covariates Regression is a linear model, see :ref:`PCovR-api`. diff --git a/docs/src/references/metrics.rst b/docs/src/references/metrics.rst index ff4164b199..f0e1e1b486 100644 --- a/docs/src/references/metrics.rst +++ b/docs/src/references/metrics.rst @@ -1,45 +1,28 @@ -.. _gfrm: - Reconstruction Measures ======================= -.. marker-reconstruction-introduction-begin - .. automodule:: skmatter.metrics -These reconstruction measures are available: - -* :ref:`GRE-api` (GRE) computes the amount of linearly-decodable information - recovered through a global linear reconstruction. -* :ref:`GRD-api` (GRD) computes the amount of distortion contained in a global linear - reconstruction. -* :ref:`LRE-api` (LRE) computes the amount of decodable information recovered through - a local linear reconstruction for the k-nearest neighborhood of each sample. - -.. marker-reconstruction-introduction-end - -.. currentmodule:: skmatter.metrics - .. _GRE-api: Global Reconstruction Error --------------------------- -.. autofunction:: pointwise_global_reconstruction_error -.. autofunction:: global_reconstruction_error +.. autofunction:: skmatter.metrics.pointwise_global_reconstruction_error +.. autofunction:: skmatter.metrics.global_reconstruction_error .. _GRD-api: Global Reconstruction Distortion -------------------------------- -.. autofunction:: pointwise_global_reconstruction_distortion -.. autofunction:: global_reconstruction_distortion +.. autofunction:: skmatter.metrics.pointwise_global_reconstruction_distortion +.. autofunction:: skmatter.metrics.global_reconstruction_distortion .. _LRE-api: Local Reconstruction Error -------------------------- -.. autofunction:: pointwise_local_reconstruction_error -.. autofunction:: local_reconstruction_error +.. autofunction:: skmatter.metrics.pointwise_local_reconstruction_error +.. autofunction:: skmatter.metrics.local_reconstruction_error diff --git a/docs/src/references/preprocessing.rst b/docs/src/references/preprocessing.rst index 7252ddc600..f0bfcb4d6f 100644 --- a/docs/src/references/preprocessing.rst +++ b/docs/src/references/preprocessing.rst @@ -1,6 +1,7 @@ Preprocessing ============= +.. automodule:: skmatter.preprocessing KernelNormalizer ---------------- diff --git a/docs/src/references/selection.rst b/docs/src/references/selection.rst index 3bb8b37479..a9149d4abb 100644 --- a/docs/src/references/selection.rst +++ b/docs/src/references/selection.rst @@ -10,7 +10,6 @@ Feature and Sample Selection CUR --- - CUR decomposition begins by approximating a matrix :math:`{\mathbf{X}}` using a subset of columns and rows @@ -72,7 +71,6 @@ computation of :math:`\pi`. S :undoc-members: :inherited-members: - .. _FPS-api: Farthest Point-Sampling (FPS) @@ -93,7 +91,6 @@ row-wise), and are built off of the same base class, These selectors can be instantiated using :py:class:`skmatter.feature_selection.FPS` and :py:class:`skmatter.sample_selection.FPS`. - .. autoclass:: skmatter.feature_selection.FPS :members: :undoc-members: @@ -139,7 +136,7 @@ When *Not* to Use Voronoi FPS In many cases, this algorithm may not increase upon the efficiency. For example, for simple metrics (such as Euclidean distance), Voronoi FPS will likely not accelerate, and -may decelerate, computations when compared to FPS. The sweet spot for Voronoi FPS is +may decelerate, computations when compared to FPS. The sweet spot for Voronoi FPS is when the number of selectable samples is already enough to divide the space with Voronoi polyhedrons, but not yet comparable to the total number of samples, when the cost of bookkeeping significantly degrades the speed of work compared to FPS. diff --git a/docs/src/references/utils.rst b/docs/src/references/utils.rst index 8b2c6560a8..41a017156a 100644 --- a/docs/src/references/utils.rst +++ b/docs/src/references/utils.rst @@ -3,34 +3,30 @@ Utility Classes .. _PCovR_dist-api: -.. currentmodule:: skmatter.utils._pcovr_utils - Modified Gram Matrix :math:`\mathbf{\tilde{K}}` -############################################### +----------------------------------------------- -.. autofunction:: pcovr_kernel +.. autofunction:: skmatter.utils.pcovr_kernel Modified Covariance Matrix :math:`\mathbf{\tilde{C}}` -##################################################### +----------------------------------------------------- -.. autofunction:: pcovr_covariance +.. autofunction:: skmatter.utils.pcovr_covariance Orthogonalizers for CUR -####################### - -.. currentmodule:: skmatter.utils._orthogonalizers +----------------------- When computing non-iterative CUR, it is necessary to orthogonalize the input matrices after each selection. For this, we have supplied a feature and a sample orthogonalizer for feature and sample selection. -.. autofunction:: X_orthogonalizer -.. autofunction:: Y_feature_orthogonalizer -.. autofunction:: Y_sample_orthogonalizer +.. autofunction:: skmatter.utils.X_orthogonalizer +.. autofunction:: skmatter.utils.Y_feature_orthogonalizer +.. autofunction:: skmatter.utils.Y_sample_orthogonalizer Random Partitioning with Overlaps -################################# +--------------------------------- .. autofunction:: skmatter.model_selection.train_test_split diff --git a/src/skmatter/__init__.py b/src/skmatter/__init__.py index bbab0242f6..1cbb426927 100644 --- a/src/skmatter/__init__.py +++ b/src/skmatter/__init__.py @@ -1 +1,10 @@ +""" +scikit-matter +============= + +scikit-matter is a toolbox of methods developed in the computational chemical and +materials science community, following the `scikit-learn `_ API and +coding guidelines to promote usability and interoperability with existing workflows. +""" + __version__ = "0.1.4" diff --git a/src/skmatter/_selection.py b/src/skmatter/_selection.py index ecaf8ccea0..fc0bf01139 100644 --- a/src/skmatter/_selection.py +++ b/src/skmatter/_selection.py @@ -1,14 +1,13 @@ -r""" -This module contains data sub-selection modules primarily corresponding to -methods derived from CUR matrix decomposition and Farthest Point Sampling. In -their classical form, CUR and FPS determine a data subset that maximizes the -variance (CUR) or distribution (FPS) of the features or samples. These methods -can be modified to combine supervised target information denoted by the methods -`PCov-CUR` and `PCov-FPS`. For further reading, refer to [Imbalzano2018]_ and -[Cersonsky2021]_. These selectors can be used for both feature and sample -selection, with similar instantiations. All sub-selection methods scores each -feature or sample (without an estimator) and chooses that with the maximum -score. A simple example of usage: +""" +This module contains data sub-selection modules primarily corresponding to methods +derived from CUR matrix decomposition and Farthest Point Sampling. In their classical +form, CUR and FPS determine a data subset that maximizes the variance (CUR) or +distribution (FPS) of the features or samples. These methods can be modified to combine +supervised target information denoted by the methods `PCov-CUR` and `PCov-FPS`. For +further reading, refer to [Imbalzano2018]_ and [Cersonsky2021]_. These selectors can be +used for both feature and sample selection, with similar instantiations. All +sub-selection methods scores each feature or sample (without an estimator) and chooses +that with the maximum score. A simple example of usage: .. doctest:: @@ -64,9 +63,9 @@ singular value decoposition. * :ref:`PCov-CUR-api` decomposition extends upon CUR by using augmented right or left singular vectors inspired by Principal Covariates Regression. -* :ref:`FPS-api`: a common selection technique intended to exploit the diversity of - the input space. The selection of the first point is made at random or by a - separate metric +* :ref:`FPS-api`: a common selection technique intended to exploit the diversity of the + input space. The selection of the first point is made at random or by a separate + metric * :ref:`PCov-FPS-api` extends upon FPS much like PCov-CUR does to CUR. * :ref:`Voronoi-FPS-api`: conduct FPS selection, taking advantage of Voronoi tessellations to accelerate selection. diff --git a/src/skmatter/metrics/__init__.py b/src/skmatter/metrics/__init__.py index e6047b72ae..ed003b82a8 100644 --- a/src/skmatter/metrics/__init__.py +++ b/src/skmatter/metrics/__init__.py @@ -1,18 +1,26 @@ -r""" -This module contains a set of easily-interpretable error measures of the -relative information capacity of feature space `F` with respect to feature -space `F'`. The methods returns a value between 0 and 1, where 0 means that -`F` and `F'` are completey distinct in terms of linearly-decodable -information, and where 1 means that `F'` is contained in `F`. All methods -are implemented as the root mean-square error for the regression of the -feature matrix `X_F'` (or sometimes called `Y` in the doc) from `X_F` (or -sometimes called `X` in the doc) for transformations with different -constraints (linear, orthogonal, locally-linear). By default a custom 2-fold -cross-validation :py:class:`skosmo.linear_model.RidgeRegression2FoldCV` is -used to ensure the generalization of the transformation and efficiency of -the computation, since we deal with a multi-target regression problem. -Methods were applied to compare different forms of featurizations through -different hyperparameters and induced metrics and kernels [Goscinski2021]_ . +""" +This module contains a set of easily-interpretable error measures of the relative +information capacity of feature space `F` with respect to feature space `F'`. The +methods returns a value between 0 and 1, where 0 means that `F` and `F'` are completey +distinct in terms of linearly-decodable information, and where 1 means that `F'` is +contained in `F`. All methods are implemented as the root mean-square error for the +regression of the feature matrix `X_F'` (or sometimes called `Y` in the doc) from `X_F` +(or sometimes called `X` in the doc) for transformations with different constraints +(linear, orthogonal, locally-linear). By default a custom 2-fold cross-validation +:py:class:`skosmo.linear_model.RidgeRegression2FoldCV` is used to ensure the +generalization of the transformation and efficiency of the computation, since we deal +with a multi-target regression problem. Methods were applied to compare different forms +of featurizations through different hyperparameters and induced metrics and kernels +[Goscinski2021]_ . + +These reconstruction measures are available: + +* :ref:`GRE-api` (GRE) computes the amount of linearly-decodable information + recovered through a global linear reconstruction. +* :ref:`GRD-api` (GRD) computes the amount of distortion contained in a global linear + reconstruction. +* :ref:`LRE-api` (LRE) computes the amount of decodable information recovered through + a local linear reconstruction for the k-nearest neighborhood of each sample. """ from ._reconstruction_measures import ( diff --git a/src/skmatter/preprocessing/__init__.py b/src/skmatter/preprocessing/__init__.py index 1ef32de384..b81735a391 100644 --- a/src/skmatter/preprocessing/__init__.py +++ b/src/skmatter/preprocessing/__init__.py @@ -1,7 +1,4 @@ -""" -The :mod:`sklearn.preprocessing` module includes scaling, centering and -normalization methods. -""" +"""This module includes scaling, centering and normalization methods.""" from ._data import ( KernelNormalizer, diff --git a/src/skmatter/sample_selection/_base.py b/src/skmatter/sample_selection/_base.py index cf5229f09e..c4d4ee859f 100644 --- a/src/skmatter/sample_selection/_base.py +++ b/src/skmatter/sample_selection/_base.py @@ -355,7 +355,6 @@ class CUR(_CUR): >>> X[selector.selected_idx_] array([[-0.03, -0.53, 0.08], [ 0.12, 0.21, 0.02]]) - """ def __init__(