From 8441ac0e891f155d6954986df7c630e481240a74 Mon Sep 17 00:00:00 2001 From: "Rose K. Cersonsky" <47536110+rosecers@users.noreply.github.com> Date: Tue, 7 Dec 2021 14:39:59 +0100 Subject: [PATCH] Replacing csd in sample selection with california because the larger dataset means a few redundant samples, which is not good for stable testing --- docs/source/conf.py | 2 +- examples/PlotLFRE.ipynb | 2 +- skcosmo/_selection.py | 2 +- skcosmo/decomposition/_kernel_pcovr.py | 2 +- skcosmo/metrics/_reconstruction_measures.py | 4 ++-- skcosmo/sample_selection/_voronoi_fps.py | 2 +- skcosmo/utils/_pcovr_utils.py | 4 ++-- tests/test_sample_simple_cur.py | 7 ++++--- 8 files changed, 13 insertions(+), 12 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 77640dcae1..7315bdb74f 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -209,7 +209,7 @@ # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, "skcosmo.tex", u"scikit-COSMO Documentation", author, "manual"), + (master_doc, "skcosmo.tex", "scikit-COSMO Documentation", author, "manual"), ] # The name of an image file (relative to this directory) to place at the top of diff --git a/examples/PlotLFRE.ipynb b/examples/PlotLFRE.ipynb index fa6a505c49..c546544d76 100644 --- a/examples/PlotLFRE.ipynb +++ b/examples/PlotLFRE.ipynb @@ -140,7 +140,7 @@ "ax34.set_title(r\"$X^-$ LFRE(3-body, 4-body)\")\n", "ax43.set_title(r\"$X^-$ LFRE(4-body, 3-body)\")\n", "\n", - "cbar = fig.colorbar(pcm, ax=(ax34, ax43), label=\"LFRE\", location=\"bottom\")\n", + "cbar = fig.colorbar(pcm, ax=[ax34, ax43], label=\"LFRE\", location=\"bottom\")\n", "\n", "plt.show()" ] diff --git a/skcosmo/_selection.py b/skcosmo/_selection.py index 39cd207335..57a5708406 100644 --- a/skcosmo/_selection.py +++ b/skcosmo/_selection.py @@ -932,7 +932,7 @@ def _init_greedy_search(self, X, y, n_to_select): super()._init_greedy_search(X, y, n_to_select) - self.norms_ = (X ** 2).sum(axis=abs(self._axis - 1)) + self.norms_ = (X**2).sum(axis=abs(self._axis - 1)) self.haussdorf_ = np.full(X.shape[self._axis], np.inf) self.haussdorf_at_select_ = np.full(X.shape[self._axis], np.inf) diff --git a/skcosmo/decomposition/_kernel_pcovr.py b/skcosmo/decomposition/_kernel_pcovr.py index 6b3a179ca4..2a55d0759f 100644 --- a/skcosmo/decomposition/_kernel_pcovr.py +++ b/skcosmo/decomposition/_kernel_pcovr.py @@ -624,7 +624,7 @@ def _decompose_full(self, mat): U, Vt = svd_flip(U, Vt) # Get variance explained by singular values - explained_variance_ = (S ** 2) / (self.n_samples_ - 1) + explained_variance_ = (S**2) / (self.n_samples_ - 1) total_var = explained_variance_.sum() explained_variance_ratio_ = explained_variance_ / total_var diff --git a/skcosmo/metrics/_reconstruction_measures.py b/skcosmo/metrics/_reconstruction_measures.py index aaeb7b4290..2eb55e8824 100644 --- a/skcosmo/metrics/_reconstruction_measures.py +++ b/skcosmo/metrics/_reconstruction_measures.py @@ -441,8 +441,8 @@ def pointwise_local_reconstruction_error( Y_test = scaler.transform(Y_test) squared_dist = ( - np.sum(X_train ** 2, axis=1) - + np.sum(X_test ** 2, axis=1)[:, np.newaxis] + np.sum(X_train**2, axis=1) + + np.sum(X_test**2, axis=1)[:, np.newaxis] - 2 * X_test @ X_train.T ) diff --git a/skcosmo/sample_selection/_voronoi_fps.py b/skcosmo/sample_selection/_voronoi_fps.py index c411076bcd..6469daa3d0 100644 --- a/skcosmo/sample_selection/_voronoi_fps.py +++ b/skcosmo/sample_selection/_voronoi_fps.py @@ -195,7 +195,7 @@ def _init_greedy_search(self, X, y, n_to_select): super()._init_greedy_search(X, y, n_to_select) - self.norms_ = (X ** 2).sum(axis=abs(self._axis - 1)) + self.norms_ = (X**2).sum(axis=abs(self._axis - 1)) if self.initialize == "random": random_state = check_random_state(self.random_state) diff --git a/skcosmo/utils/_pcovr_utils.py b/skcosmo/utils/_pcovr_utils.py index 7d5a27f9a7..2a641e6719 100644 --- a/skcosmo/utils/_pcovr_utils.py +++ b/skcosmo/utils/_pcovr_utils.py @@ -177,8 +177,8 @@ def pcovr_covariance( random_state=random_state, ) - UC = UC.T[:, (vC ** 2) > rcond] - vC = vC[(vC ** 2) > rcond] + UC = UC.T[:, (vC**2) > rcond] + vC = vC[(vC**2) > rcond] C_isqrt = UC @ np.diagflat(1.0 / vC) @ UC.T diff --git a/tests/test_sample_simple_cur.py b/tests/test_sample_simple_cur.py index 72655d1fb4..a79a71aa8e 100644 --- a/tests/test_sample_simple_cur.py +++ b/tests/test_sample_simple_cur.py @@ -3,14 +3,15 @@ import numpy as np from sklearn import exceptions -from skcosmo.datasets import load_csd_1000r as load +from sklearn.datasets import fetch_california_housing as load from skcosmo.sample_selection import CUR class TestCUR(unittest.TestCase): def setUp(self): - self.X, self.y = load(return_X_y=True) - self.n_select = 20 + self.X, _ = load(return_X_y=True) + self.X = self.X[:1000] + self.n_select = min(20, min(self.X.shape) // 2) def test_bad_transform(self): selector = CUR(n_to_select=2)