Skip to content

Commit

Permalink
Replacing csd in sample selection with california because the larger …
Browse files Browse the repository at this point in the history
…dataset means a few redundant samples, which is not good for stable testing
  • Loading branch information
rosecers committed May 16, 2022
1 parent f6278b6 commit 0a86d48
Show file tree
Hide file tree
Showing 11 changed files with 55 additions and 161 deletions.
22 changes: 4 additions & 18 deletions skcosmo/_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,18 +333,12 @@ def _continue_greedy_search(self, X, y, n_to_select):
n_pad[self._axis] = (0, n_to_select - self.n_selected_)

self.X_selected_ = np.pad(
self.X_selected_,
n_pad,
"constant",
constant_values=0.0,
self.X_selected_, n_pad, "constant", constant_values=0.0,
)

if hasattr(self, "y_selected_"):
self.y_selected_ = np.pad(
self.y_selected_,
n_pad,
"constant",
constant_values=0.0,
self.y_selected_, n_pad, "constant", constant_values=0.0,
)

old_idx = self.selected_idx_.copy()
Expand Down Expand Up @@ -775,18 +769,10 @@ def _compute_pi(self, X, y=None):
"""

if self._axis == 0:
pcovr_distance = pcovr_kernel(
self.mixing,
X,
y,
)
pcovr_distance = pcovr_kernel(self.mixing, X, y,)
else:
pcovr_distance = pcovr_covariance(
self.mixing,
X,
y,
rcond=1e-12,
rank=None,
self.mixing, X, y, rcond=1e-12, rank=None,
)

if self.k < pcovr_distance.shape[0] - 1:
Expand Down
5 changes: 1 addition & 4 deletions skcosmo/datasets/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,7 @@ def load_csd_1000r(return_X_y=False):
target_filename = join(module_path, "data", "csd-1000r.npz")
raw_data = np.load(target_filename)
if not return_X_y:
data = Bunch(
X=raw_data["X"],
y=raw_data["Y"],
)
data = Bunch(X=raw_data["X"], y=raw_data["Y"],)
with open(join(module_path, "descr", "csd-1000r.rst")) as rst_file:
fdescr = rst_file.read()

Expand Down
5 changes: 1 addition & 4 deletions skcosmo/datasets/make_csd_1000r.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,7 @@

# read all of the frames and book-keep the centers and species
filename = "/path/to/CSD-1000R.xyz"
frames = np.asarray(
read(filename, ":"),
dtype=object,
)
frames = np.asarray(read(filename, ":"), dtype=object,)

n_centers = np.array([len(frame) for frame in frames])
center_idx = np.array([i for i, f in enumerate(frames) for p in f])
Expand Down
18 changes: 3 additions & 15 deletions skcosmo/decomposition/_kernel_pcovr.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,11 +541,7 @@ def _decompose_truncated(self, mat):
"n_components=%r must be between 1 and "
"n_samples=%r with "
"svd_solver='%s'"
% (
self.n_components,
self.n_samples_,
self.svd_solver,
)
% (self.n_components, self.n_samples_, self.svd_solver,)
)
elif not isinstance(self.n_components, numbers.Integral):
raise ValueError(
Expand All @@ -558,11 +554,7 @@ def _decompose_truncated(self, mat):
"n_components=%r must be strictly less than "
"n_samples=%r with "
"svd_solver='%s'"
% (
self.n_components,
self.n_samples_,
self.svd_solver,
)
% (self.n_components, self.n_samples_, self.svd_solver,)
)

random_state = check_random_state(self.random_state)
Expand Down Expand Up @@ -601,11 +593,7 @@ def _decompose_full(self, mat):
"n_components=%r must be between 1 and "
"n_samples=%r with "
"svd_solver='%s'"
% (
self.n_components,
self.n_samples_,
self.svd_solver,
)
% (self.n_components, self.n_samples_, self.svd_solver,)
)
elif self.n_components >= 1:
if not isinstance(self.n_components, numbers.Integral):
Expand Down
14 changes: 3 additions & 11 deletions skcosmo/decomposition/_pcovr.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,12 +333,8 @@ def fit(self, X, Y):

self.pxy_ = self.pxt_ @ self.pty_
if len(Y.shape) == 1:
self.pxy_ = self.pxy_.reshape(
X.shape[1],
)
self.pty_ = self.pty_.reshape(
self.n_components,
)
self.pxy_ = self.pxy_.reshape(X.shape[1],)
self.pty_ = self.pty_.reshape(self.n_components,)

self.components_ = self.pxt_.T # for sklearn compatibility
return self
Expand Down Expand Up @@ -379,11 +375,7 @@ def _fit_feature_space(self, X, Y, Yhat):
"""

Ct, iCsqrt = pcovr_covariance(
mixing=self.mixing,
X=X,
Y=Yhat,
rcond=self.tol,
return_isqrt=True,
mixing=self.mixing, X=X, Y=Yhat, rcond=self.tol, return_isqrt=True,
)
try:
Csqrt = np.linalg.lstsq(iCsqrt, np.eye(len(iCsqrt)), rcond=None)[0]
Expand Down
63 changes: 19 additions & 44 deletions skcosmo/metrics/_reconstruction_measures.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,7 @@


def pointwise_global_reconstruction_error(
X,
Y,
train_idx=None,
test_idx=None,
scaler=None,
estimator=None,
X, Y, train_idx=None, test_idx=None, scaler=None, estimator=None,
):
"""Computes the pointwise global reconstruction error using the source X
to reconstruct the features or samples of target Y based on a minimization
Expand Down Expand Up @@ -102,12 +97,7 @@ def pointwise_global_reconstruction_error(


def global_reconstruction_error(
X,
Y,
test_idx=None,
train_idx=None,
scaler=None,
estimator=None,
X, Y, test_idx=None, train_idx=None, scaler=None, estimator=None,
):
"""Computes the global reconstruction error using the source X
to reconstruct the features or samples of target Y based on a minimization
Expand Down Expand Up @@ -163,28 +153,21 @@ def global_reconstruction_error(
The global reconstruction error
"""
pointwise_global_reconstruction_error_values = (
pointwise_global_reconstruction_error(
X,
Y,
train_idx=train_idx,
test_idx=test_idx,
scaler=scaler,
estimator=estimator,
)
pointwise_global_reconstruction_error_values = pointwise_global_reconstruction_error(
X,
Y,
train_idx=train_idx,
test_idx=test_idx,
scaler=scaler,
estimator=estimator,
)
return np.linalg.norm(pointwise_global_reconstruction_error_values) / np.sqrt(
len(pointwise_global_reconstruction_error_values)
)


def pointwise_global_reconstruction_distortion(
X,
Y,
test_idx=None,
train_idx=None,
scaler=None,
estimator=None,
X, Y, test_idx=None, train_idx=None, scaler=None, estimator=None,
):
"""Computes the pointwise global reconstruction distortion using the source X
to reconstruct the features or samples of target Y based on a minimization
Expand Down Expand Up @@ -273,12 +256,7 @@ def pointwise_global_reconstruction_distortion(


def global_reconstruction_distortion(
X,
Y,
test_idx=None,
train_idx=None,
scaler=None,
estimator=None,
X, Y, test_idx=None, train_idx=None, scaler=None, estimator=None,
):
"""Computes the global reconstruction distortion using the source X
to reconstruct the features or samples of target Y based on a minimization
Expand Down Expand Up @@ -334,15 +312,13 @@ def global_reconstruction_distortion(
The global reconstruction distortion
"""
pointwise_global_reconstruction_distortion_values = (
pointwise_global_reconstruction_distortion(
X,
Y,
train_idx=train_idx,
test_idx=test_idx,
scaler=scaler,
estimator=estimator,
)
pointwise_global_reconstruction_distortion_values = pointwise_global_reconstruction_distortion(
X,
Y,
train_idx=train_idx,
test_idx=test_idx,
scaler=scaler,
estimator=estimator,
)
return np.linalg.norm(pointwise_global_reconstruction_distortion_values) / np.sqrt(
len(pointwise_global_reconstruction_distortion_values)
Expand Down Expand Up @@ -461,8 +437,7 @@ def local_reconstruction_error_i(i):
local_Y_train_mean = np.mean(Y_train[local_env_idx], axis=0)
# P_{FF'}
estimator.fit(
local_X_train - local_X_train_mean,
local_Y_train - local_Y_train_mean,
local_X_train - local_X_train_mean, local_Y_train - local_Y_train_mean,
)
# \tilde{x}_i' = \bar{x}_{F'} + (x_i - \bar{x}_F)P_{FF'}
tilde_x_i_dash_test = local_Y_train_mean + estimator.predict(
Expand Down
32 changes: 7 additions & 25 deletions tests/test_kernel_pcovr.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,7 @@ def test_incompatible_regressor(self):
with self.assertRaises(ValueError) as cm:
kpcovr.fit(self.X, self.Y)
self.assertTrue(
str(cm.message),
"Regressor must be an instance of `KernelRidge`",
str(cm.message), "Regressor must be an instance of `KernelRidge`",
)

def test_none_regressor(self):
Expand Down Expand Up @@ -328,10 +327,7 @@ def test_linear_matches_pcovr(self):
ridge.fit(self.X, self.Y)

# common instantiation parameters for the two models
hypers = dict(
mixing=0.5,
n_components=1,
)
hypers = dict(mixing=0.5, n_components=1,)

# computing projection and predicton loss with linear KernelPCovR
# and use the alpha from RidgeCV for level regression comparisons
Expand Down Expand Up @@ -368,13 +364,11 @@ def test_linear_matches_pcovr(self):

rounding = 3
self.assertEqual(
round(ly, rounding),
round(ly_ref, rounding),
round(ly, rounding), round(ly_ref, rounding),
)

self.assertEqual(
round(lk, rounding),
round(lk_ref, rounding),
round(lk, rounding), round(lk_ref, rounding),
)


Expand Down Expand Up @@ -441,11 +435,7 @@ def test_bad_n_components(self):
"self.n_components=%r must be between 0 and "
"min(n_samples, n_features)=%r with "
"svd_solver='%s'"
% (
kpcovr.n_components,
self.X.shape[0],
kpcovr.svd_solver,
),
% (kpcovr.n_components, self.X.shape[0], kpcovr.svd_solver,),
)
with self.subTest(type="0_ncomponents"):
with self.assertRaises(ValueError) as cm:
Expand All @@ -457,11 +447,7 @@ def test_bad_n_components(self):
"self.n_components=%r must be between 1 and "
"min(n_samples, n_features)=%r with "
"svd_solver='%s'"
% (
kpcovr.n_components,
self.X.shape[0],
kpcovr.svd_solver,
),
% (kpcovr.n_components, self.X.shape[0], kpcovr.svd_solver,),
)
with self.subTest(type="arpack_X_ncomponents"):
with self.assertRaises(ValueError) as cm:
Expand All @@ -472,11 +458,7 @@ def test_bad_n_components(self):
"self.n_components=%r must be strictly less than "
"min(n_samples, n_features)=%r with "
"svd_solver='%s'"
% (
kpcovr.n_components,
self.X.shape[0],
kpcovr.svd_solver,
),
% (kpcovr.n_components, self.X.shape[0], kpcovr.svd_solver,),
)

for svd_solver in ["auto", "full"]:
Expand Down
15 changes: 7 additions & 8 deletions tests/test_linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,16 +111,16 @@ def setUpClass(cls):
def test_ridge_regression_2fold_regularization_method_raise_error(self):
# tests if wrong regularization_method in RidgeRegression2FoldCV raises error
with self.assertRaises(ValueError):
RidgeRegression2FoldCV(
regularization_method="dummy",
).fit(self.features_small, self.features_small)
RidgeRegression2FoldCV(regularization_method="dummy",).fit(
self.features_small, self.features_small
)

def test_ridge_regression_2fold_alpha_type_raise_error(self):
# tests if wrong alpha type in RidgeRegression2FoldCV raises error
with self.assertRaises(ValueError):
RidgeRegression2FoldCV(
alpha_type="dummy",
).fit(self.features_small, self.features_small)
RidgeRegression2FoldCV(alpha_type="dummy",).fit(
self.features_small, self.features_small
)

def test_ridge_regression_2fold_relative_alpha_type_raise_error(self):
# tests if an error is raised if alpha not in [0,1)
Expand Down Expand Up @@ -179,8 +179,7 @@ def test_ridge_regression_2fold_cv_small_to_large(
.predict(self.features_small)
)
self.assertTrue(
abs(err) < self.eps,
f"error {err} surpasses threshold for zero {self.eps}",
abs(err) < self.eps, f"error {err} surpasses threshold for zero {self.eps}",
)

@parameterized.expand(ridge_parameters)
Expand Down
18 changes: 3 additions & 15 deletions tests/test_pcovr.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,11 +319,7 @@ def test_bad_n_components(self):
"self.n_components=%r must be between 0 and "
"min(n_samples, n_features)=%r with "
"svd_solver='%s'"
% (
pcovr.n_components,
min(self.X.shape),
pcovr.svd_solver,
),
% (pcovr.n_components, min(self.X.shape), pcovr.svd_solver,),
)
with self.subTest(type="0_ncomponents"):
with self.assertRaises(ValueError) as cm:
Expand All @@ -335,11 +331,7 @@ def test_bad_n_components(self):
"self.n_components=%r must be between 1 and "
"min(n_samples, n_features)=%r with "
"svd_solver='%s'"
% (
pcovr.n_components,
min(self.X.shape),
pcovr.svd_solver,
),
% (pcovr.n_components, min(self.X.shape), pcovr.svd_solver,),
)
with self.subTest(type="arpack_X_ncomponents"):
with self.assertRaises(ValueError) as cm:
Expand All @@ -350,11 +342,7 @@ def test_bad_n_components(self):
"self.n_components=%r must be strictly less than "
"min(n_samples, n_features)=%r with "
"svd_solver='%s'"
% (
pcovr.n_components,
min(self.X.shape),
pcovr.svd_solver,
),
% (pcovr.n_components, min(self.X.shape), pcovr.svd_solver,),
)

for svd_solver in ["auto", "full"]:
Expand Down
Loading

0 comments on commit 0a86d48

Please sign in to comment.