From 07045060ab0236cdebe6bffab4f18ff7964c2c7b Mon Sep 17 00:00:00 2001 From: Badr-MOUFAD Date: Mon, 12 Jun 2023 10:21:15 +0200 Subject: [PATCH 01/13] init commit --- skglm/solvers/bfgs.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 skglm/solvers/bfgs.py diff --git a/skglm/solvers/bfgs.py b/skglm/solvers/bfgs.py new file mode 100644 index 000000000..64dcd9678 --- /dev/null +++ b/skglm/solvers/bfgs.py @@ -0,0 +1,12 @@ +from skglm.solvers import BaseSolver + + +class BFGS(BaseSolver): + """A wrapper for scipy BFGS solver.""" + + def __init__(self, max_iter=50, tol=1e-4): + self.max_iter = max_iter + self.tol = tol + + def solve(X, y, datafit, penalty, w_init=None, Xw_init=None): + return From 7e364212fe059a9dc67acbec239e5167d002c2f3 Mon Sep 17 00:00:00 2001 From: Badr-MOUFAD Date: Mon, 12 Jun 2023 10:57:28 +0200 Subject: [PATCH 02/13] bfgs wrapper implem --- skglm/solvers/bfgs.py | 47 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/skglm/solvers/bfgs.py b/skglm/solvers/bfgs.py index 64dcd9678..4ccf62358 100644 --- a/skglm/solvers/bfgs.py +++ b/skglm/solvers/bfgs.py @@ -1,12 +1,53 @@ +import numpy as np +import scipy.optimize +from numpy.linalg import norm + from skglm.solvers import BaseSolver +from skglm.datafits import BaseDatafit class BFGS(BaseSolver): """A wrapper for scipy BFGS solver.""" - def __init__(self, max_iter=50, tol=1e-4): + def __init__(self, max_iter=50, tol=1e-4, verbose=False): self.max_iter = max_iter self.tol = tol + self.verbose = verbose + + def solve(self, X, y, datafit: BaseDatafit, penalty, w_init=None, Xw_init=None): + + def objective_function(w): + Xw = X @ w + datafit_value = datafit.value(y, w, Xw) + penalty_value = penalty.value(w) + + return datafit_value + penalty_value + + def jacobian_function(w): + Xw = X @ w + datafit_grad = datafit.gradient(y, w, Xw) + penalty_grad = penalty.gradient(w) + + return datafit_grad + penalty_grad + + n_features = X.shape[1] + w = np.zeros(n_features) if w_init is not None else w_init + p_objs_out = [] + + result = scipy.optimize.minimize( + fun=objective_function, + jac=jacobian_function, + x0=w, + method="BFGS", + options=dict( + gtol=self.tol, + maxiter=self.max_iter, + disp=self.verbose + ), + callback=lambda w_k: p_objs_out.append(objective_function(w_k)) + ) + + w = result.x + stop_crit = norm(result.jac) - def solve(X, y, datafit, penalty, w_init=None, Xw_init=None): - return + return w, np.asarray(p_objs_out), stop_crit From 0fdbcde76637202c807204ef78b45617dc6aedbb Mon Sep 17 00:00:00 2001 From: Badr-MOUFAD Date: Mon, 12 Jun 2023 11:14:52 +0200 Subject: [PATCH 03/13] implement L2 penalty --- skglm/penalties/separable.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/skglm/penalties/separable.py b/skglm/penalties/separable.py index 2c1429a87..a2e6468cb 100644 --- a/skglm/penalties/separable.py +++ b/skglm/penalties/separable.py @@ -557,3 +557,34 @@ def is_penalized(self, n_features): def generalized_support(self, w): """Return a mask with non-zero coefficients.""" return w != 0 + + +class L2(BasePenalty): + r""":math:`ell_2` penalty. + + The penalty reads:: + + .. math:: + + \alpha xx \lVert w \rVert_2 + """ + + def __init__(self, alpha): + self.alpha = alpha + + def get_spec(self): + spec = ( + ('alpha', float64), + ) + return spec + + def params_to_dict(self): + return dict(alpha=self.alpha) + + def value(self, w): + """Compute the value of the L2 penalty.""" + return self.alpha * (w ** 2).sum() / 2 + + def gradient(self, w): + """Compute the gradient of the L2 penalty.""" + return self.alpha * w From c75b21f1ceb69e3ac9a7c2f59374655bf8e1ff8c Mon Sep 17 00:00:00 2001 From: Badr-MOUFAD Date: Mon, 12 Jun 2023 11:19:13 +0200 Subject: [PATCH 04/13] gradient of logistic --- skglm/datafits/single_task.py | 3 +++ skglm/solvers/bfgs.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/skglm/datafits/single_task.py b/skglm/datafits/single_task.py index f21484fd6..457f63501 100644 --- a/skglm/datafits/single_task.py +++ b/skglm/datafits/single_task.py @@ -182,6 +182,9 @@ def value(self, y, w, Xw): def gradient_scalar(self, X, y, w, Xw, j): return (- X[:, j] @ (y * sigmoid(- y * Xw))) / len(y) + def gradient(self, X, y, Xw): + return X.T @ self.raw_grad(y, Xw) + def full_grad_sparse( self, X_data, X_indptr, X_indices, y, Xw): n_features = X_indptr.shape[0] - 1 diff --git a/skglm/solvers/bfgs.py b/skglm/solvers/bfgs.py index 4ccf62358..ec2702113 100644 --- a/skglm/solvers/bfgs.py +++ b/skglm/solvers/bfgs.py @@ -25,7 +25,7 @@ def objective_function(w): def jacobian_function(w): Xw = X @ w - datafit_grad = datafit.gradient(y, w, Xw) + datafit_grad = datafit.gradient(X, y, Xw) penalty_grad = penalty.gradient(w) return datafit_grad + penalty_grad @@ -40,8 +40,8 @@ def jacobian_function(w): x0=w, method="BFGS", options=dict( - gtol=self.tol, maxiter=self.max_iter, + gtol=self.tol, disp=self.verbose ), callback=lambda w_k: p_objs_out.append(objective_function(w_k)) From d6358e12c3bf6b244d977a79a6dc48a091b14698 Mon Sep 17 00:00:00 2001 From: Badr-MOUFAD Date: Mon, 12 Jun 2023 11:50:45 +0200 Subject: [PATCH 05/13] unittest against scikit learn --- skglm/penalties/__init__.py | 4 ++-- skglm/solvers/__init__.py | 3 ++- skglm/solvers/bfgs.py | 8 +++++-- skglm/tests/test_bfgs_solver.py | 41 +++++++++++++++++++++++++++++++++ 4 files changed, 51 insertions(+), 5 deletions(-) create mode 100644 skglm/tests/test_bfgs_solver.py diff --git a/skglm/penalties/__init__.py b/skglm/penalties/__init__.py index 3fb0771ec..a08b1b859 100644 --- a/skglm/penalties/__init__.py +++ b/skglm/penalties/__init__.py @@ -1,6 +1,6 @@ from .base import BasePenalty from .separable import ( - L1_plus_L2, L0_5, L1, L2_3, MCPenalty, SCAD, WeightedL1, IndicatorBox, + L1_plus_L2, L0_5, L1, L2, L2_3, MCPenalty, SCAD, WeightedL1, IndicatorBox, PositiveConstraint ) from .block_separable import ( @@ -12,6 +12,6 @@ __all__ = [ BasePenalty, - L1_plus_L2, L0_5, L1, L2_3, MCPenalty, SCAD, WeightedL1, IndicatorBox, + L1_plus_L2, L0_5, L1, L2, L2_3, MCPenalty, SCAD, WeightedL1, IndicatorBox, PositiveConstraint, L2_05, L2_1, BlockMCPenalty, BlockSCAD, WeightedGroupL2, SLOPE ] diff --git a/skglm/solvers/__init__.py b/skglm/solvers/__init__.py index 457367018..0a1d6ba0c 100644 --- a/skglm/solvers/__init__.py +++ b/skglm/solvers/__init__.py @@ -6,7 +6,8 @@ from .multitask_bcd import MultiTaskBCD from .prox_newton import ProxNewton from .group_prox_newton import GroupProxNewton +from .bfgs import BFGS __all__ = [AndersonCD, BaseSolver, FISTA, GramCD, GroupBCD, MultiTaskBCD, ProxNewton, - GroupProxNewton] + GroupProxNewton, BFGS] diff --git a/skglm/solvers/bfgs.py b/skglm/solvers/bfgs.py index ec2702113..b1ed50d72 100644 --- a/skglm/solvers/bfgs.py +++ b/skglm/solvers/bfgs.py @@ -30,8 +30,12 @@ def jacobian_function(w): return datafit_grad + penalty_grad + def save_p_obj(w_k): + p_obj = objective_function(w_k) + p_objs_out.append(p_obj) + n_features = X.shape[1] - w = np.zeros(n_features) if w_init is not None else w_init + w = np.zeros(n_features) if w_init is None else w_init p_objs_out = [] result = scipy.optimize.minimize( @@ -44,7 +48,7 @@ def jacobian_function(w): gtol=self.tol, disp=self.verbose ), - callback=lambda w_k: p_objs_out.append(objective_function(w_k)) + callback=save_p_obj, ) w = result.x diff --git a/skglm/tests/test_bfgs_solver.py b/skglm/tests/test_bfgs_solver.py new file mode 100644 index 000000000..57429cef6 --- /dev/null +++ b/skglm/tests/test_bfgs_solver.py @@ -0,0 +1,41 @@ +import numpy as np + +from skglm.solvers import BFGS +from skglm.penalties import L2 +from skglm.datafits import Logistic + +from sklearn.linear_model import LogisticRegression + +from skglm.utils.data import make_correlated_data +from skglm.utils.jit_compilation import compiled_clone + + +def test_bfgs_L2_logreg(): + reg = 1. + n_samples, n_features = 50, 10 + + X, y, _ = make_correlated_data( + n_samples, n_features, random_state=0) + y = np.sign(y) + + datafit = compiled_clone(Logistic()) + penalty = compiled_clone(L2(reg)) + solver = BFGS() + + w, *_ = solver.solve(X, y, datafit, penalty) + + # fit scikit learn + estimator = LogisticRegression( + penalty='l2', + C=1 / (n_samples * reg), + fit_intercept=False + ) + estimator.fit(X, y) + + np.testing.assert_allclose( + w, estimator.coef_.flatten(), atol=1e-4 + ) + + +if __name__ == "__main__": + test_bfgs_L2_logreg() From ebbf0bc6a7c636425ed7f7dd666bc5c890d2f4b9 Mon Sep 17 00:00:00 2001 From: Badr-MOUFAD Date: Mon, 12 Jun 2023 13:18:33 +0200 Subject: [PATCH 06/13] fix verbosity and p_pbj computation --- skglm/solvers/bfgs.py | 18 ++++++++++++++---- skglm/tests/test_bfgs_solver.py | 3 ++- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/skglm/solvers/bfgs.py b/skglm/solvers/bfgs.py index b1ed50d72..47e57b440 100644 --- a/skglm/solvers/bfgs.py +++ b/skglm/solvers/bfgs.py @@ -30,10 +30,21 @@ def jacobian_function(w): return datafit_grad + penalty_grad - def save_p_obj(w_k): + def callback_post_iter(w_k): + # save p_obj p_obj = objective_function(w_k) p_objs_out.append(p_obj) + if self.verbose: + grad = jacobian_function(w_k) + stop_crit = norm(grad) + + it = len(p_objs_out) + print( + f"Iteration {it}: {p_obj:.10f}, " + f"stopping crit: {stop_crit:.2e}" + ) + n_features = X.shape[1] w = np.zeros(n_features) if w_init is None else w_init p_objs_out = [] @@ -45,10 +56,9 @@ def save_p_obj(w_k): method="BFGS", options=dict( maxiter=self.max_iter, - gtol=self.tol, - disp=self.verbose + gtol=self.tol ), - callback=save_p_obj, + callback=callback_post_iter, ) w = result.x diff --git a/skglm/tests/test_bfgs_solver.py b/skglm/tests/test_bfgs_solver.py index 57429cef6..027682f3c 100644 --- a/skglm/tests/test_bfgs_solver.py +++ b/skglm/tests/test_bfgs_solver.py @@ -20,7 +20,7 @@ def test_bfgs_L2_logreg(): datafit = compiled_clone(Logistic()) penalty = compiled_clone(L2(reg)) - solver = BFGS() + solver = BFGS(verbose=1) w, *_ = solver.solve(X, y, datafit, penalty) @@ -39,3 +39,4 @@ def test_bfgs_L2_logreg(): if __name__ == "__main__": test_bfgs_L2_logreg() + pass From 92d6eda805a69f3f2eacda53d6cdcf3db0502c95 Mon Sep 17 00:00:00 2001 From: Badr-MOUFAD Date: Mon, 12 Jun 2023 13:37:01 +0200 Subject: [PATCH 07/13] BFGS --> L-BFGS --- skglm/solvers/__init__.py | 4 ++-- skglm/solvers/{bfgs.py => lbfgs.py} | 6 +++--- skglm/tests/{test_bfgs_solver.py => test_lbfgs_solver.py} | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) rename skglm/solvers/{bfgs.py => lbfgs.py} (94%) rename skglm/tests/{test_bfgs_solver.py => test_lbfgs_solver.py} (87%) diff --git a/skglm/solvers/__init__.py b/skglm/solvers/__init__.py index 0a1d6ba0c..9089128d5 100644 --- a/skglm/solvers/__init__.py +++ b/skglm/solvers/__init__.py @@ -6,8 +6,8 @@ from .multitask_bcd import MultiTaskBCD from .prox_newton import ProxNewton from .group_prox_newton import GroupProxNewton -from .bfgs import BFGS +from .lbfgs import LBFGS __all__ = [AndersonCD, BaseSolver, FISTA, GramCD, GroupBCD, MultiTaskBCD, ProxNewton, - GroupProxNewton, BFGS] + GroupProxNewton, LBFGS] diff --git a/skglm/solvers/bfgs.py b/skglm/solvers/lbfgs.py similarity index 94% rename from skglm/solvers/bfgs.py rename to skglm/solvers/lbfgs.py index 47e57b440..25677e205 100644 --- a/skglm/solvers/bfgs.py +++ b/skglm/solvers/lbfgs.py @@ -6,8 +6,8 @@ from skglm.datafits import BaseDatafit -class BFGS(BaseSolver): - """A wrapper for scipy BFGS solver.""" +class LBFGS(BaseSolver): + """A wrapper for scipy L-BFGS solver.""" def __init__(self, max_iter=50, tol=1e-4, verbose=False): self.max_iter = max_iter @@ -53,7 +53,7 @@ def callback_post_iter(w_k): fun=objective_function, jac=jacobian_function, x0=w, - method="BFGS", + method="L-BFGS-B", options=dict( maxiter=self.max_iter, gtol=self.tol diff --git a/skglm/tests/test_bfgs_solver.py b/skglm/tests/test_lbfgs_solver.py similarity index 87% rename from skglm/tests/test_bfgs_solver.py rename to skglm/tests/test_lbfgs_solver.py index 027682f3c..2eaf26ad8 100644 --- a/skglm/tests/test_bfgs_solver.py +++ b/skglm/tests/test_lbfgs_solver.py @@ -1,6 +1,6 @@ import numpy as np -from skglm.solvers import BFGS +from skglm.solvers import LBFGS from skglm.penalties import L2 from skglm.datafits import Logistic @@ -10,7 +10,7 @@ from skglm.utils.jit_compilation import compiled_clone -def test_bfgs_L2_logreg(): +def test_lbfgs_L2_logreg(): reg = 1. n_samples, n_features = 50, 10 @@ -20,7 +20,7 @@ def test_bfgs_L2_logreg(): datafit = compiled_clone(Logistic()) penalty = compiled_clone(L2(reg)) - solver = BFGS(verbose=1) + solver = LBFGS(verbose=1) w, *_ = solver.solve(X, y, datafit, penalty) @@ -38,5 +38,5 @@ def test_bfgs_L2_logreg(): if __name__ == "__main__": - test_bfgs_L2_logreg() + test_lbfgs_L2_logreg() pass From c130ca32d7899054847d78062e9c64b5adf628d2 Mon Sep 17 00:00:00 2001 From: Badr-MOUFAD Date: Mon, 12 Jun 2023 13:41:12 +0200 Subject: [PATCH 08/13] add convergence warning --- skglm/solvers/lbfgs.py | 11 +++++++++++ skglm/tests/test_lbfgs_solver.py | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/skglm/solvers/lbfgs.py b/skglm/solvers/lbfgs.py index 25677e205..b4e2fd263 100644 --- a/skglm/solvers/lbfgs.py +++ b/skglm/solvers/lbfgs.py @@ -1,3 +1,6 @@ +import warnings +from sklearn.exceptions import ConvergenceWarning + import numpy as np import scipy.optimize from numpy.linalg import norm @@ -61,6 +64,14 @@ def callback_post_iter(w_k): callback=callback_post_iter, ) + if not result.success: + warnings.warn( + f"`LBFGS` did not converge for tol={self.tol:.3e} " + f"and max_iter={self.max_iter}.\n" + "Consider increasing `max_iter` and/or `tol`.", + category=ConvergenceWarning + ) + w = result.x stop_crit = norm(result.jac) diff --git a/skglm/tests/test_lbfgs_solver.py b/skglm/tests/test_lbfgs_solver.py index 2eaf26ad8..a4984c9f8 100644 --- a/skglm/tests/test_lbfgs_solver.py +++ b/skglm/tests/test_lbfgs_solver.py @@ -20,7 +20,7 @@ def test_lbfgs_L2_logreg(): datafit = compiled_clone(Logistic()) penalty = compiled_clone(L2(reg)) - solver = LBFGS(verbose=1) + solver = LBFGS() w, *_ = solver.solve(X, y, datafit, penalty) From 7161dccb1e0cf126b2c41a8a9a3018b9f9068c71 Mon Sep 17 00:00:00 2001 From: Badr-MOUFAD Date: Mon, 12 Jun 2023 13:47:35 +0200 Subject: [PATCH 09/13] add to docs --- doc/api.rst | 1 + skglm/solvers/lbfgs.py | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/doc/api.rst b/doc/api.rst index ef738558d..04ff040b0 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -78,6 +78,7 @@ Solvers GramCD GroupBCD GroupProxNewton + LBFGS MultiTaskBCD ProxNewton diff --git a/skglm/solvers/lbfgs.py b/skglm/solvers/lbfgs.py index b4e2fd263..a1b8a1bf8 100644 --- a/skglm/solvers/lbfgs.py +++ b/skglm/solvers/lbfgs.py @@ -10,7 +10,22 @@ class LBFGS(BaseSolver): - """A wrapper for scipy L-BFGS solver.""" + """A wrapper for scipy L-BFGS solver. + + Refer to `scipy L-BFGS-B `_ documentation for details. + + Parameters + ---------- + max_iter : int, default 20 + Maximum number of outer iterations. + + tol : float, default 1e-4 + Tolerance for convergence. + + verbose : bool, default False + Amount of verbosity. 0/False is silent. + """ def __init__(self, max_iter=50, tol=1e-4, verbose=False): self.max_iter = max_iter From 5e3afdbd579074294ee96b6fe92aae108f288373 Mon Sep 17 00:00:00 2001 From: Badr-MOUFAD Date: Mon, 12 Jun 2023 13:50:47 +0200 Subject: [PATCH 10/13] cleanups --- skglm/solvers/lbfgs.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/skglm/solvers/lbfgs.py b/skglm/solvers/lbfgs.py index a1b8a1bf8..a9dcd3c8c 100644 --- a/skglm/solvers/lbfgs.py +++ b/skglm/solvers/lbfgs.py @@ -6,7 +6,6 @@ from numpy.linalg import norm from skglm.solvers import BaseSolver -from skglm.datafits import BaseDatafit class LBFGS(BaseSolver): @@ -18,7 +17,7 @@ class LBFGS(BaseSolver): Parameters ---------- max_iter : int, default 20 - Maximum number of outer iterations. + Maximum number of iterations. tol : float, default 1e-4 Tolerance for convergence. @@ -32,7 +31,7 @@ def __init__(self, max_iter=50, tol=1e-4, verbose=False): self.tol = tol self.verbose = verbose - def solve(self, X, y, datafit: BaseDatafit, penalty, w_init=None, Xw_init=None): + def solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None): def objective_function(w): Xw = X @ w From 63b3ad0eef02fbb02aacf636cb0ec7d4f49efdb2 Mon Sep 17 00:00:00 2001 From: Badr-MOUFAD Date: Mon, 12 Jun 2023 13:54:38 +0200 Subject: [PATCH 11/13] L2 to doc --- doc/api.rst | 1 + skglm/penalties/separable.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 04ff040b0..0f3aac341 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -36,6 +36,7 @@ Penalties IndicatorBox L0_5 L1 + L2 L1_plus_L2 L2_3 MCPenalty diff --git a/skglm/penalties/separable.py b/skglm/penalties/separable.py index a2e6468cb..b1046216e 100644 --- a/skglm/penalties/separable.py +++ b/skglm/penalties/separable.py @@ -562,11 +562,11 @@ def generalized_support(self, w): class L2(BasePenalty): r""":math:`ell_2` penalty. - The penalty reads:: + The penalty reads .. math:: - \alpha xx \lVert w \rVert_2 + \alpha / 2 ||w||_2^2 """ def __init__(self, alpha): From eae3b81504f7452f5afa46bb7b0680cf4287d958 Mon Sep 17 00:00:00 2001 From: Badr-MOUFAD Date: Mon, 12 Jun 2023 13:56:40 +0200 Subject: [PATCH 12/13] cleanups tests --- skglm/tests/test_lbfgs_solver.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/skglm/tests/test_lbfgs_solver.py b/skglm/tests/test_lbfgs_solver.py index a4984c9f8..3146aeab2 100644 --- a/skglm/tests/test_lbfgs_solver.py +++ b/skglm/tests/test_lbfgs_solver.py @@ -18,11 +18,10 @@ def test_lbfgs_L2_logreg(): n_samples, n_features, random_state=0) y = np.sign(y) + # fit L-BFGS datafit = compiled_clone(Logistic()) penalty = compiled_clone(L2(reg)) - solver = LBFGS() - - w, *_ = solver.solve(X, y, datafit, penalty) + w, *_ = LBFGS().solve(X, y, datafit, penalty) # fit scikit learn estimator = LogisticRegression( @@ -38,5 +37,4 @@ def test_lbfgs_L2_logreg(): if __name__ == "__main__": - test_lbfgs_L2_logreg() pass From 251865ea20de06d9c290c6029cc1cdb9da729867 Mon Sep 17 00:00:00 2001 From: Badr-MOUFAD Date: Mon, 12 Jun 2023 14:06:30 +0200 Subject: [PATCH 13/13] lexicographic order --- doc/api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/api.rst b/doc/api.rst index 0f3aac341..9c3e1d21f 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -36,8 +36,8 @@ Penalties IndicatorBox L0_5 L1 - L2 L1_plus_L2 + L2 L2_3 MCPenalty WeightedL1