scikit-learn-contrib · mathurinm · Jun 12, 2023 · Jun 12, 2023 · Jun 12, 2023 · Jun 12, 2023
diff --git a/skglm/datafits/single_task.py b/skglm/datafits/single_task.py
@@ -182,6 +182,9 @@ def value(self, y, w, Xw):
     def gradient_scalar(self, X, y, w, Xw, j):
         return (- X[:, j] @ (y * sigmoid(- y * Xw))) / len(y)
 
+    def gradient(self, X, y, Xw):
+        return X.T @ self.raw_grad(y, Xw)
+
     def full_grad_sparse(
             self, X_data, X_indptr, X_indices, y, Xw):
         n_features = X_indptr.shape[0] - 1

diff --git a/skglm/penalties/separable.py b/skglm/penalties/separable.py
@@ -557,3 +557,34 @@ def is_penalized(self, n_features):
     def generalized_support(self, w):
         """Return a mask with non-zero coefficients."""
         return w != 0
+
+
+class L2(BasePenalty):
+    r""":math:`ell_2` penalty.
+
+    The penalty reads::
+
+    .. math::
+
+        \alpha xx \lVert w \rVert_2
+    """
+
+    def __init__(self, alpha):
+        self.alpha = alpha
+
+    def get_spec(self):
+        spec = (
+            ('alpha', float64),
+        )
+        return spec
+
+    def params_to_dict(self):
+        return dict(alpha=self.alpha)
+
+    def value(self, w):
+        """Compute the value of the L2 penalty."""
+        return self.alpha * (w ** 2).sum() / 2
+
+    def gradient(self, w):
+        """Compute the gradient of the L2 penalty."""
+        return self.alpha * w
diff --git a/skglm/solvers/bfgs.py b/skglm/solvers/bfgs.py
@@ -0,0 +1,53 @@
+import numpy as np
+import scipy.optimize
+from numpy.linalg import norm
+
+from skglm.solvers import BaseSolver
+from skglm.datafits import BaseDatafit
+
+
+class BFGS(BaseSolver):
+    """A wrapper for scipy BFGS solver."""
+
+    def __init__(self, max_iter=50, tol=1e-4, verbose=False):
+        self.max_iter = max_iter
+        self.tol = tol
+        self.verbose = verbose
+
+    def solve(self, X, y, datafit: BaseDatafit, penalty, w_init=None, Xw_init=None):
+
+        def objective_function(w):
+            Xw = X @ w
+            datafit_value = datafit.value(y, w, Xw)
+            penalty_value = penalty.value(w)
+
+            return datafit_value + penalty_value
+
+        def jacobian_function(w):
+            Xw = X @ w
+            datafit_grad = datafit.gradient(X, y, Xw)
+            penalty_grad = penalty.gradient(w)
+
+            return datafit_grad + penalty_grad
+
+        n_features = X.shape[1]
+        w = np.zeros(n_features) if w_init is not None else w_init
+        p_objs_out = []
+
+        result = scipy.optimize.minimize(
+            fun=objective_function,
+            jac=jacobian_function,
+            x0=w,
+            method="BFGS",
+            options=dict(
+                maxiter=self.max_iter,
+                gtol=self.tol,
+                disp=self.verbose
+            ),
+            callback=lambda w_k: p_objs_out.append(objective_function(w_k))
+        )
+
+        w = result.x
+        stop_crit = norm(result.jac)
+
+        return w, np.asarray(p_objs_out), stop_crit