Merge pull request #116 from dj-gauthier/master

Add uniform regularized regression
wilsonrljr · Nov 15, 2023 · 55727d4 · 55727d4
2 parents a32cccc + b6584d8
commit 55727d4
Show file tree

Hide file tree

Showing 2 changed files with 57 additions and 2 deletions.
diff --git a/sysidentpy/model_structure_selection/forward_regression_orthogonal_least_squares.py b/sysidentpy/model_structure_selection/forward_regression_orthogonal_least_squares.py
@@ -88,6 +88,8 @@ class FROLS(Estimators, BaseMSS):
         The convergence coefficient (learning rate) of the filter.
     eps : float
         Normalization factor of the normalized filters.
+    ridge_param : float
+        Regularization prameter used in ridge regression
     gama : float, default=0.2
         The leakage factor of the Leaky LMS method.
     weight : float, default=0.02
@@ -164,6 +166,7 @@ def __init__(
         offset_covariance: float = 0.2,
         mu: float = 0.01,
         eps: np.float64 = np.finfo(np.float64).eps,
+        ridge_param: np.float64 = np.finfo(np.float64).eps, # default is machine eps
         gama: float = 0.2,
         weight: float = 0.02,
         basis_function: Union[Polynomial, Fourier] = Polynomial(),
@@ -191,6 +194,7 @@ def __init__(
             offset_covariance=offset_covariance,
             mu=mu,
             eps=eps,
+            ridge_param=ridge_param, # ridge regression parameter
             gama=gama,
             weight=weight,
             basis_function=basis_function,
@@ -294,9 +298,16 @@ def error_reduction_ratio(self, psi, y, process_term_number):
             for j in np.arange(i, dimension):
                 # Add `eps` in the denominator to omit division by zero if
                 # denominator is zero
+                # To implement regularized regression (ridge regression), add 
+                # ridgePparam to psi.T @ psi.   See S. Chen, Local regularization assisted
+                # orthogonal least squares regression, Neurocomputing 69 (2006) 559–585.
+                # The version implemeted below uses the same regularization for every feature,
+                # What Chen refers to Uniform regularized orthogonal least squares (UROLS)
+                # Set to tiny (self.eps) when you are not regularizing.  ridge_param = eps is
+                # the default.
                 tmp_err[j] = (np.dot(tmp_psi[i:, j].T, tmp_y[i:]) ** 2) / (
-                    np.dot(tmp_psi[i:, j].T, tmp_psi[i:, j]) * squared_y + self.eps
-                )
+                    (np.dot(tmp_psi[i:, j].T, tmp_psi[i:, j]) + self.ridge_param) * squared_y 
+                ) + self.eps
 
             if i == process_term_number:
                 break

diff --git a/sysidentpy/parameter_estimation/estimators.py b/sysidentpy/parameter_estimation/estimators.py
@@ -23,11 +23,13 @@ def __init__(
         offset_covariance=0.2,
         mu=0.01,
         eps=np.finfo(np.float64).eps,
+        ridge_param = np.finfo(np.float64).eps, # for regularized ridge regression
         gama=0.2,
         weight=0.02,
         basis_function=None,
     ):
         self.eps = eps
+        self.ridge_param = ridge_param # for regularized ridge regression
         self.mu = mu
         self.offset_covariance = offset_covariance
         self.max_lag = max_lag
@@ -49,6 +51,7 @@ def _validate_params(self):
             "offset_covariance": self.offset_covariance,
             "mu": self.mu,
             "eps": self.eps,
+            "ridge_param": self.ridge_param, # for regularized ridge regression
             "gama": self.gama,
             "weight": self.weight,
         }
@@ -116,6 +119,47 @@ def least_squares(self, psi, y):
         y = y[self.max_lag :, 0].reshape(-1, 1)
         theta = np.linalg.lstsq(psi, y, rcond=None)[0]
         return theta
+
+    def ridge_regression(self, psi, y):
+        """Estimate the model parameters using the regularized least squares method
+           known as ridge regression.  Based on the least_squares module and uses
+           the same data format but you need to pass ridge_param in the call to
+           FROLS
+
+        Parameters
+        ----------
+        psi : ndarray of floats
+            The information matrix of the model.
+        y : array-like of shape = y_training
+            The data used to training the model.
+        ridge_param : ridge regression parameter that regularizes the algorithm
+            to prevent over fitting.  If the input is a noisy signal, the ridge
+            parameter is likely to be set close to the noise level, at least
+            as a starting point.  Entered through the self data structure.
+
+        Returns
+        -------
+        theta : array-like of shape = number_of_model_elements
+            The estimated parameters of the model.
+
+        References
+        ----------
+        - Wikipedia entry on ridge regression
+          https://en.wikipedia.org/wiki/Ridge_regression
+          
+        ridge_parm multiplied by the identity matrix (np.eye) favors models (theta) that
+        have small size using an L2 norm.  This prevents over fitting of the model.
+        For applications where preventing overfitting is important, see, for example,
+        D. J. Gauthier, E. Bollt, A. Griffith, W. A. S. Barbosa, ‘Next generation
+        reservoir computing,’ Nat. Commun. 12, 5564 (2021).
+        https://www.nature.com/articles/s41467-021-25801-2
+          
+        """
+        self._check_linear_dependence_rows(psi)
+
+        y = y[self.max_lag :, 0].reshape(-1, 1)
+        theta = (np.linalg.pinv(psi.T @ psi + self.ridge_param * np.eye(psi.shape[1])) @ psi.T @ y)
+        return theta
 
     def _unbiased_estimator(self, psi, y, theta, elag, max_lag, estimator):
         """Estimate the model parameters using Extended Least Squares method.