diff --git a/.github/workflows/ci_tox.yml b/.github/workflows/ci_tox.yml index f464451..f75230c 100644 --- a/.github/workflows/ci_tox.yml +++ b/.github/workflows/ci_tox.yml @@ -11,13 +11,13 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ["3.6", "3.7", "3.8", "3.9", "3.10"] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} architecture: x64 diff --git a/procrustes/psdp.py b/procrustes/psdp.py index 4d15464..92d160e 100644 --- a/procrustes/psdp.py +++ b/procrustes/psdp.py @@ -34,10 +34,194 @@ __all__ = [ "psdp_woodgate", "psdp_peng", - "psdp_opt" + "psdp_opt", + "psdp_projgrad", ] +def psdp_projgrad( + a: np.ndarray, + b: np.ndarray, + options_dict: Dict = None, + pad: bool = True, + translate: bool = False, + scale: bool = False, + unpad_col: bool = False, + unpad_row: bool = False, + check_finite: bool = True, + weight: Optional[np.ndarray] = None, +) -> ProcrustesResult: + r""" + Projected gradient method for the positive semi-definite Procrustes problem. + + We want to minimize the function F = ||SAT-B||_F where A and B are n*m matrices and S is n*n + transformation we want to find that minimizes the above function and T is just an identity + matrix for now. We are only considering left transformation. + + The paper minimizes the following function ||AX-B||_F, so the mapping between the problem + statements are + + Term used in paper Term used in our implementation + -------------------------------------------------------------- + A S + X A + B B + + Parameters + ---------- + a : np.ndarray + The matrix :math:`\mathbf{A}` which is to be transformed. + + b : np.ndarray + The target matrix :math:`\mathbf{B}`. + + options : Dict, optional + Dictionary with fields that serve as parameters for the algorithm. + + max_iter : int + Maximum number of iterations. + Default value is 10000. + + s_tol : float + Stop control for ||S_i - S_{i-1}||_F / ||S_1 - S_0||_F + Defaut value is 1e-5. Should be kept below 1 + + f_tol : float + Stop control for ||F_i - F_{i-1}||_F/(1+||F_{i-1}||_F). + Default value is 1e-12. Should be kept way less than 1 + + pad : bool, optional + Add zero rows (at the bottom) and/or columns (to the right-hand side) of matrices + :math:`\mathbf{A}` and :math:`\mathbf{B}` so that they have the same shape. + + translate : bool, optional + If True, both arrays are centered at origin (columns of the arrays will have mean zero). + + scale : bool, optional + If True, both arrays are normalized with respect to the Frobenius norm, i.e., + :math:`\text{Tr}\left[\mathbf{A}^\dagger\mathbf{A}\right] = 1` and + :math:`\text{Tr}\left[\mathbf{B}^\dagger\mathbf{B}\right] = 1`. + + unpad_col : bool, optional + If True, zero columns (with values less than 1.0e-8) on the right-hand side of the intial + :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices are removed. + + unpad_row : bool, optional + If True, zero rows (with values less than 1.0e-8) at the bottom of the intial + :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices are removed. + + check_finite : bool, optional + If True, convert the input to an array, checking for NaNs or Infs. + + weight : ndarray, optional + The 1D-array representing the weights of each row of :math:`\mathbf{A}`. This defines the + elements of the diagonal matrix :math:`\mathbf{W}` that is multiplied by :math:`\mathbf{A}` + matrix, i.e., :math:`\mathbf{A} \rightarrow \mathbf{WA}`. + + Returns + ------- + ProcrustesResult + The result of the Procrustes transformation. + + Notes + ----- + The Projected Gradient algorithm (on which this implementation is based) is defined well in + p. 131 of [1]_. + + References + ---------- + .. [1] Nicolas Gillis, Punit Sharma, "A semi-analytical approach for the positive semidefinite + Procrustesproblem, Linear Algebra and its Applications, Volume 540, 2018, Pages 112-137 + """ + a, b = setup_input_arrays( + a, + b, + unpad_col, + unpad_row, + pad, + translate, + scale, + check_finite, + weight, + ) + + if a.shape != b.shape: + raise ValueError( + f"Shape of A and B does not match: {a.shape} != {b.shape} " + "Check pad, unpad_col, and unpad_row arguments." + ) + + _, m = a.shape + + # Specifies the default parameters of the algorithm + options = { + # The maximum number of iterations the gradient descent algorithm must run for + "max_iter": 10000, + # The minimum ratio of ||S_i - S_{i - 1}||_F with ||S_1 - S_0||_F, below which algorithm + # terminates + "s_tol": 1e-5, + # The minimum ratio of ||F_i - F_{i - 1}||_F with 1 + ||F_{i - 1}||_F, below which algorithm + # terminates + "f_tol": 1e-12 + } + + if options_dict: + options.update(options_dict) + + # Performing some precomputations + aat = a@a.conj().T + x, _ = np.linalg.eig(aat) + max_eig = np.max(x) ** 2 # if they are complex, then need to take norm + bat = b@a.conj().T + + # Initialization of the algorithm + i = 1 + err = np.zeros((options["max_iter"] + 1, 1)) + # S is the right transformation in our problem statement + s = _init_procustes_projgrad(a, b) + # F is the function whose norm we want to minimize, F = S@A - B + f = s @ a - b + # eps = ||S_i - S_{i - 1}||_F + eps = 1 + # eps0 = ||S_1 - S_0||_F + eps0 = 0 + + # Algorithm is run until the max iterations + while i <= options["max_iter"]: + s_old = s + f_old = f + # Projected gradient step + s = _psd_proj(s - (s@aat - bat)/max_eig) + f = s @ a - b + err[i] = np.linalg.norm(f, 'fro') + + # Stop conditions to check if the algorithm should be terminated + + # If the ||F_i - F_{i - 1}||_F / (1 + ||F_old||_F) is lesser than some tolerance level, + # then we terminate the algorithm, as there is not much improvement gain in the + # optimisation problem for the extra iterations we perform + if np.linalg.norm(f - f_old, 'fro') / (1 + np.linalg.norm(f_old, 'fro')) < options["f_tol"]: + break + + # If the ||S_i - S_{i - 1}||_F / ||S_1 - S_0||_F is lesser than some tolerance level, + # then we terminate the algorithm. TODO: to decide if this is useful or not. + if i == 1: + eps0 = np.linalg.norm(s - s_old, 'fro') + eps = np.linalg.norm(s - s_old, 'fro') + if eps < options["s_tol"] * eps0: + break + + i += 1 + + return ProcrustesResult( + new_a=a, + new_b=b, + error=compute_error(a=a, b=b, t=np.eye(m), s=s), + t=np.eye(m), + s=s, + ) + + def psdp_opt( a: np.ndarray, b: np.ndarray, @@ -814,3 +998,39 @@ def _psd_proj(arr: np.ndarray, do_cholesky: bool = True) -> np.ndarray: return arr except np.linalg.LinAlgError: return _make_positive(arr) + + +def _init_procustes_projgrad( + a: np.ndarray, + b: np.ndarray, + choice: int = 0 +) -> np.ndarray: + r"""Return the starting point of the transformation S of the projection gradient algorithm.""" + + n, _ = a.shape + + # We will find two choices S1 and S2 and return the one that gives a lower error in the + # minimization function + + # Finding S1 + s1t = np.linalg.lstsq(a.conj().T, b.conj().T, rcond=1)[0] + s1 = s1t.conj().T + s1 = _psd_proj(s1) + e1 = np.linalg.norm(s1@a-b, 'fro') + + # Finding S2 + eps = 1e-6 + s2 = np.zeros((n, n)) + for i in range(n): + s2[i, i] = max(0, (a[i, :]@b[i, :].conj().T) / (np.linalg.norm(a[i, :])**2 + eps)) + # Adding eps to avoid 0 division + e2 = np.linalg.norm(s2@a-b, "fro") + + s_init = None + + if e1 < e2 or choice == 1: + s_init = s1 + elif e2 < e1 or choice == 2: + s_init = s2 + + return s_init diff --git a/procrustes/test/test_psdp.py b/procrustes/test/test_psdp.py index cf6c903..fd458c8 100644 --- a/procrustes/test/test_psdp.py +++ b/procrustes/test/test_psdp.py @@ -23,8 +23,97 @@ r"""Testings for PSDP (positive semi-definite Procrustes) module.""" import numpy as np -from numpy.testing import assert_almost_equal -from procrustes.psdp import psdp_opt, psdp_peng, psdp_woodgate +from numpy.testing import assert_almost_equal, assert_array_less +from procrustes.psdp import psdp_opt, psdp_peng, psdp_projgrad, psdp_woodgate + + +def test_psdp_projgrad_identity(n=np.random.randint(50, 100)): + r"""Test Projected Gradient's algorithm for PSDP with identity matrix.""" + a = np.eye(n) + b = np.eye(n) + res = psdp_projgrad(a=a, b=b) + s, error = res["s"], res["error"] + assert_almost_equal(s, np.eye(n)) + assert_almost_equal(error, 0.0) + + +def test_psdp_projgrad_diagonal(): + r"""Test Projected Gradient's algorithm for PSDP with diagonal matrix.""" + a = np.diag([1, 2, 3, 4]) + b = np.eye(4) + res = psdp_projgrad(a=a, b=b) + s, error = res["s"], res["error"] + actual_result = np.diag([0.99999, 0.5, 0.33333, 0.25]) + assert_almost_equal(s, actual_result, decimal=5) + assert_almost_equal(error, 0.0, decimal=3) + + +def test_psdp_projgrad_generic_square(): + r"""Test Projected Gradient's algorithm for PSDP with 2 generic square matrices.""" + a = np.array([[1, 6, 0], [4, 3, 0], [0, 0, -0.5]]) + b = np.array([[1, 0, 0], [0, -2, 3], [0, 2, 4]]) + res = psdp_projgrad(a=a, b=b) + s, error = res["s"], res["error"] + actual_result = np.array( + [ + [0.22351489, -0.11059539, 0.24342428], + [-0.11059539, 0.05472271, -0.12044658], + [0.24342428, -0.12044658, 0.26510708], + ] + ) + assert_almost_equal(s, actual_result, decimal=4) + assert_almost_equal(error, 31.371190566800497, decimal=3) + + +def test_psdp_projgrad_generic_non_square(): + r"""Test Projected Grdient's algorithm for PSDP with 2 generic non-square matrices.""" + a = np.array([[5, 1, 6, -1], [3, 2, 0, 2], [2, 4, 3, -3]]) + b = np.array([[15, 1, 15 - 3, 2 + 5], [10, 5, 6, 3], [-3, 3, -3, -2 + 4]]) + res = psdp_projgrad(a=a, b=b) + s, error = res["s"], res["error"] + actual_result = np.array([ + [2.58262946, 1.10868691, -1.08661918], + [1.10868691, 1.67636517, 0.13242428], + [-1.08661918, 0.13242428, 0.75597659]] + ) + assert_almost_equal(s, actual_result, decimal=5) + assert_array_less(error, 32.200295757989856) + + +def test_psdp_projgrad_non_full_rank(): + r"""Test Projected Gradient's algorithm for PSDP for full rank matrix.""" + a = np.array( + [ + [0.3452, -0.9897, 0.8082, -0.1739, -1.4692, -0.2531, 1.0339], + [0.2472, -1.4457, -0.6672, -0.5790, 1.2516, -0.8184, -0.4790], + [-1.3567, -0.9348, 0.7573, 1.7002, -0.9627, -0.5655, 2.5222], + [1.6639, 0.6111, -0.1858, 0.0485, 0.1136, 0.1372, -0.0149], + [-0.1400, -0.3303, -0.2965, 0.0218, 0.0565, -0.1907, -0.2544], + [-1.2662, 0.1905, 0.3302, -0.4041, 1.1479, -1.4716, 0.0857], + ] + ) + b = np.array( + [ + [6.3043, -6.5364, 1.2659, -2.7625, -2.9861, -5.4362, 2.7422], + [-0.5694, -9.4371, -5.5455, -15.6041, 24.4958, -20.4567, -11.4576], + [-0.1030, 2.3164, 3.0813, 8.1280, -10.6447, 6.6903, 7.9874], + [8.1678, -4.5977, 0.0559, -2.6948, 1.1326, -6.5904, 2.0167], + [6.3043, -6.5364, 1.2659, -2.7625, -2.9861, -5.4362, 2.7422], + [-0.5694, -9.4371, -5.5455, -15.6041, 24.4958, -20.4567, -11.4576], + ] + ) + res = psdp_projgrad(a=a, b=b) + s, error = res["s"], res["error"] + actual_result = np.array([ + [5.40878932, 1.63338805, -0.30680274, 3.87229356, 5.40863988, 1.63366874], + [1.63338805, 9.63678713, -3.53016912, 2.47908485, 1.63323779, 9.63660762], + [-0.30680274, -3.53016912, 2.71131028, 0.02464064, -0.30684737, -3.53027101], + [3.87229356, 2.47908485, 0.02464064, 5.9697877, 3.87199514, 2.47930511], + [5.40863988, 1.63323779, -0.30684737, 3.87199514, 5.40849846, 1.63356974], + [1.63366874, 9.63660762, -3.53027101, 2.47930511, 1.63356974, 9.63675614] + ]) + assert_almost_equal(s, actual_result, decimal=2) + assert_almost_equal(error, 0.0, decimal=5) def test_psdp_woodgate_identity(n=np.random.randint(50, 100)): diff --git a/tox.ini b/tox.ini index 20e71f3..1c40b7e 100644 --- a/tox.ini +++ b/tox.ini @@ -182,6 +182,8 @@ extension-pkg-whitelist=numpy [FORMAT] # Maximum number of characters on a single line. max-line-length=100 +# Maximum number of lines in a module +max-module-lines=2000 [MESSAGES CONTROL] disable=