Merge pull request #198 from abhishekmittal15/psdp-algorithms

Add projected gradient method for the positive semi–definite Procrustes problem
theochem · Jan 3, 2023 · 39d90f9 · 39d90f9
2 parents 025426f + ea5941b
commit 39d90f9
Show file tree

Hide file tree

Showing 4 changed files with 316 additions and 5 deletions.
diff --git a/.github/workflows/ci_tox.yml b/.github/workflows/ci_tox.yml
@@ -11,13 +11,13 @@ jobs:
             fail-fast: false
             matrix:
                 os: [ubuntu-latest, macos-latest, windows-latest]
-                python-version: ["3.6", "3.7", "3.8", "3.9", "3.10"]
+                python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
 
         runs-on: ${{ matrix.os }}
         steps:
         - uses: actions/checkout@v3
         - name: Set up Python ${{ matrix.python-version }}
-          uses: actions/setup-python@v3
+          uses: actions/setup-python@v4
           with:
               python-version: ${{ matrix.python-version }}
               architecture: x64

diff --git a/procrustes/psdp.py b/procrustes/psdp.py
@@ -34,10 +34,194 @@
 __all__ = [
     "psdp_woodgate",
     "psdp_peng",
-    "psdp_opt"
+    "psdp_opt",
+    "psdp_projgrad",
 ]
 
 
+def psdp_projgrad(
+    a: np.ndarray,
+    b: np.ndarray,
+    options_dict: Dict = None,
+    pad: bool = True,
+    translate: bool = False,
+    scale: bool = False,
+    unpad_col: bool = False,
+    unpad_row: bool = False,
+    check_finite: bool = True,
+    weight: Optional[np.ndarray] = None,
+) -> ProcrustesResult:
+    r"""
+    Projected gradient method for the positive semi-definite Procrustes problem.
+
+    We want to minimize the function F = ||SAT-B||_F where A and B are n*m matrices and S is n*n
+    transformation we want to find that minimizes the above function and T is just an identity
+    matrix for now. We are only considering left transformation.
+
+    The paper minimizes the following function ||AX-B||_F, so the mapping between the problem
+    statements are
+
+    Term used in paper             Term used in our implementation
+    --------------------------------------------------------------
+    A                               S
+    X                               A
+    B                               B
+
+    Parameters
+    ----------
+    a : np.ndarray
+        The matrix :math:`\mathbf{A}` which is to be transformed.
+
+    b : np.ndarray
+        The target matrix :math:`\mathbf{B}`.
+
+    options : Dict, optional
+        Dictionary with fields that serve as parameters for the algorithm.
+
+        max_iter : int
+            Maximum number of iterations.
+            Default value is 10000.
+
+        s_tol : float
+            Stop control for ||S_i - S_{i-1}||_F / ||S_1 - S_0||_F
+            Defaut value is 1e-5. Should be kept below 1
+
+        f_tol : float
+            Stop control for ||F_i - F_{i-1}||_F/(1+||F_{i-1}||_F).
+            Default value is 1e-12. Should be kept way less than 1
+
+    pad : bool, optional
+        Add zero rows (at the bottom) and/or columns (to the right-hand side) of matrices
+        :math:`\mathbf{A}` and :math:`\mathbf{B}` so that they have the same shape.
+
+    translate : bool, optional
+        If True, both arrays are centered at origin (columns of the arrays will have mean zero).
+
+    scale : bool, optional
+        If True, both arrays are normalized with respect to the Frobenius norm, i.e.,
+        :math:`\text{Tr}\left[\mathbf{A}^\dagger\mathbf{A}\right] = 1` and
+        :math:`\text{Tr}\left[\mathbf{B}^\dagger\mathbf{B}\right] = 1`.
+
+    unpad_col : bool, optional
+        If True, zero columns (with values less than 1.0e-8) on the right-hand side of the intial
+        :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices are removed.
+
+    unpad_row : bool, optional
+        If True, zero rows (with values less than 1.0e-8) at the bottom of the intial
+        :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices are removed.
+
+    check_finite : bool, optional
+        If True, convert the input to an array, checking for NaNs or Infs.
+
+    weight : ndarray, optional
+        The 1D-array representing the weights of each row of :math:`\mathbf{A}`. This defines the
+        elements of the diagonal matrix :math:`\mathbf{W}` that is multiplied by :math:`\mathbf{A}`
+        matrix, i.e., :math:`\mathbf{A} \rightarrow \mathbf{WA}`.
+
+    Returns
+    -------
+    ProcrustesResult
+        The result of the Procrustes transformation.
+
+    Notes
+    -----
+    The Projected Gradient algorithm (on which this implementation is based) is defined well in
+    p. 131 of [1]_.
+
+    References
+    ----------
+    .. [1] Nicolas Gillis, Punit Sharma, "A semi-analytical approach for the positive semidefinite
+        Procrustesproblem, Linear Algebra and its Applications, Volume 540, 2018, Pages 112-137
+    """
+    a, b = setup_input_arrays(
+        a,
+        b,
+        unpad_col,
+        unpad_row,
+        pad,
+        translate,
+        scale,
+        check_finite,
+        weight,
+    )
+
+    if a.shape != b.shape:
+        raise ValueError(
+            f"Shape of A and B does not match: {a.shape} != {b.shape} "
+            "Check pad, unpad_col, and unpad_row arguments."
+        )
+
+    _, m = a.shape
+
+    # Specifies the default parameters of the algorithm
+    options = {
+        # The maximum number of iterations the gradient descent algorithm must run for
+        "max_iter": 10000,
+        # The minimum ratio of ||S_i - S_{i - 1}||_F with ||S_1 - S_0||_F, below which algorithm
+        # terminates
+        "s_tol": 1e-5,
+        # The minimum ratio of ||F_i - F_{i - 1}||_F with 1 + ||F_{i - 1}||_F, below which algorithm
+        # terminates
+        "f_tol": 1e-12
+    }
+
+    if options_dict:
+        options.update(options_dict)
+
+    # Performing some precomputations
+    aat = a@a.conj().T
+    x, _ = np.linalg.eig(aat)
+    max_eig = np.max(x) ** 2  # if they are complex, then need to take norm
+    bat = b@a.conj().T
+
+    # Initialization of the algorithm
+    i = 1
+    err = np.zeros((options["max_iter"] + 1, 1))
+    # S is the right transformation in our problem statement
+    s = _init_procustes_projgrad(a, b)
+    # F is the function whose norm we want to minimize, F = S@A - B
+    f = s @ a - b
+    # eps = ||S_i - S_{i - 1}||_F
+    eps = 1
+    # eps0 = ||S_1 - S_0||_F
+    eps0 = 0
+
+    # Algorithm is run until the max iterations
+    while i <= options["max_iter"]:
+        s_old = s
+        f_old = f
+        # Projected gradient step
+        s = _psd_proj(s - (s@aat - bat)/max_eig)
+        f = s @ a - b
+        err[i] = np.linalg.norm(f, 'fro')
+
+        # Stop conditions to check if the algorithm should be terminated
+
+        # If the ||F_i - F_{i - 1}||_F / (1 + ||F_old||_F) is lesser than some tolerance level,
+        # then we terminate the algorithm, as there is not much improvement gain in the
+        # optimisation problem for the extra iterations we perform
+        if np.linalg.norm(f - f_old, 'fro') / (1 + np.linalg.norm(f_old, 'fro')) < options["f_tol"]:
+            break
+
+        # If the ||S_i - S_{i - 1}||_F / ||S_1 - S_0||_F is lesser than some tolerance level,
+        # then we terminate the algorithm. TODO: to decide if this is useful or not.
+        if i == 1:
+            eps0 = np.linalg.norm(s - s_old, 'fro')
+        eps = np.linalg.norm(s - s_old, 'fro')
+        if eps < options["s_tol"] * eps0:
+            break
+
+        i += 1
+
+    return ProcrustesResult(
+        new_a=a,
+        new_b=b,
+        error=compute_error(a=a, b=b, t=np.eye(m), s=s),
+        t=np.eye(m),
+        s=s,
+    )
+
+
 def psdp_opt(
     a: np.ndarray,
     b: np.ndarray,
@@ -814,3 +998,39 @@ def _psd_proj(arr: np.ndarray, do_cholesky: bool = True) -> np.ndarray:
         return arr
     except np.linalg.LinAlgError:
         return _make_positive(arr)
+
+
+def _init_procustes_projgrad(
+    a: np.ndarray,
+    b: np.ndarray,
+    choice: int = 0
+) -> np.ndarray:
+    r"""Return the starting point of the transformation S of the projection gradient algorithm."""
+
+    n, _ = a.shape
+
+    # We will find two choices S1 and S2 and return the one that gives a lower error in the
+    # minimization function
+
+    # Finding S1
+    s1t = np.linalg.lstsq(a.conj().T, b.conj().T, rcond=1)[0]
+    s1 = s1t.conj().T
+    s1 = _psd_proj(s1)
+    e1 = np.linalg.norm(s1@a-b, 'fro')
+
+    # Finding S2
+    eps = 1e-6
+    s2 = np.zeros((n, n))
+    for i in range(n):
+        s2[i, i] = max(0, (a[i, :]@b[i, :].conj().T) / (np.linalg.norm(a[i, :])**2 + eps))
+        # Adding eps to avoid 0 division
+    e2 = np.linalg.norm(s2@a-b, "fro")
+
+    s_init = None
+
+    if e1 < e2 or choice == 1:
+        s_init = s1
+    elif e2 < e1 or choice == 2:
+        s_init = s2
+
+    return s_init
diff --git a/procrustes/test/test_psdp.py b/procrustes/test/test_psdp.py
@@ -23,8 +23,97 @@
 r"""Testings for PSDP (positive semi-definite Procrustes) module."""
 
 import numpy as np
-from numpy.testing import assert_almost_equal
-from procrustes.psdp import psdp_opt, psdp_peng, psdp_woodgate
+from numpy.testing import assert_almost_equal, assert_array_less
+from procrustes.psdp import psdp_opt, psdp_peng, psdp_projgrad, psdp_woodgate
+
+
+def test_psdp_projgrad_identity(n=np.random.randint(50, 100)):
+    r"""Test Projected Gradient's algorithm for PSDP with identity matrix."""
+    a = np.eye(n)
+    b = np.eye(n)
+    res = psdp_projgrad(a=a, b=b)
+    s, error = res["s"], res["error"]
+    assert_almost_equal(s, np.eye(n))
+    assert_almost_equal(error, 0.0)
+
+
+def test_psdp_projgrad_diagonal():
+    r"""Test Projected Gradient's algorithm for PSDP with diagonal matrix."""
+    a = np.diag([1, 2, 3, 4])
+    b = np.eye(4)
+    res = psdp_projgrad(a=a, b=b)
+    s, error = res["s"], res["error"]
+    actual_result = np.diag([0.99999, 0.5, 0.33333, 0.25])
+    assert_almost_equal(s, actual_result, decimal=5)
+    assert_almost_equal(error, 0.0, decimal=3)
+
+
+def test_psdp_projgrad_generic_square():
+    r"""Test Projected Gradient's algorithm for PSDP with 2 generic square matrices."""
+    a = np.array([[1, 6, 0], [4, 3, 0], [0, 0, -0.5]])
+    b = np.array([[1, 0, 0], [0, -2, 3], [0, 2, 4]])
+    res = psdp_projgrad(a=a, b=b)
+    s, error = res["s"], res["error"]
+    actual_result = np.array(
+        [
+            [0.22351489, -0.11059539, 0.24342428],
+            [-0.11059539, 0.05472271, -0.12044658],
+            [0.24342428, -0.12044658, 0.26510708],
+        ]
+    )
+    assert_almost_equal(s, actual_result, decimal=4)
+    assert_almost_equal(error, 31.371190566800497, decimal=3)
+
+
+def test_psdp_projgrad_generic_non_square():
+    r"""Test Projected Grdient's algorithm for PSDP with 2 generic non-square matrices."""
+    a = np.array([[5, 1, 6, -1], [3, 2, 0, 2], [2, 4, 3, -3]])
+    b = np.array([[15, 1, 15 - 3, 2 + 5], [10, 5, 6, 3], [-3, 3, -3, -2 + 4]])
+    res = psdp_projgrad(a=a, b=b)
+    s, error = res["s"], res["error"]
+    actual_result = np.array([
+        [2.58262946,  1.10868691, -1.08661918],
+        [1.10868691,  1.67636517,  0.13242428],
+        [-1.08661918,  0.13242428,  0.75597659]]
+    )
+    assert_almost_equal(s, actual_result, decimal=5)
+    assert_array_less(error, 32.200295757989856)
+
+
+def test_psdp_projgrad_non_full_rank():
+    r"""Test Projected Gradient's algorithm for PSDP for full rank matrix."""
+    a = np.array(
+        [
+            [0.3452, -0.9897, 0.8082, -0.1739, -1.4692, -0.2531, 1.0339],
+            [0.2472, -1.4457, -0.6672, -0.5790, 1.2516, -0.8184, -0.4790],
+            [-1.3567, -0.9348, 0.7573, 1.7002, -0.9627, -0.5655, 2.5222],
+            [1.6639, 0.6111, -0.1858, 0.0485, 0.1136, 0.1372, -0.0149],
+            [-0.1400, -0.3303, -0.2965, 0.0218, 0.0565, -0.1907, -0.2544],
+            [-1.2662, 0.1905, 0.3302, -0.4041, 1.1479, -1.4716, 0.0857],
+        ]
+    )
+    b = np.array(
+        [
+            [6.3043, -6.5364, 1.2659, -2.7625, -2.9861, -5.4362, 2.7422],
+            [-0.5694, -9.4371, -5.5455, -15.6041, 24.4958, -20.4567, -11.4576],
+            [-0.1030, 2.3164, 3.0813, 8.1280, -10.6447, 6.6903, 7.9874],
+            [8.1678, -4.5977, 0.0559, -2.6948, 1.1326, -6.5904, 2.0167],
+            [6.3043, -6.5364, 1.2659, -2.7625, -2.9861, -5.4362, 2.7422],
+            [-0.5694, -9.4371, -5.5455, -15.6041, 24.4958, -20.4567, -11.4576],
+        ]
+    )
+    res = psdp_projgrad(a=a, b=b)
+    s, error = res["s"], res["error"]
+    actual_result = np.array([
+        [5.40878932,  1.63338805, -0.30680274,  3.87229356,  5.40863988,  1.63366874],
+        [1.63338805,  9.63678713, -3.53016912,  2.47908485,  1.63323779,  9.63660762],
+        [-0.30680274, -3.53016912,  2.71131028,  0.02464064, -0.30684737, -3.53027101],
+        [3.87229356,  2.47908485,  0.02464064,  5.9697877,   3.87199514,  2.47930511],
+        [5.40863988,  1.63323779, -0.30684737,  3.87199514,  5.40849846,  1.63356974],
+        [1.63366874,  9.63660762, -3.53027101,  2.47930511,  1.63356974,  9.63675614]
+    ])
+    assert_almost_equal(s, actual_result, decimal=2)
+    assert_almost_equal(error, 0.0, decimal=5)
 
 
 def test_psdp_woodgate_identity(n=np.random.randint(50, 100)):

diff --git a/tox.ini b/tox.ini
@@ -182,6 +182,8 @@ extension-pkg-whitelist=numpy
 [FORMAT]
 # Maximum number of characters on a single line.
 max-line-length=100
+# Maximum number of lines in a module
+max-module-lines=2000
 
 [MESSAGES CONTROL]
 disable=