Skip to content

Commit

Permalink
Merge pull request #198 from abhishekmittal15/psdp-algorithms
Browse files Browse the repository at this point in the history
Add projected gradient method for the positive semi–definite Procrustes problem
  • Loading branch information
FanwangM authored Jan 3, 2023
2 parents 025426f + ea5941b commit 39d90f9
Show file tree
Hide file tree
Showing 4 changed files with 316 additions and 5 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci_tox.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ["3.6", "3.7", "3.8", "3.9", "3.10"]
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]

runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
architecture: x64
Expand Down
222 changes: 221 additions & 1 deletion procrustes/psdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,194 @@
__all__ = [
"psdp_woodgate",
"psdp_peng",
"psdp_opt"
"psdp_opt",
"psdp_projgrad",
]


def psdp_projgrad(
a: np.ndarray,
b: np.ndarray,
options_dict: Dict = None,
pad: bool = True,
translate: bool = False,
scale: bool = False,
unpad_col: bool = False,
unpad_row: bool = False,
check_finite: bool = True,
weight: Optional[np.ndarray] = None,
) -> ProcrustesResult:
r"""
Projected gradient method for the positive semi-definite Procrustes problem.
We want to minimize the function F = ||SAT-B||_F where A and B are n*m matrices and S is n*n
transformation we want to find that minimizes the above function and T is just an identity
matrix for now. We are only considering left transformation.
The paper minimizes the following function ||AX-B||_F, so the mapping between the problem
statements are
Term used in paper Term used in our implementation
--------------------------------------------------------------
A S
X A
B B
Parameters
----------
a : np.ndarray
The matrix :math:`\mathbf{A}` which is to be transformed.
b : np.ndarray
The target matrix :math:`\mathbf{B}`.
options : Dict, optional
Dictionary with fields that serve as parameters for the algorithm.
max_iter : int
Maximum number of iterations.
Default value is 10000.
s_tol : float
Stop control for ||S_i - S_{i-1}||_F / ||S_1 - S_0||_F
Defaut value is 1e-5. Should be kept below 1
f_tol : float
Stop control for ||F_i - F_{i-1}||_F/(1+||F_{i-1}||_F).
Default value is 1e-12. Should be kept way less than 1
pad : bool, optional
Add zero rows (at the bottom) and/or columns (to the right-hand side) of matrices
:math:`\mathbf{A}` and :math:`\mathbf{B}` so that they have the same shape.
translate : bool, optional
If True, both arrays are centered at origin (columns of the arrays will have mean zero).
scale : bool, optional
If True, both arrays are normalized with respect to the Frobenius norm, i.e.,
:math:`\text{Tr}\left[\mathbf{A}^\dagger\mathbf{A}\right] = 1` and
:math:`\text{Tr}\left[\mathbf{B}^\dagger\mathbf{B}\right] = 1`.
unpad_col : bool, optional
If True, zero columns (with values less than 1.0e-8) on the right-hand side of the intial
:math:`\mathbf{A}` and :math:`\mathbf{B}` matrices are removed.
unpad_row : bool, optional
If True, zero rows (with values less than 1.0e-8) at the bottom of the intial
:math:`\mathbf{A}` and :math:`\mathbf{B}` matrices are removed.
check_finite : bool, optional
If True, convert the input to an array, checking for NaNs or Infs.
weight : ndarray, optional
The 1D-array representing the weights of each row of :math:`\mathbf{A}`. This defines the
elements of the diagonal matrix :math:`\mathbf{W}` that is multiplied by :math:`\mathbf{A}`
matrix, i.e., :math:`\mathbf{A} \rightarrow \mathbf{WA}`.
Returns
-------
ProcrustesResult
The result of the Procrustes transformation.
Notes
-----
The Projected Gradient algorithm (on which this implementation is based) is defined well in
p. 131 of [1]_.
References
----------
.. [1] Nicolas Gillis, Punit Sharma, "A semi-analytical approach for the positive semidefinite
Procrustesproblem, Linear Algebra and its Applications, Volume 540, 2018, Pages 112-137
"""
a, b = setup_input_arrays(
a,
b,
unpad_col,
unpad_row,
pad,
translate,
scale,
check_finite,
weight,
)

if a.shape != b.shape:
raise ValueError(
f"Shape of A and B does not match: {a.shape} != {b.shape} "
"Check pad, unpad_col, and unpad_row arguments."
)

_, m = a.shape

# Specifies the default parameters of the algorithm
options = {
# The maximum number of iterations the gradient descent algorithm must run for
"max_iter": 10000,
# The minimum ratio of ||S_i - S_{i - 1}||_F with ||S_1 - S_0||_F, below which algorithm
# terminates
"s_tol": 1e-5,
# The minimum ratio of ||F_i - F_{i - 1}||_F with 1 + ||F_{i - 1}||_F, below which algorithm
# terminates
"f_tol": 1e-12
}

if options_dict:
options.update(options_dict)

# Performing some precomputations
aat = a@a.conj().T
x, _ = np.linalg.eig(aat)
max_eig = np.max(x) ** 2 # if they are complex, then need to take norm
bat = b@a.conj().T

# Initialization of the algorithm
i = 1
err = np.zeros((options["max_iter"] + 1, 1))
# S is the right transformation in our problem statement
s = _init_procustes_projgrad(a, b)
# F is the function whose norm we want to minimize, F = S@A - B
f = s @ a - b
# eps = ||S_i - S_{i - 1}||_F
eps = 1
# eps0 = ||S_1 - S_0||_F
eps0 = 0

# Algorithm is run until the max iterations
while i <= options["max_iter"]:
s_old = s
f_old = f
# Projected gradient step
s = _psd_proj(s - (s@aat - bat)/max_eig)
f = s @ a - b
err[i] = np.linalg.norm(f, 'fro')

# Stop conditions to check if the algorithm should be terminated

# If the ||F_i - F_{i - 1}||_F / (1 + ||F_old||_F) is lesser than some tolerance level,
# then we terminate the algorithm, as there is not much improvement gain in the
# optimisation problem for the extra iterations we perform
if np.linalg.norm(f - f_old, 'fro') / (1 + np.linalg.norm(f_old, 'fro')) < options["f_tol"]:
break

# If the ||S_i - S_{i - 1}||_F / ||S_1 - S_0||_F is lesser than some tolerance level,
# then we terminate the algorithm. TODO: to decide if this is useful or not.
if i == 1:
eps0 = np.linalg.norm(s - s_old, 'fro')
eps = np.linalg.norm(s - s_old, 'fro')
if eps < options["s_tol"] * eps0:
break

i += 1

return ProcrustesResult(
new_a=a,
new_b=b,
error=compute_error(a=a, b=b, t=np.eye(m), s=s),
t=np.eye(m),
s=s,
)


def psdp_opt(
a: np.ndarray,
b: np.ndarray,
Expand Down Expand Up @@ -814,3 +998,39 @@ def _psd_proj(arr: np.ndarray, do_cholesky: bool = True) -> np.ndarray:
return arr
except np.linalg.LinAlgError:
return _make_positive(arr)


def _init_procustes_projgrad(
a: np.ndarray,
b: np.ndarray,
choice: int = 0
) -> np.ndarray:
r"""Return the starting point of the transformation S of the projection gradient algorithm."""

n, _ = a.shape

# We will find two choices S1 and S2 and return the one that gives a lower error in the
# minimization function

# Finding S1
s1t = np.linalg.lstsq(a.conj().T, b.conj().T, rcond=1)[0]
s1 = s1t.conj().T
s1 = _psd_proj(s1)
e1 = np.linalg.norm(s1@a-b, 'fro')

# Finding S2
eps = 1e-6
s2 = np.zeros((n, n))
for i in range(n):
s2[i, i] = max(0, (a[i, :]@b[i, :].conj().T) / (np.linalg.norm(a[i, :])**2 + eps))
# Adding eps to avoid 0 division
e2 = np.linalg.norm(s2@a-b, "fro")

s_init = None

if e1 < e2 or choice == 1:
s_init = s1
elif e2 < e1 or choice == 2:
s_init = s2

return s_init
93 changes: 91 additions & 2 deletions procrustes/test/test_psdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,97 @@
r"""Testings for PSDP (positive semi-definite Procrustes) module."""

import numpy as np
from numpy.testing import assert_almost_equal
from procrustes.psdp import psdp_opt, psdp_peng, psdp_woodgate
from numpy.testing import assert_almost_equal, assert_array_less
from procrustes.psdp import psdp_opt, psdp_peng, psdp_projgrad, psdp_woodgate


def test_psdp_projgrad_identity(n=np.random.randint(50, 100)):
r"""Test Projected Gradient's algorithm for PSDP with identity matrix."""
a = np.eye(n)
b = np.eye(n)
res = psdp_projgrad(a=a, b=b)
s, error = res["s"], res["error"]
assert_almost_equal(s, np.eye(n))
assert_almost_equal(error, 0.0)


def test_psdp_projgrad_diagonal():
r"""Test Projected Gradient's algorithm for PSDP with diagonal matrix."""
a = np.diag([1, 2, 3, 4])
b = np.eye(4)
res = psdp_projgrad(a=a, b=b)
s, error = res["s"], res["error"]
actual_result = np.diag([0.99999, 0.5, 0.33333, 0.25])
assert_almost_equal(s, actual_result, decimal=5)
assert_almost_equal(error, 0.0, decimal=3)


def test_psdp_projgrad_generic_square():
r"""Test Projected Gradient's algorithm for PSDP with 2 generic square matrices."""
a = np.array([[1, 6, 0], [4, 3, 0], [0, 0, -0.5]])
b = np.array([[1, 0, 0], [0, -2, 3], [0, 2, 4]])
res = psdp_projgrad(a=a, b=b)
s, error = res["s"], res["error"]
actual_result = np.array(
[
[0.22351489, -0.11059539, 0.24342428],
[-0.11059539, 0.05472271, -0.12044658],
[0.24342428, -0.12044658, 0.26510708],
]
)
assert_almost_equal(s, actual_result, decimal=4)
assert_almost_equal(error, 31.371190566800497, decimal=3)


def test_psdp_projgrad_generic_non_square():
r"""Test Projected Grdient's algorithm for PSDP with 2 generic non-square matrices."""
a = np.array([[5, 1, 6, -1], [3, 2, 0, 2], [2, 4, 3, -3]])
b = np.array([[15, 1, 15 - 3, 2 + 5], [10, 5, 6, 3], [-3, 3, -3, -2 + 4]])
res = psdp_projgrad(a=a, b=b)
s, error = res["s"], res["error"]
actual_result = np.array([
[2.58262946, 1.10868691, -1.08661918],
[1.10868691, 1.67636517, 0.13242428],
[-1.08661918, 0.13242428, 0.75597659]]
)
assert_almost_equal(s, actual_result, decimal=5)
assert_array_less(error, 32.200295757989856)


def test_psdp_projgrad_non_full_rank():
r"""Test Projected Gradient's algorithm for PSDP for full rank matrix."""
a = np.array(
[
[0.3452, -0.9897, 0.8082, -0.1739, -1.4692, -0.2531, 1.0339],
[0.2472, -1.4457, -0.6672, -0.5790, 1.2516, -0.8184, -0.4790],
[-1.3567, -0.9348, 0.7573, 1.7002, -0.9627, -0.5655, 2.5222],
[1.6639, 0.6111, -0.1858, 0.0485, 0.1136, 0.1372, -0.0149],
[-0.1400, -0.3303, -0.2965, 0.0218, 0.0565, -0.1907, -0.2544],
[-1.2662, 0.1905, 0.3302, -0.4041, 1.1479, -1.4716, 0.0857],
]
)
b = np.array(
[
[6.3043, -6.5364, 1.2659, -2.7625, -2.9861, -5.4362, 2.7422],
[-0.5694, -9.4371, -5.5455, -15.6041, 24.4958, -20.4567, -11.4576],
[-0.1030, 2.3164, 3.0813, 8.1280, -10.6447, 6.6903, 7.9874],
[8.1678, -4.5977, 0.0559, -2.6948, 1.1326, -6.5904, 2.0167],
[6.3043, -6.5364, 1.2659, -2.7625, -2.9861, -5.4362, 2.7422],
[-0.5694, -9.4371, -5.5455, -15.6041, 24.4958, -20.4567, -11.4576],
]
)
res = psdp_projgrad(a=a, b=b)
s, error = res["s"], res["error"]
actual_result = np.array([
[5.40878932, 1.63338805, -0.30680274, 3.87229356, 5.40863988, 1.63366874],
[1.63338805, 9.63678713, -3.53016912, 2.47908485, 1.63323779, 9.63660762],
[-0.30680274, -3.53016912, 2.71131028, 0.02464064, -0.30684737, -3.53027101],
[3.87229356, 2.47908485, 0.02464064, 5.9697877, 3.87199514, 2.47930511],
[5.40863988, 1.63323779, -0.30684737, 3.87199514, 5.40849846, 1.63356974],
[1.63366874, 9.63660762, -3.53027101, 2.47930511, 1.63356974, 9.63675614]
])
assert_almost_equal(s, actual_result, decimal=2)
assert_almost_equal(error, 0.0, decimal=5)


def test_psdp_woodgate_identity(n=np.random.randint(50, 100)):
Expand Down
2 changes: 2 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ extension-pkg-whitelist=numpy
[FORMAT]
# Maximum number of characters on a single line.
max-line-length=100
# Maximum number of lines in a module
max-module-lines=2000

[MESSAGES CONTROL]
disable=
Expand Down

0 comments on commit 39d90f9

Please sign in to comment.