-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchapter4.py
348 lines (287 loc) · 12.1 KB
/
chapter4.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
import sympy as sp
from typing import cast
from chapter3 import *
def det(A: np.ndarray) -> float | sp.Expr:
""" Returns the determinant of A.
A: (n, n) matrix.
Returns: float or `sympy.Expr`.
"""
assert A.shape[0] == A.shape[1], 'Matrix must be square.'
n = A.shape[0]
if n == 1:
# noinspection PyTypeChecker
return A[0, 0]
else:
# Laplace's expansion along row 0
return sum((-1) ** j * A[0, j] * det(np.delete(np.delete(A, 0, 0), j, 1)) for j in range(n))
def tr(A: np.ndarray) -> float:
""" Returns the trace of A.
A: (n, n) matrix.
Returns: float.
"""
assert A.shape[0] == A.shape[1], 'Matrix must be square.'
return np.diag(A).sum()
def characteristic_polynomial(A: np.ndarray) -> np.polynomial.Polynomial:
""" Returns the characteristic polynomial of A.
A: (n, n) matrix.
Returns: `np.polynomial.Polynomial`.
"""
assert A.shape[0] == A.shape[1], 'Matrix must be square.'
sympy_poly = cast(sp.Expr, det(A - sp.symbols('lambda') * np.eye(A.shape[0])))
sympy_poly = cast(sp.Poly, sympy_poly.expand().as_poly())
coeffs = np.array(sympy_poly.all_coeffs()[::-1], dtype=np.float64)
return np.polynomial.Polynomial(coeffs)
def eigen_values(A: np.ndarray, rounding_tol: float | None = 1e-7) -> np.ndarray:
""" Returns the real eigenvalues of A sorted in descending order.
A: (m, n)
rounding_tol: float, the tolerance for rounding eigenvalues to the nearest integer.
Returns: (k) where k is the number of real eigenvalues of A, k <= min(m, n).
"""
m, n = A.shape
assert m == n, 'Matrix must be square.'
if m >= 7:
# Use the QR algorithm for large matrices.
v = eigen_values_qr(A)
else:
# Solve the characteristic polynomial for small matrices.
v = characteristic_polynomial(A).roots()
# Keep only the real eigenvalues.
if np.any(np.iscomplex(v)):
if rounding_tol is not None:
v.imag = round_within(v.imag, rounding_tol)
v = v[v.imag == 0].real
# Round to nearest integer.
if rounding_tol is not None:
v = round_within(v, rounding_tol)
v = np.sort(v)[::-1]
return v
def eigen_space(A: np.ndarray, eigenvalue: float) -> np.ndarray:
""" Returns an orthonormal basis for the eigenspace of A associated with the eigenvalue.
A: (m, n)
eigenvalue: float, an eigenvalue of A.
Returns: (m, k) where k is the geometric multiplicity of the eigenvalue.
"""
assert A.shape[0] == A.shape[1], 'Matrix must be square.'
return kernel_space(A - eigenvalue * np.eye(A.shape[0]))
def eigen_decomposition(A: np.ndarray, allow_partial: bool = False, orthonormalize: bool = False) -> Tuple[np.ndarray, np.ndarray]:
""" Returns (D, P) from the factorization of A as A = PDP^(-1).
A: (n, n) matrix with k eigenvectors.
allow_partial: bool, if True, the function returns the eigenvalues and eigenvectors of A even if A is defective.
orthonormalize: bool, if True, the eigenvectors are orthonormalized using the Gram-Schmidt
Returns:
D: (n, k) diagonal matrix of eigenvalues in descending order.
P: (n, k) matrix whose columns are the eigenvectors of A.
"""
assert A.shape[0] == A.shape[1], 'Matrix must be square.'
n = A.shape[0]
eigenvalues = np.sort(np.unique(eigen_values(A)))[::-1]
eigen_spaces = [eigen_space(A, v) for v in eigenvalues]
geometric_multiplicities = [B.shape[1] for B in eigen_spaces]
assert allow_partial or sum(
geometric_multiplicities) == n, f'The matrix is defective, sum(ga) <> n: {sum(geometric_multiplicities)} <> {n}.'
eigenvalues = np.repeat(eigenvalues, geometric_multiplicities)
D = np.diag(eigenvalues)
P = np.hstack(eigen_spaces)
if orthonormalize:
P, _ = qr_decomposition_gram_schmidt(P)
return D, P
def singular_value_decomposition(A: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
""" Returns (U, D, V) from the factorization of A as A = UDV^T where U and V are orthogonal matrices and D is a diagonal matrix.
A: m x n matrix.
Returns:
U: m x k orthogonal matrix.
D: k x k diagonal matrix.
V: n x k orthogonal matrix.
k = min(m, n)
"""
m, n = A.shape
if m >= n:
D, V = eigen_decomposition(A.T @ A, orthonormalize=True)
D = np.sqrt(D)
_, U = eigen_decomposition(A @ A.T, orthonormalize=True)
else:
D, U = eigen_decomposition(A @ A.T, orthonormalize=True)
D = np.sqrt(D)
_, V = eigen_decomposition(A.T @ A, orthonormalize=True)
return U, D, V
# Additional Functions
def pseudo_invert(A: np.ndarray) -> np.ndarray:
""" Returns the Moore-Penrose pseudo-inverse of A.
A: (m, n)
Returns: (n, m)
The Moore-Penrose pseudo-inverse of A is the unique matrix A^+ such that:
1. A A^+ A = A
2. A^+ A A^+ = A^+
3. (A A^+)^T = A A^+
If A is of full rank, then A^+ = A^(-1).
"""
U, D, V = singular_value_decomposition(A)
inv_D = np.zeros_like(D)
non_zero_indices = D != 0
inv_D[non_zero_indices] = 1 / D[non_zero_indices]
return V @ inv_D @ U.T
def solve_linear_systems_with_pseudo_inverse(A: np.ndarray, B: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
""" Returns the least squares general solutions to AX = B in the form (particular solutions, basis of null space).
A: m x n matrix.
B: m x p matrix.
Returns:
particular solutions: (n, p)
basis of null space: (n, k) where k is the dimension of the null space of A.
"""
particular_solutions = pseudo_invert(A) @ B
null_space_basis = kernel_space(A)
return particular_solutions, null_space_basis
def givens_rotation_to_zero(A: np.ndarray, target_idx: tuple[int, int], other_dim: int) -> np.ndarray:
""" Returns the givens rotation matrix that rotates the target_dim to zero in the 2d-plane.
A: (m, n) the matrix to be rotated
target_idx: the index of the element to be rotated to zero
other_dim: the index of the other dimension in the 2d-plane to rotate in
Returns: (dims, dims)
"""
R = np.eye(A.shape[1], dtype=np.float64)
v = A[:, target_idx[1]].astype(np.float64)
i, j = other_dim, target_idx[0]
vi, vj = v[i], v[j]
# Solving the below system for c and s.
# vi c - vj s = sqrt(vi^2 + vj^2) [preserve ||v||]
# vi s + vj c = 0 [rotate vj to 0]
c = vi / np.sqrt(vi ** 2 + vj ** 2)
s = -vj / np.sqrt(vi ** 2 + vj ** 2)
R[i, i], R[i, j] = c, -s
R[j, i], R[j, j] = s, c
return R
def qr_decomposition_gram_schmidt(A: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
""" Returns the QR decomposition of A using the Gram-Schmidt process.
A: (m, n) the matrix to be decomposed
Returns: (m, n), (n, n)
"""
m, n = A.shape
assert m == n, 'A must be square'
A = A.astype(np.float64)
Q = np.zeros((n, n), dtype=np.float64)
R = np.zeros((n, n), dtype=np.float64)
for j in range(n):
v = A[:, j]
for i in range(j):
R[i, j] = Q[:, i] @ A[:, j]
v -= R[i, j] * Q[:, i]
R[j, j] = norm(v)
if R[j, j] != 0:
Q[:, j] = v / R[j, j]
return Q, R
def eigen_values_qr(A: np.ndarray, max_iter: int = 1000, tol: float = 1e-6) -> np.ndarray:
""" Returns the eigenvalues of A using the QR algorithm.
A: (n, n) the matrix to find the eigenvalues of
max_iter: the maximum number of iterations
tol: the tolerance for convergence
Returns: (n)
"""
A = A.astype(np.float64)
excess_frob_norm = np.inf
for _ in range(max_iter):
Q, R = qr_decomposition_gram_schmidt(A)
A = R @ Q
excess_frob_norm = np.sqrt(np.sum(np.tril(A, k=-1) ** 2))
if excess_frob_norm < tol:
break
if excess_frob_norm > tol:
print(f'QR algorithm did not converge after {max_iter} iterations.')
v = np.diag(A)
v = np.sort(v)[::-1]
return v
# Helper Functions
def round_within(x: np.ndarray, tol: float = 1e-7) -> np.ndarray:
""" Rounds the elements of x to the nearest integer if they are within tol of an integer. """
return np.where(np.abs(x - np.round(x)) < tol, np.round(x), x)
# Additional Functions
def householder_transform_to_zero(A: np.ndarray, column: int, row: int, B: np.ndarray = None) -> np.ndarray:
""" Returns a Householder transformation matrix that zeros out the elements in the given column below the given row.
If B is given, the transformation is directly applied to it and the resulting matrix is returned. """
x = A[:, column].astype(np.float64).copy()
x[:row] = 0
# add the norm of the sub-vector to the element above
# the sign of the element is used to determine the sign of the norm
# this is done to avoid cancellation
add = norm(x[row:])
x[row] += np.sign(x[row]) * add
if B is not None:
if add == 0:
# the row is already in the correct form
return B
# directly evaluate the transformation without creating the matrix
# evaluates to: b - 2 * (x @ b) / (x @ x) * x
# tile x to match the shape of B
vectors = np.tile(2 * x, (B.shape[1], 1)).T
# evaluate the dot product of x with each column of B
scalars = (x[None, :] @ B) / dot_product(x, x)
return B - vectors * scalars
if add == 0:
# the row is already in the correct form
return np.eye(A.shape[0], dtype=np.float64)
# create the transformation matrix
Q = np.eye(A.shape[0], dtype=np.float64)
Q -= 2 * np.outer(x, x) / dot_product(x, x)
return Q
def qr_decomposition_householder(A: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
""" Returns the QR decomposition of the given matrix using the Householder transformation. """
m, n = A.shape
assert m == n, "Matrix must be square"
# Q = H_1 @ H_2 @ ... @ H_n-1 @ I
# R = H_n-1 @ ... @ H_1 @ A
Q = np.eye(n, dtype=np.float64)
# store each step to use for computing Q
Rs = [A.astype(np.float64).copy()]
for i in range(n-1):
Rs.append(householder_transform_to_zero(Rs[-1], i, i, Rs[-1]))
for i in range(n-2, -1, -1):
Q = householder_transform_to_zero(Rs[i], i, i, Q)
return Q, Rs[-1]
def cholesky_factorization(A: np.ndarray) -> np.ndarray:
""" Returns the Cholesky factorization of A, i.e. A = LL^T where L is a lower triangular matrix.
Input:
A: n x n symmetric positive definite matrix.
Output:
L: n x n lower triangular matrix.
"""
n = A.shape[0]
L = np.zeros((n, n), dtype=np.float64)
for j in range(n):
for i in range(j, n):
if i == j:
L[i, j] = np.sqrt(A[i, j] - np.sum(L[i, :j] ** 2))
else:
L[i, j] = (A[i, j] - np.sum(L[i, :j] * L[j, :j])) / L[j, j]
return L
def orthogonal_iteration(A: np.ndarray, max_iterations: int = 1000) -> Tuple[np.ndarray, np.ndarray]:
""" Returns the eigenvalues and eigenvectors of the given symmetric matrix using the orthogonal iteration algorithm. """
assert A.shape[0] == A.shape[1], "A must be a square matrix"
n = A.shape[0]
Q = np.eye(n, dtype=np.float64)
for _ in range(max_iterations):
Q = A @ Q
Q, R = qr_decomposition_householder(Q)
A = Q.T @ A @ Q
return np.diag(A), Q
def qr_algorithm(A: np.ndarray, max_iterations: int = 1000) -> Tuple[np.ndarray, np.ndarray]:
""" Returns the eigenvalues and eigenvectors of the given symmetric matrix using the QR algorithm. """
assert A.shape[0] == A.shape[1], "A must be a square matrix"
n = A.shape[0]
U = np.eye(n, dtype=np.float64)
for _ in range(max_iterations):
Q, R = qr_decomposition_householder(A)
A = R @ Q
U = U @ Q
return np.diag(A), U
def find_similarity_transform_hessenberg(A: np.ndarray) -> np.ndarray:
""" Returns the similarity transformation matrix of A in Hessenberg form (upper triangular with one sub-diagonal).
A: (n, n) the matrix to be transformed
Returns: (n, n)
"""
m, n = A.shape
assert m == n, 'A must be square'
A = A.astype(np.float64)
for j in range(n - 2):
for i in range(j + 2, n):
R = givens_rotation_to_zero(A, (i, j), j + 1)
A = R @ A @ R.T
return A