helmholtz-analytics · coquelin77 · Jan 31, 2022 · Oct 1, 2021 · Oct 8, 2021 · Oct 8, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -28,7 +28,7 @@
 - [#846](https://github.com/helmholtz-analytics/heat/pull/846) New features `norm`, `vector_norm`, `matrix_norm`
 - [#850](https://github.com/helmholtz-analytics/heat/pull/850) New Feature `cross`
 - [#877](https://github.com/helmholtz-analytics/heat/pull/877) New feature `det`
-
+- [#875](https://github.com/helmholtz-analytics/heat/pull/875) New feature `inv`
 ### Logical
 - [#862](https://github.com/helmholtz-analytics/heat/pull/862) New feature `signbit`
 ### Manipulations

diff --git a/heat/core/linalg/basics.py b/heat/core/linalg/basics.py
@@ -28,6 +28,7 @@
     "cross",
     "det",
     "dot",
+    "inv",
     "matmul",
     "matrix_norm",
     "norm",
@@ -308,6 +309,118 @@ def dot(a: DNDarray, b: DNDarray, out: Optional[DNDarray] = None) -> Union[DNDar
         raise NotImplementedError("ht.dot not implemented for N-D dot M-D arrays")
 
 
+def inv(a: DNDarray) -> DNDarray:
+    """
+    Computes the multiplicative inverse of a square matrix.
+
+    Parameters
+    ----------
+    a : DNDarray
+        Square matrix of floating-point data type or a stack of square matrices. Shape = (...,M,M)
+
+    Raises
+    ------
+    RuntimeError
+        If the inverse does not exist.
+    RuntimeError
+        If the dtype is not floating-point
+    RuntimeError
+        If a is not at least two-dimensional or if the lengths of the last two dimensions are not the same.
+
+    Examples
+    --------
+    >>> a = ht.array([[1., 2], [2, 3]])
+    >>> ht.linalg.inv(a)
+    DNDarray([[-3.,  2.],
+              [ 2., -1.]], dtype=ht.float32, device=cpu:0, split=None)
+    """
+    sanitation.sanitize_in(a)  # pragma: no cover
+
+    if a.ndim < 2:
+        raise RuntimeError("DNDarray must be at least two-dimensional.")
+
+    m, n = a.shape[-2:]
+    if m != n:
+        raise RuntimeError("Last two dimensions of the DNDarray must be square.")
+
+    if types.heat_type_is_exact(a.dtype):
+        raise RuntimeError("dtype of DNDarray must be floating-point.")
+
+    # no split in the square matrices
+    if not a.is_distributed() or a.split < a.ndim - 2:
+        data = torch.inverse(a.larray)
+        return DNDarray(
+            data,
+            a.shape,
+            types.heat_type_of(data),
+            split=a.split,
+            device=a.device,
+            comm=a.comm,
+            balanced=a.balanced,
+        )
+
+    acopy = a.copy()
+    acopy = manipulations.reshape(acopy, (-1, m, m), new_split=a.split - a.ndim + 3)
+    ainv = factories.zeros_like(acopy)
+    for i in range(m):
+        ainv[:, i, i] = 1
+
+    _, displs = acopy.counts_displs()
+
+    for k in range(ainv.shape[0]):
+        rank = 0
+        for i in range(n):
+            # partial pivoting
+            if np.isclose(acopy[k, i, i].item(), 0):
+                abord = True
+                for j in range(i + 1, n):
+                    if not np.isclose(acopy[k, j, i].item(), 0):
+                        if a.split == a.ndim - 2:  # split=0 on square matrix
+                            ainv[k, i, :], ainv[k, j, :] = ainv[k, j, :], ainv[k, i, :].copy()
+                            acopy[k, i, :], acopy[k, j, :] = acopy[k, j, :], acopy[k, i, :].copy()
+                        else:  # split=1
+                            acopy.larray[k, i, :], acopy.larray[k, j, :] = (
+                                acopy.larray[k, j, :],
+                                acopy.larray[k, i, :].clone(),
+                            )
+                            ainv.larray[k, i, :], ainv.larray[k, j, :] = (
+                                ainv.larray[k, j, :],
+                                ainv.larray[k, i, :].clone(),
+                            )
+                        abord = False
+                        break
+                if abord:
+                    raise RuntimeError("Inverse does not exist")
+
+            scale = acopy[k, i, i].item()
+
+            # Circumvent an issue with DNDarray setter and getter that caused precision errors
+            if a.split == a.ndim - 2:
+                if rank < acopy.comm.size - 1:
+                    if i >= displs[rank + 1]:
+                        rank += 1
+                if acopy.comm.rank == rank:
+                    ainv.larray[k, i - displs[rank], :] /= scale
+                    acopy.larray[k, i - displs[rank], :] /= scale
+            else:
+                ainv[k, i, :].larray /= scale
+                acopy[k, i, :].larray /= scale
+
+            factor = acopy[k, i + 1 :, i, None].larray
+            ainv[k, i + 1 :, :].larray -= factor * ainv[k, i, :].larray
+            acopy[k, i + 1 :, :].larray -= factor * acopy[k, i, :].larray
+
+        # backwards
+        for i in range(n - 1, 0, -1):
+            factor = acopy[k, :i, i, None].larray
+            ainv[k, :i, :].larray -= factor * ainv[k, i, :].larray
+            acopy[k, :i, :].larray -= factor * acopy[k, i, :].larray
+
+    ainv = manipulations.reshape(ainv, a.shape, new_split=a.split)
+
+    return ainv
+
+
 def matmul(a: DNDarray, b: DNDarray, allow_resplit: bool = False) -> DNDarray:
     """
     Matrix multiplication of two ``DNDarrays``: ``a@b=c`` or ``A@B=c``.

diff --git a/heat/core/linalg/tests/test_basics.py b/heat/core/linalg/tests/test_basics.py
@@ -224,6 +224,101 @@ def test_dot(self):
         with self.assertRaises(NotImplementedError):
             ht.dot(ht.array(data3d), ht.array(data1d))
 
+    def test_inv(self):
+        # single 2D array
+        # pytorch
+        ares = ht.array([[2.0, 2, 1], [3, 4, 1], [0, 1, -1]])
+
+        a = ht.array([[5.0, -3, 2], [-3, 2, -1], [-3, 2, -2]])
+        ainv = ht.linalg.inv(a)
+
+        self.assertEqual(ainv.split, a.split)
+        self.assertEqual(ainv.device, a.device)
+        self.assertTupleEqual(ainv.shape, a.shape)
+        self.assertTrue(ht.allclose(ainv, ares))
+
+        # distributed
+        a = ht.array([[5.0, -3, 2], [-3, 2, -1], [-3, 2, -2]], split=0)
+        ainv = ht.linalg.inv(a)
+        self.assertEqual(ainv.split, a.split)
+        self.assertEqual(ainv.device, a.device)
+        self.assertTupleEqual(ainv.shape, a.shape)
+        self.assertTrue(ht.allclose(ainv, ares))
+
+        a = ht.array([[5.0, -3, 2], [-3, 2, -1], [-3, 2, -2]], split=1)
+        ainv = ht.linalg.inv(a)
+        self.assertEqual(ainv.split, a.split)
+        self.assertEqual(ainv.device, a.device)
+        self.assertTupleEqual(ainv.shape, a.shape)
+        self.assertTrue(ht.allclose(ainv, ares))
+
+        # array Size=(2,2,2,2)
+        ares = ht.array(
+            [[[2, -0.5], [-3, 1]], [[-3, 2], [2, -1]], [[-3, 2], [2, -1]], [[2, -0.5], [-3, 1]]],
+            dtype=ht.float,
+        )
+
+        a = ht.array(
+            [[[2, 1], [6, 4]], [[1, 2], [2, 3]], [[1, 2], [2, 3]], [[2, 1], [6, 4]]],
+            dtype=ht.float,
+            split=1,
+        )
+        ainv = ht.linalg.inv(a)
+        self.assertEqual(ainv.split, a.split)
+        self.assertEqual(ainv.device, a.device)
+        self.assertTupleEqual(ainv.shape, a.shape)
+        self.assertTrue(ht.allclose(ainv, ares))
+
+        a = ht.array(
+            [[[2, 1], [6, 4]], [[1, 2], [2, 3]], [[1, 2], [2, 3]], [[2, 1], [6, 4]]],
+            dtype=ht.float,
+            split=2,
+        )
+        ainv = ht.linalg.inv(a)
+        self.assertEqual(ainv.split, a.split)
+        self.assertEqual(ainv.device, a.device)
+        self.assertTupleEqual(ainv.shape, a.shape)
+        self.assertTrue(ht.allclose(ainv, ares))
+
+        # pivoting row change
+        ares = ht.array([[-1, 0, 2], [2, 0, -1], [-6, 3, 0]], dtype=ht.double) / 3.0
+        a = ht.array([[1, 2, 0], [2, 4, 1], [2, 1, 0]], dtype=ht.double, split=0)
+        ainv = ht.linalg.inv(a)
+        self.assertEqual(ainv.split, a.split)
+        self.assertEqual(ainv.device, a.device)
+        self.assertTupleEqual(ainv.shape, a.shape)
+        self.assertTrue(ht.allclose(ainv, ares))
+
+        a = ht.array([[1, 2, 0], [2, 4, 1], [2, 1, 0]], dtype=ht.double, split=1)
+        ainv = ht.linalg.inv(a)
+        self.assertEqual(ainv.split, a.split)
+        self.assertEqual(ainv.device, a.device)
+        self.assertTupleEqual(ainv.shape, a.shape)
+        self.assertTrue(ht.allclose(ainv, ares))
+
+        ht.random.seed(42)
+        a = ht.random.random((20, 20), dtype=ht.float64, split=1)
+        ainv = ht.linalg.inv(a)
+        i = ht.eye(a.shape, split=1, dtype=a.dtype)
+        self.assertTrue(ht.allclose(a @ ainv, i))
+
+        # ht.random.seed(42)
+        # a = ht.random.random((20, 20), dtype=ht.float64, split=0)
+        # ainv = ht.linalg.inv(a)
+        # i = ht.eye(a.shape, split=0, dtype=a.dtype)
+        # self.assertTrue(ht.allclose(a @ ainv, i))
+
+        with self.assertRaises(RuntimeError):
+            ht.linalg.inv(ht.array([1, 2, 3], split=0))
+        with self.assertRaises(RuntimeError):
+            ht.linalg.inv(ht.zeros((1, 2, 3), split=1))
+        with self.assertRaises(RuntimeError):
+            ht.linalg.inv(ht.zeros((2, 2), dtype=ht.int, split=1))
+        with self.assertRaises(RuntimeError):
+            ht.linalg.inv(ht.zeros((3, 3), split=0))
+        with self.assertRaises(RuntimeError):
+            ht.linalg.inv(ht.ones((3, 3), split=1))
+
     def test_matmul(self):
         with self.assertRaises(ValueError):
             ht.matmul(ht.ones((25, 25)), ht.ones((42, 42)))