From 7cf400f59547a0b92101b2623736d68058438caf Mon Sep 17 00:00:00 2001
From: Debus <Charlotte.Debus@dlr.de>
Date: Fri, 24 Jan 2020 14:40:35 +0100
Subject: [PATCH 01/49] Implement Utilities

---
 heat/core/factories.py |  45 ++++++++++++
 heat/core/linalg.py    | 157 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 201 insertions(+), 1 deletion(-)

diff --git a/heat/core/factories.py b/heat/core/factories.py
index 7e8ee248cb..303617a0f9 100644
--- a/heat/core/factories.py
+++ b/heat/core/factories.py
@@ -374,6 +374,51 @@ def array(
 
     return dndarray.DNDarray(obj, tuple(int(ele) for ele in gshape), dtype, split, device, comm)
 
+def diagonal(v, dtype=types.float32, split=None, device=None, comm=None):
+    """
+    Returns a new 2D array with elements of vector v on the diagonal.
+
+    Parameters
+    ----------
+    v : ht.dndarray
+        1D vector
+    dtype : ht.dtype
+        The desired HeAT data type for the array, defaults to ht.float32.
+    split: int, optional
+        The axis along which the array is split and distributed, defaults to None (no distribution).
+    device : str, ht.Device or None, optional
+        Specifies the device the tensor shall be allocated on, defaults to None (i.e. globally set default device).
+    comm: Communication, optional
+        Handle to the nodes holding distributed parts or copies of this tensor. Must be either None or the same as v.comm
+
+    Returns
+    -------
+    out : ht.DNDarray
+        Diagonal matrix of size v.shape[0] x v.shape[0]
+
+    """
+    shape = (v.shape[0], v.shape[0])
+    diag = zeros(shape, dtype, split, device, comm)
+    if comm is not None and comm != v.comm:
+        raise NotImplementedError("Differing communcators are currently not supported")
+
+    if v.split is not None and v.comm.is_distributed():
+        counts, displ, _ = v.comm.counts_displs_shape(v.shape, v.split)
+        c1 = displ[v.comm.rank]
+        if v.comm.rank != v.comm.size-1:
+            c2 =displ[v.comm.rank+1]
+        else:
+            c2 = v.shape[0]
+        if split is None:
+            diag._DNDarray__array[c1:c2, c1:c2] = torch.diag(v._DNDarray__array)
+        elif split==0:
+            diag._DNDarray__array[:, c1:c2] = torch.diag(v._DNDarray__array)
+        elif split==1:
+            diag._DNDarray__array[c1:c2,:] = torch.diag(v._DNDarray__array)
+    else:
+        diag._DNDarray__array = torch.diag(v._DNDarray__array)
+
+    return diag
 
 def empty(shape, dtype=types.float32, split=None, device=None, comm=None, order="C"):
     """
diff --git a/heat/core/linalg.py b/heat/core/linalg.py
index ce55c1e7c7..228ea118e5 100644
--- a/heat/core/linalg.py
+++ b/heat/core/linalg.py
@@ -1,13 +1,15 @@
 import itertools
+import math
 import torch
 
 from .communication import MPI
 from . import dndarray
 from . import factories
 from . import manipulations
+from . import random
 from . import types
 
-__all__ = ["dot", "matmul", "transpose", "tril", "triu"]
+__all__ = ["dot", "matmul", "projection", "transpose", "tril", "triu"]
 
 
 def dot(a, b, out=None):
@@ -67,6 +69,129 @@ def dot(a, b, out=None):
     else:
         raise NotImplementedError("ht.dot not implemented for N-D dot M-D arrays")
 
+def cg(A, b, x0, out=None):
+    """
+    Conjugate gradients method for solving a system of linear equations Ax = b
+
+    Parameters
+    ----------
+    A : ht.DNDarray
+        2D symmetric, positive definite Matrix
+    b : ht.DNDarray
+        1D vector
+    x0 : ht.DNDarray
+        Arbitrary 1D starting vector
+
+    Returns
+    -------
+    ht.DNDarray
+        Returns the solution x of the system of linear equations. If out is given, it is returned
+    """
+
+    r = b - matmul(A , x0)
+    p = r
+    rsold = matmul(r , r)
+    x = x0
+
+    for i in range(len(b)):
+        Ap = matmul(A , p)
+        alpha = rsold / matmul(p , Ap)
+        x = x + (alpha * p)
+        r = r - (alpha * Ap)
+        rsnew = matmul(r , r)
+        if(math.sqrt(rsnew)< 1e-10):
+            print("Residual r = {} reaches tolerance in it = {}".format(math.sqrt(rsnew), i))
+            if out is not None:
+                out = x
+                return out
+            return x
+
+        p = r + ((rsnew / rsold) * p)
+        rsold = rsnew
+
+    if out is not None:
+        out = x
+        return out
+    return x
+
+def lanczos(A, m, v0=None, V_out=None, T_out=None):
+    """
+    Lanczos algorithm for iterative approximation of the solution to the eigenvalue problem,  an adaptation of power methods to find the m "most useful" (tending towards extreme highest/lowest) eigenvalues and eigenvectors of an n×n Hermitian matrix, where often m<<n
+    Parameters
+    ----------
+    A : ht.DNDarray
+        2D symmetric, positive definite Matrix
+    m : int
+        number of Lanczos iterations
+    v0 : ht.DNDarray (optional)
+        1D starting vector of euclidian norm 1
+
+    Returns
+    -------
+    V ht.DNDarray
+        Matrix of size nxm, with orthonormal columns, that span the Krylow subspace. If out is given, it is returned
+    T ht.DNDarray
+        Tridiagonal matrix of size mxm, with coefficients alpha_1,...alpha_n on the diagonal and coefficients beta_1,...,beta_n-1 on the side-diagonals. If out is given, it is returned
+
+    """
+
+    n, column = A.shape
+    if n!=column:
+        raise TypeError("Input Matrix A needs to be symmetric.")
+    T = factories.zeros((m, m))
+    if A.split==0:
+        V = factories.zeros((n,m), split=A.split, dtype=types.float64)
+    else:
+        V = factories.zeros((n,m), dtype=types.float64)
+
+    if v0 is None:
+        vr = random.rand(n)
+        v0 = vr / norm(vr)
+
+    # # 0th iteration
+    # # vector v0 has euklidian norm = 1
+    w = matmul(A, v0)
+    alpha = dot(w, v0)
+    w = w - alpha*v0
+    T[0,0] = alpha
+    V[:,0] = v0
+
+    for i in range(1, m):
+        beta = norm(w)
+        if beta == 0.:
+            # Lanczos Breakdown, pick a random vector to continue
+            vr = random.rand(n)
+            # orthogonalize v_r with respect to all vectors v[i]
+            for j in range(i):
+                vr = vr - projection(v[:,j], vr)
+            # normalize v_r to Euclidian norm 1 and set as ith vector v
+            vi = vr/norm(vr)
+        else:
+            vi = w/beta
+
+        w = matmul(A, vi)
+        alpha = dot(w, vi)
+        w = w - alpha*vi - beta*v[:,i-1]
+        T[i-1, i] = beta
+        T[i, i-1] = beta
+        T[i, i] = alpha
+        V[:, i] = vi
+
+    if V.split is not None:
+        V.resplit_(axis=None)
+
+    if T_out is not None:
+        T_out = T
+        if V_out is not None:
+            V_out = V
+            return V_out, T_out
+        return V, T_out
+    elif V_out is not None:
+        V_out = V
+        return V_out, T
+
+    return V, T
+
 
 def matmul(a, b):
     """
@@ -730,6 +855,36 @@ def c_block_setter(b_proc, a_proc, a_data, b_data):
             c = factories.array(res, split=split, device=a.device)
             return c
 
+def norm(a):
+    """
+    Euklidian norm of vector a
+
+    Parameters
+    ----------
+    a : ht.DNDarray (1D)
+
+    Returns
+    -------
+    float
+        Returns the vector norm (lenght) of a
+    """
+    return math.sqrt(dot(a, a))
+
+def projection(a, b):
+    """
+    Projection of vector b onto vector a
+
+    Parameters
+    ----------
+    a : ht.DNDarray (1D)
+    b : ht.DNDarray (1D)
+
+    Returns
+    -------
+    ht.DNDarray
+        Returns the vector projection of b in the direction of a
+    """
+    return (dot(b, a)/dot(a,a))*a
 
 def transpose(a, axes=None):
     """

From 477d840f4a054e82b47f81e801e5f14c4e378ba8 Mon Sep 17 00:00:00 2001
From: Debus <Charlotte.Debus@dlr.de>
Date: Fri, 24 Jan 2020 14:51:33 +0100
Subject: [PATCH 02/49] Run Hooks

---
 heat/core/factories.py | 12 ++++++-----
 heat/core/linalg.py    | 47 +++++++++++++++++++++++-------------------
 2 files changed, 33 insertions(+), 26 deletions(-)

diff --git a/heat/core/factories.py b/heat/core/factories.py
index 303617a0f9..fe4bbdd040 100644
--- a/heat/core/factories.py
+++ b/heat/core/factories.py
@@ -374,6 +374,7 @@ def array(
 
     return dndarray.DNDarray(obj, tuple(int(ele) for ele in gshape), dtype, split, device, comm)
 
+
 def diagonal(v, dtype=types.float32, split=None, device=None, comm=None):
     """
     Returns a new 2D array with elements of vector v on the diagonal.
@@ -405,21 +406,22 @@ def diagonal(v, dtype=types.float32, split=None, device=None, comm=None):
     if v.split is not None and v.comm.is_distributed():
         counts, displ, _ = v.comm.counts_displs_shape(v.shape, v.split)
         c1 = displ[v.comm.rank]
-        if v.comm.rank != v.comm.size-1:
-            c2 =displ[v.comm.rank+1]
+        if v.comm.rank != v.comm.size - 1:
+            c2 = displ[v.comm.rank + 1]
         else:
             c2 = v.shape[0]
         if split is None:
             diag._DNDarray__array[c1:c2, c1:c2] = torch.diag(v._DNDarray__array)
-        elif split==0:
+        elif split == 0:
             diag._DNDarray__array[:, c1:c2] = torch.diag(v._DNDarray__array)
-        elif split==1:
-            diag._DNDarray__array[c1:c2,:] = torch.diag(v._DNDarray__array)
+        elif split == 1:
+            diag._DNDarray__array[c1:c2, :] = torch.diag(v._DNDarray__array)
     else:
         diag._DNDarray__array = torch.diag(v._DNDarray__array)
 
     return diag
 
+
 def empty(shape, dtype=types.float32, split=None, device=None, comm=None, order="C"):
     """
     Returns a new uninitialized array of given shape and data type. May be allocated split up across multiple
diff --git a/heat/core/linalg.py b/heat/core/linalg.py
index 228ea118e5..b869b47f94 100644
--- a/heat/core/linalg.py
+++ b/heat/core/linalg.py
@@ -69,6 +69,7 @@ def dot(a, b, out=None):
     else:
         raise NotImplementedError("ht.dot not implemented for N-D dot M-D arrays")
 
+
 def cg(A, b, x0, out=None):
     """
     Conjugate gradients method for solving a system of linear equations Ax = b
@@ -88,18 +89,18 @@ def cg(A, b, x0, out=None):
         Returns the solution x of the system of linear equations. If out is given, it is returned
     """
 
-    r = b - matmul(A , x0)
+    r = b - matmul(A, x0)
     p = r
-    rsold = matmul(r , r)
+    rsold = matmul(r, r)
     x = x0
 
     for i in range(len(b)):
-        Ap = matmul(A , p)
-        alpha = rsold / matmul(p , Ap)
+        Ap = matmul(A, p)
+        alpha = rsold / matmul(p, Ap)
         x = x + (alpha * p)
         r = r - (alpha * Ap)
-        rsnew = matmul(r , r)
-        if(math.sqrt(rsnew)< 1e-10):
+        rsnew = matmul(r, r)
+        if math.sqrt(rsnew) < 1e-10:
             print("Residual r = {} reaches tolerance in it = {}".format(math.sqrt(rsnew), i))
             if out is not None:
                 out = x
@@ -114,6 +115,7 @@ def cg(A, b, x0, out=None):
         return out
     return x
 
+
 def lanczos(A, m, v0=None, V_out=None, T_out=None):
     """
     Lanczos algorithm for iterative approximation of the solution to the eigenvalue problem,  an adaptation of power methods to find the m "most useful" (tending towards extreme highest/lowest) eigenvalues and eigenvectors of an n×n Hermitian matrix, where often m<<n
@@ -136,13 +138,13 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
     """
 
     n, column = A.shape
-    if n!=column:
+    if n != column:
         raise TypeError("Input Matrix A needs to be symmetric.")
     T = factories.zeros((m, m))
-    if A.split==0:
-        V = factories.zeros((n,m), split=A.split, dtype=types.float64)
+    if A.split == 0:
+        V = factories.zeros((n, m), split=A.split, dtype=types.float64)
     else:
-        V = factories.zeros((n,m), dtype=types.float64)
+        V = factories.zeros((n, m), dtype=types.float64)
 
     if v0 is None:
         vr = random.rand(n)
@@ -152,28 +154,28 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
     # # vector v0 has euklidian norm = 1
     w = matmul(A, v0)
     alpha = dot(w, v0)
-    w = w - alpha*v0
-    T[0,0] = alpha
-    V[:,0] = v0
+    w = w - alpha * v0
+    T[0, 0] = alpha
+    V[:, 0] = v0
 
     for i in range(1, m):
         beta = norm(w)
-        if beta == 0.:
+        if beta == 0.0:
             # Lanczos Breakdown, pick a random vector to continue
             vr = random.rand(n)
             # orthogonalize v_r with respect to all vectors v[i]
             for j in range(i):
-                vr = vr - projection(v[:,j], vr)
+                vr = vr - projection(v[:, j], vr)
             # normalize v_r to Euclidian norm 1 and set as ith vector v
-            vi = vr/norm(vr)
+            vi = vr / norm(vr)
         else:
-            vi = w/beta
+            vi = w / beta
 
         w = matmul(A, vi)
         alpha = dot(w, vi)
-        w = w - alpha*vi - beta*v[:,i-1]
-        T[i-1, i] = beta
-        T[i, i-1] = beta
+        w = w - alpha * vi - beta * v[:, i - 1]
+        T[i - 1, i] = beta
+        T[i, i - 1] = beta
         T[i, i] = alpha
         V[:, i] = vi
 
@@ -855,6 +857,7 @@ def c_block_setter(b_proc, a_proc, a_data, b_data):
             c = factories.array(res, split=split, device=a.device)
             return c
 
+
 def norm(a):
     """
     Euklidian norm of vector a
@@ -870,6 +873,7 @@ def norm(a):
     """
     return math.sqrt(dot(a, a))
 
+
 def projection(a, b):
     """
     Projection of vector b onto vector a
@@ -884,7 +888,8 @@ def projection(a, b):
     ht.DNDarray
         Returns the vector projection of b in the direction of a
     """
-    return (dot(b, a)/dot(a,a))*a
+    return (dot(b, a) / dot(a, a)) * a
+
 
 def transpose(a, axes=None):
     """

From 36707287127fb46626fd371a5118cc75578f4703 Mon Sep 17 00:00:00 2001
From: Debus <Charlotte.Debus@dlr.de>
Date: Mon, 27 Jan 2020 13:33:30 +0100
Subject: [PATCH 03/49] Implementation of metrics class and spectral clustering

---
 heat/core/cluster/spectral.py |  99 +++++++++++++++++++
 heat/core/linalg.py           |   4 +-
 heat/core/metrics.py          | 173 ++++++++++++++++++++++++++++++++++
 3 files changed, 274 insertions(+), 2 deletions(-)
 create mode 100644 heat/core/cluster/spectral.py
 create mode 100644 heat/core/metrics.py

diff --git a/heat/core/cluster/spectral.py b/heat/core/cluster/spectral.py
new file mode 100644
index 0000000000..598e22d132
--- /dev/null
+++ b/heat/core/cluster/spectral.py
@@ -0,0 +1,99 @@
+import torch
+import numpy as np
+
+from .. import factories
+from .. import types
+from .. import operations
+from .. import linalg
+from .. import metrics
+from .. import random
+from . import KMeans
+
+
+def laplacian(X, kernel, adjacency=None, norm=True, mode="fc", upper=None, lower=None):
+    if adjacency is not None:
+        S = adjacency
+    else:
+        S = metrics.similarity(X, kernel)
+    if mode == "eNeighbour":
+        if (upper is not None) and (lower is not None):
+            raise ValueError(
+                "Epsilon neighborhood with either upper or lower threshold, both is not supported"
+            )
+
+        if upper is not None:
+            A = types.int(S < upper) - factories.eye(S.shape, dtype=types.int, split=S.split)
+        elif lower is not None:
+            A = types.int(S > upper) - factories.eye(S.shape, dtype=types.int, split=S.split)
+    elif mode == "fc":
+        A = S
+    else:
+        raise NotImplementedError(
+            "Only eNeighborhood and fully-connected graphs supported at the moment."
+        )
+    if norm:
+        degree = operations.sqrt(1.0 / operations.sum(A, axis=1))
+    else:
+        degree = sum(A, axis=1)
+
+    D = factories.diagonal(degree, dtype=S.dtype, split=S.split, device=S.device, comm=S.comm)
+
+    if norm:
+        L = factories.eye(A.shape, split=A.split) - linalg.matmul(D, linalg.matmul(S, D))
+    else:
+        L = D - A
+
+    return L
+
+
+class spectral:
+    def __init__(self, kernel=metrics.GaussianDistance(sigma=1.0), n_clusters=None, iter=300):
+        """
+        K-Means clustering algorithm. An implementation of Lloyd's algorithm [1].
+
+        Parameters
+        ----------
+        n_clusters : int, optional, default: 8
+            The number of clusters to form as well as the number of centroids to generate.
+        iter : int, default: 300
+            Number of Lanczos iterations
+
+        """
+        self.iter = iter
+        self.kernel = kernel
+        self.n_clusters = n_clusters
+
+        # in-place properties
+        self._labels = None
+        self._adjacency = None
+        self._eigenvectors = None
+        self._eigenvalues = None
+
+    def fit(self, X):
+        # 2. Calculation of Laplacian
+        L = laplacian(X, self.kernel)
+
+        # 3. Eigenvalue and -vector Calculation
+        vr = random.rand(L.shape[0], split=L.split, dtype=types.float64)
+        v0 = vr / linalg.norm(vr)
+        Vg_norm, Tg_norm = linalg.lanczos(L, self.iter, v0)
+
+        # 4. Calculate and Sort Eigenvalues and Eigenvectors of tridiagonal matrix T
+        eval, evec = torch.eig(Tg_norm._DNDarray__array, eigenvectors=True)
+        # If x is an Eigenvector of T, then y = V@x is the corresponding Eigenvector of L
+        self._eigenvectors = linalg.matmul(Vg_norm, factories.array(evec))
+        self._eigenvalues, idx = torch.sort(eval[:, 0], dim=0)
+        self._eigenvalues = self._eigenvectors[:, idx]
+
+        if self.n_clusters is None:
+            temp = np.diff(self._eigenvalues.numpy())
+            self.n_clusters = np.where(temp == temp.max())[0][0] + 1
+
+        components = self._eigenvectors[:, : self.n_clusters].copy()
+
+        kmeans = KMeans(n_clusters=self.n_clusters, init="kmeans++")
+        kmeans.fit(components)
+        # cluster = kmeans.predict(self.n_clusters)
+        # centroids = kmeans.cluster_centers_
+
+        return
diff --git a/heat/core/linalg.py b/heat/core/linalg.py
index b869b47f94..32a4c1fa2c 100644
--- a/heat/core/linalg.py
+++ b/heat/core/linalg.py
@@ -165,7 +165,7 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
             vr = random.rand(n)
             # orthogonalize v_r with respect to all vectors v[i]
             for j in range(i):
-                vr = vr - projection(v[:, j], vr)
+                vr = vr - projection(V[:, j], vr)
             # normalize v_r to Euclidian norm 1 and set as ith vector v
             vi = vr / norm(vr)
         else:
@@ -173,7 +173,7 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
 
         w = matmul(A, vi)
         alpha = dot(w, vi)
-        w = w - alpha * vi - beta * v[:, i - 1]
+        w = w - alpha * vi - beta * V[:, i - 1]
         T[i - 1, i] = beta
         T[i, i - 1] = beta
         T[i, i] = alpha
diff --git a/heat/core/metrics.py b/heat/core/metrics.py
new file mode 100644
index 0000000000..082291a3b7
--- /dev/null
+++ b/heat/core/metrics.py
@@ -0,0 +1,173 @@
+import torch
+
+from .. import communication
+from .. import factories
+from .. import types
+
+
+class EuclidianDistance:
+    def __call__(self, X, Y):
+        # X and Y are torch tensors
+        k1, f1 = X.shape
+        k2, f2 = Y.shape
+        if f1 != f2:
+            raise RuntimeError(
+                "X and Y have differing feature dimensions (dim = 1), should be equal, but are {} and {}".format(
+                    f1, f2
+                )
+            )
+
+        Xd = X.unsqueeze(dim=1)
+        Yd = Y.unsqueeze(dim=0)
+        result = torch.zeros((k1, k2), dtype=torch.float64)
+
+        for i in range(Xd.shape[0]):
+            result[i, :] = ((Yd - Xd[i, :, :]) ** 2).sum(dim=-1).sqrt()
+
+        return result
+
+
+class GaussianDistance:
+    def __init__(self, sigma=1.0):
+        self.sigma = sigma
+
+    def __call__(self, X, Y):
+        # X and Y are torch tensors
+        k1, f1 = X.shape
+        k2, f2 = Y.shape
+        if f1 != f2:
+            raise RuntimeError(
+                "X and Y have differing feature dimensions (dim = 1), should be equal, but are {} and {}".format(
+                    f1, f2
+                )
+            )
+        Xd = X.unsqueeze(dim=1)
+        Yd = Y.unsqueeze(dim=0)
+        result = torch.zeros((k1, k2), dtype=torch.float64)
+        for i in range(Xd.shape[0]):
+            result[i, :] = torch.exp(
+                -((Yd - Xd[i, :, :]) ** 2).sum(dim=-1) / (2 * self.sigma * self.sigma)
+            )
+
+        return result
+
+
+def similarity(X, metric=EuclidianDistance()):
+    if (X.split is not None) and (X.split != 0):
+        raise NotImplementedError("Feature Splitting is not supported")
+    if len(X.shape) > 2:
+        raise NotImplementedError("Only 2D data matrices are supported")
+
+    comm = X.comm
+    rank = comm.Get_rank()
+    size = comm.Get_size()
+
+    K, f = X.shape
+    k1, _ = X.lshape
+
+    S = factories.zeros((K, K), dtype=types.float64, split=0)
+
+    counts, displ, _ = comm.counts_displs_shape(X.shape, X.split)
+    num_iter = (size + 1) // 2
+
+    stationary = X._DNDarray__array
+    rows = (displ[rank], displ[rank + 1] if (rank + 1) != size else K)
+
+    # 0th iteration, calculate diagonal
+    d_ij = metric(stationary, stationary)
+    S[rows[0] : rows[1], rows[0] : rows[1]] = d_ij
+
+    for iter in range(1, num_iter):
+        # Send rank's part of the matrix to the next process in a circular fashion
+        receiver = (rank + iter) % size
+        sender = (rank - iter) % size
+
+        col1 = displ[sender]
+        if sender != size - 1:
+            col2 = displ[sender + 1]
+        else:
+            col2 = K
+        columns = (col1, col2)
+
+        # All but the first iter processes are receiving, then sending
+        if (rank // iter) != 0:
+            stat = communication.Status()
+            comm.handle.Probe(source=sender, tag=iter, status=stat)
+            count = int(stat.Get_count(communication.FLOAT) / f)
+            moving = torch.zeros((count, f), dtype=torch.float32)
+            comm.Recv(moving, source=sender, tag=iter)
+
+        # Sending to next Process
+        comm.Send(stationary, dest=receiver, tag=iter)
+
+        # The first iter processes can now receive after sending
+        if (rank // iter) == 0:
+            stat = communication.Status()
+            comm.handle.Probe(source=sender, tag=iter, status=stat)
+            count = int(stat.Get_count(communication.FLOAT) / f)
+            moving = torch.zeros((count, f), dtype=torch.float32)
+            comm.Recv(moving, source=sender, tag=iter)
+
+        d_ij = metric(stationary, moving)
+        S[rows[0] : rows[1], columns[0] : columns[1]] = d_ij
+
+        # Receive calculated tile
+        scol1 = displ[receiver]
+        if receiver != size - 1:
+            scol2 = displ[receiver + 1]
+        else:
+            scol2 = K
+        scolumns = (scol1, scol2)
+        symmetric = torch.zeros(scolumns[1] - scolumns[0], (rows[1] - rows[0]), dtype=torch.float64)
+        # Receive calculated tile
+        if (rank // iter) != 0:
+            comm.Recv(symmetric, source=receiver, tag=iter)
+
+        # sending result back to sender of moving matrix (for symmetry)
+        comm.Send(d_ij, dest=sender, tag=iter)
+        if (rank // iter) == 0:
+            comm.Recv(symmetric, source=receiver, tag=iter)
+        S[rows[0] : rows[1], scolumns[0] : scolumns[1]] = symmetric.transpose(0, 1)
+
+    if (size + 1) % 2 != 0:  # we need one mor iteration for the first n/2 processes
+        receiver = (rank + num_iter) % size
+        sender = (rank - num_iter) % size
+
+        # Case 1: only receiving
+        if rank < (size // 2):
+            stat = communication.Status()
+            comm.handle.Probe(source=sender, tag=num_iter, status=stat)
+            count = int(stat.Get_count(communication.FLOAT) / f)
+            moving = torch.zeros((count, f), dtype=torch.float32)
+            comm.Recv(moving, source=sender, tag=num_iter)
+
+            col1 = displ[sender]
+            if sender != size - 1:
+                col2 = displ[sender + 1]
+            else:
+                col2 = K
+            columns = (col1, col2)
+
+            d_ij = metric(stationary, moving)
+            S[rows[0] : rows[1], columns[0] : columns[1]] = d_ij
+            # sending result back to sender of moving matrix (for symmetry)
+            comm.Send(d_ij, dest=sender, tag=num_iter)
+
+        # Case 2 : only sending processes
+        else:
+            comm.Send(stationary, dest=receiver, tag=num_iter)
+
+            # Receiving back result
+            scol1 = displ[receiver]
+            if receiver != size - 1:
+                scol2 = displ[receiver + 1]
+            else:
+                scol2 = K
+            scolumns = (scol1, scol2)
+            symmetric = torch.zeros(
+                (scolumns[1] - scolumns[0], rows[1] - rows[0]), dtype=torch.float64
+            )
+            comm.Recv(symmetric, source=receiver, tag=num_iter)
+            S[rows[0] : rows[1], scolumns[0] : scolumns[1]] = symmetric.transpose(0, 1)
+
+    return S

From 5411b45036477f1c47114c06d76813025ee7bfdb Mon Sep 17 00:00:00 2001
From: Debus <Charlotte.Debus@dlr.de>
Date: Tue, 28 Jan 2020 16:10:32 +0100
Subject: [PATCH 04/49] Rework draft: - Restructure metrics into
 spatial.distance module - Draft implementation of pairwise distance
 calculation for two different tensors - Adjust spectral fit to mimic SciKit
 API

---
 heat/core/cluster/spectral.py                 |  94 ++++++++--
 heat/core/spatial/__init__.py                 |   1 +
 heat/core/{metrics.py => spatial/distance.py} | 162 ++++++++++++------
 3 files changed, 191 insertions(+), 66 deletions(-)
 create mode 100644 heat/core/spatial/__init__.py
 rename heat/core/{metrics.py => spatial/distance.py} (53%)

diff --git a/heat/core/cluster/spectral.py b/heat/core/cluster/spectral.py
index 598e22d132..92322a9d4f 100644
--- a/heat/core/cluster/spectral.py
+++ b/heat/core/cluster/spectral.py
@@ -4,17 +4,13 @@
 from .. import factories
 from .. import types
 from .. import operations
+from ..spatial import distance
 from .. import linalg
-from .. import metrics
 from .. import random
 from . import KMeans
 
 
-def laplacian(X, kernel, adjacency=None, norm=True, mode="fc", upper=None, lower=None):
-    if adjacency is not None:
-        S = adjacency
-    else:
-        S = metrics.similarity(X, kernel)
+def laplacian(S, norm=True, mode="fc", upper=None, lower=None):
     if mode == "eNeighbour":
         if (upper is not None) and (lower is not None):
             raise ValueError(
@@ -47,36 +43,100 @@ def laplacian(X, kernel, adjacency=None, norm=True, mode="fc", upper=None, lower
 
 
 class spectral:
-    def __init__(self, kernel=metrics.GaussianDistance(sigma=1.0), n_clusters=None, iter=300):
+    def __init__(
+        self,
+        n_clusters=None,
+        gamma=1.0,
+        metric="rbf",
+        laplacian="fully_connected",
+        threshold=1.0,
+        boundary="upper",
+        normalize=True,
+        n_lanczos=300,
+    ):
         """
-        K-Means clustering algorithm. An implementation of Lloyd's algorithm [1].
+        Spectral clustering
 
         Parameters
         ----------
-        n_clusters : int, optional, default: 8
-            The number of clusters to form as well as the number of centroids to generate.
-        iter : int, default: 300
-            Number of Lanczos iterations
+        n_clusters : int, optional
+        gamma : float, default=1.0
+            Kernel coefficient sigma for 'rbf', ignored for affinity='euclidean'
+        metric : string
+            How to construct the similarity matrix.
+            ‘rbf’ : construct the similarity matrix using a radial basis function (RBF) kernel.
+            'euclidean' : construct the similarity matrix as only euclidean distance
+            ‘precomputed’ : interpret X as a precomputed similarity matrix.
+        laplacian : string
+            How to calculate the graph laplacian (affinity)
+            'fully_connected'
+            'eNeighborhood'
+        theshold : float
+            Threshold for affinity matrix if laplacian='eNeighborhood'
+            Ignorded for laplacian='fully_connected'
+        boundary : string
+            How to interpret threshold
+            'upper'
+            'lower'
+        normalize : bool, default = True
+            normalized vs. unnormalized Laplacian
+        n_lanczos : int
+            number of Lanczos iterations for Eigenvalue decomposition
 
         """
-        self.iter = iter
-        self.kernel = kernel
         self.n_clusters = n_clusters
+        self.n_lanczos = n_lanczos
+        self.metric = metric
+        self.gamma = gamma
+        self.normalize = normalize
+        self.laplacian = laplacian
+        self.epsilon = (threshold, boundary)
 
         # in-place properties
         self._labels = None
-        self._adjacency = None
+        self._similarity = None
         self._eigenvectors = None
         self._eigenvalues = None
 
     def fit(self, X):
+        # 1. Calculation of Adjacency Matrix
+        if self.kernel == "rbf":
+            if self.sigma is None:
+                raise ValueError(
+                    "For usage of RBF kernel please specify the scaling factor sigma in class instantiation"
+                )
+            self._similarity = distance.similarity(
+                X, lambda x, y: distance._gaussian(x, y, self.sigma)
+            )
+        elif self.kernel == "euclidean":
+            self._similarity = distance.similarity(X, lambda x, y: distance._euclidian(x, y))
+        else:
+            raise NotImplementedError("Other kernels currently not implemented")
+
         # 2. Calculation of Laplacian
-        L = laplacian(X, self.kernel)
+        if self.laplacian == "eNeighborhood":
+            if self.epsilon[1] == "upper":
+                L = laplacian(
+                    self._similarity, norm=self.normalize, mode="eNeighbour", upper=self.epsilon[0]
+                )
+            elif self.epsilon[1] == "lower":
+                L = laplacian(
+                    self._similarity, norm=self.normalize, mode="eNeighbour", lower=self.epsilon[0]
+                )
+            else:
+                raise ValueError(
+                    "Boundary needs to be 'upper' or 'lower' and threshold needs to be set, if laplacian = eNeighborhood"
+                )
+
+        elif self.laplacian == "fully_connected":
+            L = laplacian(self._similarity, norm=self.normalize, mode="fc")
+        else:
+            raise NotImplementedError("Other approaches currently not implemented")
 
         # 3. Eigenvalue and -vector Calculation
         vr = random.rand(L.shape[0], split=L.split, dtype=types.float64)
         v0 = vr / linalg.norm(vr)
-        Vg_norm, Tg_norm = linalg.lanczos(L, self.iter, v0)
+        Vg_norm, Tg_norm = linalg.lanczos(L, self.n_lanczos, v0)
 
         # 4. Calculate and Sort Eigenvalues and Eigenvectors of tridiagonal matrix T
         eval, evec = torch.eig(Tg_norm._DNDarray__array, eigenvectors=True)
diff --git a/heat/core/spatial/__init__.py b/heat/core/spatial/__init__.py
new file mode 100644
index 0000000000..c3dbe9d6cd
--- /dev/null
+++ b/heat/core/spatial/__init__.py
@@ -0,0 +1 @@
+from .distance import *
diff --git a/heat/core/metrics.py b/heat/core/spatial/distance.py
similarity index 53%
rename from heat/core/metrics.py
rename to heat/core/spatial/distance.py
index 082291a3b7..943df36e89 100644
--- a/heat/core/metrics.py
+++ b/heat/core/spatial/distance.py
@@ -5,54 +5,49 @@
 from .. import types
 
 
-class EuclidianDistance:
-    def __call__(self, X, Y):
-        # X and Y are torch tensors
-        k1, f1 = X.shape
-        k2, f2 = Y.shape
-        if f1 != f2:
-            raise RuntimeError(
-                "X and Y have differing feature dimensions (dim = 1), should be equal, but are {} and {}".format(
-                    f1, f2
-                )
-            )
-
-        Xd = X.unsqueeze(dim=1)
-        Yd = Y.unsqueeze(dim=0)
-        result = torch.zeros((k1, k2), dtype=torch.float64)
-
-        for i in range(Xd.shape[0]):
-            result[i, :] = ((Yd - Xd[i, :, :]) ** 2).sum(dim=-1).sqrt()
-
-        return result
-
-
-class GaussianDistance:
-    def __init__(self, sigma=1.0):
-        self.sigma = sigma
-
-    def __call__(self, X, Y):
-        # X and Y are torch tensors
-        k1, f1 = X.shape
-        k2, f2 = Y.shape
-        if f1 != f2:
-            raise RuntimeError(
-                "X and Y have differing feature dimensions (dim = 1), should be equal, but are {} and {}".format(
-                    f1, f2
-                )
-            )
-        Xd = X.unsqueeze(dim=1)
-        Yd = Y.unsqueeze(dim=0)
-        result = torch.zeros((k1, k2), dtype=torch.float64)
-        for i in range(Xd.shape[0]):
-            result[i, :] = torch.exp(
-                -((Yd - Xd[i, :, :]) ** 2).sum(dim=-1) / (2 * self.sigma * self.sigma)
-            )
-
-        return result
-
-
-def similarity(X, metric=EuclidianDistance()):
+def _euclidian(x, y):
+    # X and Y are torch tensors
+    # k1, f1 = x.shape
+    # k2, f2 = y.shape
+    # if f1 != f2:
+    #    raise RuntimeError(
+    #        "X and Y have differing feature dimensions (dim = 1), should be equal, but are {} and {}".format(
+    #            f1, f2
+    #        )
+    #    )
+    # xd = x.unsqueeze(dim=1)
+    # yd = y.unsqueeze(dim=0)
+    # result = torch.zeros((k1, k2), dtype=torch.float64)
+
+    # for i in range(xd.shape[0]):
+    #    result[i, :] = ((yd - xd[i, :, :]) ** 2).sum(dim=-1).sqrt()
+    result = torch.cdist(x, y)
+    return result
+
+
+def _gaussian(x, y, sigma=1.0):
+    # X and Y are torch tensors
+    # k1, f1 = x.shape
+    # k2, f2 = y.shape
+    # if f1 != f2:
+    #    raise RuntimeError(
+    #        "X and Y have differing feature dimensions (dim = 1), should be equal, but are {} and {}".format(
+    #            f1, f2
+    #        )
+    #    )
+    # xd = x.unsqueeze(dim=1)
+    # yd = y.unsqueeze(dim=0)
+    # result = torch.zeros((k1, k2), dtype=torch.float64)
+    # for i in range(xd.shape[0]):
+    #   result[i, :] = torch.exp(
+    #      -((yd - yd[i, :, :]) ** 2).sum(dim=-1) / (2 * sigma * sigma)
+    # )
+    d = torch.cdist(x, y)
+    result = torch.exp(-d ** 2 / (2 * sigma * sigma))
+    return result
+
+
+def similarity(X, metric=_euclidian):
     if (X.split is not None) and (X.split != 0):
         raise NotImplementedError("Feature Splitting is not supported")
     if len(X.shape) > 2:
@@ -63,7 +58,6 @@ def similarity(X, metric=EuclidianDistance()):
     size = comm.Get_size()
 
     K, f = X.shape
-    k1, _ = X.lshape
 
     S = factories.zeros((K, K), dtype=types.float64, split=0)
 
@@ -171,3 +165,73 @@ def similarity(X, metric=EuclidianDistance()):
             S[rows[0] : rows[1], scolumns[0] : scolumns[1]] = symmetric.transpose(0, 1)
 
     return S
+
+
+def pairwise(X, Y, metric=_euclidian):
+    if ((X.split is not None) and (X.split != 0)) or ((X.split is not None) and (X.split != 0)):
+        raise NotImplementedError("Feature Splitting is not supported")
+    if len(X.shape) > 2 or len(Y.shape) > 2:
+        raise NotImplementedError("Only 2D data matrices are supported")
+    if X.comm != Y.comm:
+        raise NotImplementedError("Differing communicators not supported")
+
+    if X.shape[1] != Y.shape[1]:
+        raise ValueError("inputs must have same number of features")
+
+    comm = X.comm
+    rank = comm.Get_rank()
+    size = comm.Get_size()
+
+    m, f = X.shape
+    n = Y.shape[0]
+
+    S = factories.zeros((m, n), dtype=types.float64, split=0)
+
+    xcounts, xdispl, _ = X.comm.counts_displs_shape(X.shape, X.split)
+    ycounts, ydispl, _ = Y.comm.counts_displs_shape(Y.shape, Y.split)
+    num_iter = size
+
+    x_ = X._DNDarray__array
+    stationary = Y._DNDarray__array
+    rows = (xdispl[rank], xdispl[rank + 1] if (rank + 1) != size else m)
+    cols = (ydispl[rank], ydispl[rank + 1] if (rank + 1) != size else n)
+
+    # 0th iteration, calculate diagonal
+    d_ij = metric(x_, stationary)
+    S[rows[0] : rows[1], cols[0] : cols[1]] = d_ij
+
+    for iter in range(1, num_iter):
+        # Send rank's part of the matrix to the next process in a circular fashion
+        receiver = (rank + iter) % size
+        sender = (rank - iter) % size
+
+        col1 = ydispl[sender]
+        if sender != size - 1:
+            col2 = ydispl[sender + 1]
+        else:
+            col2 = n
+        columns = (col1, col2)
+
+        # All but the first iter processes are receiving, then sending
+        if (rank // iter) != 0:
+            stat = communication.Status()
+            Y.comm.handle.Probe(source=sender, tag=iter, status=stat)
+            count = int(stat.Get_count(communication.FLOAT) / f)
+            moving = torch.zeros((count, f), dtype=torch.float32)
+            Y.comm.Recv(moving, source=sender, tag=iter)
+
+        # Sending to next Process
+        Y.comm.Send(stationary, dest=receiver, tag=iter)
+
+        # The first iter processes can now receive after sending
+        if (rank // iter) == 0:
+            stat = communication.Status()
+            Y.comm.handle.Probe(source=sender, tag=iter, status=stat)
+            count = int(stat.Get_count(communication.FLOAT) / f)
+            moving = torch.zeros((count, f), dtype=torch.float32)
+            Y.comm.Recv(moving, source=sender, tag=iter)
+
+        d_ij = metric(stationary, moving)
+        S[rows[0] : rows[1], columns[0] : columns[1]] = d_ij
+
+    return S

From 440fc81887fef79105f498ded93c5ba74ecb4fba Mon Sep 17 00:00:00 2001
From: Charlotte Debus <charlotte.debus@dlr.de>
Date: Fri, 31 Jan 2020 16:02:50 +0100
Subject: [PATCH 05/49] Implementation of heat cdist(X,y,metric) - Case X.split
 is None and Y.split = 0 still needs design - currenly only 2D tensors
 supported - Bugfix for linalg.dot

---
 heat/core/linalg.py       |   7 +-
 heat/spatial/distances.py | 274 ++++++++++++++++++++++++++++++++++----
 2 files changed, 257 insertions(+), 24 deletions(-)

diff --git a/heat/core/linalg.py b/heat/core/linalg.py
index 32a4c1fa2c..d13561518a 100644
--- a/heat/core/linalg.py
+++ b/heat/core/linalg.py
@@ -44,8 +44,13 @@ def dot(a, b, out=None):
         return a * b
     elif a.numdims == 1 and b.numdims == 1:
         # 1. If both a and b are 1-D arrays, it is inner product of vectors.
-        if a.split is not None or b.split is not None:
+        if a.split is None and b.split is None:
+            sl = slice(None)
+        elif a.split is not None and b.split is not None:
+            sl = a.comm.chunk(a.shape, a.split)[2]
+        else:  # a.split is not None or b.split is not None:
             sl = a.comm.chunk(a.shape, a.split if a.split is not None else b.split)[2]
+
         ret = torch.dot(a[sl]._DNDarray__array, b[sl]._DNDarray__array)
         if a.is_distributed() or b.is_distributed():
             a.comm.Allreduce(MPI.IN_PLACE, ret, MPI.SUM)
diff --git a/heat/spatial/distances.py b/heat/spatial/distances.py
index 58d61d40e7..e79e7db632 100644
--- a/heat/spatial/distances.py
+++ b/heat/spatial/distances.py
@@ -1,38 +1,266 @@
 import torch
 import numpy as np
+from mpi4py import MPI
 
 from .. import core
 
 __all__ = ["cdist"]
 
 
-def cdist(X, Y, quadratic_expansion=False):
-    # ToDo Case X==Y
-    result = core.factories.zeros(
-        (X.shape[0], Y.shape[0]),
-        dtype=core.types.float32,
-        split=X.split,
-        device=X.device,
-        comm=X.comm,
-    )
+def _euclidian(x, y):
+    return torch.cdist(x, y)
+
+
+def _euclidian_fast(x, y):
+    return torch.sqrt(_quadratic_expand(x, y))
+
+
+def _quadratic_expand(x, y):
+    x_norm = (x ** 2).sum(1).view(-1, 1)
+    y_t = torch.transpose(y, 0, 1)
+    y_norm = (y ** 2).sum(1).view(1, -1)
+
+    dist = x_norm + y_norm - 2.0 * torch.mm(x, y_t)
+    return torch.clamp(dist, 0.0, np.inf)
 
-    if X.split is not None:
-        if X.split != 0:
-            # ToDo: Find out if even possible
-            raise NotImplementedError("Splittings other than 0 or None currently not supported.")
-        if Y.split is not None:
-            # ToDo: This requires communication of calculated blocks, will be implemented with Similarity Matrix Calculation
-            raise NotImplementedError("Currently not supported")
 
+def _gaussian(x, y, sigma=1.0):
+    d2 = _quadratic_expand(x, y)
+    result = torch.exp(-d2 / (2 * sigma * sigma))
+    return result
+
+
+def cdist(X, Y=None, quadratic_expansion=False):
     if quadratic_expansion:
-        x_norm = (X._DNDarray__array ** 2).sum(1).view(-1, 1)
-        y_t = torch.transpose(Y._DNDarray__array, 0, 1)
-        y_norm = (Y._DNDarray__array ** 2).sum(1).view(1, -1)
+        return _dist(X, Y, _euclidian_fast)
+    else:
+        return _dist(X, Y, _euclidian)
+
+
+def _dist(X, Y=None, metric=_euclidian):
+    if len(X.shape) > 2:
+        raise NotImplementedError("Only 2D data matrices are supported")
+
+    if Y is None:
+        d = core.factories.zeros(
+            (X.shape[0], X.shape[0]), dtype=X.dtype, split=X.split, device=X.device, comm=X.comm
+        )
+        if X.split is None:
+            d._DNDarray__array = metric(X._DNDarray__array, X._DNDarray__array)
+
+        elif X.split == 0:
+            comm = X.comm
+            rank = comm.Get_rank()
+            size = comm.Get_size()
+            K, f = X.shape
+
+            counts, displ, _ = comm.counts_displs_shape(X.shape, X.split)
+            num_iter = (size + 1) // 2
+
+            stationary = X._DNDarray__array
+            rows = (displ[rank], displ[rank + 1] if (rank + 1) != size else K)
+            # 0th iteration, calculate diagonal
+            d_ij = metric(stationary, stationary)
+            d[rows[0] : rows[1], rows[0] : rows[1]] = d_ij
+            for iter in range(1, num_iter):
+                # Send rank's part of the matrix to the next process in a circular fashion
+                receiver = (rank + iter) % size
+                sender = (rank - iter) % size
+
+                col1 = displ[sender]
+                if sender != size - 1:
+                    col2 = displ[sender + 1]
+                else:
+                    col2 = K
+                columns = (col1, col2)
+
+                # All but the first iter processes are receiving, then sending
+                if (rank // iter) != 0:
+                    stat = MPI.Status()
+                    comm.handle.Probe(source=sender, tag=iter, status=stat)
+                    count = int(stat.Get_count(MPI.FLOAT) / f)
+                    moving = torch.zeros((count, f), dtype=torch.float32)
+                    comm.Recv(moving, source=sender, tag=iter)
+
+                # Sending to next Process
+                comm.Send(stationary, dest=receiver, tag=iter)
+
+                # The first iter processes can now receive after sending
+                if (rank // iter) == 0:
+                    stat = MPI.Status()
+                    comm.handle.Probe(source=sender, tag=iter, status=stat)
+                    count = int(stat.Get_count(MPI.FLOAT) / f)
+                    moving = torch.zeros((count, f), dtype=torch.float32)
+                    comm.Recv(moving, source=sender, tag=iter)
+
+                d_ij = metric(stationary, moving)
+                d[rows[0] : rows[1], columns[0] : columns[1]] = d_ij
+
+                # Receive calculated tile
+                scol1 = displ[receiver]
+                if receiver != size - 1:
+                    scol2 = displ[receiver + 1]
+                else:
+                    scol2 = K
+                scolumns = (scol1, scol2)
+                symmetric = torch.zeros(
+                    scolumns[1] - scolumns[0], (rows[1] - rows[0]), dtype=torch.float64
+                )
+                # Receive calculated tile
+                if (rank // iter) != 0:
+                    comm.Recv(symmetric, source=receiver, tag=iter)
+
+                # sending result back to sender of moving matrix (for symmetry)
+                comm.Send(d_ij, dest=sender, tag=iter)
+                if (rank // iter) == 0:
+                    comm.Recv(symmetric, source=receiver, tag=iter)
+                d[rows[0] : rows[1], scolumns[0] : scolumns[1]] = symmetric.transpose(0, 1)
+
+            if (size + 1) % 2 != 0:  # we need one mor iteration for the first n/2 processes
+                receiver = (rank + num_iter) % size
+                sender = (rank - num_iter) % size
+
+                # Case 1: only receiving
+                if rank < (size // 2):
+                    stat = MPI.Status()
+                    comm.handle.Probe(source=sender, tag=num_iter, status=stat)
+                    count = int(stat.Get_count(MPI.FLOAT) / f)
+                    moving = torch.zeros((count, f), dtype=torch.float32)
+                    comm.Recv(moving, source=sender, tag=num_iter)
+
+                    col1 = displ[sender]
+                    if sender != size - 1:
+                        col2 = displ[sender + 1]
+                    else:
+                        col2 = K
+                    columns = (col1, col2)
+
+                    d_ij = metric(stationary, moving)
+                    d[rows[0] : rows[1], columns[0] : columns[1]] = d_ij
 
-        dist = x_norm + y_norm - 2.0 * torch.mm(X._DNDarray__array, y_t)
-        result._DNDarray__array = torch.sqrt(torch.clamp(dist, 0.0, np.inf))
+                    # sending result back to sender of moving matrix (for symmetry)
+                    comm.Send(d_ij, dest=sender, tag=num_iter)
 
+                # Case 2 : only sending processes
+                else:
+                    comm.Send(stationary, dest=receiver, tag=num_iter)
+
+                    # Receiving back result
+                    scol1 = displ[receiver]
+                    if receiver != size - 1:
+                        scol2 = displ[receiver + 1]
+                    else:
+                        scol2 = K
+                    scolumns = (scol1, scol2)
+                    symmetric = torch.zeros(
+                        (scolumns[1] - scolumns[0], rows[1] - rows[0]), dtype=torch.float32
+                    )
+                    comm.Recv(symmetric, source=receiver, tag=num_iter)
+                    d[rows[0] : rows[1], scolumns[0] : scolumns[1]] = symmetric.transpose(0, 1)
+
+        else:
+            raise NotImplementedError("Splittings other than 0 or None currently not supported.")
+
+    #########################################################################
     else:
-        result._DNDarray__array = torch.cdist(X._DNDarray__array, Y._DNDarray__array)
+        if len(Y.shape) > 2:
+            raise NotImplementedError("Only 2D data matrices are supported")
 
-    return result
+        if X.comm != Y.comm:
+            raise NotImplementedError("Differing communicators not supported")
+
+        if X.split is None:
+            if Y.split is None:
+                split = None
+            else:
+                split = Y.split
+        else:
+            # ToDo Für split implementation >1: split = min(X,split, Y.split)
+            split = X.split
+
+        d = core.factories.zeros(
+            (X.shape[0], Y.shape[0]), dtype=X.dtype, split=split, device=X.device, comm=X.comm
+        )
+
+        if X.split is None:
+            if Y.split == 0 or Y.split is None:
+                d._DNDarray__array = metric(X._DNDarray__array, Y._DNDarray__array)
+
+            else:
+                raise NotImplementedError(
+                    "Splittings other than 0 or None currently not supported."
+                )
+
+        elif X.split == 0:
+            if Y.split is None:
+                d._DNDarray__array = metric(X._DNDarray__array, Y._DNDarray__array)
+
+            elif Y.split == 0:
+                if X.shape[1] != Y.shape[1]:
+                    raise ValueError("Inputs must have same shape[1]")
+
+                # print("Rank {}\n X._DNDarray__array  = {}, \nY._DNDarray__array = {}".format(X.comm.rank,X._DNDarray__array, Y._DNDarray__array))
+                comm = X.comm
+                rank = comm.Get_rank()
+                size = comm.Get_size()
+
+                m, f = X.shape
+                n = Y.shape[0]
+
+                xcounts, xdispl, _ = X.comm.counts_displs_shape(X.shape, X.split)
+                ycounts, ydispl, _ = Y.comm.counts_displs_shape(Y.shape, Y.split)
+                num_iter = size
+
+                x_ = X._DNDarray__array
+                stationary = Y._DNDarray__array
+                rows = (xdispl[rank], xdispl[rank + 1] if (rank + 1) != size else m)
+                cols = (ydispl[rank], ydispl[rank + 1] if (rank + 1) != size else n)
+
+                # 0th iteration, calculate diagonal
+                d_ij = metric(x_, stationary)
+                d[rows[0] : rows[1], cols[0] : cols[1]] = d_ij
+
+                for iter in range(1, num_iter):
+                    # Send rank's part of the matrix to the next process in a circular fashion
+                    receiver = (rank + iter) % size
+                    sender = (rank - iter) % size
+
+                    col1 = ydispl[sender]
+                    if sender != size - 1:
+                        col2 = ydispl[sender + 1]
+                    else:
+                        col2 = n
+                    columns = (col1, col2)
+
+                    # All but the first iter processes are receiving, then sending
+                    if (rank // iter) != 0:
+                        stat = MPI.Status()
+                        Y.comm.handle.Probe(source=sender, tag=iter, status=stat)
+                        count = int(stat.Get_count(MPI.FLOAT) / f)
+                        moving = torch.zeros((count, f), dtype=torch.float32)
+                        Y.comm.Recv(moving, source=sender, tag=iter)
+
+                    # Sending to next Process
+                    Y.comm.Send(stationary, dest=receiver, tag=iter)
+
+                    # The first iter processes can now receive after sending
+                    if (rank // iter) == 0:
+                        stat = MPI.Status()
+                        Y.comm.handle.Probe(source=sender, tag=iter, status=stat)
+                        count = int(stat.Get_count(MPI.FLOAT) / f)
+                        moving = torch.zeros((count, f), dtype=torch.float32)
+                        Y.comm.Recv(moving, source=sender, tag=iter)
+
+                    d_ij = metric(x_, moving)
+                    d[rows[0] : rows[1], columns[0] : columns[1]] = d_ij
+
+            else:
+                raise NotImplementedError(
+                    "Splittings other than 0 or None currently not supported."
+                )
+
+        else:
+            # ToDo: Find out if even possible
+            raise NotImplementedError("Splittings other than 0 or None currently not supported.")
+
+    return d

From 560fbbad82939f947ad88fcb4bcc80bb46c39d40 Mon Sep 17 00:00:00 2001
From: Charlotte Debus <charlotte.debus@dlr.de>
Date: Mon, 3 Feb 2020 10:45:40 +0100
Subject: [PATCH 06/49] Removed diag function, was already available under
 ht.manipulations

---
 heat/core/factories.py | 47 ------------------------------------------
 1 file changed, 47 deletions(-)

diff --git a/heat/core/factories.py b/heat/core/factories.py
index fe4bbdd040..7e8ee248cb 100644
--- a/heat/core/factories.py
+++ b/heat/core/factories.py
@@ -375,53 +375,6 @@ def array(
     return dndarray.DNDarray(obj, tuple(int(ele) for ele in gshape), dtype, split, device, comm)
 
 
-def diagonal(v, dtype=types.float32, split=None, device=None, comm=None):
-    """
-    Returns a new 2D array with elements of vector v on the diagonal.
-
-    Parameters
-    ----------
-    v : ht.dndarray
-        1D vector
-    dtype : ht.dtype
-        The desired HeAT data type for the array, defaults to ht.float32.
-    split: int, optional
-        The axis along which the array is split and distributed, defaults to None (no distribution).
-    device : str, ht.Device or None, optional
-        Specifies the device the tensor shall be allocated on, defaults to None (i.e. globally set default device).
-    comm: Communication, optional
-        Handle to the nodes holding distributed parts or copies of this tensor. Must be either None or the same as v.comm
-
-    Returns
-    -------
-    out : ht.DNDarray
-        Diagonal matrix of size v.shape[0] x v.shape[0]
-
-    """
-    shape = (v.shape[0], v.shape[0])
-    diag = zeros(shape, dtype, split, device, comm)
-    if comm is not None and comm != v.comm:
-        raise NotImplementedError("Differing communcators are currently not supported")
-
-    if v.split is not None and v.comm.is_distributed():
-        counts, displ, _ = v.comm.counts_displs_shape(v.shape, v.split)
-        c1 = displ[v.comm.rank]
-        if v.comm.rank != v.comm.size - 1:
-            c2 = displ[v.comm.rank + 1]
-        else:
-            c2 = v.shape[0]
-        if split is None:
-            diag._DNDarray__array[c1:c2, c1:c2] = torch.diag(v._DNDarray__array)
-        elif split == 0:
-            diag._DNDarray__array[:, c1:c2] = torch.diag(v._DNDarray__array)
-        elif split == 1:
-            diag._DNDarray__array[c1:c2, :] = torch.diag(v._DNDarray__array)
-    else:
-        diag._DNDarray__array = torch.diag(v._DNDarray__array)
-
-    return diag
-
-
 def empty(shape, dtype=types.float32, split=None, device=None, comm=None, order="C"):
     """
     Returns a new uninitialized array of given shape and data type. May be allocated split up across multiple

From 6165b3cac3605a3d38b488cf8eb56c8ae5f08877 Mon Sep 17 00:00:00 2001
From: Charlotte Debus <charlotte.debus@dlr.de>
Date: Mon, 3 Feb 2020 15:32:50 +0100
Subject: [PATCH 07/49] Some cosmetics and input sanitation

---
 heat/{core => }/cluster/spectral.py |  11 +-
 heat/core/linalg.py                 | 141 ++++++----
 heat/ml/cluster/kmeans.py.orig      | 385 ++++++++++++++++++++++++++++
 setup.py                            |   2 +-
 4 files changed, 482 insertions(+), 57 deletions(-)
 rename heat/{core => }/cluster/spectral.py (94%)
 create mode 100644 heat/ml/cluster/kmeans.py.orig

diff --git a/heat/core/cluster/spectral.py b/heat/cluster/spectral.py
similarity index 94%
rename from heat/core/cluster/spectral.py
rename to heat/cluster/spectral.py
index 92322a9d4f..ca141bd50b 100644
--- a/heat/core/cluster/spectral.py
+++ b/heat/cluster/spectral.py
@@ -2,9 +2,10 @@
 import numpy as np
 
 from .. import factories
+from .. import manipulations
 from .. import types
 from .. import operations
-from ..spatial import distance
+from ..spatial import distances
 from .. import linalg
 from .. import random
 from . import KMeans
@@ -32,7 +33,7 @@ def laplacian(S, norm=True, mode="fc", upper=None, lower=None):
     else:
         degree = sum(A, axis=1)
 
-    D = factories.diagonal(degree, dtype=S.dtype, split=S.split, device=S.device, comm=S.comm)
+    D = manipulations.diag(degree, dtype=S.dtype, split=S.split, device=S.device, comm=S.comm)
 
     if norm:
         L = factories.eye(A.shape, split=A.split) - linalg.matmul(D, linalg.matmul(S, D))
@@ -105,11 +106,11 @@ def fit(self, X):
                 raise ValueError(
                     "For usage of RBF kernel please specify the scaling factor sigma in class instantiation"
                 )
-            self._similarity = distance.similarity(
-                X, lambda x, y: distance._gaussian(x, y, self.sigma)
+            self._similarity = distances.similarity(
+                X, lambda x, y: distances._gaussian(x, y, self.sigma)
             )
         elif self.kernel == "euclidean":
-            self._similarity = distance.similarity(X, lambda x, y: distance._euclidian(x, y))
+            self._similarity = distances.similarity(X, lambda x, y: distances._euclidian(x, y))
         else:
             raise NotImplementedError("Other kernels currently not implemented")
 
diff --git a/heat/core/linalg.py b/heat/core/linalg.py
index d13561518a..69344a4dee 100644
--- a/heat/core/linalg.py
+++ b/heat/core/linalg.py
@@ -3,13 +3,74 @@
 import torch
 
 from .communication import MPI
+from . import arithmetics
 from . import dndarray
 from . import factories
 from . import manipulations
 from . import random
 from . import types
 
-__all__ = ["dot", "matmul", "projection", "transpose", "tril", "triu"]
+__all__ = ["cg", "dot", "lanczos", "matmul", "projection", "transpose", "tril", "triu"]
+
+
+def cg(A, b, x0, out=None):
+    """
+    Conjugate gradients method for solving a system of linear equations Ax = b
+
+    Parameters
+    ----------
+    A : ht.DNDarray
+        2D symmetric, positive definite Matrix
+    b : ht.DNDarray
+        1D vector
+    x0 : ht.DNDarray
+        Arbitrary 1D starting vector
+
+    Returns
+    -------
+    ht.DNDarray
+        Returns the solution x of the system of linear equations. If out is given, it is returned
+    """
+
+    if not isinstance(A, dndarray) or not isinstance(b, dndarray) or not isinstance(x0, dndarray):
+        raise TypeError(
+            "A, b and x0 need to be of type ht.dndarra, but were {}, {}, {}".format(
+                type(A), type(b), type(x0)
+            )
+        )
+
+    if not (A.numdims == 2):
+        raise RuntimeError("A needs to be a 2D matrix")
+    if not (b.numdims == 1):
+        raise RuntimeError("b needs to be a 1D vector")
+    if not (x0.numdims == 1):
+        raise RuntimeError("c needs to be a 1D vector")
+
+    r = b - matmul(A, x0)
+    p = r
+    rsold = matmul(r, r)
+    x = x0
+
+    for i in range(len(b)):
+        Ap = matmul(A, p)
+        alpha = rsold / matmul(p, Ap)
+        x = x + (alpha * p)
+        r = r - (alpha * Ap)
+        rsnew = matmul(r, r)
+        if math.sqrt(rsnew) < 1e-10:
+            print("Residual r = {} reaches tolerance in it = {}".format(math.sqrt(rsnew), i))
+            if out is not None:
+                out = x
+                return out
+            return x
+
+        p = r + ((rsnew / rsold) * p)
+        rsold = rsnew
+
+    if out is not None:
+        out = x
+        return out
+    return x
 
 
 def dot(a, b, out=None):
@@ -75,52 +136,6 @@ def dot(a, b, out=None):
         raise NotImplementedError("ht.dot not implemented for N-D dot M-D arrays")
 
 
-def cg(A, b, x0, out=None):
-    """
-    Conjugate gradients method for solving a system of linear equations Ax = b
-
-    Parameters
-    ----------
-    A : ht.DNDarray
-        2D symmetric, positive definite Matrix
-    b : ht.DNDarray
-        1D vector
-    x0 : ht.DNDarray
-        Arbitrary 1D starting vector
-
-    Returns
-    -------
-    ht.DNDarray
-        Returns the solution x of the system of linear equations. If out is given, it is returned
-    """
-
-    r = b - matmul(A, x0)
-    p = r
-    rsold = matmul(r, r)
-    x = x0
-
-    for i in range(len(b)):
-        Ap = matmul(A, p)
-        alpha = rsold / matmul(p, Ap)
-        x = x + (alpha * p)
-        r = r - (alpha * Ap)
-        rsnew = matmul(r, r)
-        if math.sqrt(rsnew) < 1e-10:
-            print("Residual r = {} reaches tolerance in it = {}".format(math.sqrt(rsnew), i))
-            if out is not None:
-                out = x
-                return out
-            return x
-
-        p = r + ((rsnew / rsold) * p)
-        rsold = rsnew
-
-    if out is not None:
-        out = x
-        return out
-    return x
-
-
 def lanczos(A, m, v0=None, V_out=None, T_out=None):
     """
     Lanczos algorithm for iterative approximation of the solution to the eigenvalue problem,  an adaptation of power methods to find the m "most useful" (tending towards extreme highest/lowest) eigenvalues and eigenvectors of an n×n Hermitian matrix, where often m<<n
@@ -141,6 +156,13 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
         Tridiagonal matrix of size mxm, with coefficients alpha_1,...alpha_n on the diagonal and coefficients beta_1,...,beta_n-1 on the side-diagonals. If out is given, it is returned
 
     """
+    if not isinstance(A, dndarray):
+        raise TypeError("A needs to be of type ht.dndarra, but was {}".format(type(A)))
+
+    if not (A.numdims == 2):
+        raise RuntimeError("A needs to be a 2D matrix")
+    if not isinstance(m, (int, float)):
+        raise TypeError("m must be eiter int or float, but was {}".format(type(m)))
 
     n, column = A.shape
     if n != column:
@@ -163,7 +185,7 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
     T[0, 0] = alpha
     V[:, 0] = v0
 
-    for i in range(1, m):
+    for i in range(1, int(m)):
         beta = norm(w)
         if beta == 0.0:
             # Lanczos Breakdown, pick a random vector to continue
@@ -865,18 +887,25 @@ def c_block_setter(b_proc, a_proc, a_data, b_data):
 
 def norm(a):
     """
-    Euklidian norm of vector a
+    Frobenius norm of vector a
 
     Parameters
     ----------
-    a : ht.DNDarray (1D)
+    a : ht.DNDarray
 
     Returns
     -------
     float
         Returns the vector norm (lenght) of a
     """
-    return math.sqrt(dot(a, a))
+    if not isinstance(a, dndarray.DNDarray):
+        raise TypeError("a must be of type ht.DNDarray, but was {}".format(type(a)))
+
+    d = a ** 2
+    for i in range(len(a.shape) - 1, -1, -1):
+        d = arithmetics.sum(d, axis=i)
+
+    return math.sqrt(d)
 
 
 def projection(a, b):
@@ -893,6 +922,16 @@ def projection(a, b):
     ht.DNDarray
         Returns the vector projection of b in the direction of a
     """
+    if not isinstance(a, dndarray.DNDarray) or not isinstance(b, dndarray.DNDarray):
+        raise TypeError(
+            "a, b must be of type ht.DNDarray, but were {}, {}".format(type(a), type(b))
+        )
+
+    if len(a.shape) != 1 or len(b.shape) != 1:
+        raise RuntimeError(
+            "a, b must be vectors of length 1, but were {}, {}".format(len(a.shape), len(b.shape))
+        )
+
     return (dot(b, a) / dot(a, a)) * a
 
 
diff --git a/heat/ml/cluster/kmeans.py.orig b/heat/ml/cluster/kmeans.py.orig
new file mode 100644
index 0000000000..e6157e38de
--- /dev/null
+++ b/heat/ml/cluster/kmeans.py.orig
@@ -0,0 +1,385 @@
+import sys
+
+import heat as ht
+
+
+class KMeans:
+    def __init__(self, n_clusters=8, init="random", max_iter=300, tol=1e-4, random_state=None):
+        """
+        K-Means clustering algorithm. An implementation of Lloyd's algorithm [1].
+
+        Parameters
+        ----------
+        n_clusters : int, optional, default: 8
+            The number of clusters to form as well as the number of centroids to generate.
+        init : {‘random’ or an ndarray}
+            Method for initialization, defaults to ‘random’:
+            ‘k-means++’ : selects initial cluster centers for the clustering in a smart way to speed up convergence [2].
+            ‘random’: choose k observations (rows) at random from data for the initial centroids.
+            If an ht.DNDarray is passed, it should be of shape (n_clusters, n_features) and gives the initial centers.
+        max_iter : int, default: 300
+            Maximum number of iterations of the k-means algorithm for a single run.
+        tol : float, default: 1e-4
+            Relative tolerance with regards to inertia to declare convergence.
+        random_state : int
+            Determines random number generation for centroid initialization.
+
+        Notes
+        -----
+        The average complexity is given by O(k*n*T), were n is the number of samples and T is the number of iterations.
+
+        In practice, the k-means algorithm is very fast, but it may fall into local minima. That is why it can be useful
+        to restart it several times. If the algorithm stops before fully converging (because of tol or max_iter),
+        labels_ and cluster_centers_ will not be consistent, i.e. the cluster_centers_ will not be the means of the
+        points in each cluster. Also, the estimator will reassign labels_ after the last iteration to make labels_
+        consistent with predict on the training set.
+
+        References
+        ----------
+        [1] Lloyd, Stuart P., "Least squares quantization in PCM", IEEE Transactions on Information Theory, 28 (2), pp.
+            129–137, 1982.
+        [2] Arthur, D., Vassilvitskii, S., "k-means++: The Advantages of Careful Seeding", Proceedings of the Eighteenth
+            Annual ACM-SIAM Symposium on Discrete Algorithms, Society for Industrial and Applied Mathematics
+            Philadelphia, PA, USA. pp. 1027–1035, 2007.
+        """
+        self.init = init
+        self.max_iter = max_iter
+        self.n_clusters = n_clusters
+        self.random_state = random_state
+        self.tol = tol
+
+        # in-place properties
+        self._cluster_centers = None
+        self._labels = None
+        self._inertia = None
+        self._n_iter = None
+
+    @property
+    def cluster_centers_(self):
+        """
+        Returns
+        -------
+        ht.DNDarray, shape=(n_clusters, n_features):
+            Coordinates of cluster centers. If the algorithm stops before fully converging (see tol and max_iter),
+            these will not be consistent with labels_.
+        """
+        return self._cluster_centers.squeeze(axis=0).T
+
+    @property
+    def labels_(self):
+        """
+        Returns
+        -------
+        ht.DNDarray, shape=(n_points):
+            Labels of each point.
+        """
+        return self._labels
+
+    @property
+    def inertia_(self):
+        """
+        Returns
+        -------
+        float:
+            Sum of squared distances of samples to their closest cluster center.
+        """
+        return self._inertia
+
+    @property
+    def n_iter_(self):
+        """
+        Returns
+        -------
+        int:
+            Number of iterations run.
+        """
+        return self._n_iter
+
+    def _initialize_cluster_centers(self, X):
+        """
+        Initializes the K-Means centroids.
+
+        Parameters
+        ----------
+        X : ht.DNDarray, shape=(n_point, n_features)
+            The data to initialize the clusters for.
+        """
+        # always initialize the random state
+        if self.random_state is not None:
+            ht.random.seed(self.random_state)
+
+        # initialize the centroids by randomly picking some of the points
+        if self.init == "random":
+            # Samples will be equally distributed drawn from all involved processes
+            _, displ, _ = X.comm.counts_displs_shape(shape=X.shape, axis=0)
+            centroids = ht.empty(
+                (X.shape[1], self.n_clusters), split=None, device=X.device, comm=X.comm
+            )
+
+            if (X.split is None) or (X.split == 0):
+                for i in range(self.n_clusters):
+                    samplerange = (
+                        X.gshape[0] // self.n_clusters * i,
+                        X.gshape[0] // self.n_clusters * (i + 1),
+                    )
+                    sample = ht.random.randint(samplerange[0], samplerange[1]).item()
+                    proc = 0
+                    for p in range(X.comm.size):
+                        if displ[p] > sample:
+                            break
+                        proc = p
+                    xi = ht.zeros(X.shape[1], dtype=X.dtype)
+                    if X.comm.rank == proc:
+                        idx = sample - displ[proc]
+                        xi = ht.array(X.lloc[idx, :], device=X.device, comm=X.comm)
+                    xi.comm.Bcast(xi, root=proc)
+                    centroids[:, i] = xi
+
+            else:
+                raise NotImplementedError("Not implemented for other splitting-axes")
+
+<<<<<<< HEAD
+            #self._cluster_centers = centroids.expand_dims(axis=0)
+            self._cluster_centers = centroids
+=======
+            self._cluster_centers = centroids.expand_dims(axis=0)
+>>>>>>> fc71e7317d79438e8650775c2da59333328676b1
+
+        # directly passed centroids
+        elif isinstance(self.init, ht.DNDarray):
+            if len(self.init.shape) != 2:
+                raise ValueError(
+                    "passed centroids need to be two-dimensional, but are {}".format(len(self.init))
+                )
+            if self.init.shape[0] != self.n_clusters or self.init.shape[1] != X.shape[1]:
+                raise ValueError("passed centroids do not match cluster count or data shape")
+            self._cluster_centers = self.init.resplit(None).T.expand_dims(axis=0)
+
+        # kmeans++, smart centroid guessing
+        elif self.init == "kmeans++":
+            if (X.split is None) or (X.split == 0):
+                # X = X.expand_dims(axis=2)
+                centroids = ht.empty(
+                    (1, X.shape[1], self.n_clusters), split=None, device=X.device, comm=X.comm
+                )
+                sample = ht.random.randint(0, X.shape[0] - 1).item()
+                _, displ, _ = X.comm.counts_displs_shape(shape=X.shape, axis=0)
+                proc = 0
+                for p in range(X.comm.size):
+                    if displ[p] > sample:
+                        break
+                    proc = p
+                x0 = ht.zeros(X.shape[1], dtype=X.dtype, device=X.device, comm=X.comm)
+                if X.comm.rank == proc:
+                    idx = sample - displ[proc]
+                    x0 = ht.array(X.lloc[idx, :], device=X.device, comm=X.comm)
+                x0.comm.Bcast(x0, root=proc)
+                centroids[:, :, 0] = x0
+
+                for i in range(1, self.n_clusters):
+                    distances = ht.zeros(
+                        (X.shape[0], i), dtype=X.dtype, split=X.split, device=X.device, comm=X.comm
+                    )
+                    for k in range(i):
+                        distances[:, k] = ((X - centroids[:, :, k]) ** 2).sum(axis=1, keepdim=False)
+                    D2 = distances.min(axis=1)
+                    D2.resplit_(axis=None)
+                    D2 = D2.squeeze()
+                    prob = D2 / D2.sum()
+                    x = ht.random.rand().item()
+                    sample = 0
+                    sum = 0
+                    for j in range(len(prob)):
+                        if sum > x:
+                            break
+                        sum += prob[j].item()
+                        sample = j
+                    proc = 0
+                    for p in range(X.comm.size):
+                        if displ[p] > sample:
+                            break
+                        proc = p
+                    xi = ht.zeros(X.shape[1], dtype=X.dtype)
+                    if X.comm.rank == proc:
+                        idx = sample - displ[proc]
+                        xi = ht.array(X.lloc[idx, :], device=X.device, comm=X.comm)
+                    xi.comm.Bcast(xi, root=proc)
+                    centroids[:, :, i] = xi
+            else:
+                raise NotImplementedError("Not implemented for other splitting-axes")
+
+            self._cluster_centers = centroids
+
+        else:
+            raise ValueError(
+                'init needs to be one of "random", ht.DNDarray or "kmeans++", but was {}'.format(
+                    self.init
+                )
+            )
+
+    def _fit_to_cluster(self, X):
+        """
+        Assigns the passed data points to
+
+        Parameters
+        ----------
+        X : ht.DNDarray, shape=(n_samples, n_features)
+            Training instances to cluster.
+        """
+        # calculate the distance matrix and determine the closest centroid
+        distances = ht.zeros(
+            (X.shape[0], self.n_clusters),
+            dtype=X.dtype,
+            split=X.split,
+            device=X.device,
+            comm=X.comm,
+        )
+        for k in range(self._cluster_centers.shape[2]):
+            distances[:, k] = ((X - self._cluster_centers[:, :, k]) ** 2).sum(axis=1, keepdim=False)
+        matching_centroids = distances.argmin(axis=1, keepdim=True)
+
+        return matching_centroids
+
+    def fit(self, X):
+        """
+        Computes the centroid of a k-means clustering.
+
+        Parameters
+        ----------
+        X : ht.DNDarray, shape=(n_samples, n_features)
+            Training instances to cluster.
+        """
+        # input sanitation
+        if not isinstance(X, ht.DNDarray):
+            raise ValueError("input needs to be a ht.DNDarray, but was {}".format(type(X)))
+
+        # initialize the clustering
+        self._initialize_cluster_centers(X)
+        self._n_iter = 0
+        matching_centroids = ht.zeros((X.shape[0]), split=X.split, device=X.device, comm=X.comm)
+
+        # X = X.expand_dims(axis=2)
+        new_cluster_centers = self._cluster_centers.copy()
+
+        # iteratively fit the points to the centroids
+        for epoch in range(self.max_iter):
+            # increment the iteration count
+            self._n_iter += 1
+            # determine the centroids
+            matching_centroids = self._fit_to_cluster(X)
+
+            # update the centroids
+            for i in range(self.n_clusters):
+                # points in current cluster
+                selection = (matching_centroids == i).astype(ht.int64)
+
+                # accumulate points and total number of points in cluster
+                assigned_points = (X * selection).sum(axis=0, keepdim=True)
+                points_in_cluster = selection.sum(axis=0, keepdim=True).clip(
+                    1.0, ht.iinfo(ht.int64).max
+                )
+
+                # compute the new centroids
+                new_cluster_centers[:, :, i] = assigned_points / points_in_cluster
+
+            # check whether centroid movement has converged
+            self._inertia = ((self._cluster_centers - new_cluster_centers) ** 2).sum()
+            self._cluster_centers = new_cluster_centers.copy()
+            if self.tol is not None and self._inertia <= self.tol:
+                break
+
+        self._labels = matching_centroids.squeeze()
+
+        return self
+
+    def fit_predict(self, X):
+        """
+        Compute cluster centers and predict cluster index for each sample.
+
+        Convenience method; equivalent to calling fit(X) followed by predict(X).
+
+        Parameters
+        ----------
+        X : ht.DNDarray, shape = [n_samples, n_features]
+            Input data to be clustered.
+
+        Returns
+        -------
+        labels : ht.DNDarray, shape [n_samples,]
+            Index of the cluster each sample belongs to.
+        """
+        self.fit(X)
+        return self.predict(X)
+
+    def get_params(self, deep=True):
+        """
+        Get parameters for this estimator.
+
+        Parameters
+        ----------
+        deep : boolean, optional
+            If True, will return the parameters for this estimator and contained sub-objects that are estimators.
+            Defaults to true.
+
+        Returns
+        -------
+        params : dict of string to any
+            Parameter names mapped to their values.
+        """
+        # unused
+        _ = deep
+
+        return {
+            "init": self.init,
+            "max_iter": self.max_iter,
+            "n_clusters": self.n_clusters,
+            "random_state": self.random_state,
+            "tol": self.tol,
+        }
+
+    def predict(self, X):
+        """
+        Predict the closest cluster each sample in X belongs to.
+
+        In the vector quantization literature, cluster_centers_ is called the code book and each value returned by
+        predict is the index of the closest code in the code book.
+
+        Parameters
+        ----------
+        X : ht.DNDarray, shape = [n_samples, n_features]
+            New data to predict.
+
+        Returns
+        -------
+        labels : ht.DNDarray, shape = [n_samples,]
+            Index of the cluster each sample belongs to.
+        """
+        # input sanitation
+        if not isinstance(X, ht.DNDarray):
+            raise ValueError("input needs to be a ht.DNDarray, but was {}".format(type(X)))
+
+        # determine the centroids
+        # return self._fit_to_cluster(X.expand_dims(axis=2)).squeeze()
+        return self._fit_to_cluster(X).squeeze()
+
+    def set_params(self, **params):
+        """
+        Set the parameters of this estimator.
+
+        Parameters
+        ----------
+        params : dict
+            The parameters of the estimator to be modified.
+
+        Returns
+        -------
+        self : ht.ml.KMeans
+            This estimator instance for chaining.
+        """
+        self.init = params.get(params["init"], self.init)
+        self.max_iter = params.get(params["max_iter"], self.max_iter)
+        self.n_clusters = params.get(params["n_clusters"], self.n_clusters)
+        self.random_state = params.get(params["random_state"], self.random_state)
+        self.tol = params.get(params["tol"], self.tol)
+
+        return self
diff --git a/setup.py b/setup.py
index b70fd95e55..ab4cb67139 100644
--- a/setup.py
+++ b/setup.py
@@ -18,9 +18,9 @@
     packages=[
         "heat",
         "heat.core",
-        "heat.core.cluster",
         "heat.core.regression",
         "heat.core.regression.lasso",
+        "heat.cluster",
         "heat.utils",
     ],
     data_files=["README.md", "LICENSE"],

From a3f82b89b4bd03575c9aacb49555ca2627042e2d Mon Sep 17 00:00:00 2001
From: Charlotte Debus <charlotte.debus@dlr.de>
Date: Mon, 3 Feb 2020 16:27:34 +0100
Subject: [PATCH 08/49] Bugfixes in the Linalg Utilites

---
 heat/core/linalg.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/heat/core/linalg.py b/heat/core/linalg.py
index 69344a4dee..a8111c9d7f 100644
--- a/heat/core/linalg.py
+++ b/heat/core/linalg.py
@@ -32,7 +32,11 @@ def cg(A, b, x0, out=None):
         Returns the solution x of the system of linear equations. If out is given, it is returned
     """
 
-    if not isinstance(A, dndarray) or not isinstance(b, dndarray) or not isinstance(x0, dndarray):
+    if (
+        not isinstance(A, dndarray.DNDarray)
+        or not isinstance(b, dndarray.DNDarray)
+        or not isinstance(x0, dndarray.DNDarray)
+    ):
         raise TypeError(
             "A, b and x0 need to be of type ht.dndarra, but were {}, {}, {}".format(
                 type(A), type(b), type(x0)
@@ -48,6 +52,7 @@ def cg(A, b, x0, out=None):
 
     r = b - matmul(A, x0)
     p = r
+    print(A.shape, p.shape)
     rsold = matmul(r, r)
     x = x0
 
@@ -156,7 +161,7 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
         Tridiagonal matrix of size mxm, with coefficients alpha_1,...alpha_n on the diagonal and coefficients beta_1,...,beta_n-1 on the side-diagonals. If out is given, it is returned
 
     """
-    if not isinstance(A, dndarray):
+    if not isinstance(A, dndarray.DNDarray):
         raise TypeError("A needs to be of type ht.dndarra, but was {}".format(type(A)))
 
     if not (A.numdims == 2):
@@ -316,13 +321,17 @@ def matmul(a, b):
     else:
         # if they are vectors they need to be expanded to be the proper dimensions
         vector_flag = False  # flag to run squeeze at the end of the function
+        both_vec = 0
         if len(a.gshape) < 2:
             a = manipulations.expand_dims(a, axis=0)
             vector_flag = True
+            both_vec += 1
         if len(b.gshape) < 2:
             b = manipulations.expand_dims(b, axis=1)
             vector_flag = True
+            both_vec += 1
 
+        both_vec = True if both_vec == 2 else False
         split_0_flag = False
         split_1_flag = False
         split_01_flag = False
@@ -701,7 +710,6 @@ def c_block_setter(b_proc, a_proc, a_data, b_data):
                         c._DNDarray__array[: a_node_rem_s0.shape[0]] += a_node_rem_s0 @ b_rem
 
                     del b_lp_data[pr]
-
             c = (
                 c
                 if not vector_flag
@@ -881,7 +889,7 @@ def c_block_setter(b_proc, a_proc, a_data, b_data):
             split = a.split if b.gshape[1] > 1 else 0
             split = split if not vector_flag else 0
             res = res if not vector_flag else res.squeeze()
-            c = factories.array(res, split=split, device=a.device)
+            c = factories.array(res, split=split if not both_vec else None, device=a.device)
             return c
 
 

From 0c7cc4a5f0fdbf18ded0b65b687a29cbaa6f6632 Mon Sep 17 00:00:00 2001
From: Charlotte Debus <charlotte.debus@dlr.de>
Date: Wed, 5 Feb 2020 13:11:03 +0100
Subject: [PATCH 09/49] Renaming of distance module; implementation of rbf
 kernel

---
 heat/cluster/kmeans.py                     |  4 ++--
 heat/cluster/spectral.py                   |  9 ++++-----
 heat/spatial/__init__.py                   |  2 +-
 heat/spatial/{distances.py => distance.py} | 15 ++++++++++++++-
 4 files changed, 21 insertions(+), 9 deletions(-)
 rename heat/spatial/{distances.py => distance.py} (96%)

diff --git a/heat/cluster/kmeans.py b/heat/cluster/kmeans.py
index 5820f73294..59ff067a9e 100644
--- a/heat/cluster/kmeans.py
+++ b/heat/cluster/kmeans.py
@@ -170,7 +170,7 @@ def _initialize_cluster_centers(self, X):
                 x0.comm.Bcast(x0, root=proc)
                 centroids[0, :] = x0
                 for i in range(1, self.n_clusters):
-                    distances = ht.spatial.distances.cdist(X, centroids, quadratic_expansion=True)
+                    distances = ht.spatial.distance.cdist(X, centroids, quadratic_expansion=True)
                     D2 = distances.min(axis=1)
                     D2.resplit_(axis=None)
                     prob = D2 / D2.sum()
@@ -216,7 +216,7 @@ def _fit_to_cluster(self, X):
             Training instances to cluster.
         """
         # calculate the distance matrix and determine the closest centroid
-        distances = ht.spatial.distances.cdist(X, self._cluster_centers, quadratic_expansion=True)
+        distances = ht.spatial.distance.cdist(X, self._cluster_centers, quadratic_expansion=True)
         matching_centroids = distances.argmin(axis=1, keepdim=True)
 
         return matching_centroids
diff --git a/heat/cluster/spectral.py b/heat/cluster/spectral.py
index ca141bd50b..9c0fbee3fa 100644
--- a/heat/cluster/spectral.py
+++ b/heat/cluster/spectral.py
@@ -5,7 +5,7 @@
 from .. import manipulations
 from .. import types
 from .. import operations
-from ..spatial import distances
+from ..spatial import distance
 from .. import linalg
 from .. import random
 from . import KMeans
@@ -106,11 +106,10 @@ def fit(self, X):
                 raise ValueError(
                     "For usage of RBF kernel please specify the scaling factor sigma in class instantiation"
                 )
-            self._similarity = distances.similarity(
-                X, lambda x, y: distances._gaussian(x, y, self.sigma)
-            )
+            self._similarity = distance.rbf(X, self.sigma)
+
         elif self.kernel == "euclidean":
-            self._similarity = distances.similarity(X, lambda x, y: distances._euclidian(x, y))
+            self._similarity = distance.cdist(X)
         else:
             raise NotImplementedError("Other kernels currently not implemented")
 
diff --git a/heat/spatial/__init__.py b/heat/spatial/__init__.py
index 5d350fd2d4..c3dbe9d6cd 100644
--- a/heat/spatial/__init__.py
+++ b/heat/spatial/__init__.py
@@ -1 +1 @@
-from .distances import *
+from .distance import *
diff --git a/heat/spatial/distances.py b/heat/spatial/distance.py
similarity index 96%
rename from heat/spatial/distances.py
rename to heat/spatial/distance.py
index e79e7db632..a7344b848e 100644
--- a/heat/spatial/distances.py
+++ b/heat/spatial/distance.py
@@ -4,7 +4,7 @@
 
 from .. import core
 
-__all__ = ["cdist"]
+__all__ = ["cdist", "rbf"]
 
 
 def _euclidian(x, y):
@@ -25,6 +25,12 @@ def _quadratic_expand(x, y):
 
 
 def _gaussian(x, y, sigma=1.0):
+    d2 = _euclidian(x, y) ** 2
+    result = torch.exp(-d2 / (2 * sigma * sigma))
+    return result
+
+
+def _gaussian_fast(x, y, sigma=1.0):
     d2 = _quadratic_expand(x, y)
     result = torch.exp(-d2 / (2 * sigma * sigma))
     return result
@@ -37,6 +43,13 @@ def cdist(X, Y=None, quadratic_expansion=False):
         return _dist(X, Y, _euclidian)
 
 
+def rbf(X, Y=None, sigma=1.0, quadratic_expansion=False):
+    if quadratic_expansion:
+        return _dist(X, Y, lambda x, y: _gaussian_fast(x, y, sigma))
+    else:
+        return _dist(X, Y, lambda x, y: _gaussian(x, y, sigma))
+
+
 def _dist(X, Y=None, metric=_euclidian):
     if len(X.shape) > 2:
         raise NotImplementedError("Only 2D data matrices are supported")

From b4cddd95a07b0446f7d171d502ada6da2d2c90f2 Mon Sep 17 00:00:00 2001
From: Charlotte Debus <charlotte.debus@dlr.de>
Date: Wed, 5 Feb 2020 15:05:09 +0100
Subject: [PATCH 10/49] Unit tests for norm and projection

---
 heat/core/linalg.py            | 10 ++++----
 heat/core/tests/test_linalg.py | 46 ++++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/heat/core/linalg.py b/heat/core/linalg.py
index a8111c9d7f..4fe9f90c5d 100644
--- a/heat/core/linalg.py
+++ b/heat/core/linalg.py
@@ -10,7 +10,7 @@
 from . import random
 from . import types
 
-__all__ = ["cg", "dot", "lanczos", "matmul", "projection", "transpose", "tril", "triu"]
+__all__ = ["cg", "dot", "lanczos", "matmul", "norm", "projection", "transpose", "tril", "triu"]
 
 
 def cg(A, b, x0, out=None):
@@ -112,9 +112,9 @@ def dot(a, b, out=None):
         # 1. If both a and b are 1-D arrays, it is inner product of vectors.
         if a.split is None and b.split is None:
             sl = slice(None)
-        elif a.split is not None and b.split is not None:
+        elif (a.split is not None) and (b.split is not None):  # both are split
             sl = a.comm.chunk(a.shape, a.split)[2]
-        else:  # a.split is not None or b.split is not None:
+        else:
             sl = a.comm.chunk(a.shape, a.split if a.split is not None else b.split)[2]
 
         ret = torch.dot(a[sl]._DNDarray__array, b[sl]._DNDarray__array)
@@ -918,7 +918,7 @@ def norm(a):
 
 def projection(a, b):
     """
-    Projection of vector b onto vector a
+    Projection of vector a onto vector b
 
     Parameters
     ----------
@@ -940,7 +940,7 @@ def projection(a, b):
             "a, b must be vectors of length 1, but were {}, {}".format(len(a.shape), len(b.shape))
         )
 
-    return (dot(b, a) / dot(a, a)) * a
+    return (dot(a, b) / dot(b, b)) * b
 
 
 def transpose(a, axes=None):
diff --git a/heat/core/tests/test_linalg.py b/heat/core/tests/test_linalg.py
index 810995428b..6c51bb78fd 100644
--- a/heat/core/tests/test_linalg.py
+++ b/heat/core/tests/test_linalg.py
@@ -434,6 +434,52 @@ def test_matmul(self):
             self.assertEqual(ret00.dtype, ht.float)
             self.assertEqual(ret00.split, 0)
 
+    def test_norm(self):
+        a = ht.arange(9, dtype=ht.float32, split=0) - 4
+        self.assertTrue(
+            ht.allclose(ht.linalg.norm(a), ht.float32(np.linalg.norm(a.numpy())).item(), atol=1e-5)
+        )
+        a.resplit_(axis=None)
+        self.assertTrue(
+            ht.allclose(ht.linalg.norm(a), ht.float32(np.linalg.norm(a.numpy())).item(), atol=1e-5)
+        )
+
+        b = ht.array([[-4.0, -3.0, -2.0], [-1.0, 0.0, 1.0], [2.0, 3.0, 4.0]], split=0)
+        self.assertTrue(
+            ht.allclose(ht.linalg.norm(b), ht.float32(np.linalg.norm(b.numpy())).item(), atol=1e-5)
+        )
+        b.resplit_(axis=1)
+        self.assertTrue(
+            ht.allclose(ht.linalg.norm(b), ht.float32(np.linalg.norm(b.numpy())).item(), atol=1e-5)
+        )
+
+        with self.assertRaises(TypeError):
+            c = np.arange(9) - 4
+            ht.linalg.norm(c)
+
+    def test_projection(self):
+        a = ht.arange(1, 4, dtype=ht.float32, split=None)
+        e1 = ht.array([1, 0, 0], dtype=ht.float32, split=None)
+        self.assertTrue(ht.equal(ht.linalg.projection(a, e1), e1))
+
+        a.resplit_(axis=0)
+        self.assertTrue(ht.equal(ht.linalg.projection(a, e1), e1))
+
+        e2 = ht.array([0, 1, 0], dtype=ht.float32, split=0)
+        self.assertTrue(ht.equal(ht.linalg.projection(a, e2), e2 * 2))
+
+        a = ht.arange(1, 4, dtype=ht.float32, split=None)
+        e3 = ht.array([0, 0, 1], dtype=ht.float32, split=0)
+        self.assertTrue(ht.equal(ht.linalg.projection(a, e3), e3 * 3))
+
+        a = np.arange(1, 4)
+        with self.assertRaises(TypeError):
+            ht.linalg.projection(a, e1)
+
+        a = ht.array([[1], [2], [3]], dtype=ht.float32, split=None)
+        with self.assertRaises(RuntimeError):
+            ht.linalg.projection(a, e1)
+
     def test_transpose(self):
         # vector transpose, not distributed
         vector = ht.arange(10, device=ht_device)

From ecfc9c1454bb1edb6eb708420919a3d25fb04556 Mon Sep 17 00:00:00 2001
From: Charlotte Debus <charlotte.debus@dlr.de>
Date: Wed, 5 Feb 2020 15:50:17 +0100
Subject: [PATCH 11/49] Testing for CG Algorithm

---
 heat/core/tests/test_linalg.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/heat/core/tests/test_linalg.py b/heat/core/tests/test_linalg.py
index 6c51bb78fd..742f9ac1dd 100644
--- a/heat/core/tests/test_linalg.py
+++ b/heat/core/tests/test_linalg.py
@@ -18,6 +18,28 @@
 
 
 class TestLinalg(unittest.TestCase):
+    def test_cg(self):
+        size = ht.communication.MPI_WORLD.size * 3
+        b = ht.arange(1, size + 1, dtype=ht.float32, split=0)
+        A = ht.manipulations.diag(b)
+        x0 = ht.random.rand(size, dtype=b.dtype, split=b.split)
+
+        x = ht.ones(b.shape, dtype=b.dtype, split=b.split)
+
+        res = ht.linalg.cg(A, b, x0)
+        self.assertTrue(ht.allclose(x, res, atol=1e-3))
+
+        b_np = np.arange(1, size + 1)
+        with self.assertRaises(TypeError):
+            res = ht.linalg.cg(A, b_np, x0)
+
+        with self.assertRaises(RuntimeError):
+            res = ht.linalg.cg(A, A, x0)
+        with self.assertRaises(RuntimeError):
+            res = ht.linalg.cg(b, b, x0)
+        with self.assertRaises(RuntimeError):
+            res = ht.linalg.cg(A, b, A)
+
     def test_dot(self):
         # ONLY TESTING CORRECTNESS! ALL CALLS IN DOT ARE PREVIOUSLY TESTED
         # cases to test:

From 9af8bed0677d84ccdefa16edaef09e00ddf3e505 Mon Sep 17 00:00:00 2001
From: Charlotte Debus <charlotte.debus@dlr.de>
Date: Thu, 6 Feb 2020 12:31:31 +0100
Subject: [PATCH 12/49] Extended Unit tests for spatial/distance.py module

---
 heat/spatial/distance.py             |  14 ++-
 heat/spatial/tests/test_distances.py | 142 +++++++++++++++++++++++----
 2 files changed, 133 insertions(+), 23 deletions(-)

diff --git a/heat/spatial/distance.py b/heat/spatial/distance.py
index a7344b848e..586a25eaa5 100644
--- a/heat/spatial/distance.py
+++ b/heat/spatial/distance.py
@@ -52,7 +52,10 @@ def rbf(X, Y=None, sigma=1.0, quadratic_expansion=False):
 
 def _dist(X, Y=None, metric=_euclidian):
     if len(X.shape) > 2:
-        raise NotImplementedError("Only 2D data matrices are supported")
+        raise NotImplementedError("Only 2D data matrices are currently supported")
+
+    if X.dtype != core.float32:
+        raise NotImplementedError("Currently only float32 is supported as input datatype")
 
     if Y is None:
         d = core.factories.zeros(
@@ -182,11 +185,18 @@ def _dist(X, Y=None, metric=_euclidian):
         if X.comm != Y.comm:
             raise NotImplementedError("Differing communicators not supported")
 
+        if Y.dtype != core.float32:
+            raise NotImplementedError("Currently only float32 is supported as input datatype")
+
         if X.split is None:
             if Y.split is None:
                 split = None
+            elif Y.split == 0:
+                split = 1
             else:
-                split = Y.split
+                raise NotImplementedError(
+                    "Splittings other than 0 or None currently not supported."
+                )
         else:
             # ToDo Für split implementation >1: split = min(X,split, Y.split)
             split = X.split
diff --git a/heat/spatial/tests/test_distances.py b/heat/spatial/tests/test_distances.py
index a440696483..57efcae9a2 100644
--- a/heat/spatial/tests/test_distances.py
+++ b/heat/spatial/tests/test_distances.py
@@ -5,6 +5,7 @@
 
 import heat as ht
 import numpy as np
+import math
 
 if os.environ.get("DEVICE") == "gpu" and torch.cuda.is_available():
     ht.use_device("gpu")
@@ -21,43 +22,142 @@
 
 class TestDistances(unittest.TestCase):
     def test_cdist(self):
-        split = None
-        X = ht.ones((4, 4), dtype=ht.float32, split=split)
+        X = ht.ones((4, 4), dtype=ht.float32, split=None)
         Y = ht.zeros((4, 4), dtype=ht.float32, split=None)
+        res_XX_cdist = ht.zeros((4, 4), dtype=ht.float32, split=None)
+        res_XX_rbf = ht.ones((4, 4), dtype=ht.float32, split=None)
+        res_XY_cdist = ht.ones((4, 4), dtype=ht.float32, split=None) * 2
+        res_XY_rbf = ht.ones((4, 4), dtype=ht.float32, split=None) * math.exp(-1.0)
 
-        res = ht.ones((4, 4), dtype=ht.float32, split=split) * 2
+        # Case 1a: X.split == None, Y == None
+        d = ht.spatial.cdist(X, quadratic_expansion=False)
+        self.assertTrue(ht.equal(d, res_XX_cdist))
+        self.assertEqual(d.split, None)
 
+        d = ht.spatial.cdist(X, quadratic_expansion=True)
+        self.assertTrue(ht.equal(d, res_XX_cdist))
+        self.assertEqual(d.split, None)
+
+        d = ht.spatial.rbf(X, quadratic_expansion=False)
+        self.assertTrue(ht.equal(d, res_XX_rbf))
+        self.assertEqual(d.split, None)
+
+        d = ht.spatial.rbf(X, quadratic_expansion=True)
+        self.assertTrue(ht.equal(d, res_XX_rbf))
+        self.assertEqual(d.split, None)
+
+        # Case 1b: X.split == None, Y != None, Y.split == None
         d = ht.spatial.cdist(X, Y, quadratic_expansion=False)
-        self.assertTrue(ht.equal(d, res))
-        self.assertEqual(d.split, split)
+        self.assertTrue(ht.equal(d, res_XY_cdist))
+        self.assertEqual(d.split, None)
 
         d = ht.spatial.cdist(X, Y, quadratic_expansion=True)
-        self.assertTrue(ht.equal(d, res))
-        self.assertEqual(d.split, split)
+        self.assertTrue(ht.equal(d, res_XY_cdist))
+        self.assertEqual(d.split, None)
+
+        d = ht.spatial.rbf(X, Y, sigma=math.sqrt(2.0), quadratic_expansion=False)
+        self.assertTrue(ht.equal(d, res_XY_rbf))
+        self.assertEqual(d.split, None)
 
-        split = 0
-        X = ht.ones((4, 4), dtype=ht.float32, split=split)
-        res = ht.ones((4, 4), dtype=ht.float32, split=split) * 2
+        d = ht.spatial.rbf(X, Y, sigma=math.sqrt(2.0), quadratic_expansion=True)
+        self.assertTrue(ht.equal(d, res_XY_rbf))
+        self.assertEqual(d.split, None)
+
+        # Case 1c: X.split == None, Y != None, Y.split == 0
+        Y.resplit_(axis=0)
+        res_XX_cdist.resplit_(axis=1)
+        res_XX_rbf.resplit_(axis=1)
+        res_XY_cdist.resplit_(axis=1)
+        res_XY_rbf.resplit_(axis=1)
 
         d = ht.spatial.cdist(X, Y, quadratic_expansion=False)
-        self.assertTrue(ht.equal(d, res))
-        self.assertEqual(d.split, split)
+        self.assertTrue(ht.equal(d, res_XY_cdist))
+        self.assertEqual(d.split, 1)
 
         d = ht.spatial.cdist(X, Y, quadratic_expansion=True)
-        self.assertTrue(ht.equal(d, res))
-        self.assertEqual(d.split, split)
+        self.assertTrue(ht.equal(d, res_XY_cdist))
+        self.assertEqual(d.split, 1)
+
+        d = ht.spatial.rbf(X, Y, sigma=math.sqrt(2.0), quadratic_expansion=False)
+        self.assertTrue(ht.equal(d, res_XY_rbf))
+        self.assertEqual(d.split, 1)
+
+        d = ht.spatial.rbf(X, Y, sigma=math.sqrt(2.0), quadratic_expansion=True)
+        self.assertTrue(ht.equal(d, res_XY_rbf))
+        self.assertEqual(d.split, 1)
+
+        # Case 2a: X.split == 0, Y == None
+        X.resplit_(axis=0)
+        Y.resplit_(axis=None)
+        res_XX_cdist.resplit_(axis=0)
+        res_XX_rbf.resplit_(axis=0)
+        res_XY_cdist.resplit_(axis=0)
+        res_XY_rbf.resplit_(axis=0)
 
-        Y = ht.zeros((4, 4), dtype=ht.float32, split=split)
+        d = ht.spatial.cdist(X, quadratic_expansion=False)
+        self.assertTrue(ht.equal(d, res_XX_cdist))
+        self.assertEqual(d.split, 0)
+
+        d = ht.spatial.cdist(X, quadratic_expansion=True)
+        self.assertTrue(ht.equal(d, res_XX_cdist))
+        self.assertEqual(d.split, 0)
+
+        d = ht.spatial.rbf(X, quadratic_expansion=False)
+        self.assertTrue(ht.equal(d, res_XX_rbf))
+        self.assertEqual(d.split, 0)
+
+        d = ht.spatial.rbf(X, quadratic_expansion=True)
+        self.assertTrue(ht.equal(d, res_XX_rbf))
+        self.assertEqual(d.split, 0)
+
+        # Case 2b: X.split == 0, Y != None, Y.split == None
+        d = ht.spatial.cdist(X, Y, quadratic_expansion=False)
+        self.assertTrue(ht.equal(d, res_XY_cdist))
+        self.assertEqual(d.split, 0)
+
+        d = ht.spatial.cdist(X, Y, quadratic_expansion=True)
+        self.assertTrue(ht.equal(d, res_XY_cdist))
+        self.assertEqual(d.split, 0)
 
+        d = ht.spatial.rbf(X, Y, sigma=math.sqrt(2.0), quadratic_expansion=False)
+        self.assertTrue(ht.equal(d, res_XY_rbf))
+        self.assertEqual(d.split, 0)
+
+        d = ht.spatial.rbf(X, Y, sigma=math.sqrt(2.0), quadratic_expansion=True)
+        self.assertTrue(ht.equal(d, res_XY_rbf))
+        self.assertEqual(d.split, 0)
+
+        # Case 2c: X.split == 0, Y != None, Y.split == 0
+        Y.resplit_(axis=0)
+
+        d = ht.spatial.cdist(X, Y, quadratic_expansion=False)
+        self.assertTrue(ht.equal(d, res_XY_cdist))
+        self.assertEqual(d.split, 0)
+
+        d = ht.spatial.cdist(X, Y, quadratic_expansion=True)
+        self.assertTrue(ht.equal(d, res_XY_cdist))
+        self.assertEqual(d.split, 0)
+
+        d = ht.spatial.rbf(X, Y, sigma=math.sqrt(2.0), quadratic_expansion=False)
+        self.assertTrue(ht.equal(d, res_XY_rbf))
+        self.assertEqual(d.split, 0)
+
+        d = ht.spatial.rbf(X, Y, sigma=math.sqrt(2.0), quadratic_expansion=True)
+        self.assertTrue(ht.equal(d, res_XY_rbf))
+        self.assertEqual(d.split, 0)
+
+        # Case 3 X.split == 1
+        X.resplit_(axis=1)
         with self.assertRaises(NotImplementedError):
-            d = ht.spatial.cdist(X, Y, quadratic_expansion=False)
+            d = ht.spatial.cdist(X)
         with self.assertRaises(NotImplementedError):
-            d = ht.spatial.cdist(X, Y, quadratic_expansion=True)
-
-        X = ht.ones((4, 4), dtype=ht.float32, split=1)
-        Y = ht.zeros((4, 4), dtype=ht.float32, split=None)
+            d = ht.spatial.cdist(X, Y, quadratic_expansion=False)
 
+        X.resplit_(axis=0)
+        Y = ht.ones((4, 4), dtype=ht.float64, split=None)
         with self.assertRaises(NotImplementedError):
             d = ht.spatial.cdist(X, Y, quadratic_expansion=False)
+
+        X = ht.ones((4, 4), dtype=ht.float64, split=None)
         with self.assertRaises(NotImplementedError):
-            d = ht.spatial.cdist(X, Y, quadratic_expansion=True)
+            d = ht.spatial.cdist(X)

From b32e8529f724b15c954a7bca9cb40839a6e4e740 Mon Sep 17 00:00:00 2001
From: Charlotte Debus <charlotte.debus@dlr.de>
Date: Thu, 6 Feb 2020 13:57:06 +0100
Subject: [PATCH 13/49] Bugfixes after first full testrun

---
 heat/cluster/__init__.py |  1 +
 heat/cluster/spectral.py | 50 ++++++++++++++++++----------------------
 heat/core/linalg.py      |  4 ++--
 heat/spatial/distance.py | 12 ++++++++--
 4 files changed, 36 insertions(+), 31 deletions(-)

diff --git a/heat/cluster/__init__.py b/heat/cluster/__init__.py
index 13d73a0cae..4f3b4db275 100644
--- a/heat/cluster/__init__.py
+++ b/heat/cluster/__init__.py
@@ -1 +1,2 @@
 from .kmeans import *
+from .spectral import *
diff --git a/heat/cluster/spectral.py b/heat/cluster/spectral.py
index 9c0fbee3fa..648e2f1dc0 100644
--- a/heat/cluster/spectral.py
+++ b/heat/cluster/spectral.py
@@ -1,14 +1,7 @@
 import torch
+import math
 import numpy as np
-
-from .. import factories
-from .. import manipulations
-from .. import types
-from .. import operations
-from ..spatial import distance
-from .. import linalg
-from .. import random
-from . import KMeans
+import heat as ht
 
 
 def laplacian(S, norm=True, mode="fc", upper=None, lower=None):
@@ -19,9 +12,9 @@ def laplacian(S, norm=True, mode="fc", upper=None, lower=None):
             )
 
         if upper is not None:
-            A = types.int(S < upper) - factories.eye(S.shape, dtype=types.int, split=S.split)
+            A = ht.int(S < upper) - ht.eye(S.shape, dtype=ht.int, split=S.split)
         elif lower is not None:
-            A = types.int(S > upper) - factories.eye(S.shape, dtype=types.int, split=S.split)
+            A = ht.int(S > upper) - ht.eye(S.shape, dtype=ht.int, split=S.split)
     elif mode == "fc":
         A = S
     else:
@@ -29,21 +22,21 @@ def laplacian(S, norm=True, mode="fc", upper=None, lower=None):
             "Only eNeighborhood and fully-connected graphs supported at the moment."
         )
     if norm:
-        degree = operations.sqrt(1.0 / operations.sum(A, axis=1))
+        degree = ht.sqrt(1.0 / ht.sum(A, axis=1))
     else:
         degree = sum(A, axis=1)
 
-    D = manipulations.diag(degree, dtype=S.dtype, split=S.split, device=S.device, comm=S.comm)
+    D = ht.diag(degree)
 
     if norm:
-        L = factories.eye(A.shape, split=A.split) - linalg.matmul(D, linalg.matmul(S, D))
+        L = ht.eye(A.shape, split=A.split) - ht.matmul(D, ht.matmul(S, D))
     else:
         L = D - A
 
     return L
 
 
-class spectral:
+class Spectral:
     def __init__(
         self,
         n_clusters=None,
@@ -54,6 +47,7 @@ def __init__(
         boundary="upper",
         normalize=True,
         n_lanczos=300,
+        assign_labels="kmeans",
     ):
         """
         Spectral clustering
@@ -83,6 +77,9 @@ def __init__(
             normalized vs. unnormalized Laplacian
         n_lanczos : int
             number of Lanczos iterations for Eigenvalue decomposition
+        assign_labels: str, default = 'kmeans'
+             The strategy to use to assign labels in the embedding space.
+             'kmeans'
 
         """
         self.n_clusters = n_clusters
@@ -101,15 +98,16 @@ def __init__(
 
     def fit(self, X):
         # 1. Calculation of Adjacency Matrix
-        if self.kernel == "rbf":
-            if self.sigma is None:
+        if self.metric == "rbf":
+            if self.gamma is None:
                 raise ValueError(
                     "For usage of RBF kernel please specify the scaling factor sigma in class instantiation"
                 )
-            self._similarity = distance.rbf(X, self.sigma)
+            sig = math.sqrt(1 / (2 * self.gamma))
+            self._similarity = ht.spatial.rbf(X, sigma=sig)
 
-        elif self.kernel == "euclidean":
-            self._similarity = distance.cdist(X)
+        elif self.metric == "euclidean":
+            self._similarity = ht.spatial.cdist(X)
         else:
             raise NotImplementedError("Other kernels currently not implemented")
 
@@ -134,14 +132,14 @@ def fit(self, X):
             raise NotImplementedError("Other approaches currently not implemented")
 
         # 3. Eigenvalue and -vector Calculation
-        vr = random.rand(L.shape[0], split=L.split, dtype=types.float64)
-        v0 = vr / linalg.norm(vr)
-        Vg_norm, Tg_norm = linalg.lanczos(L, self.n_lanczos, v0)
+        vr = ht.random.rand(L.shape[0], split=L.split, dtype=ht.float32)
+        v0 = vr / ht.norm(vr)
+        Vg_norm, Tg_norm = ht.lanczos(L, self.n_lanczos, v0)
 
         # 4. Calculate and Sort Eigenvalues and Eigenvectors of tridiagonal matrix T
         eval, evec = torch.eig(Tg_norm._DNDarray__array, eigenvectors=True)
         # If x is an Eigenvector of T, then y = V@x is the corresponding Eigenvector of L
-        self._eigenvectors = linalg.matmul(Vg_norm, factories.array(evec))
+        self._eigenvectors = ht.matmul(Vg_norm, ht.array(evec))
         self._eigenvalues, idx = torch.sort(eval[:, 0], dim=0)
         self._eigenvalues = self._eigenvectors[:, idx]
 
@@ -151,9 +149,7 @@ def fit(self, X):
 
         components = self._eigenvectors[:, : self.n_clusters].copy()
 
-        kmeans = KMeans(n_clusters=self.n_clusters, init="kmeans++")
+        kmeans = ht.cluster.KMeans(n_clusters=self.n_clusters, init="kmeans++")
         kmeans.fit(components)
-        # cluster = kmeans.predict(self.n_clusters)
-        # centroids = kmeans.cluster_centers_
 
         return
diff --git a/heat/core/linalg.py b/heat/core/linalg.py
index 4fe9f90c5d..9808422095 100644
--- a/heat/core/linalg.py
+++ b/heat/core/linalg.py
@@ -174,9 +174,9 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
         raise TypeError("Input Matrix A needs to be symmetric.")
     T = factories.zeros((m, m))
     if A.split == 0:
-        V = factories.zeros((n, m), split=A.split, dtype=types.float64)
+        V = factories.zeros((n, m), split=A.split, dtype=types.float32)
     else:
-        V = factories.zeros((n, m), dtype=types.float64)
+        V = factories.zeros((n, m), dtype=types.float32)
 
     if v0 is None:
         vr = random.rand(n)
diff --git a/heat/spatial/distance.py b/heat/spatial/distance.py
index 586a25eaa5..1bddc126ab 100644
--- a/heat/spatial/distance.py
+++ b/heat/spatial/distance.py
@@ -55,7 +55,11 @@ def _dist(X, Y=None, metric=_euclidian):
         raise NotImplementedError("Only 2D data matrices are currently supported")
 
     if X.dtype != core.float32:
-        raise NotImplementedError("Currently only float32 is supported as input datatype")
+        raise NotImplementedError(
+            "Datatype needs to be float32, but was {}. This is currently not supported as input".format(
+                X.dtype
+            )
+        )
 
     if Y is None:
         d = core.factories.zeros(
@@ -186,7 +190,11 @@ def _dist(X, Y=None, metric=_euclidian):
             raise NotImplementedError("Differing communicators not supported")
 
         if Y.dtype != core.float32:
-            raise NotImplementedError("Currently only float32 is supported as input datatype")
+            raise NotImplementedError(
+                "Datatype needs to be float32, but was {}. This is currently not supported as input".format(
+                    Y.dtype
+                )
+            )
 
         if X.split is None:
             if Y.split is None:

From 2caf243489451b46b74197f3e0d6f2afe75d977f Mon Sep 17 00:00:00 2001
From: Charlotte Debus <charlotte.debus@dlr.de>
Date: Fri, 7 Feb 2020 09:35:02 +0100
Subject: [PATCH 14/49] Bugfix

---
 heat/spatial/distance.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/heat/spatial/distance.py b/heat/spatial/distance.py
index 1bddc126ab..0eaa48f107 100644
--- a/heat/spatial/distance.py
+++ b/heat/spatial/distance.py
@@ -62,8 +62,12 @@ def _dist(X, Y=None, metric=_euclidian):
         )
 
     if Y is None:
-        d = core.factories.zeros(
-            (X.shape[0], X.shape[0]), dtype=X.dtype, split=X.split, device=X.device, comm=X.comm
+        d = core.zeros(
+            (X.shape[0], X.shape[0]),
+            dtype=core.float32,
+            split=X.split,
+            device=X.device,
+            comm=X.comm,
         )
         if X.split is None:
             d._DNDarray__array = metric(X._DNDarray__array, X._DNDarray__array)
@@ -79,6 +83,7 @@ def _dist(X, Y=None, metric=_euclidian):
 
             stationary = X._DNDarray__array
             rows = (displ[rank], displ[rank + 1] if (rank + 1) != size else K)
+
             # 0th iteration, calculate diagonal
             d_ij = metric(stationary, stationary)
             d[rows[0] : rows[1], rows[0] : rows[1]] = d_ij
@@ -124,9 +129,8 @@ def _dist(X, Y=None, metric=_euclidian):
                     scol2 = K
                 scolumns = (scol1, scol2)
                 symmetric = torch.zeros(
-                    scolumns[1] - scolumns[0], (rows[1] - rows[0]), dtype=torch.float64
+                    scolumns[1] - scolumns[0], (rows[1] - rows[0]), dtype=torch.float32
                 )
-                # Receive calculated tile
                 if (rank // iter) != 0:
                     comm.Recv(symmetric, source=receiver, tag=iter)
 
@@ -230,7 +234,6 @@ def _dist(X, Y=None, metric=_euclidian):
                 if X.shape[1] != Y.shape[1]:
                     raise ValueError("Inputs must have same shape[1]")
 
-                # print("Rank {}\n X._DNDarray__array  = {}, \nY._DNDarray__array = {}".format(X.comm.rank,X._DNDarray__array, Y._DNDarray__array))
                 comm = X.comm
                 rank = comm.Get_rank()
                 size = comm.Get_size()

From ffe60b6bf76b7cd084cf5af6f01b060b37161bba Mon Sep 17 00:00:00 2001
From: Charlotte Debus <charlotte.debus@dlr.de>
Date: Fri, 7 Feb 2020 10:28:25 +0100
Subject: [PATCH 15/49] Add support of float64 and typecasting to distance
 calculation

---
 heat/spatial/distance.py | 86 +++++++++++++++++++++++++---------------
 1 file changed, 55 insertions(+), 31 deletions(-)

diff --git a/heat/spatial/distance.py b/heat/spatial/distance.py
index 0eaa48f107..354b6d738a 100644
--- a/heat/spatial/distance.py
+++ b/heat/spatial/distance.py
@@ -54,20 +54,27 @@ def _dist(X, Y=None, metric=_euclidian):
     if len(X.shape) > 2:
         raise NotImplementedError("Only 2D data matrices are currently supported")
 
-    if X.dtype != core.float32:
-        raise NotImplementedError(
-            "Datatype needs to be float32, but was {}. This is currently not supported as input".format(
-                X.dtype
-            )
-        )
+    # if X.dtype != core.float32:
+    #    raise NotImplementedError(
+    #        "Datatype needs to be float32, but was {}. This is currently not supported as input".format(
+    #            X.dtype
+    #        )
+    #    )
 
     if Y is None:
+        if X.dtype == core.float32:
+            torch_type = torch.float32
+            mpi_type = MPI.FLOAT
+        elif X.dtype == core.float64:
+            torch_type = torch.float64
+            mpi_type = MPI.DOUBLE
+        else:
+            raise NotImplementedError(
+                "Datatype {} currently not supported as input".format(X.dtype)
+            )
+
         d = core.zeros(
-            (X.shape[0], X.shape[0]),
-            dtype=core.float32,
-            split=X.split,
-            device=X.device,
-            comm=X.comm,
+            (X.shape[0], X.shape[0]), dtype=X.dtype, split=X.split, device=X.device, comm=X.comm
         )
         if X.split is None:
             d._DNDarray__array = metric(X._DNDarray__array, X._DNDarray__array)
@@ -86,6 +93,7 @@ def _dist(X, Y=None, metric=_euclidian):
 
             # 0th iteration, calculate diagonal
             d_ij = metric(stationary, stationary)
+            # print("Step 0, rank {}, d_ji = {}".format(rank,d_ij))
             d[rows[0] : rows[1], rows[0] : rows[1]] = d_ij
             for iter in range(1, num_iter):
                 # Send rank's part of the matrix to the next process in a circular fashion
@@ -103,10 +111,9 @@ def _dist(X, Y=None, metric=_euclidian):
                 if (rank // iter) != 0:
                     stat = MPI.Status()
                     comm.handle.Probe(source=sender, tag=iter, status=stat)
-                    count = int(stat.Get_count(MPI.FLOAT) / f)
-                    moving = torch.zeros((count, f), dtype=torch.float32)
+                    count = int(stat.Get_count(mpi_type) / f)
+                    moving = torch.zeros((count, f), dtype=torch_type)
                     comm.Recv(moving, source=sender, tag=iter)
-
                 # Sending to next Process
                 comm.Send(stationary, dest=receiver, tag=iter)
 
@@ -114,11 +121,13 @@ def _dist(X, Y=None, metric=_euclidian):
                 if (rank // iter) == 0:
                     stat = MPI.Status()
                     comm.handle.Probe(source=sender, tag=iter, status=stat)
-                    count = int(stat.Get_count(MPI.FLOAT) / f)
-                    moving = torch.zeros((count, f), dtype=torch.float32)
+                    count = int(stat.Get_count(mpi_type) / f)
+                    moving = torch.zeros((count, f), dtype=torch_type)
                     comm.Recv(moving, source=sender, tag=iter)
 
+                # print("Step {}, rank {} received moving = {} from rank {}".format(iter, rank, moving, sender))
                 d_ij = metric(stationary, moving)
+                # print("Step {}, rank {} d_ji = {}".format(iter,rank, d_ij))
                 d[rows[0] : rows[1], columns[0] : columns[1]] = d_ij
 
                 # Receive calculated tile
@@ -129,7 +138,7 @@ def _dist(X, Y=None, metric=_euclidian):
                     scol2 = K
                 scolumns = (scol1, scol2)
                 symmetric = torch.zeros(
-                    scolumns[1] - scolumns[0], (rows[1] - rows[0]), dtype=torch.float32
+                    scolumns[1] - scolumns[0], (rows[1] - rows[0]), dtype=torch_type
                 )
                 if (rank // iter) != 0:
                     comm.Recv(symmetric, source=receiver, tag=iter)
@@ -138,6 +147,7 @@ def _dist(X, Y=None, metric=_euclidian):
                 comm.Send(d_ij, dest=sender, tag=iter)
                 if (rank // iter) == 0:
                     comm.Recv(symmetric, source=receiver, tag=iter)
+                # print("Step {}, rank {} received symmetric = {} from rank {}".format(iter, rank, symmetric, receiver))
                 d[rows[0] : rows[1], scolumns[0] : scolumns[1]] = symmetric.transpose(0, 1)
 
             if (size + 1) % 2 != 0:  # we need one mor iteration for the first n/2 processes
@@ -148,8 +158,8 @@ def _dist(X, Y=None, metric=_euclidian):
                 if rank < (size // 2):
                     stat = MPI.Status()
                     comm.handle.Probe(source=sender, tag=num_iter, status=stat)
-                    count = int(stat.Get_count(MPI.FLOAT) / f)
-                    moving = torch.zeros((count, f), dtype=torch.float32)
+                    count = int(stat.Get_count(mpi_type) / f)
+                    moving = torch.zeros((count, f), dtype=torch_type)
                     comm.Recv(moving, source=sender, tag=num_iter)
 
                     col1 = displ[sender]
@@ -177,7 +187,7 @@ def _dist(X, Y=None, metric=_euclidian):
                         scol2 = K
                     scolumns = (scol1, scol2)
                     symmetric = torch.zeros(
-                        (scolumns[1] - scolumns[0], rows[1] - rows[0]), dtype=torch.float32
+                        (scolumns[1] - scolumns[0], rows[1] - rows[0]), dtype=torch_type
                     )
                     comm.Recv(symmetric, source=receiver, tag=num_iter)
                     d[rows[0] : rows[1], scolumns[0] : scolumns[1]] = symmetric.transpose(0, 1)
@@ -193,12 +203,12 @@ def _dist(X, Y=None, metric=_euclidian):
         if X.comm != Y.comm:
             raise NotImplementedError("Differing communicators not supported")
 
-        if Y.dtype != core.float32:
-            raise NotImplementedError(
-                "Datatype needs to be float32, but was {}. This is currently not supported as input".format(
-                    Y.dtype
-                )
-            )
+        # if Y.dtype != core.float32:
+        #    raise NotImplementedError(
+        #        "Datatype needs to be float32, but was {}. This is currently not supported as input".format(
+        #            Y.dtype
+        #        )
+        #    )
 
         if X.split is None:
             if Y.split is None:
@@ -213,8 +223,22 @@ def _dist(X, Y=None, metric=_euclidian):
             # ToDo Für split implementation >1: split = min(X,split, Y.split)
             split = X.split
 
+        promoted_type = core.promote_types(X.dtype, Y.dtype)
+        X = X.astype(promoted_type)
+        Y = Y.astype(promoted_type)
+        if promoted_type == core.float32:
+            torch_type = torch.float32
+            mpi_type = MPI.FLOAT
+        elif promoted_type == core.float64:
+            torch_type = torch.float64
+            mpi_type = MPI.DOUBLE
+        else:
+            raise NotImplementedError(
+                "Datatype {} currently not supported as input".format(X.dtype)
+            )
+
         d = core.factories.zeros(
-            (X.shape[0], Y.shape[0]), dtype=X.dtype, split=split, device=X.device, comm=X.comm
+            (X.shape[0], Y.shape[0]), dtype=promoted_type, split=split, device=X.device, comm=X.comm
         )
 
         if X.split is None:
@@ -270,8 +294,8 @@ def _dist(X, Y=None, metric=_euclidian):
                     if (rank // iter) != 0:
                         stat = MPI.Status()
                         Y.comm.handle.Probe(source=sender, tag=iter, status=stat)
-                        count = int(stat.Get_count(MPI.FLOAT) / f)
-                        moving = torch.zeros((count, f), dtype=torch.float32)
+                        count = int(stat.Get_count(mpi_type) / f)
+                        moving = torch.zeros((count, f), dtype=torch_type)
                         Y.comm.Recv(moving, source=sender, tag=iter)
 
                     # Sending to next Process
@@ -281,8 +305,8 @@ def _dist(X, Y=None, metric=_euclidian):
                     if (rank // iter) == 0:
                         stat = MPI.Status()
                         Y.comm.handle.Probe(source=sender, tag=iter, status=stat)
-                        count = int(stat.Get_count(MPI.FLOAT) / f)
-                        moving = torch.zeros((count, f), dtype=torch.float32)
+                        count = int(stat.Get_count(mpi_type) / f)
+                        moving = torch.zeros((count, f), dtype=torch_type)
                         Y.comm.Recv(moving, source=sender, tag=iter)
 
                     d_ij = metric(x_, moving)

From 0d3762b867a8a1b8d55efc714e98ffacefb0bb60 Mon Sep 17 00:00:00 2001
From: Charlotte Debus <charlotte.debus@dlr.de>
Date: Fri, 7 Feb 2020 10:38:44 +0100
Subject: [PATCH 16/49] Add support of float64 and typecasting to distance
 calculation

---
 heat/spatial/distance.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/heat/spatial/distance.py b/heat/spatial/distance.py
index 354b6d738a..61dd4a5de7 100644
--- a/heat/spatial/distance.py
+++ b/heat/spatial/distance.py
@@ -69,9 +69,18 @@ def _dist(X, Y=None, metric=_euclidian):
             torch_type = torch.float64
             mpi_type = MPI.DOUBLE
         else:
-            raise NotImplementedError(
-                "Datatype {} currently not supported as input".format(X.dtype)
-            )
+            promoted_type = core.promote_types(X.dtype, core.float32)
+            X = X.astype(promoted_type)
+            if promoted_type == core.float32:
+                torch_type = torch.float32
+                mpi_type = MPI.FLOAT
+            elif promoted_type == core.float64:
+                torch_type = torch.float64
+                mpi_type = MPI.DOUBLE
+            else:
+                raise NotImplementedError(
+                    "Datatype {} currently not supported as input".format(X.dtype)
+                )
 
         d = core.zeros(
             (X.shape[0], X.shape[0]), dtype=X.dtype, split=X.split, device=X.device, comm=X.comm

From c7c8c2edc5f1f78bbd729d18fc051afbf45d8b8a Mon Sep 17 00:00:00 2001
From: Charlotte Debus <charlotte.debus@dlr.de>
Date: Fri, 7 Feb 2020 10:43:31 +0100
Subject: [PATCH 17/49] Support of int types and typecasting

---
 heat/spatial/distance.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/heat/spatial/distance.py b/heat/spatial/distance.py
index 61dd4a5de7..d377122867 100644
--- a/heat/spatial/distance.py
+++ b/heat/spatial/distance.py
@@ -233,6 +233,9 @@ def _dist(X, Y=None, metric=_euclidian):
             split = X.split
 
         promoted_type = core.promote_types(X.dtype, Y.dtype)
+        promoted_type = core.promote_types(promoted_type, core.float32)
+        print("Promoted type: ", promoted_type)
+
         X = X.astype(promoted_type)
         Y = Y.astype(promoted_type)
         if promoted_type == core.float32:

From cb5bdc6841a5fcebe10274bdb2078734b810d471 Mon Sep 17 00:00:00 2001
From: Charlotte Debus <charlotte.debus@dlr.de>
Date: Fri, 7 Feb 2020 14:34:34 +0100
Subject: [PATCH 18/49] Cosmetics and initial unit tests

---
 heat/spatial/distance.py             | 61 ++++++++++------------------
 heat/spatial/tests/test_distances.py | 21 ++++++----
 2 files changed, 34 insertions(+), 48 deletions(-)

diff --git a/heat/spatial/distance.py b/heat/spatial/distance.py
index d377122867..9b71fa0237 100644
--- a/heat/spatial/distance.py
+++ b/heat/spatial/distance.py
@@ -2,7 +2,8 @@
 import numpy as np
 from mpi4py import MPI
 
-from .. import core
+from ..core import factories
+from ..core import types
 
 __all__ = ["cdist", "rbf"]
 
@@ -54,27 +55,20 @@ def _dist(X, Y=None, metric=_euclidian):
     if len(X.shape) > 2:
         raise NotImplementedError("Only 2D data matrices are currently supported")
 
-    # if X.dtype != core.float32:
-    #    raise NotImplementedError(
-    #        "Datatype needs to be float32, but was {}. This is currently not supported as input".format(
-    #            X.dtype
-    #        )
-    #    )
-
     if Y is None:
-        if X.dtype == core.float32:
+        if X.dtype == types.float32:
             torch_type = torch.float32
             mpi_type = MPI.FLOAT
-        elif X.dtype == core.float64:
+        elif X.dtype == types.float64:
             torch_type = torch.float64
             mpi_type = MPI.DOUBLE
         else:
-            promoted_type = core.promote_types(X.dtype, core.float32)
+            promoted_type = types.promote_types(X.dtype, types.float32)
             X = X.astype(promoted_type)
-            if promoted_type == core.float32:
+            if promoted_type == types.float32:
                 torch_type = torch.float32
                 mpi_type = MPI.FLOAT
-            elif promoted_type == core.float64:
+            elif promoted_type == types.float64:
                 torch_type = torch.float64
                 mpi_type = MPI.DOUBLE
             else:
@@ -82,7 +76,7 @@ def _dist(X, Y=None, metric=_euclidian):
                     "Datatype {} currently not supported as input".format(X.dtype)
                 )
 
-        d = core.zeros(
+        d = factories.zeros(
             (X.shape[0], X.shape[0]), dtype=X.dtype, split=X.split, device=X.device, comm=X.comm
         )
         if X.split is None:
@@ -102,8 +96,7 @@ def _dist(X, Y=None, metric=_euclidian):
 
             # 0th iteration, calculate diagonal
             d_ij = metric(stationary, stationary)
-            # print("Step 0, rank {}, d_ji = {}".format(rank,d_ij))
-            d[rows[0] : rows[1], rows[0] : rows[1]] = d_ij
+            d._DNDarray__array[:, rows[0] : rows[1]] = d_ij
             for iter in range(1, num_iter):
                 # Send rank's part of the matrix to the next process in a circular fashion
                 receiver = (rank + iter) % size
@@ -134,10 +127,8 @@ def _dist(X, Y=None, metric=_euclidian):
                     moving = torch.zeros((count, f), dtype=torch_type)
                     comm.Recv(moving, source=sender, tag=iter)
 
-                # print("Step {}, rank {} received moving = {} from rank {}".format(iter, rank, moving, sender))
                 d_ij = metric(stationary, moving)
-                # print("Step {}, rank {} d_ji = {}".format(iter,rank, d_ij))
-                d[rows[0] : rows[1], columns[0] : columns[1]] = d_ij
+                d._DNDarray__array[:, columns[0] : columns[1]] = d_ij
 
                 # Receive calculated tile
                 scol1 = displ[receiver]
@@ -156,8 +147,7 @@ def _dist(X, Y=None, metric=_euclidian):
                 comm.Send(d_ij, dest=sender, tag=iter)
                 if (rank // iter) == 0:
                     comm.Recv(symmetric, source=receiver, tag=iter)
-                # print("Step {}, rank {} received symmetric = {} from rank {}".format(iter, rank, symmetric, receiver))
-                d[rows[0] : rows[1], scolumns[0] : scolumns[1]] = symmetric.transpose(0, 1)
+                d._DNDarray__array[:, scolumns[0] : scolumns[1]] = symmetric.transpose(0, 1)
 
             if (size + 1) % 2 != 0:  # we need one mor iteration for the first n/2 processes
                 receiver = (rank + num_iter) % size
@@ -179,7 +169,7 @@ def _dist(X, Y=None, metric=_euclidian):
                     columns = (col1, col2)
 
                     d_ij = metric(stationary, moving)
-                    d[rows[0] : rows[1], columns[0] : columns[1]] = d_ij
+                    d._DNDarray__array[:, columns[0] : columns[1]] = d_ij
 
                     # sending result back to sender of moving matrix (for symmetry)
                     comm.Send(d_ij, dest=sender, tag=num_iter)
@@ -199,7 +189,7 @@ def _dist(X, Y=None, metric=_euclidian):
                         (scolumns[1] - scolumns[0], rows[1] - rows[0]), dtype=torch_type
                     )
                     comm.Recv(symmetric, source=receiver, tag=num_iter)
-                    d[rows[0] : rows[1], scolumns[0] : scolumns[1]] = symmetric.transpose(0, 1)
+                    d._DNDarray__array[:, scolumns[0] : scolumns[1]] = symmetric.transpose(0, 1)
 
         else:
             raise NotImplementedError("Splittings other than 0 or None currently not supported.")
@@ -212,13 +202,6 @@ def _dist(X, Y=None, metric=_euclidian):
         if X.comm != Y.comm:
             raise NotImplementedError("Differing communicators not supported")
 
-        # if Y.dtype != core.float32:
-        #    raise NotImplementedError(
-        #        "Datatype needs to be float32, but was {}. This is currently not supported as input".format(
-        #            Y.dtype
-        #        )
-        #    )
-
         if X.split is None:
             if Y.split is None:
                 split = None
@@ -232,16 +215,14 @@ def _dist(X, Y=None, metric=_euclidian):
             # ToDo Für split implementation >1: split = min(X,split, Y.split)
             split = X.split
 
-        promoted_type = core.promote_types(X.dtype, Y.dtype)
-        promoted_type = core.promote_types(promoted_type, core.float32)
-        print("Promoted type: ", promoted_type)
-
+        promoted_type = types.promote_types(X.dtype, Y.dtype)
+        promoted_type = types.promote_types(promoted_type, types.float32)
         X = X.astype(promoted_type)
         Y = Y.astype(promoted_type)
-        if promoted_type == core.float32:
+        if promoted_type == types.float32:
             torch_type = torch.float32
             mpi_type = MPI.FLOAT
-        elif promoted_type == core.float64:
+        elif promoted_type == types.float64:
             torch_type = torch.float64
             mpi_type = MPI.DOUBLE
         else:
@@ -249,7 +230,7 @@ def _dist(X, Y=None, metric=_euclidian):
                 "Datatype {} currently not supported as input".format(X.dtype)
             )
 
-        d = core.factories.zeros(
+        d = factories.zeros(
             (X.shape[0], Y.shape[0]), dtype=promoted_type, split=split, device=X.device, comm=X.comm
         )
 
@@ -283,12 +264,12 @@ def _dist(X, Y=None, metric=_euclidian):
 
                 x_ = X._DNDarray__array
                 stationary = Y._DNDarray__array
-                rows = (xdispl[rank], xdispl[rank + 1] if (rank + 1) != size else m)
+                # rows = (xdispl[rank], xdispl[rank + 1] if (rank + 1) != size else m)
                 cols = (ydispl[rank], ydispl[rank + 1] if (rank + 1) != size else n)
 
                 # 0th iteration, calculate diagonal
                 d_ij = metric(x_, stationary)
-                d[rows[0] : rows[1], cols[0] : cols[1]] = d_ij
+                d._DNDarray__array[:, cols[0] : cols[1]] = d_ij
 
                 for iter in range(1, num_iter):
                     # Send rank's part of the matrix to the next process in a circular fashion
@@ -322,7 +303,7 @@ def _dist(X, Y=None, metric=_euclidian):
                         Y.comm.Recv(moving, source=sender, tag=iter)
 
                     d_ij = metric(x_, moving)
-                    d[rows[0] : rows[1], columns[0] : columns[1]] = d_ij
+                    d._DNDarray__array[:, columns[0] : columns[1]] = d_ij
 
             else:
                 raise NotImplementedError(
diff --git a/heat/spatial/tests/test_distances.py b/heat/spatial/tests/test_distances.py
index 57efcae9a2..0b2bc4b5bc 100644
--- a/heat/spatial/tests/test_distances.py
+++ b/heat/spatial/tests/test_distances.py
@@ -153,11 +153,16 @@ def test_cdist(self):
         with self.assertRaises(NotImplementedError):
             d = ht.spatial.cdist(X, Y, quadratic_expansion=False)
 
-        X.resplit_(axis=0)
-        Y = ht.ones((4, 4), dtype=ht.float64, split=None)
-        with self.assertRaises(NotImplementedError):
-            d = ht.spatial.cdist(X, Y, quadratic_expansion=False)
-
-        X = ht.ones((4, 4), dtype=ht.float64, split=None)
-        with self.assertRaises(NotImplementedError):
-            d = ht.spatial.cdist(X)
+        n = ht.communication.MPI_WORLD.size
+        A = ht.ones((n * 2, 6), dtype=ht.float32, split=None)
+        for i in range(n):
+            A[2 * i, :] = A[2 * i, :] * (2 * i)
+            A[2 * i + 1, :] = A[2 * i + 1, :] * (2 * i + 1)
+        res = torch.cdist(A._DNDarray__array, A._DNDarray__array)
+
+        A.resplit_(axis=0)
+        B = A.astype(ht.int32)
+
+        d = ht.spatial.cdist(A, B, quadratic_expansion=False)
+        result = ht.array(res, dtype=ht.float64, split=0)
+        self.assertTrue(ht.equal(d, result))

From d360ebe7bdc989eeb016aa502964d03d7f4a2115 Mon Sep 17 00:00:00 2001
From: Charlotte Debus <charlotte.debus@dlr.de>
Date: Fri, 7 Feb 2020 14:44:20 +0100
Subject: [PATCH 19/49] Fixed typecasting in ht.array, and adjusted Unit tests

---
 heat/core/factories.py               | 4 ++++
 heat/spatial/tests/test_distances.py | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/heat/core/factories.py b/heat/core/factories.py
index 7e8ee248cb..24600e004b 100644
--- a/heat/core/factories.py
+++ b/heat/core/factories.py
@@ -303,6 +303,10 @@ def array(
     # infer dtype from obj if not explicitly given
     if dtype is None:
         dtype = types.canonical_heat_type(obj.dtype)
+    else:
+        torch_dtype = dtype.torch_type()
+        if obj.dtype != torch_dtype:
+            obj = obj.type(torch_dtype)
 
     # sanitize minimum number of dimensions
     if not isinstance(ndmin, int):
diff --git a/heat/spatial/tests/test_distances.py b/heat/spatial/tests/test_distances.py
index 0b2bc4b5bc..2800049ccb 100644
--- a/heat/spatial/tests/test_distances.py
+++ b/heat/spatial/tests/test_distances.py
@@ -165,4 +165,4 @@ def test_cdist(self):
 
         d = ht.spatial.cdist(A, B, quadratic_expansion=False)
         result = ht.array(res, dtype=ht.float64, split=0)
-        self.assertTrue(ht.equal(d, result))
+        self.assertTrue(ht.allclose(d, result, atol=1e-8))

From c9530d52a5444fa3b3404038f246e2ce12e119ba Mon Sep 17 00:00:00 2001
From: Charlotte Debus <charlotte.debus@dlr.de>
Date: Fri, 7 Feb 2020 15:02:05 +0100
Subject: [PATCH 20/49] Added testing for different types in cdist

---
 heat/spatial/tests/test_distances.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/heat/spatial/tests/test_distances.py b/heat/spatial/tests/test_distances.py
index 2800049ccb..fe84df392b 100644
--- a/heat/spatial/tests/test_distances.py
+++ b/heat/spatial/tests/test_distances.py
@@ -166,3 +166,27 @@ def test_cdist(self):
         d = ht.spatial.cdist(A, B, quadratic_expansion=False)
         result = ht.array(res, dtype=ht.float64, split=0)
         self.assertTrue(ht.allclose(d, result, atol=1e-8))
+
+        n = ht.communication.MPI_WORLD.size
+        A = ht.ones((n * 2, 6), dtype=ht.float32, split=None)
+        for i in range(n):
+            A[2 * i, :] = A[2 * i, :] * (2 * i)
+            A[2 * i + 1, :] = A[2 * i + 1, :] * (2 * i + 1)
+        res = torch.cdist(A._DNDarray__array, A._DNDarray__array)
+
+        A.resplit_(axis=0)
+        B = A.astype(ht.int32)
+
+        d = ht.spatial.cdist(A, B, quadratic_expansion=False)
+        result = ht.array(res, dtype=ht.float64, split=0)
+        self.assertTrue(ht.allclose(d, result, atol=1e-8))
+
+        B = A.astype(ht.float64)
+        d = ht.spatial.cdist(A, B, quadratic_expansion=False)
+        result = ht.array(res, dtype=ht.float64, split=0)
+        self.assertTrue(ht.allclose(d, result, atol=1e-8))
+
+        B = A.astype(ht.int16)
+        d = ht.spatial.cdist(A, B, quadratic_expansion=False)
+        result = ht.array(res, dtype=ht.float32, split=0)
+        self.assertTrue(ht.allclose(d, result, atol=1e-8))

From 9286d44f3e54dc9489e84e78734c0b282d1c2ec6 Mon Sep 17 00:00:00 2001
From: Charlotte Debus <charlotte.debus@dlr.de>
Date: Fri, 7 Feb 2020 15:03:51 +0100
Subject: [PATCH 21/49] Removed old spatial module

---
 heat/core/spatial/__init__.py |   1 -
 heat/core/spatial/distance.py | 237 ----------------------------------
 2 files changed, 238 deletions(-)
 delete mode 100644 heat/core/spatial/__init__.py
 delete mode 100644 heat/core/spatial/distance.py

diff --git a/heat/core/spatial/__init__.py b/heat/core/spatial/__init__.py
deleted file mode 100644
index c3dbe9d6cd..0000000000
--- a/heat/core/spatial/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .distance import *
diff --git a/heat/core/spatial/distance.py b/heat/core/spatial/distance.py
deleted file mode 100644
index 943df36e89..0000000000
--- a/heat/core/spatial/distance.py
+++ /dev/null
@@ -1,237 +0,0 @@
-import torch
-
-from .. import communication
-from .. import factories
-from .. import types
-
-
-def _euclidian(x, y):
-    # X and Y are torch tensors
-    # k1, f1 = x.shape
-    # k2, f2 = y.shape
-    # if f1 != f2:
-    #    raise RuntimeError(
-    #        "X and Y have differing feature dimensions (dim = 1), should be equal, but are {} and {}".format(
-    #            f1, f2
-    #        )
-    #    )
-    # xd = x.unsqueeze(dim=1)
-    # yd = y.unsqueeze(dim=0)
-    # result = torch.zeros((k1, k2), dtype=torch.float64)
-
-    # for i in range(xd.shape[0]):
-    #    result[i, :] = ((yd - xd[i, :, :]) ** 2).sum(dim=-1).sqrt()
-    result = torch.cdist(x, y)
-    return result
-
-
-def _gaussian(x, y, sigma=1.0):
-    # X and Y are torch tensors
-    # k1, f1 = x.shape
-    # k2, f2 = y.shape
-    # if f1 != f2:
-    #    raise RuntimeError(
-    #        "X and Y have differing feature dimensions (dim = 1), should be equal, but are {} and {}".format(
-    #            f1, f2
-    #        )
-    #    )
-    # xd = x.unsqueeze(dim=1)
-    # yd = y.unsqueeze(dim=0)
-    # result = torch.zeros((k1, k2), dtype=torch.float64)
-    # for i in range(xd.shape[0]):
-    #   result[i, :] = torch.exp(
-    #      -((yd - yd[i, :, :]) ** 2).sum(dim=-1) / (2 * sigma * sigma)
-    # )
-    d = torch.cdist(x, y)
-    result = torch.exp(-d ** 2 / (2 * sigma * sigma))
-    return result
-
-
-def similarity(X, metric=_euclidian):
-    if (X.split is not None) and (X.split != 0):
-        raise NotImplementedError("Feature Splitting is not supported")
-    if len(X.shape) > 2:
-        raise NotImplementedError("Only 2D data matrices are supported")
-
-    comm = X.comm
-    rank = comm.Get_rank()
-    size = comm.Get_size()
-
-    K, f = X.shape
-
-    S = factories.zeros((K, K), dtype=types.float64, split=0)
-
-    counts, displ, _ = comm.counts_displs_shape(X.shape, X.split)
-    num_iter = (size + 1) // 2
-
-    stationary = X._DNDarray__array
-    rows = (displ[rank], displ[rank + 1] if (rank + 1) != size else K)
-
-    # 0th iteration, calculate diagonal
-    d_ij = metric(stationary, stationary)
-    S[rows[0] : rows[1], rows[0] : rows[1]] = d_ij
-
-    for iter in range(1, num_iter):
-        # Send rank's part of the matrix to the next process in a circular fashion
-        receiver = (rank + iter) % size
-        sender = (rank - iter) % size
-
-        col1 = displ[sender]
-        if sender != size - 1:
-            col2 = displ[sender + 1]
-        else:
-            col2 = K
-        columns = (col1, col2)
-
-        # All but the first iter processes are receiving, then sending
-        if (rank // iter) != 0:
-            stat = communication.Status()
-            comm.handle.Probe(source=sender, tag=iter, status=stat)
-            count = int(stat.Get_count(communication.FLOAT) / f)
-            moving = torch.zeros((count, f), dtype=torch.float32)
-            comm.Recv(moving, source=sender, tag=iter)
-
-        # Sending to next Process
-        comm.Send(stationary, dest=receiver, tag=iter)
-
-        # The first iter processes can now receive after sending
-        if (rank // iter) == 0:
-            stat = communication.Status()
-            comm.handle.Probe(source=sender, tag=iter, status=stat)
-            count = int(stat.Get_count(communication.FLOAT) / f)
-            moving = torch.zeros((count, f), dtype=torch.float32)
-            comm.Recv(moving, source=sender, tag=iter)
-
-        d_ij = metric(stationary, moving)
-        S[rows[0] : rows[1], columns[0] : columns[1]] = d_ij
-
-        # Receive calculated tile
-        scol1 = displ[receiver]
-        if receiver != size - 1:
-            scol2 = displ[receiver + 1]
-        else:
-            scol2 = K
-        scolumns = (scol1, scol2)
-        symmetric = torch.zeros(scolumns[1] - scolumns[0], (rows[1] - rows[0]), dtype=torch.float64)
-        # Receive calculated tile
-        if (rank // iter) != 0:
-            comm.Recv(symmetric, source=receiver, tag=iter)
-
-        # sending result back to sender of moving matrix (for symmetry)
-        comm.Send(d_ij, dest=sender, tag=iter)
-        if (rank // iter) == 0:
-            comm.Recv(symmetric, source=receiver, tag=iter)
-        S[rows[0] : rows[1], scolumns[0] : scolumns[1]] = symmetric.transpose(0, 1)
-
-    if (size + 1) % 2 != 0:  # we need one mor iteration for the first n/2 processes
-        receiver = (rank + num_iter) % size
-        sender = (rank - num_iter) % size
-
-        # Case 1: only receiving
-        if rank < (size // 2):
-            stat = communication.Status()
-            comm.handle.Probe(source=sender, tag=num_iter, status=stat)
-            count = int(stat.Get_count(communication.FLOAT) / f)
-            moving = torch.zeros((count, f), dtype=torch.float32)
-            comm.Recv(moving, source=sender, tag=num_iter)
-
-            col1 = displ[sender]
-            if sender != size - 1:
-                col2 = displ[sender + 1]
-            else:
-                col2 = K
-            columns = (col1, col2)
-
-            d_ij = metric(stationary, moving)
-            S[rows[0] : rows[1], columns[0] : columns[1]] = d_ij
-            # sending result back to sender of moving matrix (for symmetry)
-            comm.Send(d_ij, dest=sender, tag=num_iter)
-
-        # Case 2 : only sending processes
-        else:
-            comm.Send(stationary, dest=receiver, tag=num_iter)
-
-            # Receiving back result
-            scol1 = displ[receiver]
-            if receiver != size - 1:
-                scol2 = displ[receiver + 1]
-            else:
-                scol2 = K
-            scolumns = (scol1, scol2)
-            symmetric = torch.zeros(
-                (scolumns[1] - scolumns[0], rows[1] - rows[0]), dtype=torch.float64
-            )
-            comm.Recv(symmetric, source=receiver, tag=num_iter)
-            S[rows[0] : rows[1], scolumns[0] : scolumns[1]] = symmetric.transpose(0, 1)
-
-    return S
-
-
-def pairwise(X, Y, metric=_euclidian):
-    if ((X.split is not None) and (X.split != 0)) or ((X.split is not None) and (X.split != 0)):
-        raise NotImplementedError("Feature Splitting is not supported")
-    if len(X.shape) > 2 or len(Y.shape) > 2:
-        raise NotImplementedError("Only 2D data matrices are supported")
-    if X.comm != Y.comm:
-        raise NotImplementedError("Differing communicators not supported")
-
-    if X.shape[1] != Y.shape[1]:
-        raise ValueError("inputs must have same number of features")
-
-    comm = X.comm
-    rank = comm.Get_rank()
-    size = comm.Get_size()
-
-    m, f = X.shape
-    n = Y.shape[0]
-
-    S = factories.zeros((m, n), dtype=types.float64, split=0)
-
-    xcounts, xdispl, _ = X.comm.counts_displs_shape(X.shape, X.split)
-    ycounts, ydispl, _ = Y.comm.counts_displs_shape(Y.shape, Y.split)
-    num_iter = size
-
-    x_ = X._DNDarray__array
-    stationary = Y._DNDarray__array
-    rows = (xdispl[rank], xdispl[rank + 1] if (rank + 1) != size else m)
-    cols = (ydispl[rank], ydispl[rank + 1] if (rank + 1) != size else n)
-
-    # 0th iteration, calculate diagonal
-    d_ij = metric(x_, stationary)
-    S[rows[0] : rows[1], cols[0] : cols[1]] = d_ij
-
-    for iter in range(1, num_iter):
-        # Send rank's part of the matrix to the next process in a circular fashion
-        receiver = (rank + iter) % size
-        sender = (rank - iter) % size
-
-        col1 = ydispl[sender]
-        if sender != size - 1:
-            col2 = ydispl[sender + 1]
-        else:
-            col2 = n
-        columns = (col1, col2)
-
-        # All but the first iter processes are receiving, then sending
-        if (rank // iter) != 0:
-            stat = communication.Status()
-            Y.comm.handle.Probe(source=sender, tag=iter, status=stat)
-            count = int(stat.Get_count(communication.FLOAT) / f)
-            moving = torch.zeros((count, f), dtype=torch.float32)
-            Y.comm.Recv(moving, source=sender, tag=iter)
-
-        # Sending to next Process
-        Y.comm.Send(stationary, dest=receiver, tag=iter)
-
-        # The first iter processes can now receive after sending
-        if (rank // iter) == 0:
-            stat = communication.Status()
-            Y.comm.handle.Probe(source=sender, tag=iter, status=stat)
-            count = int(stat.Get_count(communication.FLOAT) / f)
-            moving = torch.zeros((count, f), dtype=torch.float32)
-            Y.comm.Recv(moving, source=sender, tag=iter)
-
-        d_ij = metric(stationary, moving)
-        S[rows[0] : rows[1], columns[0] : columns[1]] = d_ij
-
-    return S

From a3312c3e677aac4baaadd4cf3a4edc1da11e957e Mon Sep 17 00:00:00 2001
From: Charlotte Debus <charlotte.debus@dlr.de>
Date: Fri, 7 Feb 2020 16:22:09 +0100
Subject: [PATCH 22/49] Cleanup

---
 heat/ml/cluster/kmeans.py.orig | 385 ---------------------------------
 1 file changed, 385 deletions(-)
 delete mode 100644 heat/ml/cluster/kmeans.py.orig

diff --git a/heat/ml/cluster/kmeans.py.orig b/heat/ml/cluster/kmeans.py.orig
deleted file mode 100644
index e6157e38de..0000000000
--- a/heat/ml/cluster/kmeans.py.orig
+++ /dev/null
@@ -1,385 +0,0 @@
-import sys
-
-import heat as ht
-
-
-class KMeans:
-    def __init__(self, n_clusters=8, init="random", max_iter=300, tol=1e-4, random_state=None):
-        """
-        K-Means clustering algorithm. An implementation of Lloyd's algorithm [1].
-
-        Parameters
-        ----------
-        n_clusters : int, optional, default: 8
-            The number of clusters to form as well as the number of centroids to generate.
-        init : {‘random’ or an ndarray}
-            Method for initialization, defaults to ‘random’:
-            ‘k-means++’ : selects initial cluster centers for the clustering in a smart way to speed up convergence [2].
-            ‘random’: choose k observations (rows) at random from data for the initial centroids.
-            If an ht.DNDarray is passed, it should be of shape (n_clusters, n_features) and gives the initial centers.
-        max_iter : int, default: 300
-            Maximum number of iterations of the k-means algorithm for a single run.
-        tol : float, default: 1e-4
-            Relative tolerance with regards to inertia to declare convergence.
-        random_state : int
-            Determines random number generation for centroid initialization.
-
-        Notes
-        -----
-        The average complexity is given by O(k*n*T), were n is the number of samples and T is the number of iterations.
-
-        In practice, the k-means algorithm is very fast, but it may fall into local minima. That is why it can be useful
-        to restart it several times. If the algorithm stops before fully converging (because of tol or max_iter),
-        labels_ and cluster_centers_ will not be consistent, i.e. the cluster_centers_ will not be the means of the
-        points in each cluster. Also, the estimator will reassign labels_ after the last iteration to make labels_
-        consistent with predict on the training set.
-
-        References
-        ----------
-        [1] Lloyd, Stuart P., "Least squares quantization in PCM", IEEE Transactions on Information Theory, 28 (2), pp.
-            129–137, 1982.
-        [2] Arthur, D., Vassilvitskii, S., "k-means++: The Advantages of Careful Seeding", Proceedings of the Eighteenth
-            Annual ACM-SIAM Symposium on Discrete Algorithms, Society for Industrial and Applied Mathematics
-            Philadelphia, PA, USA. pp. 1027–1035, 2007.
-        """
-        self.init = init
-        self.max_iter = max_iter
-        self.n_clusters = n_clusters
-        self.random_state = random_state
-        self.tol = tol
-
-        # in-place properties
-        self._cluster_centers = None
-        self._labels = None
-        self._inertia = None
-        self._n_iter = None
-
-    @property
-    def cluster_centers_(self):
-        """
-        Returns
-        -------
-        ht.DNDarray, shape=(n_clusters, n_features):
-            Coordinates of cluster centers. If the algorithm stops before fully converging (see tol and max_iter),
-            these will not be consistent with labels_.
-        """
-        return self._cluster_centers.squeeze(axis=0).T
-
-    @property
-    def labels_(self):
-        """
-        Returns
-        -------
-        ht.DNDarray, shape=(n_points):
-            Labels of each point.
-        """
-        return self._labels
-
-    @property
-    def inertia_(self):
-        """
-        Returns
-        -------
-        float:
-            Sum of squared distances of samples to their closest cluster center.
-        """
-        return self._inertia
-
-    @property
-    def n_iter_(self):
-        """
-        Returns
-        -------
-        int:
-            Number of iterations run.
-        """
-        return self._n_iter
-
-    def _initialize_cluster_centers(self, X):
-        """
-        Initializes the K-Means centroids.
-
-        Parameters
-        ----------
-        X : ht.DNDarray, shape=(n_point, n_features)
-            The data to initialize the clusters for.
-        """
-        # always initialize the random state
-        if self.random_state is not None:
-            ht.random.seed(self.random_state)
-
-        # initialize the centroids by randomly picking some of the points
-        if self.init == "random":
-            # Samples will be equally distributed drawn from all involved processes
-            _, displ, _ = X.comm.counts_displs_shape(shape=X.shape, axis=0)
-            centroids = ht.empty(
-                (X.shape[1], self.n_clusters), split=None, device=X.device, comm=X.comm
-            )
-
-            if (X.split is None) or (X.split == 0):
-                for i in range(self.n_clusters):
-                    samplerange = (
-                        X.gshape[0] // self.n_clusters * i,
-                        X.gshape[0] // self.n_clusters * (i + 1),
-                    )
-                    sample = ht.random.randint(samplerange[0], samplerange[1]).item()
-                    proc = 0
-                    for p in range(X.comm.size):
-                        if displ[p] > sample:
-                            break
-                        proc = p
-                    xi = ht.zeros(X.shape[1], dtype=X.dtype)
-                    if X.comm.rank == proc:
-                        idx = sample - displ[proc]
-                        xi = ht.array(X.lloc[idx, :], device=X.device, comm=X.comm)
-                    xi.comm.Bcast(xi, root=proc)
-                    centroids[:, i] = xi
-
-            else:
-                raise NotImplementedError("Not implemented for other splitting-axes")
-
-<<<<<<< HEAD
-            #self._cluster_centers = centroids.expand_dims(axis=0)
-            self._cluster_centers = centroids
-=======
-            self._cluster_centers = centroids.expand_dims(axis=0)
->>>>>>> fc71e7317d79438e8650775c2da59333328676b1
-
-        # directly passed centroids
-        elif isinstance(self.init, ht.DNDarray):
-            if len(self.init.shape) != 2:
-                raise ValueError(
-                    "passed centroids need to be two-dimensional, but are {}".format(len(self.init))
-                )
-            if self.init.shape[0] != self.n_clusters or self.init.shape[1] != X.shape[1]:
-                raise ValueError("passed centroids do not match cluster count or data shape")
-            self._cluster_centers = self.init.resplit(None).T.expand_dims(axis=0)
-
-        # kmeans++, smart centroid guessing
-        elif self.init == "kmeans++":
-            if (X.split is None) or (X.split == 0):
-                # X = X.expand_dims(axis=2)
-                centroids = ht.empty(
-                    (1, X.shape[1], self.n_clusters), split=None, device=X.device, comm=X.comm
-                )
-                sample = ht.random.randint(0, X.shape[0] - 1).item()
-                _, displ, _ = X.comm.counts_displs_shape(shape=X.shape, axis=0)
-                proc = 0
-                for p in range(X.comm.size):
-                    if displ[p] > sample:
-                        break
-                    proc = p
-                x0 = ht.zeros(X.shape[1], dtype=X.dtype, device=X.device, comm=X.comm)
-                if X.comm.rank == proc:
-                    idx = sample - displ[proc]
-                    x0 = ht.array(X.lloc[idx, :], device=X.device, comm=X.comm)
-                x0.comm.Bcast(x0, root=proc)
-                centroids[:, :, 0] = x0
-
-                for i in range(1, self.n_clusters):
-                    distances = ht.zeros(
-                        (X.shape[0], i), dtype=X.dtype, split=X.split, device=X.device, comm=X.comm
-                    )
-                    for k in range(i):
-                        distances[:, k] = ((X - centroids[:, :, k]) ** 2).sum(axis=1, keepdim=False)
-                    D2 = distances.min(axis=1)
-                    D2.resplit_(axis=None)
-                    D2 = D2.squeeze()
-                    prob = D2 / D2.sum()
-                    x = ht.random.rand().item()
-                    sample = 0
-                    sum = 0
-                    for j in range(len(prob)):
-                        if sum > x:
-                            break
-                        sum += prob[j].item()
-                        sample = j
-                    proc = 0
-                    for p in range(X.comm.size):
-                        if displ[p] > sample:
-                            break
-                        proc = p
-                    xi = ht.zeros(X.shape[1], dtype=X.dtype)
-                    if X.comm.rank == proc:
-                        idx = sample - displ[proc]
-                        xi = ht.array(X.lloc[idx, :], device=X.device, comm=X.comm)
-                    xi.comm.Bcast(xi, root=proc)
-                    centroids[:, :, i] = xi
-            else:
-                raise NotImplementedError("Not implemented for other splitting-axes")
-
-            self._cluster_centers = centroids
-
-        else:
-            raise ValueError(
-                'init needs to be one of "random", ht.DNDarray or "kmeans++", but was {}'.format(
-                    self.init
-                )
-            )
-
-    def _fit_to_cluster(self, X):
-        """
-        Assigns the passed data points to
-
-        Parameters
-        ----------
-        X : ht.DNDarray, shape=(n_samples, n_features)
-            Training instances to cluster.
-        """
-        # calculate the distance matrix and determine the closest centroid
-        distances = ht.zeros(
-            (X.shape[0], self.n_clusters),
-            dtype=X.dtype,
-            split=X.split,
-            device=X.device,
-            comm=X.comm,
-        )
-        for k in range(self._cluster_centers.shape[2]):
-            distances[:, k] = ((X - self._cluster_centers[:, :, k]) ** 2).sum(axis=1, keepdim=False)
-        matching_centroids = distances.argmin(axis=1, keepdim=True)
-
-        return matching_centroids
-
-    def fit(self, X):
-        """
-        Computes the centroid of a k-means clustering.
-
-        Parameters
-        ----------
-        X : ht.DNDarray, shape=(n_samples, n_features)
-            Training instances to cluster.
-        """
-        # input sanitation
-        if not isinstance(X, ht.DNDarray):
-            raise ValueError("input needs to be a ht.DNDarray, but was {}".format(type(X)))
-
-        # initialize the clustering
-        self._initialize_cluster_centers(X)
-        self._n_iter = 0
-        matching_centroids = ht.zeros((X.shape[0]), split=X.split, device=X.device, comm=X.comm)
-
-        # X = X.expand_dims(axis=2)
-        new_cluster_centers = self._cluster_centers.copy()
-
-        # iteratively fit the points to the centroids
-        for epoch in range(self.max_iter):
-            # increment the iteration count
-            self._n_iter += 1
-            # determine the centroids
-            matching_centroids = self._fit_to_cluster(X)
-
-            # update the centroids
-            for i in range(self.n_clusters):
-                # points in current cluster
-                selection = (matching_centroids == i).astype(ht.int64)
-
-                # accumulate points and total number of points in cluster
-                assigned_points = (X * selection).sum(axis=0, keepdim=True)
-                points_in_cluster = selection.sum(axis=0, keepdim=True).clip(
-                    1.0, ht.iinfo(ht.int64).max
-                )
-
-                # compute the new centroids
-                new_cluster_centers[:, :, i] = assigned_points / points_in_cluster
-
-            # check whether centroid movement has converged
-            self._inertia = ((self._cluster_centers - new_cluster_centers) ** 2).sum()
-            self._cluster_centers = new_cluster_centers.copy()
-            if self.tol is not None and self._inertia <= self.tol:
-                break
-
-        self._labels = matching_centroids.squeeze()
-
-        return self
-
-    def fit_predict(self, X):
-        """
-        Compute cluster centers and predict cluster index for each sample.
-
-        Convenience method; equivalent to calling fit(X) followed by predict(X).
-
-        Parameters
-        ----------
-        X : ht.DNDarray, shape = [n_samples, n_features]
-            Input data to be clustered.
-
-        Returns
-        -------
-        labels : ht.DNDarray, shape [n_samples,]
-            Index of the cluster each sample belongs to.
-        """
-        self.fit(X)
-        return self.predict(X)
-
-    def get_params(self, deep=True):
-        """
-        Get parameters for this estimator.
-
-        Parameters
-        ----------
-        deep : boolean, optional
-            If True, will return the parameters for this estimator and contained sub-objects that are estimators.
-            Defaults to true.
-
-        Returns
-        -------
-        params : dict of string to any
-            Parameter names mapped to their values.
-        """
-        # unused
-        _ = deep
-
-        return {
-            "init": self.init,
-            "max_iter": self.max_iter,
-            "n_clusters": self.n_clusters,
-            "random_state": self.random_state,
-            "tol": self.tol,
-        }
-
-    def predict(self, X):
-        """
-        Predict the closest cluster each sample in X belongs to.
-
-        In the vector quantization literature, cluster_centers_ is called the code book and each value returned by
-        predict is the index of the closest code in the code book.
-
-        Parameters
-        ----------
-        X : ht.DNDarray, shape = [n_samples, n_features]
-            New data to predict.
-
-        Returns
-        -------
-        labels : ht.DNDarray, shape = [n_samples,]
-            Index of the cluster each sample belongs to.
-        """
-        # input sanitation
-        if not isinstance(X, ht.DNDarray):
-            raise ValueError("input needs to be a ht.DNDarray, but was {}".format(type(X)))
-
-        # determine the centroids
-        # return self._fit_to_cluster(X.expand_dims(axis=2)).squeeze()
-        return self._fit_to_cluster(X).squeeze()
-
-    def set_params(self, **params):
-        """
-        Set the parameters of this estimator.
-
-        Parameters
-        ----------
-        params : dict
-            The parameters of the estimator to be modified.
-
-        Returns
-        -------
-        self : ht.ml.KMeans
-            This estimator instance for chaining.
-        """
-        self.init = params.get(params["init"], self.init)
-        self.max_iter = params.get(params["max_iter"], self.max_iter)
-        self.n_clusters = params.get(params["n_clusters"], self.n_clusters)
-        self.random_state = params.get(params["random_state"], self.random_state)
-        self.tol = params.get(params["tol"], self.tol)
-
-        return self

From 338401b3bfc149cdae97e01e8b1c8ef0fe1d3cda Mon Sep 17 00:00:00 2001
From: Charlotte Debus <charlotte.debus@dlr.de>
Date: Fri, 14 Feb 2020 11:06:50 +0100
Subject: [PATCH 23/49] Finalize spectral clustering API

---
 heat/cluster/spectral.py | 217 +++++++++++++++++++++++++++++++++++----
 1 file changed, 197 insertions(+), 20 deletions(-)

diff --git a/heat/cluster/spectral.py b/heat/cluster/spectral.py
index 648e2f1dc0..7c5d09f2f8 100644
--- a/heat/cluster/spectral.py
+++ b/heat/cluster/spectral.py
@@ -5,6 +5,31 @@
 
 
 def laplacian(S, norm=True, mode="fc", upper=None, lower=None):
+    """
+    Calculate the graph Laplacian from a similarity matrix
+
+    Parameters
+    ----------
+    S : ht.DNDarray
+        quadrdatic, positive semidefinite similarity matrix, encoding similarity metrices s_ij between data samples i and j
+    norm : bool
+        Whether to calculate the normalized graph Laplacian
+        The unnormalized graph Laplacian is defined as L = D - A, where where D is the diagonal degree matrix and A the adjacency matrix
+        The normalized graph Laplacian is defined as L_norm = D^(-1/2) L D^(-1/2) = I - D^(-1/2) A D^(-1/2)
+    mode : "fc", "eNeighbour"
+        How to calculate adjacency from the similarity matrix
+        "fc" is fully-connected, so A = S
+        "eNeighbour" is the epsilon neighbourhood, with A_ji = 1 if S_ij >/< lower/upper else 0; for eNeighbour an upper or lower boundary needs to be set
+    upper : float
+        upper boundary for adjacency calculation, using A_ji = 1 if S_ij < upper else 0
+    lower : float
+        upper boundary for adjacency calculation, using A_ji = 1 if S_ij > lower else 0
+    Returns
+    -------
+    L : ht.DNDarray
+
+    """
+
     if mode == "eNeighbour":
         if (upper is not None) and (lower is not None):
             raise ValueError(
@@ -89,6 +114,12 @@ def __init__(
         self.normalize = normalize
         self.laplacian = laplacian
         self.epsilon = (threshold, boundary)
+        if assign_labels == "kmeans":
+            self._cluster = ht.cluster.Kmeans(init="kmeans++")
+        else:
+            raise NotImplementedError(
+                "Other Label Assignment Algorithms are currently not available"
+            )
 
         # in-place properties
         self._labels = None
@@ -96,42 +127,109 @@ def __init__(
         self._eigenvectors = None
         self._eigenvalues = None
 
-    def fit(self, X):
-        # 1. Calculation of Adjacency Matrix
+    @property
+    def eigenvectors_(self):
+        """
+        Returns
+        -------
+        ht.DNDarray, shape = (n_samples, n_lanzcos):
+            Eigenvectors of the graph Laplacian
+        """
+        return self._eigenvectors
+
+    @property
+    def eigenvalues_(self):
+        """
+        Returns
+        -------
+        ht.DNDarray, shape = (n_lanzcos):
+            Eigenvalues of the graph Laplacian
+        """
+        return self._eigenvalues
+
+    @property
+    def similarity_(self):
+        """
+        Returns
+        -------
+        ht.DNDarray:
+            The similarity matrix of the fitted dataset
+        """
+        return self._similarity
+
+    def _calc_adjacency(self, X):
+        """
+        Internal utility function for calculating of the similarity matrix according to the configuration set in __init__
+
+        Parameters
+        ----------
+            X :  ht.DNDarray, shape=(n_samples, n_features)
+
+        Returns
+        -------
+            S : ht.DNDarray, shape = (n_samples, n_samples)
+            The similarity matrix
+
+        """
         if self.metric == "rbf":
             if self.gamma is None:
                 raise ValueError(
                     "For usage of RBF kernel please specify the scaling factor sigma in class instantiation"
                 )
             sig = math.sqrt(1 / (2 * self.gamma))
-            self._similarity = ht.spatial.rbf(X, sigma=sig)
+            s = ht.spatial.rbf(X, sigma=sig)
 
         elif self.metric == "euclidean":
-            self._similarity = ht.spatial.cdist(X)
+            s = ht.spatial.cdist(X)
         else:
             raise NotImplementedError("Other kernels currently not implemented")
+        return s
+
+    def _calc_laplacian(self, S):
+        """
+        Internal utility function for calculating of the graph Laplacian according to the configuration set in __init__
+
+        Parameters
+        ----------
+        S : ht.DNDarray
+            The similarity matrix
+
+        Returns
+        -------
+        ht.DNDarray
+
+        """
 
-        # 2. Calculation of Laplacian
         if self.laplacian == "eNeighborhood":
             if self.epsilon[1] == "upper":
-                L = laplacian(
-                    self._similarity, norm=self.normalize, mode="eNeighbour", upper=self.epsilon[0]
-                )
+                return laplacian(S, norm=self.normalize, mode="eNeighbour", upper=self.epsilon[0])
             elif self.epsilon[1] == "lower":
-                L = laplacian(
-                    self._similarity, norm=self.normalize, mode="eNeighbour", lower=self.epsilon[0]
-                )
+                return laplacian(S, norm=self.normalize, mode="eNeighbour", lower=self.epsilon[0])
             else:
                 raise ValueError(
                     "Boundary needs to be 'upper' or 'lower' and threshold needs to be set, if laplacian = eNeighborhood"
                 )
 
         elif self.laplacian == "fully_connected":
-            L = laplacian(self._similarity, norm=self.normalize, mode="fc")
+            return laplacian(self._similarity, norm=self.normalize, mode="fc")
         else:
             raise NotImplementedError("Other approaches currently not implemented")
 
-        # 3. Eigenvalue and -vector Calculation
+    def _calculate_eigendecomposition(self, L):
+        """
+        Internal utility function for calculating eigenvalues and eigenvectors of the graph Laplacian using the Lanczos algorithm
+
+        Parameters
+        ----------
+        L : ht.DNDarray
+            The graph Laplacian matrix
+
+        Returns
+        -------
+        eigenvalues : ht.DNDarray, shape = (n_lanczos,)
+        eigenvectors: ht.DNDarray, shape = (n_samples, n_lanczos)
+
+        """
         vr = ht.random.rand(L.shape[0], split=L.split, dtype=ht.float32)
         v0 = vr / ht.norm(vr)
         Vg_norm, Tg_norm = ht.lanczos(L, self.n_lanczos, v0)
@@ -139,17 +237,96 @@ def fit(self, X):
         # 4. Calculate and Sort Eigenvalues and Eigenvectors of tridiagonal matrix T
         eval, evec = torch.eig(Tg_norm._DNDarray__array, eigenvectors=True)
         # If x is an Eigenvector of T, then y = V@x is the corresponding Eigenvector of L
-        self._eigenvectors = ht.matmul(Vg_norm, ht.array(evec))
-        self._eigenvalues, idx = torch.sort(eval[:, 0], dim=0)
-        self._eigenvalues = self._eigenvectors[:, idx]
+        eigenvalues, idx = torch.sort(eval[:, 0], dim=0)
+        eigenvalues = ht.array(eigenvalues)
+        eigenvectors = ht.matmul(Vg_norm, ht.array(evec))[:, idx]
+
+        return eigenvalues, eigenvectors
+
+    def fit(self, X):
+        """
+        Computes the low-dim representation by calculation of eigenspectrum (eigenvalues and eigenvectors) of the graph laplacian from the similarity matrix and fits the eigenvectors that correspond to the k lowest eigenvalues with a seperate clustering algorithm (currently only kemans is supported)
+        Similarity metrics for adjacency calculations are supported via spatial.distance. The eigenvalues and eigenvectors are computed by reducing the Laplacian via lanczos iterations and using the torch eigenvalue solver on this smaller matrix. If other eigenvalue decompostion methods are supported, this will be expanded.
+
+
+        Parameters
+        ----------
+        X : ht.DNDarray, shape=(n_samples, n_features)
+            Training instances to cluster.
+        """
+        # input sanitation
+        if not isinstance(X, ht.DNDarray):
+            raise ValueError("input needs to be a ht.DNDarray, but was {}".format(type(X)))
+
+        # 1. Calculation of Adjacency Matrix
+        self._similarity = self._calc_adjacency(X)
+
+        # 2. Calculation of Laplacian
+        L = self._calc_laplacian()
 
+        # 3. Eigenvalue and -vector calculation via Lanczos Algorithm
+        self._eigenvalues, self._eigenvectors = self._calculate_eigendecomposition(L)
+
+        # 5. Find the spectral gap, if number of clusters is not defined from the outside
         if self.n_clusters is None:
             temp = np.diff(self._eigenvalues.numpy())
             self.n_clusters = np.where(temp == temp.max())[0][0] + 1
-
         components = self._eigenvectors[:, : self.n_clusters].copy()
 
-        kmeans = ht.cluster.KMeans(n_clusters=self.n_clusters, init="kmeans++")
-        kmeans.fit(components)
+        self._cluster.set_params({"n_clusters": self.n_clusters})
+        # kmeans = ht.cluster.KMeans(n_clusters=self.n_clusters, init="kmeans++")
+        self._cluster.fit(components)
+        self._labels = self._cluster.labels_
+
+        return self
+
+    def predict(self, X):
+        """
+        Predict the closest cluster each sample in X belongs to.
 
-        return
+        X is transformed to the low-dim representation by calculation of eigenspectrum (eigenvalues and eigenvectors) of the graph laplacian from the similarity matrix.
+        Inference of lables is done by extraction of the closest centroid of the n_clusters eigenvectors from the previously fitted clustering algorithm (kmeans)
+        Caution: Calculation of the low-dim representation requires some time!
+
+        Parameters
+        ----------
+        X : ht.DNDarray, shape = [n_samples, n_features]
+            New data to predict.
+
+        Returns
+        -------
+        labels : ht.DNDarray, shape = [n_samples,]
+            Index of the cluster each sample belongs to.
+        """
+        if not isinstance(X, ht.DNDarray):
+            raise ValueError("input needs to be a ht.DNDarray, but was {}".format(type(X)))
+
+        # 1. Calculation of Adjacency Matrix
+        S = self._calc_adjacency(X)
+
+        # 2. Calculation of Laplacian
+        L = self._calc_laplacian(S)
+
+        # 3. Eigenvalue and -vector calculation via Lanczos Algorithm
+        eval, evec = self._calculate_eigendecomposition(L)
+
+        return self._cluster.predict(evec[:, : self.n_clusters].copy())
+
+    def fit_predict(self, X):
+        """
+        Compute cluster centers and predict cluster index for each sample.
+
+        This method should be preferred to to calling fit(X) followed by predict(X), since predict(X) requires recomputation of the low-dim eigenspectrum representation of X
+
+        Parameters
+        ----------
+        X : ht.DNDarray, shape = [n_samples, n_features]
+            Input data to be clustered.
+
+        Returns
+        -------
+        labels : ht.DNDarray, shape [n_samples,]
+            Index of the cluster each sample belongs to.
+        """
+        self.fit(X)
+        return self._labels

From 67f45f5862762df6cf2c682a723937b45f2920e6 Mon Sep 17 00:00:00 2001
From: Charlotte Debus <charlotte.debus@dlr.de>
Date: Fri, 14 Feb 2020 14:57:49 +0100
Subject: [PATCH 24/49] Bugfixes and Unit tests

---
 heat/cluster/spectral.py            |  45 +++++++----
 heat/cluster/tests/test_spectral.py | 117 ++++++++++++++++++++++++++++
 2 files changed, 147 insertions(+), 15 deletions(-)
 create mode 100644 heat/cluster/tests/test_spectral.py

diff --git a/heat/cluster/spectral.py b/heat/cluster/spectral.py
index 7c5d09f2f8..66b954df30 100644
--- a/heat/cluster/spectral.py
+++ b/heat/cluster/spectral.py
@@ -35,11 +35,15 @@ def laplacian(S, norm=True, mode="fc", upper=None, lower=None):
             raise ValueError(
                 "Epsilon neighborhood with either upper or lower threshold, both is not supported"
             )
-
+        if (upper is None) and (lower is None):
+            raise ValueError("Epsilon neighborhood requires upper or lower threshold to be defined")
         if upper is not None:
-            A = ht.int(S < upper) - ht.eye(S.shape, dtype=ht.int, split=S.split)
+            A = ht.int(S < upper)
+            A = A - ht.eye(S.shape, dtype=ht.int, split=S.split, device=S.device, comm=S.comm)
         elif lower is not None:
-            A = ht.int(S > upper) - ht.eye(S.shape, dtype=ht.int, split=S.split)
+            print("Here")
+            A = ht.int(S > lower)
+            A = A - ht.eye(S.shape, dtype=ht.int, split=S.split, device=S.device, comm=S.comm)
     elif mode == "fc":
         A = S
     else:
@@ -49,12 +53,14 @@ def laplacian(S, norm=True, mode="fc", upper=None, lower=None):
     if norm:
         degree = ht.sqrt(1.0 / ht.sum(A, axis=1))
     else:
-        degree = sum(A, axis=1)
+        degree = ht.sum(A, axis=1)
 
     D = ht.diag(degree)
 
     if norm:
-        L = ht.eye(A.shape, split=A.split) - ht.matmul(D, ht.matmul(S, D))
+        L = ht.eye(A.shape, split=A.split, device=A.device, comm=A.comm) - ht.matmul(
+            D, ht.matmul(S, D)
+        )
     else:
         L = D - A
 
@@ -115,17 +121,17 @@ def __init__(
         self.laplacian = laplacian
         self.epsilon = (threshold, boundary)
         if assign_labels == "kmeans":
-            self._cluster = ht.cluster.Kmeans(init="kmeans++")
+            self._cluster = ht.cluster.KMeans(init="kmeans++")
         else:
             raise NotImplementedError(
                 "Other Label Assignment Algorithms are currently not available"
             )
 
         # in-place properties
-        self._labels = None
-        self._similarity = None
         self._eigenvectors = None
         self._eigenvalues = None
+        self._labels = None
+        self._similarity = None
 
     @property
     def eigenvectors_(self):
@@ -147,6 +153,16 @@ def eigenvalues_(self):
         """
         return self._eigenvalues
 
+    @property
+    def labels_(self):
+        """
+        Returns
+        -------
+        ht.DNDarray, shape=(n_points):
+            Labels of each point.
+        """
+        return self._labels
+
     @property
     def similarity_(self):
         """
@@ -172,10 +188,6 @@ def _calc_adjacency(self, X):
 
         """
         if self.metric == "rbf":
-            if self.gamma is None:
-                raise ValueError(
-                    "For usage of RBF kernel please specify the scaling factor sigma in class instantiation"
-                )
             sig = math.sqrt(1 / (2 * self.gamma))
             s = ht.spatial.rbf(X, sigma=sig)
 
@@ -257,12 +269,14 @@ def fit(self, X):
         # input sanitation
         if not isinstance(X, ht.DNDarray):
             raise ValueError("input needs to be a ht.DNDarray, but was {}".format(type(X)))
+        if X.split is not None and X.split != 0:
+            raise NotImplementedError("Not implemented for other splitting-axes")
 
         # 1. Calculation of Adjacency Matrix
         self._similarity = self._calc_adjacency(X)
 
         # 2. Calculation of Laplacian
-        L = self._calc_laplacian()
+        L = self._calc_laplacian(self._similarity)
 
         # 3. Eigenvalue and -vector calculation via Lanczos Algorithm
         self._eigenvalues, self._eigenvectors = self._calculate_eigendecomposition(L)
@@ -273,8 +287,9 @@ def fit(self, X):
             self.n_clusters = np.where(temp == temp.max())[0][0] + 1
         components = self._eigenvectors[:, : self.n_clusters].copy()
 
-        self._cluster.set_params({"n_clusters": self.n_clusters})
-        # kmeans = ht.cluster.KMeans(n_clusters=self.n_clusters, init="kmeans++")
+        params = self._cluster.get_params()
+        params["n_clusters"] = self.n_clusters
+        self._cluster.set_params(**params)
         self._cluster.fit(components)
         self._labels = self._cluster.labels_
 
diff --git a/heat/cluster/tests/test_spectral.py b/heat/cluster/tests/test_spectral.py
new file mode 100644
index 0000000000..00deff5032
--- /dev/null
+++ b/heat/cluster/tests/test_spectral.py
@@ -0,0 +1,117 @@
+import os
+import unittest
+
+import heat as ht
+
+if os.environ.get("DEVICE") == "gpu" and ht.torch.cuda.is_available():
+    ht.use_device("gpu")
+    ht.torch.cuda.set_device(ht.torch.device(ht.get_device().torch_device))
+else:
+    ht.use_device("cpu")
+device = ht.get_device().torch_device
+ht_device = None
+if os.environ.get("DEVICE") == "lgpu" and ht.torch.cuda.is_available():
+    device = ht.gpu.torch_device
+    ht_device = ht.gpu
+    ht.torch.cuda.set_device(device)
+
+
+class TestKMeans(unittest.TestCase):
+    def test_laplacian(self):
+        S = ht.array(
+            [
+                [0, 0.6, 0.8, 0.2, 0.2, 0.1, 0.4, 0.2, 0.6, 0.7],
+                [0.6, 0, 0.6, 0.3, 0.2, 0.1, 0.1, 0.2, 0.1, 0.2],
+                [0.8, 0.6, 0, 0.2, 0.1, 0.3, 0.4, 0.3, 0.2, 0.2],
+                [0.2, 0.3, 0.2, 0, 0.9, 0.9, 0.4, 0.3, 0.3, 0.4],
+                [0.2, 0.2, 0.1, 0.9, 0, 0.8, 0.1, 0.1, 0.1, 0.1],
+                [0.1, 0.1, 0.3, 0.9, 0.8, 0, 0.7, 0.7, 0.2, 0.2],
+                [0.4, 0.1, 0.4, 0.4, 0.1, 0.7, 0, 0.7, 0.3, 0.1],
+                [0.2, 0.2, 0.3, 0.3, 0.1, 0.7, 0.7, 0, 0.4, 0.3],
+                [0.6, 0.1, 0.2, 0.3, 0.1, 0.2, 0.3, 0.4, 0, 0.8],
+                [0.7, 0.2, 0.2, 0.4, 0.1, 0.2, 0.1, 0.3, 0.8, 0],
+            ],
+            split=0,
+            device=ht_device,
+        )
+
+        L = ht.cluster.spectral.laplacian(S, norm=True, mode="fc")
+        self.assertIsInstance(L, ht.DNDarray)
+        self.assertEqual(L.split, S.split)
+
+        L = ht.cluster.spectral.laplacian(S, norm=True, mode="eNeighbour", upper=0.5)
+        self.assertIsInstance(L, ht.DNDarray)
+        self.assertEqual(L.split, S.split)
+
+        L = ht.cluster.spectral.laplacian(S, norm=False, mode="eNeighbour", lower=0.5)
+        self.assertIsInstance(L, ht.DNDarray)
+        self.assertEqual(L.split, S.split)
+        L_true = ht.array(
+            [
+                [4, -1, -1, 0, 0, 0, 0, 0, -1, -1],
+                [-1, 2, -1, 0, 0, 0, 0, 0, 0, 0],
+                [-1, -1, 2, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 2, -1, -1, 0, 0, 0, 0],
+                [0, 0, 0, -1, 2, -1, 0, 0, 0, 0],
+                [0, 0, 0, -1, -1, 4, -1, -1, 0, 0],
+                [0, 0, 0, 0, 0, -1, 2, -1, 0, 0],
+                [0, 0, 0, 0, 0, -1, -1, 2, 0, 0],
+                [-1, 0, 0, 0, 0, 0, 0, 0, 2, -1],
+                [-1, 0, 0, 0, 0, 0, 0, 0, -1, 2],
+            ],
+            dtype=ht.float32,
+            split=0,
+            device=ht_device,
+        )
+        self.assertTrue(ht.equal(L, L_true))
+
+        with self.assertRaises(ValueError):
+            L = ht.cluster.spectral.laplacian(S, norm=True, mode="eNeighbour")
+        with self.assertRaises(ValueError):
+            L = ht.cluster.spectral.laplacian(S, norm=True, mode="eNeighbour", upper=0.3, lower=0.7)
+        with self.assertRaises(NotImplementedError):
+            L = ht.cluster.spectral.laplacian(S, norm=True, mode="knearest")
+
+    def test_fit_iris(self):
+        # get some test data
+        iris = ht.load("heat/datasets/data/iris.csv", sep=";")
+
+        # fit the clusters
+        m = 20
+        spectral = ht.cluster.Spectral(
+            gamma=1.0, metric="rbf", laplacian="fully_connected", normalize=True, n_lanczos=m
+        )
+        spectral.fit(iris)
+
+        self.assertIsInstance(spectral.labels_, ht.DNDarray)
+        self.assertIsInstance(spectral.similarity_, ht.DNDarray)
+        self.assertEqual(spectral.similarity_.shape, (iris.shape[0], iris.shape[0]))
+        self.assertIsInstance(spectral.eigenvalues_, ht.DNDarray)
+        self.assertEqual(spectral.eigenvalues_.shape, (m,))
+        self.assertIsInstance(spectral.eigenvectors_, ht.DNDarray)
+        self.assertEqual(spectral.eigenvectors_.shape, (iris.shape[0], m))
+
+        with self.assertRaises(NotImplementedError):
+            spectral = ht.cluster.Spectral(
+                gamma=1.0,
+                metric="rbf",
+                laplacian="fully_connected",
+                normalize=True,
+                n_lanczos=20,
+                assign_labels="discretize",
+            )
+        with self.assertRaises(NotImplementedError):
+            spectral = ht.cluster.Spectral(
+                gamma=1.0,
+                metric="nearest_neighbors",
+                laplacian="fully_connected",
+                normalize=True,
+                n_lanczos=m,
+            )
+            spectral.fit(iris)
+
+        iris_split = ht.load("heat/datasets/data/iris.csv", sep=";", split=1)
+        spectral = ht.cluster.Spectral(n_lanczos=20)
+
+        with self.assertRaises(NotImplementedError):
+            spectral.fit(iris_split)

From dd042796d62161b4e3e057376f3e4fad26c8edcd Mon Sep 17 00:00:00 2001
From: Charlotte Debus <charlotte.debus@dlr.de>
Date: Mon, 17 Feb 2020 16:39:06 +0100
Subject: [PATCH 25/49] Fixes in Lanczos Algorithm: Reorthogonalization to make
 Eigenvalues correct; Fixes in kmeans

---
 heat/cluster/kmeans.py   | 14 ++++++------
 heat/cluster/spectral.py | 46 +++++++++++++++++++++++-----------------
 heat/core/linalg.py      | 18 ++++++++++------
 3 files changed, 46 insertions(+), 32 deletions(-)

diff --git a/heat/cluster/kmeans.py b/heat/cluster/kmeans.py
index e480dd7fdd..c1ca273d6a 100644
--- a/heat/cluster/kmeans.py
+++ b/heat/cluster/kmeans.py
@@ -267,7 +267,7 @@ def fit(self, X):
             if self.tol is not None and self._inertia <= self.tol:
                 break
 
-        self._labels = matching_centroids.squeeze()
+        self._labels = matching_centroids
 
         return self
 
@@ -338,7 +338,7 @@ def predict(self, X):
             raise ValueError("input needs to be a ht.DNDarray, but was {}".format(type(X)))
 
         # determine the centroids
-        return self._fit_to_cluster(X.expand_dims(axis=2)).squeeze()
+        return self._fit_to_cluster(X)
 
     def set_params(self, **params):
         """
@@ -354,10 +354,10 @@ def set_params(self, **params):
         self : ht.ml.KMeans
             This estimator instance for chaining.
         """
-        self.init = params.get(params["init"], self.init)
-        self.max_iter = params.get(params["max_iter"], self.max_iter)
-        self.n_clusters = params.get(params["n_clusters"], self.n_clusters)
-        self.random_state = params.get(params["random_state"], self.random_state)
-        self.tol = params.get(params["tol"], self.tol)
+        self.init = params.get("init", self.init)
+        self.max_iter = params.get("max_iter", self.max_iter)
+        self.n_clusters = params.get("n_clusters", self.n_clusters)
+        self.random_state = params.get("random_state", self.random_state)
+        self.tol = params.get("tol", self.tol)
 
         return self
diff --git a/heat/cluster/spectral.py b/heat/cluster/spectral.py
index 66b954df30..a827799bc9 100644
--- a/heat/cluster/spectral.py
+++ b/heat/cluster/spectral.py
@@ -41,28 +41,24 @@ def laplacian(S, norm=True, mode="fc", upper=None, lower=None):
             A = ht.int(S < upper)
             A = A - ht.eye(S.shape, dtype=ht.int, split=S.split, device=S.device, comm=S.comm)
         elif lower is not None:
-            print("Here")
             A = ht.int(S > lower)
             A = A - ht.eye(S.shape, dtype=ht.int, split=S.split, device=S.device, comm=S.comm)
     elif mode == "fc":
-        A = S
+        A = S - ht.eye(S.shape, dtype=S.dtype, split=S.split, device=S.device, comm=S.comm)
     else:
         raise NotImplementedError(
             "Only eNeighborhood and fully-connected graphs supported at the moment."
         )
-    if norm:
-        degree = ht.sqrt(1.0 / ht.sum(A, axis=1))
-    else:
-        degree = ht.sum(A, axis=1)
 
-    D = ht.diag(degree)
+    degree = ht.sum(A, axis=1)
 
+    D = ht.diag(degree)
+    L = D - A
     if norm:
-        L = ht.eye(A.shape, split=A.split, device=A.device, comm=A.comm) - ht.matmul(
-            D, ht.matmul(S, D)
-        )
-    else:
-        L = D - A
+        deg = ht.sqrt(1.0 / ht.sum(A, axis=1))
+        D2 = ht.diag(deg)
+
+        L = ht.matmul(D2, ht.matmul(L, D2))
 
     return L
 
@@ -131,6 +127,7 @@ def __init__(
         self._eigenvectors = None
         self._eigenvalues = None
         self._labels = None
+        self._laplacian = None
         self._similarity = None
 
     @property
@@ -163,6 +160,16 @@ def labels_(self):
         """
         return self._labels
 
+    @property
+    def laplacian_(self):
+        """
+        Returns
+        -------
+        ht.DNDarray:
+            Graph Laplcacian of the fitted dataset
+        """
+        return self._laplacian
+
     @property
     def similarity_(self):
         """
@@ -242,16 +249,17 @@ def _calculate_eigendecomposition(self, L):
         eigenvectors: ht.DNDarray, shape = (n_samples, n_lanczos)
 
         """
-        vr = ht.random.rand(L.shape[0], split=L.split, dtype=ht.float32)
-        v0 = vr / ht.norm(vr)
-        Vg_norm, Tg_norm = ht.lanczos(L, self.n_lanczos, v0)
+        # vr = ht.random.rand(L.shape[0], split=L.split, dtype=ht.float64)
+        # v0 = vr / ht.norm(vr)
+        v0 = ht.ones((L.shape[0],), dtype=L.dtype, split=L.split) / math.sqrt(L.shape[0])
 
+        V, T = ht.lanczos(L, self.n_lanczos, v0)
         # 4. Calculate and Sort Eigenvalues and Eigenvectors of tridiagonal matrix T
-        eval, evec = torch.eig(Tg_norm._DNDarray__array, eigenvectors=True)
+        eval, evec = torch.eig(T._DNDarray__array, eigenvectors=True)
         # If x is an Eigenvector of T, then y = V@x is the corresponding Eigenvector of L
         eigenvalues, idx = torch.sort(eval[:, 0], dim=0)
         eigenvalues = ht.array(eigenvalues)
-        eigenvectors = ht.matmul(Vg_norm, ht.array(evec))[:, idx]
+        eigenvectors = ht.matmul(V, ht.array(evec))[:, idx]
 
         return eigenvalues, eigenvectors
 
@@ -276,10 +284,10 @@ def fit(self, X):
         self._similarity = self._calc_adjacency(X)
 
         # 2. Calculation of Laplacian
-        L = self._calc_laplacian(self._similarity)
+        self._laplacian = self._calc_laplacian(self._similarity)
 
         # 3. Eigenvalue and -vector calculation via Lanczos Algorithm
-        self._eigenvalues, self._eigenvectors = self._calculate_eigendecomposition(L)
+        self._eigenvalues, self._eigenvectors = self._calculate_eigendecomposition(self._laplacian)
 
         # 5. Find the spectral gap, if number of clusters is not defined from the outside
         if self.n_clusters is None:
diff --git a/heat/core/linalg.py b/heat/core/linalg.py
index 7e0dfcdf43..2792124704 100644
--- a/heat/core/linalg.py
+++ b/heat/core/linalg.py
@@ -178,9 +178,9 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
         raise TypeError("Input Matrix A needs to be symmetric.")
     T = factories.zeros((m, m))
     if A.split == 0:
-        V = factories.zeros((n, m), split=A.split, dtype=types.float32)
+        V = factories.zeros((n, m), split=A.split, dtype=A.dtype)
     else:
-        V = factories.zeros((n, m), dtype=types.float32)
+        V = factories.zeros((n, m), dtype=A.dtype)
 
     if v0 is None:
         vr = random.rand(n)
@@ -193,23 +193,28 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
     w = w - alpha * v0
     T[0, 0] = alpha
     V[:, 0] = v0
-
     for i in range(1, int(m)):
         beta = norm(w)
-        if beta == 0.0:
+        if abs(beta) < 1e-10:
+            print("Lanczos breakdown in iteration {}".format(i))
             # Lanczos Breakdown, pick a random vector to continue
             vr = random.rand(n)
             # orthogonalize v_r with respect to all vectors v[i]
             for j in range(i):
-                vr = vr - projection(V[:, j], vr)
+                vr = vr - projection(vr, V[:, j])
             # normalize v_r to Euclidian norm 1 and set as ith vector v
             vi = vr / norm(vr)
         else:
-            vi = w / beta
+            vr = w
+            # reorthogonalization
+            for j in range(i):
+                vr = vr - projection(vr, V[:, j])
+            vi = vr / norm(vr)
 
         w = matmul(A, vi)
         alpha = dot(w, vi)
         w = w - alpha * vi - beta * V[:, i - 1]
+
         T[i - 1, i] = beta
         T[i, i - 1] = beta
         T[i, i] = alpha
@@ -933,6 +938,7 @@ def norm(a):
         raise TypeError("a must be of type ht.DNDarray, but was {}".format(type(a)))
 
     d = a ** 2
+
     for i in range(len(a.shape) - 1, -1, -1):
         d = arithmetics.sum(d, axis=i)
 

From 6adf52eb2ebac3eb36d35736d8361a4a52bf9717 Mon Sep 17 00:00:00 2001
From: Charlotte Debus <debus1@hdfmll01.hdfml>
Date: Wed, 19 Feb 2020 14:25:44 +0100
Subject: [PATCH 26/49] Testing on HDFML

---
 heat/cluster/spectral.py | 10 ++++++----
 heat/core/linalg.py      |  7 +++++++
 heat/spatial/distance.py |  9 +++++++++
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/heat/cluster/spectral.py b/heat/cluster/spectral.py
index a827799bc9..57660d7482 100644
--- a/heat/cluster/spectral.py
+++ b/heat/cluster/spectral.py
@@ -279,16 +279,18 @@ def fit(self, X):
             raise ValueError("input needs to be a ht.DNDarray, but was {}".format(type(X)))
         if X.split is not None and X.split != 0:
             raise NotImplementedError("Not implemented for other splitting-axes")
-
         # 1. Calculation of Adjacency Matrix
         self._similarity = self._calc_adjacency(X)
-
+        if X.comm.rank == 0:
+            print("Similarity Calculated")
         # 2. Calculation of Laplacian
         self._laplacian = self._calc_laplacian(self._similarity)
-
+        if X.comm.rank == 0:
+            print("Laplacian Calculated")
         # 3. Eigenvalue and -vector calculation via Lanczos Algorithm
         self._eigenvalues, self._eigenvectors = self._calculate_eigendecomposition(self._laplacian)
-
+        if X.comm.rank == 0:
+            print("Eigenvalues Calculated")
         # 5. Find the spectral gap, if number of clusters is not defined from the outside
         if self.n_clusters is None:
             temp = np.diff(self._eigenvalues.numpy())
diff --git a/heat/core/linalg.py b/heat/core/linalg.py
index 2792124704..fedf5fdc34 100644
--- a/heat/core/linalg.py
+++ b/heat/core/linalg.py
@@ -1,6 +1,7 @@
 import itertools
 import math
 import torch
+import time
 
 from .communication import MPI
 from . import arithmetics
@@ -194,6 +195,8 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
     T[0, 0] = alpha
     V[:, 0] = v0
     for i in range(1, int(m)):
+        #if A.comm.rank == 0:
+        #    start = time.perf_counter()
         beta = norm(w)
         if abs(beta) < 1e-10:
             print("Lanczos breakdown in iteration {}".format(i))
@@ -220,6 +223,10 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
         T[i, i] = alpha
         V[:, i] = vi
 
+        #if A.comm.rank == 0:
+        #    stop = time.perf_counter()
+        #    print("Iteration {} of Lanczos algorithm: {}s".format(i, stop - start))
+
     if V.split is not None:
         V.resplit_(axis=None)
 
diff --git a/heat/spatial/distance.py b/heat/spatial/distance.py
index 0b7fc3b0c7..5dafa252f6 100644
--- a/heat/spatial/distance.py
+++ b/heat/spatial/distance.py
@@ -1,6 +1,7 @@
 import torch
 import numpy as np
 from mpi4py import MPI
+import time
 
 from ..core import factories
 from ..core import types
@@ -199,6 +200,8 @@ def _dist(X, Y=None, metric=_euclidian):
             rank = comm.Get_rank()
             size = comm.Get_size()
             K, f = X.shape
+            #if rank == 0:
+            #    print("Starting distance calculation")
 
             counts, displ, _ = comm.counts_displs_shape(X.shape, X.split)
             num_iter = (size + 1) // 2
@@ -210,6 +213,8 @@ def _dist(X, Y=None, metric=_euclidian):
             d_ij = metric(stationary, stationary)
             d._DNDarray__array[:, rows[0] : rows[1]] = d_ij
             for iter in range(1, num_iter):
+                #if rank == 0:
+                #    start = time.perf_counter()
                 # Send rank's part of the matrix to the next process in a circular fashion
                 receiver = (rank + iter) % size
                 sender = (rank - iter) % size
@@ -257,6 +262,10 @@ def _dist(X, Y=None, metric=_euclidian):
                 if (rank // iter) == 0:
                     comm.Recv(symmetric, source=receiver, tag=iter)
                 d._DNDarray__array[:, scolumns[0] : scolumns[1]] = symmetric.transpose(0, 1)
+                #if rank == 0:
+                #    stop = time.perf_counter()
+                #    print("Iteration {} of distance calculation: {}s".format(iter, stop-start))
+
 
             if (size + 1) % 2 != 0:  # we need one mor iteration for the first n/2 processes
                 receiver = (rank + num_iter) % size

From b413b5f9681dfc351cb11c2ecb0befa39a74f0f2 Mon Sep 17 00:00:00 2001
From: Charlotte Debus <debus1@hdfmll01.hdfml>
Date: Wed, 19 Feb 2020 16:30:22 +0100
Subject: [PATCH 27/49] Benchmarking Spectral Clustering. Cleanup needed

---
 heat/cluster/spectral.py | 18 +++++++++++++-----
 heat/core/linalg.py      | 11 ++---------
 heat/spatial/distance.py | 10 +---------
 3 files changed, 16 insertions(+), 23 deletions(-)

diff --git a/heat/cluster/spectral.py b/heat/cluster/spectral.py
index 57660d7482..78a85d6467 100644
--- a/heat/cluster/spectral.py
+++ b/heat/cluster/spectral.py
@@ -2,6 +2,7 @@
 import math
 import numpy as np
 import heat as ht
+import time
 
 
 def laplacian(S, norm=True, mode="fc", upper=None, lower=None):
@@ -196,7 +197,7 @@ def _calc_adjacency(self, X):
         """
         if self.metric == "rbf":
             sig = math.sqrt(1 / (2 * self.gamma))
-            s = ht.spatial.rbf(X, sigma=sig)
+            s = ht.spatial.rbf(X, sigma=sig, quadratic_expansion=False)
 
         elif self.metric == "euclidean":
             s = ht.spatial.cdist(X)
@@ -251,7 +252,7 @@ def _calculate_eigendecomposition(self, L):
         """
         # vr = ht.random.rand(L.shape[0], split=L.split, dtype=ht.float64)
         # v0 = vr / ht.norm(vr)
-        v0 = ht.ones((L.shape[0],), dtype=L.dtype, split=L.split) / math.sqrt(L.shape[0])
+        v0 = ht.ones((L.shape[0],), dtype=L.dtype, split=L.split, device=L.device) / math.sqrt(L.shape[0])
 
         V, T = ht.lanczos(L, self.n_lanczos, v0)
         # 4. Calculate and Sort Eigenvalues and Eigenvectors of tridiagonal matrix T
@@ -280,17 +281,24 @@ def fit(self, X):
         if X.split is not None and X.split != 0:
             raise NotImplementedError("Not implemented for other splitting-axes")
         # 1. Calculation of Adjacency Matrix
+        if X.comm.rank == 0:
+            start = time.perf_counter()
         self._similarity = self._calc_adjacency(X)
         if X.comm.rank == 0:
-            print("Similarity Calculated")
+            stop = time.perf_counter()
+            print("Similarity Calculated in : {}s".format(stop - start))
+            start = time.perf_counter()
         # 2. Calculation of Laplacian
         self._laplacian = self._calc_laplacian(self._similarity)
         if X.comm.rank == 0:
-            print("Laplacian Calculated")
+            stop = time.perf_counter()
+            print("Laplacian Calculated in : {}s".format(stop - start))
+            start = time.perf_counter()
         # 3. Eigenvalue and -vector calculation via Lanczos Algorithm
         self._eigenvalues, self._eigenvectors = self._calculate_eigendecomposition(self._laplacian)
         if X.comm.rank == 0:
-            print("Eigenvalues Calculated")
+            stop = time.perf_counter()
+            print("Eigenvalues Calculated in : {}s".format(stop - start))
         # 5. Find the spectral gap, if number of clusters is not defined from the outside
         if self.n_clusters is None:
             temp = np.diff(self._eigenvalues.numpy())
diff --git a/heat/core/linalg.py b/heat/core/linalg.py
index fedf5fdc34..e3572fe628 100644
--- a/heat/core/linalg.py
+++ b/heat/core/linalg.py
@@ -1,7 +1,6 @@
 import itertools
 import math
 import torch
-import time
 
 from .communication import MPI
 from . import arithmetics
@@ -179,9 +178,9 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
         raise TypeError("Input Matrix A needs to be symmetric.")
     T = factories.zeros((m, m))
     if A.split == 0:
-        V = factories.zeros((n, m), split=A.split, dtype=A.dtype)
+        V = factories.zeros((n, m), split=A.split, dtype=A.dtype, device=A.device)
     else:
-        V = factories.zeros((n, m), dtype=A.dtype)
+        V = factories.zeros((n, m), dtype=A.dtype, device=A.device)
 
     if v0 is None:
         vr = random.rand(n)
@@ -195,8 +194,6 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
     T[0, 0] = alpha
     V[:, 0] = v0
     for i in range(1, int(m)):
-        #if A.comm.rank == 0:
-        #    start = time.perf_counter()
         beta = norm(w)
         if abs(beta) < 1e-10:
             print("Lanczos breakdown in iteration {}".format(i))
@@ -223,10 +220,6 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
         T[i, i] = alpha
         V[:, i] = vi
 
-        #if A.comm.rank == 0:
-        #    stop = time.perf_counter()
-        #    print("Iteration {} of Lanczos algorithm: {}s".format(i, stop - start))
-
     if V.split is not None:
         V.resplit_(axis=None)
 
diff --git a/heat/spatial/distance.py b/heat/spatial/distance.py
index 54a3c3f290..7c452fe845 100644
--- a/heat/spatial/distance.py
+++ b/heat/spatial/distance.py
@@ -1,7 +1,6 @@
 import torch
 import numpy as np
 from mpi4py import MPI
-import time
 
 from ..core import factories
 from ..core import types
@@ -200,8 +199,6 @@ def _dist(X, Y=None, metric=_euclidian):
             rank = comm.Get_rank()
             size = comm.Get_size()
             K, f = X.shape
-            #if rank == 0:
-            #    print("Starting distance calculation")
 
             counts, displ, _ = comm.counts_displs_shape(X.shape, X.split)
             num_iter = (size + 1) // 2
@@ -213,8 +210,6 @@ def _dist(X, Y=None, metric=_euclidian):
             d_ij = metric(stationary, stationary)
             d._DNDarray__array[:, rows[0] : rows[1]] = d_ij
             for iter in range(1, num_iter):
-                #if rank == 0:
-                #    start = time.perf_counter()
                 # Send rank's part of the matrix to the next process in a circular fashion
                 receiver = (rank + iter) % size
                 sender = (rank - iter) % size
@@ -262,9 +257,6 @@ def _dist(X, Y=None, metric=_euclidian):
                 if (rank // iter) == 0:
                     comm.Recv(symmetric, source=receiver, tag=iter)
                 d._DNDarray__array[:, scolumns[0] : scolumns[1]] = symmetric.transpose(0, 1)
-                #if rank == 0:
-                #    stop = time.perf_counter()
-                #    print("Iteration {} of distance calculation: {}s".format(iter, stop-start))
 
 
             if (size + 1) % 2 != 0:  # we need one mor iteration for the first n/2 processes
@@ -298,7 +290,7 @@ def _dist(X, Y=None, metric=_euclidian):
                     scol2 = displ[receiver + 1] if receiver != size - 1 else K
                     scolumns = (scol1, scol2)
                     symmetric = torch.zeros(
-                        (scolumns[1] - scolumns[0], rows[1] - rows[0]), dtype=torch_type
+                        (scolumns[1] - scolumns[0], rows[1] - rows[0]), dtype=torch_type, device=X.device.torch_device
                     )
                     comm.Recv(symmetric, source=receiver, tag=num_iter)
                     d._DNDarray__array[:, scolumns[0] : scolumns[1]] = symmetric.transpose(0, 1)

From 1b2e7a7c7d7d6a0bc78ff2968eb14830c12b6ab5 Mon Sep 17 00:00:00 2001
From: Charlotte Debus <debus1@hdfmll01.hdfml>
Date: Wed, 4 Mar 2020 10:53:42 +0100
Subject: [PATCH 28/49] Enhanced Lanczos-algorithm by speeding up
 reorthogonalization

---
 heat/core/linalg.py | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/heat/core/linalg.py b/heat/core/linalg.py
index e3572fe628..f73d713a42 100644
--- a/heat/core/linalg.py
+++ b/heat/core/linalg.py
@@ -178,9 +178,11 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
         raise TypeError("Input Matrix A needs to be symmetric.")
     T = factories.zeros((m, m))
     if A.split == 0:
-        V = factories.zeros((n, m), split=A.split, dtype=A.dtype, device=A.device)
+        #V = factories.ones((n, m), split=0, dtype=A.dtype, device=A.device)
+        V = factories.ones((m, n), split=1, dtype=A.dtype, device=A.device)
     else:
-        V = factories.zeros((n, m), dtype=A.dtype, device=A.device)
+        #V = factories.ones((n, m), dtype=A.dtype, device=A.device)
+        V = factories.ones((m, n), split=None, dtype=A.dtype, device=A.device)
 
     if v0 is None:
         vr = random.rand(n)
@@ -192,37 +194,45 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
     alpha = dot(w, v0)
     w = w - alpha * v0
     T[0, 0] = alpha
-    V[:, 0] = v0
+    V[0, :] = v0
     for i in range(1, int(m)):
         beta = norm(w)
         if abs(beta) < 1e-10:
             print("Lanczos breakdown in iteration {}".format(i))
             # Lanczos Breakdown, pick a random vector to continue
-            vr = random.rand(n)
+            vr = random.rand(n, dtype=A.dtype)
             # orthogonalize v_r with respect to all vectors v[i]
             for j in range(i):
-                vr = vr - projection(vr, V[:, j])
+                vr = vr - projection(vr, V[j, :])
             # normalize v_r to Euclidian norm 1 and set as ith vector v
             vi = vr / norm(vr)
         else:
             vr = w
+
             # reorthogonalization
             for j in range(i):
-                vr = vr - projection(vr, V[:, j])
+                vi_loc = V._DNDarray__array[j, :]
+                a = torch.dot(vr._DNDarray__array, vi_loc)
+                b = torch.dot(vi_loc, vi_loc)
+                A.comm.Allreduce(MPI.IN_PLACE, a, MPI.SUM)
+                A.comm.Allreduce(MPI.IN_PLACE, b, MPI.SUM)
+                vr._DNDarray__array = vr._DNDarray__array - a/b*vi_loc
+
             vi = vr / norm(vr)
 
+
         w = matmul(A, vi)
         alpha = dot(w, vi)
-        w = w - alpha * vi - beta * V[:, i - 1]
+        w = w - alpha * vi - beta * V[i - 1, :]
 
         T[i - 1, i] = beta
         T[i, i - 1] = beta
         T[i, i] = alpha
-        V[:, i] = vi
+        V[i, :] = vi
 
     if V.split is not None:
         V.resplit_(axis=None)
-
+    V = V.transpose()
     if T_out is not None:
         T_out = T
         if V_out is not None:

From 4454bb9994ef316887cfa23a0363a5bcb2bd1808 Mon Sep 17 00:00:00 2001
From: Charlotte Debus <debus1@hdfmll01.hdfml>
Date: Wed, 4 Mar 2020 14:10:00 +0100
Subject: [PATCH 29/49] Tagged #494 for rethinking Reorthogonalization

---
 heat/core/linalg.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/heat/core/linalg.py b/heat/core/linalg.py
index f73d713a42..6d3709eeb2 100644
--- a/heat/core/linalg.py
+++ b/heat/core/linalg.py
@@ -209,7 +209,9 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
         else:
             vr = w
 
-            # reorthogonalization
+            # Reorthogonalization
+            # ToDo: Rethink this; mask torch calls, See issue #494
+            # This is the fast solution, using item access on the ht.dndarray level is way slower
             for j in range(i):
                 vi_loc = V._DNDarray__array[j, :]
                 a = torch.dot(vr._DNDarray__array, vi_loc)

From 8f5163e614e7dec890d59f593188d5671ce4e9e5 Mon Sep 17 00:00:00 2001
From: Charlotte Debus <debus1@hdfmll01.hdfml>
Date: Wed, 1 Apr 2020 14:05:32 +0200
Subject: [PATCH 30/49] Finale changes from benchmarks

---
 heat/cluster/spectral.py | 222 +++++++++++++++------------------------
 heat/core/linalg.py      |  10 +-
 heat/spatial/distance.py |   3 -
 3 files changed, 90 insertions(+), 145 deletions(-)

diff --git a/heat/cluster/spectral.py b/heat/cluster/spectral.py
index 78a85d6467..8e38ade182 100644
--- a/heat/cluster/spectral.py
+++ b/heat/cluster/spectral.py
@@ -39,28 +39,31 @@ def laplacian(S, norm=True, mode="fc", upper=None, lower=None):
         if (upper is None) and (lower is None):
             raise ValueError("Epsilon neighborhood requires upper or lower threshold to be defined")
         if upper is not None:
-            A = ht.int(S < upper)
-            A = A - ht.eye(S.shape, dtype=ht.int, split=S.split, device=S.device, comm=S.comm)
+            S = ht.int(S < upper)
         elif lower is not None:
-            A = ht.int(S > lower)
-            A = A - ht.eye(S.shape, dtype=ht.int, split=S.split, device=S.device, comm=S.comm)
+            S = ht.int(S > lower)
     elif mode == "fc":
-        A = S - ht.eye(S.shape, dtype=S.dtype, split=S.split, device=S.device, comm=S.comm)
+        pass 
     else:
         raise NotImplementedError(
             "Only eNeighborhood and fully-connected graphs supported at the moment."
         )
 
-    degree = ht.sum(A, axis=1)
+    # Subtract 1 for the self-connection of each vertex (more memory-efficient than subtracting Identity matrix from the Adjacency matrix to make diagonal = 0)
+    degree = ht.sum(S, axis=1)-1
 
-    D = ht.diag(degree)
-    L = D - A
     if norm:
-        deg = ht.sqrt(1.0 / ht.sum(A, axis=1))
-        D2 = ht.diag(deg)
+        degree.resplit_(axis=None)
+        temp = torch.ones(degree.shape, dtype=degree._DNDarray__array.dtype)
+        degree._DNDarray__array = torch.where(degree._DNDarray__array==0, temp, degree._DNDarray__array)
+        w = S/ht.sqrt(ht.expand_dims(degree, axis=1))
+        w = w / ht.sqrt(ht.expand_dims(degree, axis=0))
+        L = ht.eye(S.shape, dtype=S.dtype, split=S.split, device=S.device, comm=S.comm) - w
 
-        L = ht.matmul(D2, ht.matmul(L, D2))
+    else:
 
+        L = ht.diag(degree) - S
+  
     return L
 
 
@@ -118,38 +121,27 @@ def __init__(
         self.laplacian = laplacian
         self.epsilon = (threshold, boundary)
         if assign_labels == "kmeans":
-            self._cluster = ht.cluster.KMeans(init="kmeans++")
+            self._cluster = ht.cluster.KMeans(init='random', max_iter=30, tol=-1.0)
         else:
             raise NotImplementedError(
                 "Other Label Assignment Algorithms are currently not available"
             )
 
         # in-place properties
-        self._eigenvectors = None
-        self._eigenvalues = None
+        self._components = None
+        self._cluster_centers = None
         self._labels = None
-        self._laplacian = None
-        self._similarity = None
+
 
     @property
-    def eigenvectors_(self):
+    def components_(self):
         """
         Returns
         -------
         ht.DNDarray, shape = (n_samples, n_lanzcos):
             Eigenvectors of the graph Laplacian
         """
-        return self._eigenvectors
-
-    @property
-    def eigenvalues_(self):
-        """
-        Returns
-        -------
-        ht.DNDarray, shape = (n_lanzcos):
-            Eigenvalues of the graph Laplacian
-        """
-        return self._eigenvalues
+        return self._components
 
     @property
     def labels_(self):
@@ -161,155 +153,74 @@ def labels_(self):
         """
         return self._labels
 
-    @property
-    def laplacian_(self):
-        """
-        Returns
-        -------
-        ht.DNDarray:
-            Graph Laplcacian of the fitted dataset
-        """
-        return self._laplacian
 
-    @property
-    def similarity_(self):
-        """
-        Returns
-        -------
-        ht.DNDarray:
-            The similarity matrix of the fitted dataset
-        """
-        return self._similarity
 
-    def _calc_adjacency(self, X):
+    def fit(self, X):
         """
-        Internal utility function for calculating of the similarity matrix according to the configuration set in __init__
+        Computes the low-dim representation by calculation of eigenspectrum (eigenvalues and eigenvectors) of the graph laplacian from the similarity matrix and fits the eigenvectors that correspond to the k lowest eigenvalues with a seperate clustering algorithm (currently only kemans is supported)
+        Similarity metrics for adjacency calculations are supported via spatial.distance. The eigenvalues and eigenvectors are computed by reducing the Laplacian via lanczos iterations and using the torch eigenvalue solver on this smaller matrix. If other eigenvalue decompostion methods are supported, this will be expanded.
+
 
         Parameters
         ----------
-            X :  ht.DNDarray, shape=(n_samples, n_features)
-
-        Returns
-        -------
-            S : ht.DNDarray, shape = (n_samples, n_samples)
-            The similarity matrix
-
+        X : ht.DNDarray, shape=(n_samples, n_features)
+            Training instances to cluster.
         """
+        # input sanitation
+        if not isinstance(X, ht.DNDarray):
+            raise ValueError("input needs to be a ht.DNDarray, but was {}".format(type(X)))
+        if X.split is not None and X.split != 0:
+            raise NotImplementedError("Not implemented for other splitting-axes")
+        # 1. Calculation of Adjacency Matrix
         if self.metric == "rbf":
             sig = math.sqrt(1 / (2 * self.gamma))
-            s = ht.spatial.rbf(X, sigma=sig, quadratic_expansion=False)
+            S  = ht.spatial.rbf(X, sigma=sig, quadratic_expansion=True)
 
         elif self.metric == "euclidean":
-            s = ht.spatial.cdist(X)
+            S = ht.spatial.cdist(X)
         else:
             raise NotImplementedError("Other kernels currently not implemented")
-        return s
-
-    def _calc_laplacian(self, S):
-        """
-        Internal utility function for calculating of the graph Laplacian according to the configuration set in __init__
-
-        Parameters
-        ----------
-        S : ht.DNDarray
-            The similarity matrix
-
-        Returns
-        -------
-        ht.DNDarray
-
-        """
 
+        # 2. Calculation of Laplacian
         if self.laplacian == "eNeighborhood":
             if self.epsilon[1] == "upper":
-                return laplacian(S, norm=self.normalize, mode="eNeighbour", upper=self.epsilon[0])
+                L = laplacian(S, norm=self.normalize, mode="eNeighbour", upper=self.epsilon[0])
             elif self.epsilon[1] == "lower":
-                return laplacian(S, norm=self.normalize, mode="eNeighbour", lower=self.epsilon[0])
+                L = laplacian(S, norm=self.normalize, mode="eNeighbour", lower=self.epsilon[0])
             else:
                 raise ValueError(
                     "Boundary needs to be 'upper' or 'lower' and threshold needs to be set, if laplacian = eNeighborhood"
                 )
 
         elif self.laplacian == "fully_connected":
-            return laplacian(self._similarity, norm=self.normalize, mode="fc")
+            L = laplacian(S, norm=self.normalize, mode="fc")
         else:
             raise NotImplementedError("Other approaches currently not implemented")
 
-    def _calculate_eigendecomposition(self, L):
-        """
-        Internal utility function for calculating eigenvalues and eigenvectors of the graph Laplacian using the Lanczos algorithm
-
-        Parameters
-        ----------
-        L : ht.DNDarray
-            The graph Laplacian matrix
-
-        Returns
-        -------
-        eigenvalues : ht.DNDarray, shape = (n_lanczos,)
-        eigenvectors: ht.DNDarray, shape = (n_samples, n_lanczos)
+        # 3. Eigenvalue and -vector calculation via Lanczos Algorithm
+        v0 = ht.ones(( L.shape[0],), dtype=L.dtype, split=L.split, device=L.device) / math.sqrt(L.shape[0])
 
-        """
-        # vr = ht.random.rand(L.shape[0], split=L.split, dtype=ht.float64)
-        # v0 = vr / ht.norm(vr)
-        v0 = ht.ones((L.shape[0],), dtype=L.dtype, split=L.split, device=L.device) / math.sqrt(L.shape[0])
 
         V, T = ht.lanczos(L, self.n_lanczos, v0)
         # 4. Calculate and Sort Eigenvalues and Eigenvectors of tridiagonal matrix T
         eval, evec = torch.eig(T._DNDarray__array, eigenvectors=True)
         # If x is an Eigenvector of T, then y = V@x is the corresponding Eigenvector of L
-        eigenvalues, idx = torch.sort(eval[:, 0], dim=0)
-        eigenvalues = ht.array(eigenvalues)
+        eval, idx = torch.sort(eval[:, 0], dim=0)
+        eigenvalues = ht.array(eval)
         eigenvectors = ht.matmul(V, ht.array(evec))[:, idx]
 
-        return eigenvalues, eigenvectors
-
-    def fit(self, X):
-        """
-        Computes the low-dim representation by calculation of eigenspectrum (eigenvalues and eigenvectors) of the graph laplacian from the similarity matrix and fits the eigenvectors that correspond to the k lowest eigenvalues with a seperate clustering algorithm (currently only kemans is supported)
-        Similarity metrics for adjacency calculations are supported via spatial.distance. The eigenvalues and eigenvectors are computed by reducing the Laplacian via lanczos iterations and using the torch eigenvalue solver on this smaller matrix. If other eigenvalue decompostion methods are supported, this will be expanded.
-
-
-        Parameters
-        ----------
-        X : ht.DNDarray, shape=(n_samples, n_features)
-            Training instances to cluster.
-        """
-        # input sanitation
-        if not isinstance(X, ht.DNDarray):
-            raise ValueError("input needs to be a ht.DNDarray, but was {}".format(type(X)))
-        if X.split is not None and X.split != 0:
-            raise NotImplementedError("Not implemented for other splitting-axes")
-        # 1. Calculation of Adjacency Matrix
-        if X.comm.rank == 0:
-            start = time.perf_counter()
-        self._similarity = self._calc_adjacency(X)
-        if X.comm.rank == 0:
-            stop = time.perf_counter()
-            print("Similarity Calculated in : {}s".format(stop - start))
-            start = time.perf_counter()
-        # 2. Calculation of Laplacian
-        self._laplacian = self._calc_laplacian(self._similarity)
-        if X.comm.rank == 0:
-            stop = time.perf_counter()
-            print("Laplacian Calculated in : {}s".format(stop - start))
-            start = time.perf_counter()
-        # 3. Eigenvalue and -vector calculation via Lanczos Algorithm
-        self._eigenvalues, self._eigenvectors = self._calculate_eigendecomposition(self._laplacian)
-        if X.comm.rank == 0:
-            stop = time.perf_counter()
-            print("Eigenvalues Calculated in : {}s".format(stop - start))
         # 5. Find the spectral gap, if number of clusters is not defined from the outside
         if self.n_clusters is None:
-            temp = np.diff(self._eigenvalues.numpy())
+            temp = np.diff(eigenvalues.numpy())
             self.n_clusters = np.where(temp == temp.max())[0][0] + 1
-        components = self._eigenvectors[:, : self.n_clusters].copy()
+        components = eigenvectors[:, : self.n_clusters].copy()
 
         params = self._cluster.get_params()
         params["n_clusters"] = self.n_clusters
         self._cluster.set_params(**params)
         self._cluster.fit(components)
         self._labels = self._cluster.labels_
+        self._cluster_centers = self._cluster.cluster_centers_
 
         return self
 
@@ -331,19 +242,52 @@ def predict(self, X):
         labels : ht.DNDarray, shape = [n_samples,]
             Index of the cluster each sample belongs to.
         """
+        # input sanitation
         if not isinstance(X, ht.DNDarray):
             raise ValueError("input needs to be a ht.DNDarray, but was {}".format(type(X)))
-
+        if X.split is not None and X.split != 0:
+            raise NotImplementedError("Not implemented for other splitting-axes")
         # 1. Calculation of Adjacency Matrix
-        S = self._calc_adjacency(X)
+        if self.metric == "rbf":
+            sig = math.sqrt(1 / (2 * self.gamma))
+            S  = ht.spatial.rbf(X, sigma=sig, quadratic_expansion=True)
+
+        elif self.metric == "euclidean":
+            S = ht.spatial.cdist(X)
+        else:
+            raise NotImplementedError("Other kernels currently not implemented")
 
         # 2. Calculation of Laplacian
-        L = self._calc_laplacian(S)
+        if self.laplacian == "eNeighborhood":
+            if self.epsilon[1] == "upper":
+                L = laplacian(S, norm=self.normalize, mode="eNeighbour", upper=self.epsilon[0])
+            elif self.epsilon[1] == "lower":
+                L = laplacian(S, norm=self.normalize, mode="eNeighbour", lower=self.epsilon[0])
+            else:
+                raise ValueError(
+                    "Boundary needs to be 'upper' or 'lower' and threshold needs to be set, if laplacian = eNeighborhood"
+                )
+
+        elif self.laplacian == "fully_connected":
+            L = laplacian(self._similarity, norm=self.normalize, mode="fc")
+        else:
+            raise NotImplementedError("Other approaches currently not implemented")
 
         # 3. Eigenvalue and -vector calculation via Lanczos Algorithm
-        eval, evec = self._calculate_eigendecomposition(L)
+        v0 = ht.ones(( L.shape[0],), dtype=L.dtype, split=L.split, device=L.device) / math.sqrt(L.shape[0])
+
+
+        V, T = ht.lanczos(L, self.n_lanczos, v0)
+        # 4. Calculate and Sort Eigenvalues and Eigenvectors of tridiagonal matrix T
+        eval, evec = torch.eig(T._DNDarray__array, eigenvectors=True)
+        # If x is an Eigenvector of T, then y = V@x is the corresponding Eigenvector of L
+        eval, idx = torch.sort(eval[:, 0], dim=0)
+        eigenvalues = ht.array(eval)
+        eigenvectors = ht.matmul(V, ht.array(evec))[:, idx]
+
+        components = eigenvectors[:, : self.n_clusters].copy()
 
-        return self._cluster.predict(evec[:, : self.n_clusters].copy())
+        return self._cluster.predict(components)
 
     def fit_predict(self, X):
         """
diff --git a/heat/core/linalg.py b/heat/core/linalg.py
index 6d3709eeb2..171e21427b 100644
--- a/heat/core/linalg.py
+++ b/heat/core/linalg.py
@@ -178,10 +178,9 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
         raise TypeError("Input Matrix A needs to be symmetric.")
     T = factories.zeros((m, m))
     if A.split == 0:
-        #V = factories.ones((n, m), split=0, dtype=A.dtype, device=A.device)
+        #This is done for better memory access in the reorthogonalization Gram-Schmidt algorithm
         V = factories.ones((m, n), split=1, dtype=A.dtype, device=A.device)
     else:
-        #V = factories.ones((n, m), dtype=A.dtype, device=A.device)
         V = factories.ones((m, n), split=None, dtype=A.dtype, device=A.device)
 
     if v0 is None:
@@ -203,7 +202,12 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
             vr = random.rand(n, dtype=A.dtype)
             # orthogonalize v_r with respect to all vectors v[i]
             for j in range(i):
-                vr = vr - projection(vr, V[j, :])
+                vi_loc = V._DNDarray__array[j, :]
+                a = torch.dot(vr._DNDarray__array, vi_loc)
+                b = torch.dot(vi_loc, vi_loc)
+                A.comm.Allreduce(MPI.IN_PLACE, a, MPI.SUM)
+                A.comm.Allreduce(MPI.IN_PLACE, b, MPI.SUM)
+                vr._DNDarray__array = vr._DNDarray__array - a/b*vi_loc
             # normalize v_r to Euclidian norm 1 and set as ith vector v
             vi = vr / norm(vr)
         else:
diff --git a/heat/spatial/distance.py b/heat/spatial/distance.py
index 7c452fe845..ad6ed89066 100644
--- a/heat/spatial/distance.py
+++ b/heat/spatial/distance.py
@@ -217,7 +217,6 @@ def _dist(X, Y=None, metric=_euclidian):
                 col1 = displ[sender]
                 col2 = displ[sender + 1] if sender != size - 1 else K
                 columns = (col1, col2)
-
                 # All but the first iter processes are receiving, then sending
                 if (rank // iter) != 0:
                     stat = MPI.Status()
@@ -227,7 +226,6 @@ def _dist(X, Y=None, metric=_euclidian):
                     comm.Recv(moving, source=sender, tag=iter)
                 # Sending to next Process
                 comm.Send(stationary, dest=receiver, tag=iter)
-
                 # The first iter processes can now receive after sending
                 if (rank // iter) == 0:
                     stat = MPI.Status()
@@ -262,7 +260,6 @@ def _dist(X, Y=None, metric=_euclidian):
             if (size + 1) % 2 != 0:  # we need one mor iteration for the first n/2 processes
                 receiver = (rank + num_iter) % size
                 sender = (rank - num_iter) % size
-
                 # Case 1: only receiving
                 if rank < (size // 2):
                     stat = MPI.Status()

From 0dec50d36654f10d480d86d2871e21f73ca84365 Mon Sep 17 00:00:00 2001
From: coquelin77 <daniel.coquelin@gmail.com>
Date: Wed, 1 Apr 2020 16:35:24 +0200
Subject: [PATCH 31/49] prep for master merge

---
 heat/core/linalg.py | 242 +-------------------------------------------
 1 file changed, 1 insertion(+), 241 deletions(-)

diff --git a/heat/core/linalg.py b/heat/core/linalg.py
index 86b1b5ebb8..21f2e694ae 100644
--- a/heat/core/linalg.py
+++ b/heat/core/linalg.py
@@ -12,72 +12,7 @@
 from . import tiling
 from . import types
 
-__all__ = ["cg", "dot", "lanczos", "matmul", "norm", "projection", "transpose", "tril", "triu"]
-
-
-def cg(A, b, x0, out=None):
-    """
-    Conjugate gradients method for solving a system of linear equations Ax = b
-
-    Parameters
-    ----------
-    A : ht.DNDarray
-        2D symmetric, positive definite Matrix
-    b : ht.DNDarray
-        1D vector
-    x0 : ht.DNDarray
-        Arbitrary 1D starting vector
-
-    Returns
-    -------
-    ht.DNDarray
-        Returns the solution x of the system of linear equations. If out is given, it is returned
-    """
-
-    if (
-        not isinstance(A, dndarray.DNDarray)
-        or not isinstance(b, dndarray.DNDarray)
-        or not isinstance(x0, dndarray.DNDarray)
-    ):
-        raise TypeError(
-            "A, b and x0 need to be of type ht.dndarra, but were {}, {}, {}".format(
-                type(A), type(b), type(x0)
-            )
-        )
-
-    if not (A.numdims == 2):
-        raise RuntimeError("A needs to be a 2D matrix")
-    if not (b.numdims == 1):
-        raise RuntimeError("b needs to be a 1D vector")
-    if not (x0.numdims == 1):
-        raise RuntimeError("c needs to be a 1D vector")
-
-    r = b - matmul(A, x0)
-    p = r
-    print(A.shape, p.shape)
-    rsold = matmul(r, r)
-    x = x0
-
-    for i in range(len(b)):
-        Ap = matmul(A, p)
-        alpha = rsold / matmul(p, Ap)
-        x = x + (alpha * p)
-        r = r - (alpha * Ap)
-        rsnew = matmul(r, r)
-        if math.sqrt(rsnew) < 1e-10:
-            print("Residual r = {} reaches tolerance in it = {}".format(math.sqrt(rsnew), i))
-            if out is not None:
-                out = x
-                return out
-            return x
-
-        p = r + ((rsnew / rsold) * p)
-        rsold = rsnew
-
-    if out is not None:
-        out = x
-        return out
-    return x
+__all__ = ["dot", "matmul", "transpose", "tril", "triu"]
 
 
 def dot(a, b, out=None):
@@ -147,113 +82,6 @@ def dot(a, b, out=None):
         raise NotImplementedError("ht.dot not implemented for N-D dot M-D arrays")
 
 
-def lanczos(A, m, v0=None, V_out=None, T_out=None):
-    """
-    Lanczos algorithm for iterative approximation of the solution to the eigenvalue problem,  an adaptation of power methods to find the m "most useful" (tending towards extreme highest/lowest) eigenvalues and eigenvectors of an n×n Hermitian matrix, where often m<<n
-    Parameters
-    ----------
-    A : ht.DNDarray
-        2D symmetric, positive definite Matrix
-    m : int
-        number of Lanczos iterations
-    v0 : ht.DNDarray (optional)
-        1D starting vector of euclidian norm 1
-
-    Returns
-    -------
-    V ht.DNDarray
-        Matrix of size nxm, with orthonormal columns, that span the Krylow subspace. If out is given, it is returned
-    T ht.DNDarray
-        Tridiagonal matrix of size mxm, with coefficients alpha_1,...alpha_n on the diagonal and coefficients beta_1,...,beta_n-1 on the side-diagonals. If out is given, it is returned
-
-    """
-    if not isinstance(A, dndarray.DNDarray):
-        raise TypeError("A needs to be of type ht.dndarra, but was {}".format(type(A)))
-
-    if not (A.numdims == 2):
-        raise RuntimeError("A needs to be a 2D matrix")
-    if not isinstance(m, (int, float)):
-        raise TypeError("m must be eiter int or float, but was {}".format(type(m)))
-
-    n, column = A.shape
-    if n != column:
-        raise TypeError("Input Matrix A needs to be symmetric.")
-    T = factories.zeros((m, m))
-    if A.split == 0:
-        #This is done for better memory access in the reorthogonalization Gram-Schmidt algorithm
-        V = factories.ones((m, n), split=1, dtype=A.dtype, device=A.device)
-    else:
-        V = factories.ones((m, n), split=None, dtype=A.dtype, device=A.device)
-
-    if v0 is None:
-        vr = random.rand(n)
-        v0 = vr / norm(vr)
-
-    # # 0th iteration
-    # # vector v0 has euklidian norm = 1
-    w = matmul(A, v0)
-    alpha = dot(w, v0)
-    w = w - alpha * v0
-    T[0, 0] = alpha
-    V[0, :] = v0
-    for i in range(1, int(m)):
-        beta = norm(w)
-        if abs(beta) < 1e-10:
-            print("Lanczos breakdown in iteration {}".format(i))
-            # Lanczos Breakdown, pick a random vector to continue
-            vr = random.rand(n, dtype=A.dtype)
-            # orthogonalize v_r with respect to all vectors v[i]
-            for j in range(i):
-                vi_loc = V._DNDarray__array[j, :]
-                a = torch.dot(vr._DNDarray__array, vi_loc)
-                b = torch.dot(vi_loc, vi_loc)
-                A.comm.Allreduce(MPI.IN_PLACE, a, MPI.SUM)
-                A.comm.Allreduce(MPI.IN_PLACE, b, MPI.SUM)
-                vr._DNDarray__array = vr._DNDarray__array - a/b*vi_loc
-            # normalize v_r to Euclidian norm 1 and set as ith vector v
-            vi = vr / norm(vr)
-        else:
-            vr = w
-
-            # Reorthogonalization
-            # ToDo: Rethink this; mask torch calls, See issue #494
-            # This is the fast solution, using item access on the ht.dndarray level is way slower
-            for j in range(i):
-                vi_loc = V._DNDarray__array[j, :]
-                a = torch.dot(vr._DNDarray__array, vi_loc)
-                b = torch.dot(vi_loc, vi_loc)
-                A.comm.Allreduce(MPI.IN_PLACE, a, MPI.SUM)
-                A.comm.Allreduce(MPI.IN_PLACE, b, MPI.SUM)
-                vr._DNDarray__array = vr._DNDarray__array - a/b*vi_loc
-
-            vi = vr / norm(vr)
-
-
-        w = matmul(A, vi)
-        alpha = dot(w, vi)
-        w = w - alpha * vi - beta * V[i - 1, :]
-
-        T[i - 1, i] = beta
-        T[i, i - 1] = beta
-        T[i, i] = alpha
-        V[i, :] = vi
-
-    if V.split is not None:
-        V.resplit_(axis=None)
-    V = V.transpose()
-    if T_out is not None:
-        T_out = T
-        if V_out is not None:
-            V_out = V
-            return V_out, T_out
-        return V, T_out
-    elif V_out is not None:
-        V_out = V
-        return V_out, T
-
-    return V, T
-
-
 def matmul(a, b, allow_resplit=False):
     """
     Matrix multiplication of two DNDarrays
@@ -758,22 +586,6 @@ def matmul(a, b, allow_resplit=False):
                         c._DNDarray__array[: a_node_rem_s0.shape[0]] += a_node_rem_s0 @ b_rem
 
                     del b_lp_data[pr]
-<<<<<<< HEAD
-            c = (
-                c
-                if not vector_flag
-                else factories.array(c._DNDarray__array.squeeze(), is_split=0, device=a.device)
-            )
-=======
-
-            if vector_flag:
-                c_loc = c._DNDarray__array.squeeze()
-                if c_loc.nelement() == 1:
-                    c = torch.tensor(c_loc, device=c._DNDarray__array.device)
-                c = factories.array(c_loc, is_split=0, device=a.device)
-
->>>>>>> master
-            return c
 
         elif split_1_flag:
             # for this case, a is sent to b
@@ -969,57 +781,6 @@ def matmul(a, b, allow_resplit=False):
             return c
 
 
-<<<<<<< HEAD
-def norm(a):
-    """
-    Frobenius norm of vector a
-
-    Parameters
-    ----------
-    a : ht.DNDarray
-
-    Returns
-    -------
-    float
-        Returns the vector norm (lenght) of a
-    """
-    if not isinstance(a, dndarray.DNDarray):
-        raise TypeError("a must be of type ht.DNDarray, but was {}".format(type(a)))
-
-    d = a ** 2
-
-    for i in range(len(a.shape) - 1, -1, -1):
-        d = arithmetics.sum(d, axis=i)
-
-    return math.sqrt(d)
-
-
-def projection(a, b):
-    """
-    Projection of vector a onto vector b
-
-    Parameters
-    ----------
-    a : ht.DNDarray (1D)
-    b : ht.DNDarray (1D)
-
-    Returns
-    -------
-    ht.DNDarray
-        Returns the vector projection of b in the direction of a
-    """
-    if not isinstance(a, dndarray.DNDarray) or not isinstance(b, dndarray.DNDarray):
-        raise TypeError(
-            "a, b must be of type ht.DNDarray, but were {}, {}".format(type(a), type(b))
-        )
-
-    if len(a.shape) != 1 or len(b.shape) != 1:
-        raise RuntimeError(
-            "a, b must be vectors of length 1, but were {}, {}".format(len(a.shape), len(b.shape))
-        )
-
-    return (dot(a, b) / dot(b, b)) * b
-=======
 @torch.jit.script
 def __mm_c_block_setter(
     b_proc, a_proc, a_data, b_data, b_block_map, a_block_map, b_split, a_split, mB, kB, nB, c
@@ -2063,7 +1824,6 @@ def __qr_split1_loop(dcol, a, q0, calc_q):
                 q0.tiles.local_set(key=(qrow, dcol), value=torch.matmul(q0_row, qloop_col_left))
                 q0.tiles.local_set(key=(qrow, row), value=torch.matmul(q0_row, qloop_col_right))
         del ql
->>>>>>> master
 
 
 def transpose(a, axes=None):

From 9ce62189d40689f0ca6c23f8e79da8b0cdb7418b Mon Sep 17 00:00:00 2001
From: Charlotte Debus <debus1@hdfmll01.hdfml>
Date: Wed, 1 Apr 2020 17:16:37 +0200
Subject: [PATCH 32/49] Running hooks

---
 heat/cluster/spectral.py | 32 ++++++++++++++++----------------
 heat/spatial/distance.py |  5 +++--
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/heat/cluster/spectral.py b/heat/cluster/spectral.py
index 8e38ade182..81c1c7ec85 100644
--- a/heat/cluster/spectral.py
+++ b/heat/cluster/spectral.py
@@ -43,27 +43,29 @@ def laplacian(S, norm=True, mode="fc", upper=None, lower=None):
         elif lower is not None:
             S = ht.int(S > lower)
     elif mode == "fc":
-        pass 
+        pass
     else:
         raise NotImplementedError(
             "Only eNeighborhood and fully-connected graphs supported at the moment."
         )
 
     # Subtract 1 for the self-connection of each vertex (more memory-efficient than subtracting Identity matrix from the Adjacency matrix to make diagonal = 0)
-    degree = ht.sum(S, axis=1)-1
+    degree = ht.sum(S, axis=1) - 1
 
     if norm:
         degree.resplit_(axis=None)
         temp = torch.ones(degree.shape, dtype=degree._DNDarray__array.dtype)
-        degree._DNDarray__array = torch.where(degree._DNDarray__array==0, temp, degree._DNDarray__array)
-        w = S/ht.sqrt(ht.expand_dims(degree, axis=1))
+        degree._DNDarray__array = torch.where(
+            degree._DNDarray__array == 0, temp, degree._DNDarray__array
+        )
+        w = S / ht.sqrt(ht.expand_dims(degree, axis=1))
         w = w / ht.sqrt(ht.expand_dims(degree, axis=0))
         L = ht.eye(S.shape, dtype=S.dtype, split=S.split, device=S.device, comm=S.comm) - w
 
     else:
 
         L = ht.diag(degree) - S
-  
+
     return L
 
 
@@ -121,7 +123,7 @@ def __init__(
         self.laplacian = laplacian
         self.epsilon = (threshold, boundary)
         if assign_labels == "kmeans":
-            self._cluster = ht.cluster.KMeans(init='random', max_iter=30, tol=-1.0)
+            self._cluster = ht.cluster.KMeans(init="random", max_iter=30, tol=-1.0)
         else:
             raise NotImplementedError(
                 "Other Label Assignment Algorithms are currently not available"
@@ -132,7 +134,6 @@ def __init__(
         self._cluster_centers = None
         self._labels = None
 
-
     @property
     def components_(self):
         """
@@ -153,8 +154,6 @@ def labels_(self):
         """
         return self._labels
 
-
-
     def fit(self, X):
         """
         Computes the low-dim representation by calculation of eigenspectrum (eigenvalues and eigenvectors) of the graph laplacian from the similarity matrix and fits the eigenvectors that correspond to the k lowest eigenvalues with a seperate clustering algorithm (currently only kemans is supported)
@@ -174,7 +173,7 @@ def fit(self, X):
         # 1. Calculation of Adjacency Matrix
         if self.metric == "rbf":
             sig = math.sqrt(1 / (2 * self.gamma))
-            S  = ht.spatial.rbf(X, sigma=sig, quadratic_expansion=True)
+            S = ht.spatial.rbf(X, sigma=sig, quadratic_expansion=True)
 
         elif self.metric == "euclidean":
             S = ht.spatial.cdist(X)
@@ -198,8 +197,9 @@ def fit(self, X):
             raise NotImplementedError("Other approaches currently not implemented")
 
         # 3. Eigenvalue and -vector calculation via Lanczos Algorithm
-        v0 = ht.ones(( L.shape[0],), dtype=L.dtype, split=L.split, device=L.device) / math.sqrt(L.shape[0])
-
+        v0 = ht.ones((L.shape[0],), dtype=L.dtype, split=L.split, device=L.device) / math.sqrt(
+            L.shape[0]
+        )
 
         V, T = ht.lanczos(L, self.n_lanczos, v0)
         # 4. Calculate and Sort Eigenvalues and Eigenvectors of tridiagonal matrix T
@@ -250,7 +250,7 @@ def predict(self, X):
         # 1. Calculation of Adjacency Matrix
         if self.metric == "rbf":
             sig = math.sqrt(1 / (2 * self.gamma))
-            S  = ht.spatial.rbf(X, sigma=sig, quadratic_expansion=True)
+            S = ht.spatial.rbf(X, sigma=sig, quadratic_expansion=True)
 
         elif self.metric == "euclidean":
             S = ht.spatial.cdist(X)
@@ -274,15 +274,15 @@ def predict(self, X):
             raise NotImplementedError("Other approaches currently not implemented")
 
         # 3. Eigenvalue and -vector calculation via Lanczos Algorithm
-        v0 = ht.ones(( L.shape[0],), dtype=L.dtype, split=L.split, device=L.device) / math.sqrt(L.shape[0])
-
+        v0 = ht.ones((L.shape[0],), dtype=L.dtype, split=L.split, device=L.device) / math.sqrt(
+            L.shape[0]
+        )
 
         V, T = ht.lanczos(L, self.n_lanczos, v0)
         # 4. Calculate and Sort Eigenvalues and Eigenvectors of tridiagonal matrix T
         eval, evec = torch.eig(T._DNDarray__array, eigenvectors=True)
         # If x is an Eigenvector of T, then y = V@x is the corresponding Eigenvector of L
         eval, idx = torch.sort(eval[:, 0], dim=0)
-        eigenvalues = ht.array(eval)
         eigenvectors = ht.matmul(V, ht.array(evec))[:, idx]
 
         components = eigenvectors[:, : self.n_clusters].copy()
diff --git a/heat/spatial/distance.py b/heat/spatial/distance.py
index ad6ed89066..25de1705ca 100644
--- a/heat/spatial/distance.py
+++ b/heat/spatial/distance.py
@@ -256,7 +256,6 @@ def _dist(X, Y=None, metric=_euclidian):
                     comm.Recv(symmetric, source=receiver, tag=iter)
                 d._DNDarray__array[:, scolumns[0] : scolumns[1]] = symmetric.transpose(0, 1)
 
-
             if (size + 1) % 2 != 0:  # we need one mor iteration for the first n/2 processes
                 receiver = (rank + num_iter) % size
                 sender = (rank - num_iter) % size
@@ -287,7 +286,9 @@ def _dist(X, Y=None, metric=_euclidian):
                     scol2 = displ[receiver + 1] if receiver != size - 1 else K
                     scolumns = (scol1, scol2)
                     symmetric = torch.zeros(
-                        (scolumns[1] - scolumns[0], rows[1] - rows[0]), dtype=torch_type, device=X.device.torch_device
+                        (scolumns[1] - scolumns[0], rows[1] - rows[0]),
+                        dtype=torch_type,
+                        device=X.device.torch_device,
                     )
                     comm.Recv(symmetric, source=receiver, tag=num_iter)
                     d._DNDarray__array[:, scolumns[0] : scolumns[1]] = symmetric.transpose(0, 1)

From e1b44cd7460567263a67759b4d99b4c4a1cb7ba6 Mon Sep 17 00:00:00 2001
From: Charlotte Debus <debus1@hdfmll01.hdfml>
Date: Wed, 1 Apr 2020 19:06:27 +0200
Subject: [PATCH 33/49] Adjusted Tests to new Spectral Design

---
 heat/cluster/tests/test_spectral.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/heat/cluster/tests/test_spectral.py b/heat/cluster/tests/test_spectral.py
index 00deff5032..80ef1ba8f6 100644
--- a/heat/cluster/tests/test_spectral.py
+++ b/heat/cluster/tests/test_spectral.py
@@ -84,12 +84,7 @@ def test_fit_iris(self):
         spectral.fit(iris)
 
         self.assertIsInstance(spectral.labels_, ht.DNDarray)
-        self.assertIsInstance(spectral.similarity_, ht.DNDarray)
-        self.assertEqual(spectral.similarity_.shape, (iris.shape[0], iris.shape[0]))
-        self.assertIsInstance(spectral.eigenvalues_, ht.DNDarray)
-        self.assertEqual(spectral.eigenvalues_.shape, (m,))
-        self.assertIsInstance(spectral.eigenvectors_, ht.DNDarray)
-        self.assertEqual(spectral.eigenvectors_.shape, (iris.shape[0], m))
+        self.assertIsInstance(spectral._components, ht.DNDarray)
 
         with self.assertRaises(NotImplementedError):
             spectral = ht.cluster.Spectral(

From c69da434e5e8b6fe56e870f9f03b7b5d0f37e0b5 Mon Sep 17 00:00:00 2001
From: Charlotte Debus <debus1@hdfmll01.hdfml>
Date: Thu, 2 Apr 2020 12:19:09 +0200
Subject: [PATCH 34/49] Re-design Laplacian API for intrinsic similarity matrix
 calculation

---
 heat/cluster/spectral.py            | 109 +++++++++++++++-------------
 heat/cluster/tests/test_spectral.py |   1 -
 2 files changed, 58 insertions(+), 52 deletions(-)

diff --git a/heat/cluster/spectral.py b/heat/cluster/spectral.py
index 81c1c7ec85..07c2a72862 100644
--- a/heat/cluster/spectral.py
+++ b/heat/cluster/spectral.py
@@ -5,14 +5,18 @@
 import time
 
 
-def laplacian(S, norm=True, mode="fc", upper=None, lower=None):
+def laplacian(X, similarity, gamma=1.0, norm=True, mode="fc", upper=None, lower=None):
     """
-    Calculate the graph Laplacian from a similarity matrix
+    Construct the graph Laplacian from a dataset
 
     Parameters
     ----------
-    S : ht.DNDarray
-        quadrdatic, positive semidefinite similarity matrix, encoding similarity metrices s_ij between data samples i and j
+    X : ht.DNDarray
+        Dataset of dimensions (samples x features)
+    similarity:
+        Similarity metrices s_ij between data samples i and j. Can be (currently) 'rbf' or 'euclidean'. For 'rbf', the kernel parameter gamma has to be set
+    gamma:
+        Similarity kernel Parameter. Ignored for Similarity="euclidean"
     norm : bool
         Whether to calculate the normalized graph Laplacian
         The unnormalized graph Laplacian is defined as L = D - A, where where D is the diagonal degree matrix and A the adjacency matrix
@@ -30,6 +34,13 @@ def laplacian(S, norm=True, mode="fc", upper=None, lower=None):
     L : ht.DNDarray
 
     """
+    if similarity == "rbf":
+        sig = math.sqrt(1 / (2 * gamma))
+        S = ht.spatial.rbf(X, sigma=sig, quadratic_expansion=True)
+    elif similarity == "euclidean":
+        S = ht.spatial.cdist(X, quadratic_expansion=True)
+    else:
+        raise NotImplementedError("Other kernels currently not implemented")
 
     if mode == "eNeighbour":
         if (upper is not None) and (lower is not None):
@@ -43,14 +54,12 @@ def laplacian(S, norm=True, mode="fc", upper=None, lower=None):
         elif lower is not None:
             S = ht.int(S > lower)
     elif mode == "fc":
-        pass
+        S = S - ht.eye(S.shape)
     else:
         raise NotImplementedError(
             "Only eNeighborhood and fully-connected graphs supported at the moment."
         )
-
-    # Subtract 1 for the self-connection of each vertex (more memory-efficient than subtracting Identity matrix from the Adjacency matrix to make diagonal = 0)
-    degree = ht.sum(S, axis=1) - 1
+    degree = ht.sum(S, axis=1)
 
     if norm:
         degree.resplit_(axis=None)
@@ -63,7 +72,6 @@ def laplacian(S, norm=True, mode="fc", upper=None, lower=None):
         L = ht.eye(S.shape, dtype=S.dtype, split=S.split, device=S.device, comm=S.comm) - w
 
     else:
-
         L = ht.diag(degree) - S
 
     return L
@@ -130,20 +138,8 @@ def __init__(
             )
 
         # in-place properties
-        self._components = None
-        self._cluster_centers = None
         self._labels = None
 
-    @property
-    def components_(self):
-        """
-        Returns
-        -------
-        ht.DNDarray, shape = (n_samples, n_lanzcos):
-            Eigenvectors of the graph Laplacian
-        """
-        return self._components
-
     @property
     def labels_(self):
         """
@@ -159,40 +155,45 @@ def fit(self, X):
         Computes the low-dim representation by calculation of eigenspectrum (eigenvalues and eigenvectors) of the graph laplacian from the similarity matrix and fits the eigenvectors that correspond to the k lowest eigenvalues with a seperate clustering algorithm (currently only kemans is supported)
         Similarity metrics for adjacency calculations are supported via spatial.distance. The eigenvalues and eigenvectors are computed by reducing the Laplacian via lanczos iterations and using the torch eigenvalue solver on this smaller matrix. If other eigenvalue decompostion methods are supported, this will be expanded.
 
-
         Parameters
         ----------
         X : ht.DNDarray, shape=(n_samples, n_features)
             Training instances to cluster.
         """
-        # input sanitation
+        # 1. input sanitation
         if not isinstance(X, ht.DNDarray):
             raise ValueError("input needs to be a ht.DNDarray, but was {}".format(type(X)))
         if X.split is not None and X.split != 0:
             raise NotImplementedError("Not implemented for other splitting-axes")
-        # 1. Calculation of Adjacency Matrix
-        if self.metric == "rbf":
-            sig = math.sqrt(1 / (2 * self.gamma))
-            S = ht.spatial.rbf(X, sigma=sig, quadratic_expansion=True)
-
-        elif self.metric == "euclidean":
-            S = ht.spatial.cdist(X)
-        else:
-            raise NotImplementedError("Other kernels currently not implemented")
-
-        # 2. Calculation of Laplacian
+        # 2. Construct Laplacian
         if self.laplacian == "eNeighborhood":
             if self.epsilon[1] == "upper":
-                L = laplacian(S, norm=self.normalize, mode="eNeighbour", upper=self.epsilon[0])
+                L = laplacian(
+                    X,
+                    similarity=self.metric,
+                    gamma=self.gamma,
+                    norm=self.normalize,
+                    mode="eNeighbour",
+                    upper=self.epsilon[0],
+                )
             elif self.epsilon[1] == "lower":
-                L = laplacian(S, norm=self.normalize, mode="eNeighbour", lower=self.epsilon[0])
+                L = laplacian(
+                    X,
+                    similarity=self.metric,
+                    gamma=self.gamma,
+                    norm=self.normalize,
+                    mode="eNeighbour",
+                    lower=self.epsilon[0],
+                )
             else:
                 raise ValueError(
                     "Boundary needs to be 'upper' or 'lower' and threshold needs to be set, if laplacian = eNeighborhood"
                 )
 
         elif self.laplacian == "fully_connected":
-            L = laplacian(S, norm=self.normalize, mode="fc")
+            L = laplacian(
+                X, similarity=self.metric, gamma=self.gamma, norm=self.normalize, mode="fc"
+            )
         else:
             raise NotImplementedError("Other approaches currently not implemented")
 
@@ -247,29 +248,35 @@ def predict(self, X):
             raise ValueError("input needs to be a ht.DNDarray, but was {}".format(type(X)))
         if X.split is not None and X.split != 0:
             raise NotImplementedError("Not implemented for other splitting-axes")
-        # 1. Calculation of Adjacency Matrix
-        if self.metric == "rbf":
-            sig = math.sqrt(1 / (2 * self.gamma))
-            S = ht.spatial.rbf(X, sigma=sig, quadratic_expansion=True)
-
-        elif self.metric == "euclidean":
-            S = ht.spatial.cdist(X)
-        else:
-            raise NotImplementedError("Other kernels currently not implemented")
-
-        # 2. Calculation of Laplacian
+        # 2. Construct Laplacian
         if self.laplacian == "eNeighborhood":
             if self.epsilon[1] == "upper":
-                L = laplacian(S, norm=self.normalize, mode="eNeighbour", upper=self.epsilon[0])
+                L = laplacian(
+                    X,
+                    similarity=self.metric,
+                    gamma=self.gamma,
+                    norm=self.normalize,
+                    mode="eNeighbour",
+                    upper=self.epsilon[0],
+                )
             elif self.epsilon[1] == "lower":
-                L = laplacian(S, norm=self.normalize, mode="eNeighbour", lower=self.epsilon[0])
+                L = laplacian(
+                    X,
+                    similarity=self.metric,
+                    gamma=self.gamma,
+                    norm=self.normalize,
+                    mode="eNeighbour",
+                    lower=self.epsilon[0],
+                )
             else:
                 raise ValueError(
                     "Boundary needs to be 'upper' or 'lower' and threshold needs to be set, if laplacian = eNeighborhood"
                 )
 
         elif self.laplacian == "fully_connected":
-            L = laplacian(self._similarity, norm=self.normalize, mode="fc")
+            L = laplacian(
+                X, similarity=self.metric, gamma=self.gamma, norm=self.normalize, mode="fc"
+            )
         else:
             raise NotImplementedError("Other approaches currently not implemented")
 
diff --git a/heat/cluster/tests/test_spectral.py b/heat/cluster/tests/test_spectral.py
index 80ef1ba8f6..2d56ba9e33 100644
--- a/heat/cluster/tests/test_spectral.py
+++ b/heat/cluster/tests/test_spectral.py
@@ -84,7 +84,6 @@ def test_fit_iris(self):
         spectral.fit(iris)
 
         self.assertIsInstance(spectral.labels_, ht.DNDarray)
-        self.assertIsInstance(spectral._components, ht.DNDarray)
 
         with self.assertRaises(NotImplementedError):
             spectral = ht.cluster.Spectral(

From cdb82aade93d6b82846e27645e4bcc79b0e612ba Mon Sep 17 00:00:00 2001
From: Charlotte Debus <debus1@hdfmll01.hdfml>
Date: Thu, 2 Apr 2020 15:54:02 +0200
Subject: [PATCH 35/49] Implemented 'fill_diagonal' function for dndarrays

---
 heat/cluster/spectral.py         |  7 ++-
 heat/core/dndarray.py            | 47 ++++++++++++++++++-
 heat/core/tests/test_dndarray.py | 80 ++++++++++++++++++++++++++++++++
 3 files changed, 128 insertions(+), 6 deletions(-)

diff --git a/heat/cluster/spectral.py b/heat/cluster/spectral.py
index 07c2a72862..921db6ebe0 100644
--- a/heat/cluster/spectral.py
+++ b/heat/cluster/spectral.py
@@ -100,9 +100,8 @@ def __init__(
             Kernel coefficient sigma for 'rbf', ignored for affinity='euclidean'
         metric : string
             How to construct the similarity matrix.
-            ‘rbf’ : construct the similarity matrix using a radial basis function (RBF) kernel.
+            'rbf' : construct the similarity matrix using a radial basis function (RBF) kernel.
             'euclidean' : construct the similarity matrix as only euclidean distance
-            ‘precomputed’ : interpret X as a precomputed similarity matrix.
         laplacian : string
             How to calculate the graph laplacian (affinity)
             'fully_connected'
@@ -212,8 +211,8 @@ def fit(self, X):
 
         # 5. Find the spectral gap, if number of clusters is not defined from the outside
         if self.n_clusters is None:
-            temp = np.diff(eigenvalues.numpy())
-            self.n_clusters = np.where(temp == temp.max())[0][0] + 1
+            diff = eigenvalues[1:] - eigenvalues[:-1]
+            self.n_clusters = np.where(diff == diff.max())[0][0] + 1
         components = eigenvectors[:, : self.n_clusters].copy()
 
         params = self._cluster.get_params()
diff --git a/heat/core/dndarray.py b/heat/core/dndarray.py
index 2b1cbc97ff..96311193fa 100644
--- a/heat/core/dndarray.py
+++ b/heat/core/dndarray.py
@@ -1131,6 +1131,51 @@ def fabs(self, out=None):
         """
         return rounding.fabs(self, out)
 
+    def fill_diagonal(self, value):
+        """
+        Fill the main diagonal of a 2D dndarray . This function modifies the input tensor in-place, and returns the input tensor.
+
+        Parameters
+        ----------
+        value : float
+            The value to be placed in the dndarrays main diagonal
+
+        Returns
+        -------
+        out : ht.DNDarray
+            The modified input tensor with value along the diagonal
+
+        """
+        # Todo: make this 3D/nD
+        if len(self.shape) != 2:
+            raise ValueError("Only 2D tensors supported at the moment")
+
+        if self.split is not None and self.comm.is_distributed:
+            counts, displ, _ = self.comm.counts_displs_shape(self.shape, self.split)
+            k = min(self.shape[0], self.shape[1])
+            for p in range(self.comm.size):
+                if displ[p] > k:
+                    break
+                proc = p
+            if self.comm.rank <= proc:
+                indices = (
+                    displ[self.comm.rank],
+                    displ[self.comm.rank + 1] if (self.comm.rank + 1) != self.comm.size else k,
+                )
+                if self.split == 0:
+                    self._DNDarray__array[:, indices[0] : indices[1]] = self._DNDarray__array[
+                        :, indices[0] : indices[1]
+                    ].fill_diagonal_(value)
+                elif self.split == 1:
+                    self._DNDarray__array[indices[0] : indices[1], :] = self._DNDarray__array[
+                        indices[0] : indices[1], :
+                    ].fill_diagonal_(value)
+
+        else:
+            self._DNDarray__array = self._DNDarray__array.fill_diagonal_(value)
+
+        return self
+
     def __ge__(self, other):
         """
         Element-wise rich comparison of relation "greater than or equal" with values from second operand (scalar or
@@ -2508,8 +2553,6 @@ def resplit_(self, axis=None):
             self.__array = redistributed
             self.__split = axis
 
-        return self
-
     def __rfloordiv__(self, other):
         """
         Element-wise floor division (i.e. result is rounded int (floor))
diff --git a/heat/core/tests/test_dndarray.py b/heat/core/tests/test_dndarray.py
index 8a89359a7f..f14776c390 100644
--- a/heat/core/tests/test_dndarray.py
+++ b/heat/core/tests/test_dndarray.py
@@ -164,6 +164,86 @@ def test_complex_cast(self):
             with self.assertRaises(TypeError):
                 complex(ht.full((ht.MPI_WORLD.size,), 2, split=0, device=ht_device))
 
+    def test_fill_diagonal(self):
+        ref = ht.zeros(
+            (ht.MPI_WORLD.size * 2, ht.MPI_WORLD.size * 2),
+            dtype=ht.float32,
+            split=0,
+            device=ht_device,
+        )
+        a = ht.eye(ht.MPI_WORLD.size * 2, dtype=ht.float32, split=0, device=ht_device)
+        a.fill_diagonal(0)
+        self.assertTrue(ht.equal(a, ref))
+
+        ref = ht.zeros(
+            (ht.MPI_WORLD.size * 2, ht.MPI_WORLD.size * 2),
+            dtype=ht.int32,
+            split=0,
+            device=ht_device,
+        )
+        a = ht.eye(ht.MPI_WORLD.size * 2, dtype=ht.int32, split=0, device=ht_device)
+        a.fill_diagonal(0)
+        self.assertTrue(ht.equal(a, ref))
+
+        ref = ht.zeros(
+            (ht.MPI_WORLD.size * 2, ht.MPI_WORLD.size * 2),
+            dtype=ht.float32,
+            split=1,
+            device=ht_device,
+        )
+        a = ht.eye(ht.MPI_WORLD.size * 2, dtype=ht.float32, split=1, device=ht_device)
+        a.fill_diagonal(0)
+        self.assertTrue(ht.equal(a, ref))
+
+        ref = ht.zeros(
+            (ht.MPI_WORLD.size * 2, ht.MPI_WORLD.size * 3),
+            dtype=ht.float32,
+            split=0,
+            device=ht_device,
+        )
+        a = ht.eye(
+            (ht.MPI_WORLD.size * 2, ht.MPI_WORLD.size * 3),
+            dtype=ht.float32,
+            split=0,
+            device=ht_device,
+        )
+        a.fill_diagonal(0)
+        self.assertTrue(ht.equal(a, ref))
+
+        ref = ht.zeros(
+            (ht.MPI_WORLD.size * 2, ht.MPI_WORLD.size * 3),
+            dtype=ht.float32,
+            split=1,
+            device=ht_device,
+        )
+        a = ht.eye(
+            (ht.MPI_WORLD.size * 2, ht.MPI_WORLD.size * 3),
+            dtype=ht.float32,
+            split=1,
+            device=ht_device,
+        )
+        a.fill_diagonal(0)
+        self.assertTrue(ht.equal(a, ref))
+
+        ref = ht.zeros(
+            (ht.MPI_WORLD.size * 4, ht.MPI_WORLD.size * 2),
+            dtype=ht.float32,
+            split=0,
+            device=ht_device,
+        )
+        a = ht.eye(
+            (ht.MPI_WORLD.size * 4, ht.MPI_WORLD.size * 2),
+            dtype=ht.float32,
+            split=0,
+            device=ht_device,
+        )
+        a.fill_diagonal(0)
+        self.assertTrue(ht.equal(a, ref))
+
+        a = ht.ones((ht.MPI_WORLD.size * 2,), dtype=ht.float32, split=0, device=ht_device)
+        with self.assertRaises(ValueError):
+            a.fill_diagonal(0)
+
     def test_float_cast(self):
         # simple scalar tensor
         a = ht.ones(1, device=ht_device)

From 6de377e383846cbf7874a02e20d7542eedeac867 Mon Sep 17 00:00:00 2001
From: Charlotte Debus <debus1@hdfmll01.hdfml>
Date: Fri, 3 Apr 2020 19:06:14 +0200
Subject: [PATCH 36/49] Refactorization of linalg.solver and implementation of
 graph.laplacian module

---
 heat/__init__.py                           |   1 +
 heat/cluster/spectral.py                   | 220 +++++----------------
 heat/cluster/tests/test_spectral.py        |  57 +-----
 heat/core/linalg/__init__.py               |   2 +-
 heat/core/linalg/basics.py                 |  54 ++++-
 heat/core/linalg/{lanczos.py => solver.py} | 140 +++++--------
 heat/core/linalg/tests/test_basics.py      |  46 +++++
 heat/core/linalg/tests/test_lanczos.py     |  87 --------
 heat/core/linalg/tests/test_solver.py      |  41 ++++
 heat/graph/__init__.py                     |   1 +
 heat/graph/laplacian.py                    | 115 +++++++++++
 heat/graph/tests/test_laplacian.py         |  85 ++++++++
 12 files changed, 442 insertions(+), 407 deletions(-)
 rename heat/core/linalg/{lanczos.py => solver.py} (59%)
 delete mode 100644 heat/core/linalg/tests/test_lanczos.py
 create mode 100644 heat/core/linalg/tests/test_solver.py
 create mode 100644 heat/graph/__init__.py
 create mode 100644 heat/graph/laplacian.py
 create mode 100644 heat/graph/tests/test_laplacian.py

diff --git a/heat/__init__.py b/heat/__init__.py
index d6bb67c4ab..54769a61bc 100644
--- a/heat/__init__.py
+++ b/heat/__init__.py
@@ -1,5 +1,6 @@
 from . import core
 from . import cluster
+from . import graph
 from . import naive_bayes
 from . import regression
 from . import spatial
diff --git a/heat/cluster/spectral.py b/heat/cluster/spectral.py
index 921db6ebe0..550bb5862e 100644
--- a/heat/cluster/spectral.py
+++ b/heat/cluster/spectral.py
@@ -5,78 +5,6 @@
 import time
 
 
-def laplacian(X, similarity, gamma=1.0, norm=True, mode="fc", upper=None, lower=None):
-    """
-    Construct the graph Laplacian from a dataset
-
-    Parameters
-    ----------
-    X : ht.DNDarray
-        Dataset of dimensions (samples x features)
-    similarity:
-        Similarity metrices s_ij between data samples i and j. Can be (currently) 'rbf' or 'euclidean'. For 'rbf', the kernel parameter gamma has to be set
-    gamma:
-        Similarity kernel Parameter. Ignored for Similarity="euclidean"
-    norm : bool
-        Whether to calculate the normalized graph Laplacian
-        The unnormalized graph Laplacian is defined as L = D - A, where where D is the diagonal degree matrix and A the adjacency matrix
-        The normalized graph Laplacian is defined as L_norm = D^(-1/2) L D^(-1/2) = I - D^(-1/2) A D^(-1/2)
-    mode : "fc", "eNeighbour"
-        How to calculate adjacency from the similarity matrix
-        "fc" is fully-connected, so A = S
-        "eNeighbour" is the epsilon neighbourhood, with A_ji = 1 if S_ij >/< lower/upper else 0; for eNeighbour an upper or lower boundary needs to be set
-    upper : float
-        upper boundary for adjacency calculation, using A_ji = 1 if S_ij < upper else 0
-    lower : float
-        upper boundary for adjacency calculation, using A_ji = 1 if S_ij > lower else 0
-    Returns
-    -------
-    L : ht.DNDarray
-
-    """
-    if similarity == "rbf":
-        sig = math.sqrt(1 / (2 * gamma))
-        S = ht.spatial.rbf(X, sigma=sig, quadratic_expansion=True)
-    elif similarity == "euclidean":
-        S = ht.spatial.cdist(X, quadratic_expansion=True)
-    else:
-        raise NotImplementedError("Other kernels currently not implemented")
-
-    if mode == "eNeighbour":
-        if (upper is not None) and (lower is not None):
-            raise ValueError(
-                "Epsilon neighborhood with either upper or lower threshold, both is not supported"
-            )
-        if (upper is None) and (lower is None):
-            raise ValueError("Epsilon neighborhood requires upper or lower threshold to be defined")
-        if upper is not None:
-            S = ht.int(S < upper)
-        elif lower is not None:
-            S = ht.int(S > lower)
-    elif mode == "fc":
-        S = S - ht.eye(S.shape)
-    else:
-        raise NotImplementedError(
-            "Only eNeighborhood and fully-connected graphs supported at the moment."
-        )
-    degree = ht.sum(S, axis=1)
-
-    if norm:
-        degree.resplit_(axis=None)
-        temp = torch.ones(degree.shape, dtype=degree._DNDarray__array.dtype)
-        degree._DNDarray__array = torch.where(
-            degree._DNDarray__array == 0, temp, degree._DNDarray__array
-        )
-        w = S / ht.sqrt(ht.expand_dims(degree, axis=1))
-        w = w / ht.sqrt(ht.expand_dims(degree, axis=0))
-        L = ht.eye(S.shape, dtype=S.dtype, split=S.split, device=S.device, comm=S.comm) - w
-
-    else:
-        L = ht.diag(degree) - S
-
-    return L
-
-
 class Spectral:
     def __init__(
         self,
@@ -124,11 +52,27 @@ def __init__(
         """
         self.n_clusters = n_clusters
         self.n_lanczos = n_lanczos
-        self.metric = metric
-        self.gamma = gamma
-        self.normalize = normalize
-        self.laplacian = laplacian
-        self.epsilon = (threshold, boundary)
+        if metric == "rbf":
+            sig = math.sqrt(1 / (2 * gamma))
+            self.laplacian = ht.graph.Laplacian(
+                lambda x: ht.spatial.rbf(x, sigma=sig, quadratic_expansion=True),
+                definition="normalizes symmetric",
+                mode=laplacian,
+                threshold_key=boundary,
+                threshold_value=threshold,
+            )
+
+        elif metric == "euclidean":
+            self.laplacian = ht.graph.Laplacian(
+                lambda x: ht.spatial.cdist(x),
+                definition="normalizes symmetric",
+                mode=laplacian,
+                threshold_key=boundary,
+                threshold_value=threshold,
+            )
+        else:
+            raise NotImplementedError("Other kernels currently not supported")
+
         if assign_labels == "kmeans":
             self._cluster = ht.cluster.KMeans(init="random", max_iter=30, tol=-1.0)
         else:
@@ -149,11 +93,38 @@ def labels_(self):
         """
         return self._labels
 
+    def _spectral_embedding(self, X):
+        """
+        Helper function to embed the dataset X into the eigenvectors of the graph Laplacian matrix
+        Returns
+        -------
+        ht.DNDarray, shape=(m_lanczos):
+            Eigenvalues of the graph's Laplacian matrix.
+        ht.DNDarray, shape=(n, m_lanczos):
+            Eigenvectors of the graph's Laplacian matrix.
+        """
+        L = self.laplacian.construct(X)
+
+        # 3. Eigenvalue and -vector calculation via Lanczos Algorithm
+        v0 = (
+            ht.ones((L.shape[0],), dtype=L.dtype, split=L.split, device=L.device)
+            / ht.sqrt(L.shape[0]).get_item()
+        )
+
+        V, T = ht.lanczos(L, self.n_lanczos, v0)
+        # 4. Calculate and Sort Eigenvalues and Eigenvectors of tridiagonal matrix T
+        eval, evec = torch.eig(T._DNDarray__array, eigenvectors=True)
+        # If x is an Eigenvector of T, then y = V@x is the corresponding Eigenvector of L
+        eigenvalues = ht.array(eval[:, 0])
+        eval, idx = ht.sort(eigenvalues)
+        eigenvectors = ht.matmul(V, ht.array(evec))[:, idx]
+
+        return eigenvalues, eigenvectors
+
     def fit(self, X):
         """
         Computes the low-dim representation by calculation of eigenspectrum (eigenvalues and eigenvectors) of the graph laplacian from the similarity matrix and fits the eigenvectors that correspond to the k lowest eigenvalues with a seperate clustering algorithm (currently only kemans is supported)
         Similarity metrics for adjacency calculations are supported via spatial.distance. The eigenvalues and eigenvectors are computed by reducing the Laplacian via lanczos iterations and using the torch eigenvalue solver on this smaller matrix. If other eigenvalue decompostion methods are supported, this will be expanded.
-
         Parameters
         ----------
         X : ht.DNDarray, shape=(n_samples, n_features)
@@ -164,55 +135,13 @@ def fit(self, X):
             raise ValueError("input needs to be a ht.DNDarray, but was {}".format(type(X)))
         if X.split is not None and X.split != 0:
             raise NotImplementedError("Not implemented for other splitting-axes")
-        # 2. Construct Laplacian
-        if self.laplacian == "eNeighborhood":
-            if self.epsilon[1] == "upper":
-                L = laplacian(
-                    X,
-                    similarity=self.metric,
-                    gamma=self.gamma,
-                    norm=self.normalize,
-                    mode="eNeighbour",
-                    upper=self.epsilon[0],
-                )
-            elif self.epsilon[1] == "lower":
-                L = laplacian(
-                    X,
-                    similarity=self.metric,
-                    gamma=self.gamma,
-                    norm=self.normalize,
-                    mode="eNeighbour",
-                    lower=self.epsilon[0],
-                )
-            else:
-                raise ValueError(
-                    "Boundary needs to be 'upper' or 'lower' and threshold needs to be set, if laplacian = eNeighborhood"
-                )
-
-        elif self.laplacian == "fully_connected":
-            L = laplacian(
-                X, similarity=self.metric, gamma=self.gamma, norm=self.normalize, mode="fc"
-            )
-        else:
-            raise NotImplementedError("Other approaches currently not implemented")
+        # 2. Embed Dataset into lower-dimensional Eigenvector space
+        eigenvalues, eigenvectors = self._spectral_embedding(X)
 
-        # 3. Eigenvalue and -vector calculation via Lanczos Algorithm
-        v0 = ht.ones((L.shape[0],), dtype=L.dtype, split=L.split, device=L.device) / math.sqrt(
-            L.shape[0]
-        )
-
-        V, T = ht.lanczos(L, self.n_lanczos, v0)
-        # 4. Calculate and Sort Eigenvalues and Eigenvectors of tridiagonal matrix T
-        eval, evec = torch.eig(T._DNDarray__array, eigenvectors=True)
-        # If x is an Eigenvector of T, then y = V@x is the corresponding Eigenvector of L
-        eval, idx = torch.sort(eval[:, 0], dim=0)
-        eigenvalues = ht.array(eval)
-        eigenvectors = ht.matmul(V, ht.array(evec))[:, idx]
-
-        # 5. Find the spectral gap, if number of clusters is not defined from the outside
+        # 3. Find the spectral gap, if number of clusters is not defined from the outside
         if self.n_clusters is None:
             diff = eigenvalues[1:] - eigenvalues[:-1]
-            self.n_clusters = np.where(diff == diff.max())[0][0] + 1
+            self.n_clusters = ht.where(diff == diff.max())[0, 0] + 1
         components = eigenvectors[:, : self.n_clusters].copy()
 
         params = self._cluster.get_params()
@@ -247,49 +176,8 @@ def predict(self, X):
             raise ValueError("input needs to be a ht.DNDarray, but was {}".format(type(X)))
         if X.split is not None and X.split != 0:
             raise NotImplementedError("Not implemented for other splitting-axes")
-        # 2. Construct Laplacian
-        if self.laplacian == "eNeighborhood":
-            if self.epsilon[1] == "upper":
-                L = laplacian(
-                    X,
-                    similarity=self.metric,
-                    gamma=self.gamma,
-                    norm=self.normalize,
-                    mode="eNeighbour",
-                    upper=self.epsilon[0],
-                )
-            elif self.epsilon[1] == "lower":
-                L = laplacian(
-                    X,
-                    similarity=self.metric,
-                    gamma=self.gamma,
-                    norm=self.normalize,
-                    mode="eNeighbour",
-                    lower=self.epsilon[0],
-                )
-            else:
-                raise ValueError(
-                    "Boundary needs to be 'upper' or 'lower' and threshold needs to be set, if laplacian = eNeighborhood"
-                )
 
-        elif self.laplacian == "fully_connected":
-            L = laplacian(
-                X, similarity=self.metric, gamma=self.gamma, norm=self.normalize, mode="fc"
-            )
-        else:
-            raise NotImplementedError("Other approaches currently not implemented")
-
-        # 3. Eigenvalue and -vector calculation via Lanczos Algorithm
-        v0 = ht.ones((L.shape[0],), dtype=L.dtype, split=L.split, device=L.device) / math.sqrt(
-            L.shape[0]
-        )
-
-        V, T = ht.lanczos(L, self.n_lanczos, v0)
-        # 4. Calculate and Sort Eigenvalues and Eigenvectors of tridiagonal matrix T
-        eval, evec = torch.eig(T._DNDarray__array, eigenvectors=True)
-        # If x is an Eigenvector of T, then y = V@x is the corresponding Eigenvector of L
-        eval, idx = torch.sort(eval[:, 0], dim=0)
-        eigenvectors = ht.matmul(V, ht.array(evec))[:, idx]
+        _, eigenvectors = self._spectral_embedding(X)
 
         components = eigenvectors[:, : self.n_clusters].copy()
 
diff --git a/heat/cluster/tests/test_spectral.py b/heat/cluster/tests/test_spectral.py
index 2d56ba9e33..14e4202287 100644
--- a/heat/cluster/tests/test_spectral.py
+++ b/heat/cluster/tests/test_spectral.py
@@ -16,62 +16,7 @@
     ht.torch.cuda.set_device(device)
 
 
-class TestKMeans(unittest.TestCase):
-    def test_laplacian(self):
-        S = ht.array(
-            [
-                [0, 0.6, 0.8, 0.2, 0.2, 0.1, 0.4, 0.2, 0.6, 0.7],
-                [0.6, 0, 0.6, 0.3, 0.2, 0.1, 0.1, 0.2, 0.1, 0.2],
-                [0.8, 0.6, 0, 0.2, 0.1, 0.3, 0.4, 0.3, 0.2, 0.2],
-                [0.2, 0.3, 0.2, 0, 0.9, 0.9, 0.4, 0.3, 0.3, 0.4],
-                [0.2, 0.2, 0.1, 0.9, 0, 0.8, 0.1, 0.1, 0.1, 0.1],
-                [0.1, 0.1, 0.3, 0.9, 0.8, 0, 0.7, 0.7, 0.2, 0.2],
-                [0.4, 0.1, 0.4, 0.4, 0.1, 0.7, 0, 0.7, 0.3, 0.1],
-                [0.2, 0.2, 0.3, 0.3, 0.1, 0.7, 0.7, 0, 0.4, 0.3],
-                [0.6, 0.1, 0.2, 0.3, 0.1, 0.2, 0.3, 0.4, 0, 0.8],
-                [0.7, 0.2, 0.2, 0.4, 0.1, 0.2, 0.1, 0.3, 0.8, 0],
-            ],
-            split=0,
-            device=ht_device,
-        )
-
-        L = ht.cluster.spectral.laplacian(S, norm=True, mode="fc")
-        self.assertIsInstance(L, ht.DNDarray)
-        self.assertEqual(L.split, S.split)
-
-        L = ht.cluster.spectral.laplacian(S, norm=True, mode="eNeighbour", upper=0.5)
-        self.assertIsInstance(L, ht.DNDarray)
-        self.assertEqual(L.split, S.split)
-
-        L = ht.cluster.spectral.laplacian(S, norm=False, mode="eNeighbour", lower=0.5)
-        self.assertIsInstance(L, ht.DNDarray)
-        self.assertEqual(L.split, S.split)
-        L_true = ht.array(
-            [
-                [4, -1, -1, 0, 0, 0, 0, 0, -1, -1],
-                [-1, 2, -1, 0, 0, 0, 0, 0, 0, 0],
-                [-1, -1, 2, 0, 0, 0, 0, 0, 0, 0],
-                [0, 0, 0, 2, -1, -1, 0, 0, 0, 0],
-                [0, 0, 0, -1, 2, -1, 0, 0, 0, 0],
-                [0, 0, 0, -1, -1, 4, -1, -1, 0, 0],
-                [0, 0, 0, 0, 0, -1, 2, -1, 0, 0],
-                [0, 0, 0, 0, 0, -1, -1, 2, 0, 0],
-                [-1, 0, 0, 0, 0, 0, 0, 0, 2, -1],
-                [-1, 0, 0, 0, 0, 0, 0, 0, -1, 2],
-            ],
-            dtype=ht.float32,
-            split=0,
-            device=ht_device,
-        )
-        self.assertTrue(ht.equal(L, L_true))
-
-        with self.assertRaises(ValueError):
-            L = ht.cluster.spectral.laplacian(S, norm=True, mode="eNeighbour")
-        with self.assertRaises(ValueError):
-            L = ht.cluster.spectral.laplacian(S, norm=True, mode="eNeighbour", upper=0.3, lower=0.7)
-        with self.assertRaises(NotImplementedError):
-            L = ht.cluster.spectral.laplacian(S, norm=True, mode="knearest")
-
+class TestSpectral(unittest.TestCase):
     def test_fit_iris(self):
         # get some test data
         iris = ht.load("heat/datasets/data/iris.csv", sep=";")
diff --git a/heat/core/linalg/__init__.py b/heat/core/linalg/__init__.py
index 59c1d8a49a..d4939e6633 100644
--- a/heat/core/linalg/__init__.py
+++ b/heat/core/linalg/__init__.py
@@ -1,3 +1,3 @@
 from .basics import *
-from .lanczos import *
+from .solver import *
 from .qr import *
diff --git a/heat/core/linalg/basics.py b/heat/core/linalg/basics.py
index 94e951efae..6c76fd19db 100644
--- a/heat/core/linalg/basics.py
+++ b/heat/core/linalg/basics.py
@@ -2,12 +2,13 @@
 import torch
 
 from ..communication import MPI
+from .. import arithmetics
 from .. import dndarray
 from .. import factories
 from .. import manipulations
 from .. import types
 
-__all__ = ["dot", "matmul", "transpose", "tril", "triu"]
+__all__ = ["dot", "matmul", "norm", "projection", "transpose", "tril", "triu"]
 
 
 def dot(a, b, out=None):
@@ -758,6 +759,57 @@ def matmul(a, b, allow_resplit=False):
         return c
 
 
+def norm(a):
+    """
+    Frobenius norm of vector a
+
+    Parameters
+    ----------
+    a : ht.DNDarray
+
+    Returns
+    -------
+    float
+        Returns the vector norm (lenght) of a
+    """
+    if not isinstance(a, dndarray.DNDarray):
+        raise TypeError("a must be of type ht.DNDarray, but was {}".format(type(a)))
+
+    d = a ** 2
+
+    for i in range(len(a.shape) - 1, -1, -1):
+        d = arithmetics.sum(d, axis=i)
+
+    return arithmetics.sqrt(d).get_item()
+
+
+def projection(a, b):
+    """
+    Projection of vector a onto vector b
+
+    Parameters
+    ----------
+    a : ht.DNDarray (1D)
+    b : ht.DNDarray (1D)
+
+    Returns
+    -------
+    ht.DNDarray
+        Returns the vector projection of b in the direction of a
+    """
+    if not isinstance(a, dndarray.DNDarray) or not isinstance(b, dndarray.DNDarray):
+        raise TypeError(
+            "a, b must be of type ht.DNDarray, but were {}, {}".format(type(a), type(b))
+        )
+
+    if len(a.shape) != 1 or len(b.shape) != 1:
+        raise RuntimeError(
+            "a, b must be vectors of length 1, but were {}, {}".format(len(a.shape), len(b.shape))
+        )
+
+    return (dot(a, b) / dot(b, b)) * b
+
+
 @torch.jit.script
 def __mm_c_block_setter(
     b_proc, a_proc, a_data, b_data, b_block_map, a_block_map, b_split, a_split, mB, kB, nB, c
diff --git a/heat/core/linalg/lanczos.py b/heat/core/linalg/solver.py
similarity index 59%
rename from heat/core/linalg/lanczos.py
rename to heat/core/linalg/solver.py
index 161fc6bd11..dc57e12bfb 100644
--- a/heat/core/linalg/lanczos.py
+++ b/heat/core/linalg/solver.py
@@ -1,15 +1,7 @@
-import math
+import heat as ht
 import torch
 
-from ..communication import MPI
-from .. import arithmetics
-from .. import dndarray
-from .. import factories
-from .. import random
-from .basics import matmul
-from .basics import dot
-
-__all__ = ["cg", "lanczos", "norm", "projection"]
+__all__ = ["cg", "lanczos"]
 
 
 def cg(A, b, x0, out=None):
@@ -32,9 +24,9 @@ def cg(A, b, x0, out=None):
     """
 
     if (
-        not isinstance(A, dndarray.DNDarray)
-        or not isinstance(b, dndarray.DNDarray)
-        or not isinstance(x0, dndarray.DNDarray)
+        not isinstance(A, ht.dndarray.DNDarray)
+        or not isinstance(b, ht.dndarray.DNDarray)
+        or not isinstance(x0, ht.dndarray.DNDarray)
     ):
         raise TypeError(
             "A, b and x0 need to be of type ht.dndarra, but were {}, {}, {}".format(
@@ -49,20 +41,22 @@ def cg(A, b, x0, out=None):
     if not (x0.numdims == 1):
         raise RuntimeError("c needs to be a 1D vector")
 
-    r = b - matmul(A, x0)
+    r = b - ht.matmul(A, x0)
     p = r
     print(A.shape, p.shape)
-    rsold = matmul(r, r)
+    rsold = ht.matmul(r, r)
     x = x0
 
     for i in range(len(b)):
-        Ap = matmul(A, p)
-        alpha = rsold / matmul(p, Ap)
+        Ap = ht.matmul(A, p)
+        alpha = rsold / ht.matmul(p, Ap)
         x = x + (alpha * p)
         r = r - (alpha * Ap)
-        rsnew = matmul(r, r)
-        if math.sqrt(rsnew) < 1e-10:
-            print("Residual r = {} reaches tolerance in it = {}".format(math.sqrt(rsnew), i))
+        rsnew = ht.matmul(r, r)
+        if ht.sqrt(rsnew).get_item() < 1e-10:
+            print(
+                "Residual r = {} reaches tolerance in it = {}".format(ht.sqrt(rsnew).get_item(), i)
+            )
             if out is not None:
                 out = x
                 return out
@@ -79,25 +73,30 @@ def cg(A, b, x0, out=None):
 
 def lanczos(A, m, v0=None, V_out=None, T_out=None):
     """
-    Lanczos algorithm for iterative approximation of the solution to the eigenvalue problem,  an adaptation of power methods to find the m "most useful" (tending towards extreme highest/lowest) eigenvalues and eigenvectors of an n×n Hermitian matrix, where often m<<n
+    Lanczos algorithm for iterative approximation of the solution to the eigenvalue problem,  an adaptation of power methods to find the m "most useful" (tending towards extreme highest/lowest) eigenvalues and eigenvectors of an n x n Hermitian matrix, where often m<<n
     Parameters
     ----------
     A : ht.DNDarray
         2D symmetric, positive definite Matrix
     m : int
         number of Lanczos iterations
-    v0 : ht.DNDarray (optional)
-        1D starting vector of euclidian norm 1
+    v0 : ht.DNDarray, optiona
+        1D starting vector of euclidian norm 1. If not provided, a random vector will be used to start the algorithm
+    V_out ht.DNDarray, optional
+        Output Matrix of size (n, m) for the Krylow vectors
+    T_out ht.DNDarray, optional
+        Output Matrix of size (m, m) for the Tridiagonal matrix
+
 
     Returns
     -------
     V ht.DNDarray
-        Matrix of size nxm, with orthonormal columns, that span the Krylow subspace. If out is given, it is returned
+        Matrix of size nxm, with orthonormal columns, that span the Krylow subspace. If V_out is given, it is returned
     T ht.DNDarray
-        Tridiagonal matrix of size mxm, with coefficients alpha_1,...alpha_n on the diagonal and coefficients beta_1,...,beta_n-1 on the side-diagonals. If out is given, it is returned
+        Tridiagonal matrix of size mxm, with coefficients alpha_1,...alpha_n on the diagonal and coefficients beta_1,...,beta_n-1 on the side-diagonals. If T_out is given, it is returned
 
     """
-    if not isinstance(A, dndarray.DNDarray):
+    if not isinstance(A, ht.dndarray.DNDarray):
         raise TypeError("A needs to be of type ht.dndarra, but was {}".format(type(A)))
 
     if not (A.numdims == 2):
@@ -108,40 +107,40 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
     n, column = A.shape
     if n != column:
         raise TypeError("Input Matrix A needs to be symmetric.")
-    T = factories.zeros((m, m))
+    T = ht.zeros((m, m))
     if A.split == 0:
         # This is done for better memory access in the reorthogonalization Gram-Schmidt algorithm
-        V = factories.ones((m, n), split=1, dtype=A.dtype, device=A.device)
+        V = ht.ones((m, n), split=1, dtype=A.dtype, device=A.device)
     else:
-        V = factories.ones((m, n), split=None, dtype=A.dtype, device=A.device)
+        V = ht.ones((m, n), split=None, dtype=A.dtype, device=A.device)
 
     if v0 is None:
-        vr = random.rand(n)
-        v0 = vr / norm(vr)
+        vr = ht.random.rand(n)
+        v0 = vr / ht.norm(vr)
 
     # # 0th iteration
     # # vector v0 has euklidian norm = 1
-    w = matmul(A, v0)
-    alpha = dot(w, v0)
+    w = ht.matmul(A, v0)
+    alpha = ht.dot(w, v0)
     w = w - alpha * v0
     T[0, 0] = alpha
     V[0, :] = v0
     for i in range(1, int(m)):
-        beta = norm(w)
+        beta = ht.norm(w)
         if abs(beta) < 1e-10:
             print("Lanczos breakdown in iteration {}".format(i))
             # Lanczos Breakdown, pick a random vector to continue
-            vr = random.rand(n, dtype=A.dtype)
+            vr = ht.random.rand(n, dtype=A.dtype)
             # orthogonalize v_r with respect to all vectors v[i]
             for j in range(i):
                 vi_loc = V._DNDarray__array[j, :]
                 a = torch.dot(vr._DNDarray__array, vi_loc)
                 b = torch.dot(vi_loc, vi_loc)
-                A.comm.Allreduce(MPI.IN_PLACE, a, MPI.SUM)
-                A.comm.Allreduce(MPI.IN_PLACE, b, MPI.SUM)
+                A.comm.Allreduce(ht.comm.MPI.IN_PLACE, a, ht.comm.MPI.SUM)
+                A.comm.Allreduce(ht.comm.MPI.IN_PLACE, b, ht.comm.MPI.SUM)
                 vr._DNDarray__array = vr._DNDarray__array - a / b * vi_loc
             # normalize v_r to Euclidian norm 1 and set as ith vector v
-            vi = vr / norm(vr)
+            vi = vr / ht.norm(vr)
         else:
             vr = w
 
@@ -152,14 +151,14 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
                 vi_loc = V._DNDarray__array[j, :]
                 a = torch.dot(vr._DNDarray__array, vi_loc)
                 b = torch.dot(vi_loc, vi_loc)
-                A.comm.Allreduce(MPI.IN_PLACE, a, MPI.SUM)
-                A.comm.Allreduce(MPI.IN_PLACE, b, MPI.SUM)
+                A.comm.Allreduce(ht.comm.MPI.IN_PLACE, a, ht.comm.MPI.SUM)
+                A.comm.Allreduce(ht.comm.MPI.IN_PLACE, b, ht.comm.MPI.SUM)
                 vr._DNDarray__array = vr._DNDarray__array - a / b * vi_loc
 
-            vi = vr / norm(vr)
+            vi = vr / ht.norm(vr)
 
-        w = matmul(A, vi)
-        alpha = dot(w, vi)
+        w = ht.matmul(A, vi)
+        alpha = ht.dot(w, vi)
         w = w - alpha * vi - beta * V[i - 1, :]
 
         T[i - 1, i] = beta
@@ -171,9 +170,9 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
         V.resplit_(axis=None)
     V = V.transpose()
     if T_out is not None:
-        T_out = T
+        T_out = T.copy()
         if V_out is not None:
-            V_out = V
+            V_out = V.copy()
             return V_out, T_out
         return V, T_out
     elif V_out is not None:
@@ -181,54 +180,3 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
         return V_out, T
 
     return V, T
-
-
-def norm(a):
-    """
-    Frobenius norm of vector a
-
-    Parameters
-    ----------
-    a : ht.DNDarray
-
-    Returns
-    -------
-    float
-        Returns the vector norm (lenght) of a
-    """
-    if not isinstance(a, dndarray.DNDarray):
-        raise TypeError("a must be of type ht.DNDarray, but was {}".format(type(a)))
-
-    d = a ** 2
-
-    for i in range(len(a.shape) - 1, -1, -1):
-        d = arithmetics.sum(d, axis=i)
-
-    return math.sqrt(d)
-
-
-def projection(a, b):
-    """
-    Projection of vector a onto vector b
-
-    Parameters
-    ----------
-    a : ht.DNDarray (1D)
-    b : ht.DNDarray (1D)
-
-    Returns
-    -------
-    ht.DNDarray
-        Returns the vector projection of b in the direction of a
-    """
-    if not isinstance(a, dndarray.DNDarray) or not isinstance(b, dndarray.DNDarray):
-        raise TypeError(
-            "a, b must be of type ht.DNDarray, but were {}, {}".format(type(a), type(b))
-        )
-
-    if len(a.shape) != 1 or len(b.shape) != 1:
-        raise RuntimeError(
-            "a, b must be vectors of length 1, but were {}, {}".format(len(a.shape), len(b.shape))
-        )
-
-    return (dot(a, b) / dot(b, b)) * b
diff --git a/heat/core/linalg/tests/test_basics.py b/heat/core/linalg/tests/test_basics.py
index f082299059..52a23a665e 100644
--- a/heat/core/linalg/tests/test_basics.py
+++ b/heat/core/linalg/tests/test_basics.py
@@ -466,6 +466,52 @@ def test_matmul(self):
                 b = a.copy()
                 a @ b
 
+    def test_norm(self):
+        a = ht.arange(9, dtype=ht.float32, split=0) - 4
+        self.assertTrue(
+            ht.allclose(ht.linalg.norm(a), ht.float32(np.linalg.norm(a.numpy())).item(), atol=1e-5)
+        )
+        a.resplit_(axis=None)
+        self.assertTrue(
+            ht.allclose(ht.linalg.norm(a), ht.float32(np.linalg.norm(a.numpy())).item(), atol=1e-5)
+        )
+
+        b = ht.array([[-4.0, -3.0, -2.0], [-1.0, 0.0, 1.0], [2.0, 3.0, 4.0]], split=0)
+        self.assertTrue(
+            ht.allclose(ht.linalg.norm(b), ht.float32(np.linalg.norm(b.numpy())).item(), atol=1e-5)
+        )
+        b.resplit_(axis=1)
+        self.assertTrue(
+            ht.allclose(ht.linalg.norm(b), ht.float32(np.linalg.norm(b.numpy())).item(), atol=1e-5)
+        )
+
+        with self.assertRaises(TypeError):
+            c = np.arange(9) - 4
+            ht.linalg.norm(c)
+
+    def test_projection(self):
+        a = ht.arange(1, 4, dtype=ht.float32, split=None)
+        e1 = ht.array([1, 0, 0], dtype=ht.float32, split=None)
+        self.assertTrue(ht.equal(ht.linalg.projection(a, e1), e1))
+
+        a.resplit_(axis=0)
+        self.assertTrue(ht.equal(ht.linalg.projection(a, e1), e1))
+
+        e2 = ht.array([0, 1, 0], dtype=ht.float32, split=0)
+        self.assertTrue(ht.equal(ht.linalg.projection(a, e2), e2 * 2))
+
+        a = ht.arange(1, 4, dtype=ht.float32, split=None)
+        e3 = ht.array([0, 0, 1], dtype=ht.float32, split=0)
+        self.assertTrue(ht.equal(ht.linalg.projection(a, e3), e3 * 3))
+
+        a = np.arange(1, 4)
+        with self.assertRaises(TypeError):
+            ht.linalg.projection(a, e1)
+
+        a = ht.array([[1], [2], [3]], dtype=ht.float32, split=None)
+        with self.assertRaises(RuntimeError):
+            ht.linalg.projection(a, e1)
+
     def test_transpose(self):
         # vector transpose, not distributed
         vector = ht.arange(10, device=ht_device)
diff --git a/heat/core/linalg/tests/test_lanczos.py b/heat/core/linalg/tests/test_lanczos.py
deleted file mode 100644
index cc2838c74e..0000000000
--- a/heat/core/linalg/tests/test_lanczos.py
+++ /dev/null
@@ -1,87 +0,0 @@
-import torch
-import os
-import unittest
-import heat as ht
-import numpy as np
-
-if os.environ.get("DEVICE") == "gpu" and torch.cuda.is_available():
-    ht.use_device("gpu")
-    torch.cuda.set_device(torch.device(ht.get_device().torch_device))
-else:
-    ht.use_device("cpu")
-device = ht.get_device().torch_device
-ht_device = None
-if os.environ.get("DEVICE") == "lgpu" and torch.cuda.is_available():
-    device = ht.gpu.torch_device
-    ht_device = ht.gpu
-    torch.cuda.set_device(device)
-
-
-class TestLinalgLanczos(unittest.TestCase):
-    def test_cg(self):
-        size = ht.communication.MPI_WORLD.size * 3
-        b = ht.arange(1, size + 1, dtype=ht.float32, split=0)
-        A = ht.manipulations.diag(b)
-        x0 = ht.random.rand(size, dtype=b.dtype, split=b.split)
-
-        x = ht.ones(b.shape, dtype=b.dtype, split=b.split)
-
-        res = ht.linalg.cg(A, b, x0)
-        self.assertTrue(ht.allclose(x, res, atol=1e-3))
-
-        b_np = np.arange(1, size + 1)
-        with self.assertRaises(TypeError):
-            ht.linalg.cg(A, b_np, x0)
-
-        with self.assertRaises(RuntimeError):
-            ht.linalg.cg(A, A, x0)
-        with self.assertRaises(RuntimeError):
-            ht.linalg.cg(b, b, x0)
-        with self.assertRaises(RuntimeError):
-            ht.linalg.cg(A, b, A)
-
-    def test_norm(self):
-        a = ht.arange(9, dtype=ht.float32, split=0) - 4
-        self.assertTrue(
-            ht.allclose(ht.linalg.norm(a), ht.float32(np.linalg.norm(a.numpy())).item(), atol=1e-5)
-        )
-        a.resplit_(axis=None)
-        self.assertTrue(
-            ht.allclose(ht.linalg.norm(a), ht.float32(np.linalg.norm(a.numpy())).item(), atol=1e-5)
-        )
-
-        b = ht.array([[-4.0, -3.0, -2.0], [-1.0, 0.0, 1.0], [2.0, 3.0, 4.0]], split=0)
-        self.assertTrue(
-            ht.allclose(ht.linalg.norm(b), ht.float32(np.linalg.norm(b.numpy())).item(), atol=1e-5)
-        )
-        b.resplit_(axis=1)
-        self.assertTrue(
-            ht.allclose(ht.linalg.norm(b), ht.float32(np.linalg.norm(b.numpy())).item(), atol=1e-5)
-        )
-
-        with self.assertRaises(TypeError):
-            c = np.arange(9) - 4
-            ht.linalg.norm(c)
-
-    def test_projection(self):
-        a = ht.arange(1, 4, dtype=ht.float32, split=None)
-        e1 = ht.array([1, 0, 0], dtype=ht.float32, split=None)
-        self.assertTrue(ht.equal(ht.linalg.projection(a, e1), e1))
-
-        a.resplit_(axis=0)
-        self.assertTrue(ht.equal(ht.linalg.projection(a, e1), e1))
-
-        e2 = ht.array([0, 1, 0], dtype=ht.float32, split=0)
-        self.assertTrue(ht.equal(ht.linalg.projection(a, e2), e2 * 2))
-
-        a = ht.arange(1, 4, dtype=ht.float32, split=None)
-        e3 = ht.array([0, 0, 1], dtype=ht.float32, split=0)
-        self.assertTrue(ht.equal(ht.linalg.projection(a, e3), e3 * 3))
-
-        a = np.arange(1, 4)
-        with self.assertRaises(TypeError):
-            ht.linalg.projection(a, e1)
-
-        a = ht.array([[1], [2], [3]], dtype=ht.float32, split=None)
-        with self.assertRaises(RuntimeError):
-            ht.linalg.projection(a, e1)
diff --git a/heat/core/linalg/tests/test_solver.py b/heat/core/linalg/tests/test_solver.py
new file mode 100644
index 0000000000..4760243cba
--- /dev/null
+++ b/heat/core/linalg/tests/test_solver.py
@@ -0,0 +1,41 @@
+import torch
+import os
+import unittest
+import heat as ht
+import numpy as np
+
+if os.environ.get("DEVICE") == "gpu" and torch.cuda.is_available():
+    ht.use_device("gpu")
+    torch.cuda.set_device(torch.device(ht.get_device().torch_device))
+else:
+    ht.use_device("cpu")
+device = ht.get_device().torch_device
+ht_device = None
+if os.environ.get("DEVICE") == "lgpu" and torch.cuda.is_available():
+    device = ht.gpu.torch_device
+    ht_device = ht.gpu
+    torch.cuda.set_device(device)
+
+
+class TestSolver(unittest.TestCase):
+    def test_cg(self):
+        size = ht.communication.MPI_WORLD.size * 3
+        b = ht.arange(1, size + 1, dtype=ht.float32, split=0)
+        A = ht.manipulations.diag(b)
+        x0 = ht.random.rand(size, dtype=b.dtype, split=b.split)
+
+        x = ht.ones(b.shape, dtype=b.dtype, split=b.split)
+
+        res = ht.linalg.cg(A, b, x0)
+        self.assertTrue(ht.allclose(x, res, atol=1e-3))
+
+        b_np = np.arange(1, size + 1)
+        with self.assertRaises(TypeError):
+            ht.linalg.cg(A, b_np, x0)
+
+        with self.assertRaises(RuntimeError):
+            ht.linalg.cg(A, A, x0)
+        with self.assertRaises(RuntimeError):
+            ht.linalg.cg(b, b, x0)
+        with self.assertRaises(RuntimeError):
+            ht.linalg.cg(A, b, A)
diff --git a/heat/graph/__init__.py b/heat/graph/__init__.py
new file mode 100644
index 0000000000..675321d031
--- /dev/null
+++ b/heat/graph/__init__.py
@@ -0,0 +1 @@
+from .laplacian import *
diff --git a/heat/graph/laplacian.py b/heat/graph/laplacian.py
new file mode 100644
index 0000000000..604f44f490
--- /dev/null
+++ b/heat/graph/laplacian.py
@@ -0,0 +1,115 @@
+import torch
+import heat as ht
+
+
+class Laplacian:
+    def __init__(
+        self,
+        similarity,
+        weighted=True,
+        definition="norm_sym",
+        mode="fully_connected",
+        threshold_key="upper",
+        threshold_value=1.0,
+        neighbours=10,
+    ):
+        """
+        Graph Laplacians from a dataset
+
+        Parameters
+        ----------
+        similarity : function f(X) --> similarity matrix
+            Metric function that defines similarity between vertices. Should accept a data matrix (n,f) as input and return an (n,n) similarity matrix.
+            Additional required parameters can be passed via a lambda function.
+        definition : string
+            Type of Laplacian
+            'simple': Laplacian matrix for simple graphs L = D - A
+            'norm_sym': Symmetric normalized Laplacian L^sym = D^{-1/2} L D^{-1/2} = I - D^{-1/2} A D^{-1/2}
+            'norm_rw': L^rw = D^{-1} L = I - D^{-1} A
+        mode : "fc", "eNeighbour"
+            How to calculate adjacency from the similarity matrix
+            "fully_connected" is fully-connected, so A = S
+            "eNeighbour" is the epsilon neighbourhood, with A_ji = 0 if S_ij </> lower/upper; for eNeighbour an upper or lower boundary needs to be set
+        threshold_key : string
+            "upper" or "lower", defining the type of threshold for the epsilon-neighrborhood
+        threshold_value : float
+            Boundary value for the epsilon-neighrborhood
+        neighbours : int
+            Number of neirest neighbors to be considered for adjacency definition. Currently not implemented
+        Returns
+        -------
+        L : ht.DNDarray
+
+        """
+        self.similarity_metric = similarity
+        self.weighted = weighted
+        if definition not in ["simple", "norm_sym"]:
+            raise NotImplementedError(
+                "Currently only simple and normalized symmetric graph laplacians are supported"
+            )
+        else:
+            self.definition = definition
+        if mode not in ["eNeighbour", "fully_connected"]:
+            raise NotImplementedError(
+                "Only eNeighborhood and fully-connected graphs supported at the moment."
+            )
+        else:
+            self.mode = mode
+
+        if threshold_key not in ["upper", "lower"]:
+            raise ValueError(
+                "Only 'upper' and 'lower' threshold types supported for eNeighbouhood graph construction"
+            )
+        else:
+            self.epsilon = (threshold_key, threshold_value)
+
+        self.neighbours = neighbours
+
+    def _normalized_symmetric_L(self, A):
+        degree = ht.sum(A, axis=1)
+        degree.resplit_(axis=None)
+        # Find stand-alone vertices with no connections
+        temp = torch.ones(
+            degree.shape, dtype=degree._DNDarray__array.dtype, device=degree.device.torch_device
+        )
+        degree._DNDarray__array = torch.where(
+            degree._DNDarray__array == 0, temp, degree._DNDarray__array
+        )
+        L = A / ht.sqrt(ht.expand_dims(degree, axis=1))
+        L = L / ht.sqrt(ht.expand_dims(degree, axis=0))
+        L = L * (-1.0)
+        L.fill_diagonal(1.0)
+        return L
+
+    def _simple_L(self, A):
+        degree = ht.sum(A, axis=1)
+        L = ht.diag(degree) - A
+        return L
+
+    def construct(self, X):
+        S = self.similarity_metric(X)
+        S.fill_diagonal(0.0)
+
+        if self.mode == "eNeighbour":
+            if self.epsilon[0] == "upper":
+                if self.weighted:
+                    S = ht.where(S < self.epsilon[1], S, 0)
+                else:
+                    S = ht.int(S < self.epsilon[1])
+            else:
+                if self.weighted:
+                    S = ht.where(S > self.epsilon[1], S, 0)
+                else:
+                    S = ht.int(S > self.epsilon[1])
+
+        # Add other construction methods here
+        # elif self.mode == 'kNN':
+        #   ...
+        # self.mode == 'fully_connected' needs no further steps
+
+        if self.definition == "simple":
+            L = self._simple_L(S)
+        elif self.definition == "norm_sym":
+            L = self._normalized_symmetric_L(S)
+
+        return L
diff --git a/heat/graph/tests/test_laplacian.py b/heat/graph/tests/test_laplacian.py
new file mode 100644
index 0000000000..a593b74a14
--- /dev/null
+++ b/heat/graph/tests/test_laplacian.py
@@ -0,0 +1,85 @@
+import os
+import unittest
+
+import heat as ht
+
+if os.environ.get("DEVICE") == "gpu" and ht.torch.cuda.is_available():
+    ht.use_device("gpu")
+    ht.torch.cuda.set_device(ht.torch.device(ht.get_device().torch_device))
+else:
+    ht.use_device("cpu")
+device = ht.get_device().torch_device
+ht_device = None
+if os.environ.get("DEVICE") == "lgpu" and ht.torch.cuda.is_available():
+    device = ht.gpu.torch_device
+    ht_device = ht.gpu
+    ht.torch.cuda.set_device(device)
+
+
+class TestLaplacian(unittest.TestCase):
+    def test_laplacian(self):
+        size = ht.communication.MPI_WORLD.size
+        rank = ht.communication.MPI_WORLD.rank
+        X = ht.ones((size * 2, 4), split=0)
+        X._DNDarray__array[0, :] *= rank
+        X._DNDarray__array[1, :] *= rank + 0.5
+
+        L = ht.graph.Laplacian(lambda x: ht.spatial.cdist(x, quadratic_expansion=True))
+        res = L.construct(X)
+        self.assertIsInstance(res, ht.DNDarray)
+        self.assertEqual(res.shape, (size * 2, size * 2))
+        self.assertEqual(res.split, 0)
+
+        L = ht.graph.Laplacian(lambda x: ht.spatial.rbf(x, sigma=1.0, quadratic_expansion=True))
+        res = L.construct(X)
+        self.assertIsInstance(res, ht.DNDarray)
+        self.assertEqual(res.shape, (size * 2, size * 2))
+        self.assertEqual(res.split, 0)
+
+        L = ht.graph.Laplacian(
+            lambda x: ht.spatial.cdist(x, quadratic_expansion=True), weighted=False
+        )
+        res = L.construct(X)
+        self.assertIsInstance(res, ht.DNDarray)
+        self.assertEqual(res.shape, (size * 2, size * 2))
+        self.assertEqual(res.split, 0)
+
+        L = ht.graph.Laplacian(
+            lambda x: ht.spatial.cdist(x, quadratic_expansion=True), definition="simple"
+        )
+        res = L.construct(X)
+        self.assertIsInstance(res, ht.DNDarray)
+        self.assertEqual(res.shape, (size * 2, size * 2))
+        self.assertEqual(res.split, 0)
+
+        L = ht.graph.Laplacian(
+            lambda x: ht.spatial.cdist(x, quadratic_expansion=True), mode="eNeighbour"
+        )
+        res = L.construct(X)
+        self.assertIsInstance(res, ht.DNDarray)
+        self.assertEqual(res.shape, (size * 2, size * 2))
+        self.assertEqual(res.split, 0)
+
+        L = ht.graph.Laplacian(
+            lambda x: ht.spatial.cdist(x, quadratic_expansion=True),
+            mode="eNeighbour",
+            threshold_key="lower",
+            threshold_value=3.0,
+        )
+        res = L.construct(X)
+        self.assertIsInstance(res, ht.DNDarray)
+        self.assertEqual(res.shape, (size * 2, size * 2))
+        self.assertEqual(res.split, 0)
+
+        with self.assertRaises(ValueError):
+            L = ht.graph.Laplacian(
+                lambda x: ht.spatial.cdist(x, quadratic_expansion=True), threshold_key="both"
+            )
+        with self.assertRaises(NotImplementedError):
+            L = ht.graph.Laplacian(
+                lambda x: ht.spatial.cdist(x, quadratic_expansion=True), mode="kNN"
+            )
+        with self.assertRaises(NotImplementedError):
+            L = ht.graph.Laplacian(
+                lambda x: ht.spatial.cdist(x, quadratic_expansion=True), definition="norm_rw"
+            )

From 17635c609368fe68f7d4ed562873896ca9ab13f4 Mon Sep 17 00:00:00 2001
From: Charlotte Debus <debus1@hdfmll01.hdfml>
Date: Fri, 3 Apr 2020 22:45:04 +0200
Subject: [PATCH 37/49] Final Changes from PR Review and Bugfixes

---
 heat/cluster/spectral.py            | 18 +++++-------
 heat/cluster/tests/test_spectral.py |  2 +-
 heat/core/linalg/basics.py          |  3 +-
 heat/core/linalg/solver.py          | 43 +++++++++++++++--------------
 4 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/heat/cluster/spectral.py b/heat/cluster/spectral.py
index 550bb5862e..23bc4e3a3c 100644
--- a/heat/cluster/spectral.py
+++ b/heat/cluster/spectral.py
@@ -56,7 +56,7 @@ def __init__(
             sig = math.sqrt(1 / (2 * gamma))
             self.laplacian = ht.graph.Laplacian(
                 lambda x: ht.spatial.rbf(x, sigma=sig, quadratic_expansion=True),
-                definition="normalizes symmetric",
+                definition="norm_sym",
                 mode=laplacian,
                 threshold_key=boundary,
                 threshold_value=threshold,
@@ -65,7 +65,7 @@ def __init__(
         elif metric == "euclidean":
             self.laplacian = ht.graph.Laplacian(
                 lambda x: ht.spatial.cdist(x),
-                definition="normalizes symmetric",
+                definition="norm_sym",
                 mode=laplacian,
                 threshold_key=boundary,
                 threshold_value=threshold,
@@ -104,19 +104,15 @@ def _spectral_embedding(self, X):
             Eigenvectors of the graph's Laplacian matrix.
         """
         L = self.laplacian.construct(X)
-
         # 3. Eigenvalue and -vector calculation via Lanczos Algorithm
-        v0 = (
-            ht.ones((L.shape[0],), dtype=L.dtype, split=L.split, device=L.device)
-            / ht.sqrt(L.shape[0]).get_item()
-        )
-
+        v0 = ht.ones((L.shape[0],), dtype=L.dtype, split=0, device=L.device) / math.sqrt(L.shape[0])
         V, T = ht.lanczos(L, self.n_lanczos, v0)
+
         # 4. Calculate and Sort Eigenvalues and Eigenvectors of tridiagonal matrix T
         eval, evec = torch.eig(T._DNDarray__array, eigenvectors=True)
         # If x is an Eigenvector of T, then y = V@x is the corresponding Eigenvector of L
-        eigenvalues = ht.array(eval[:, 0])
-        eval, idx = ht.sort(eigenvalues)
+        eval, idx = torch.sort(eval[:, 0], dim=0)
+        eigenvalues = ht.array(eval)
         eigenvectors = ht.matmul(V, ht.array(evec))[:, idx]
 
         return eigenvalues, eigenvectors
@@ -141,7 +137,7 @@ def fit(self, X):
         # 3. Find the spectral gap, if number of clusters is not defined from the outside
         if self.n_clusters is None:
             diff = eigenvalues[1:] - eigenvalues[:-1]
-            self.n_clusters = ht.where(diff == diff.max())[0, 0] + 1
+            self.n_clusters = ht.where(diff == diff.max()).item() + 1
         components = eigenvectors[:, : self.n_clusters].copy()
 
         params = self._cluster.get_params()
diff --git a/heat/cluster/tests/test_spectral.py b/heat/cluster/tests/test_spectral.py
index 14e4202287..0cbef20723 100644
--- a/heat/cluster/tests/test_spectral.py
+++ b/heat/cluster/tests/test_spectral.py
@@ -22,7 +22,7 @@ def test_fit_iris(self):
         iris = ht.load("heat/datasets/data/iris.csv", sep=";")
 
         # fit the clusters
-        m = 20
+        m = 4
         spectral = ht.cluster.Spectral(
             gamma=1.0, metric="rbf", laplacian="fully_connected", normalize=True, n_lanczos=m
         )
diff --git a/heat/core/linalg/basics.py b/heat/core/linalg/basics.py
index 6c76fd19db..6bca68d3e1 100644
--- a/heat/core/linalg/basics.py
+++ b/heat/core/linalg/basics.py
@@ -3,6 +3,7 @@
 
 from ..communication import MPI
 from .. import arithmetics
+from .. import exponential
 from .. import dndarray
 from .. import factories
 from .. import manipulations
@@ -780,7 +781,7 @@ def norm(a):
     for i in range(len(a.shape) - 1, -1, -1):
         d = arithmetics.sum(d, axis=i)
 
-    return arithmetics.sqrt(d).get_item()
+    return exponential.sqrt(d).item()
 
 
 def projection(a, b):
diff --git a/heat/core/linalg/solver.py b/heat/core/linalg/solver.py
index dc57e12bfb..299c532a15 100644
--- a/heat/core/linalg/solver.py
+++ b/heat/core/linalg/solver.py
@@ -1,4 +1,5 @@
 import heat as ht
+
 import torch
 
 __all__ = ["cg", "lanczos"]
@@ -24,9 +25,9 @@ def cg(A, b, x0, out=None):
     """
 
     if (
-        not isinstance(A, ht.dndarray.DNDarray)
-        or not isinstance(b, ht.dndarray.DNDarray)
-        or not isinstance(x0, ht.dndarray.DNDarray)
+        not isinstance(A, ht.DNDarray)
+        or not isinstance(b, ht.DNDarray)
+        or not isinstance(x0, ht.DNDarray)
     ):
         raise TypeError(
             "A, b and x0 need to be of type ht.dndarra, but were {}, {}, {}".format(
@@ -96,7 +97,7 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
         Tridiagonal matrix of size mxm, with coefficients alpha_1,...alpha_n on the diagonal and coefficients beta_1,...,beta_n-1 on the side-diagonals. If T_out is given, it is returned
 
     """
-    if not isinstance(A, ht.dndarray.DNDarray):
+    if not isinstance(A, ht.DNDarray):
         raise TypeError("A needs to be of type ht.dndarra, but was {}".format(type(A)))
 
     if not (A.numdims == 2):
@@ -110,34 +111,36 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
     T = ht.zeros((m, m))
     if A.split == 0:
         # This is done for better memory access in the reorthogonalization Gram-Schmidt algorithm
-        V = ht.ones((m, n), split=1, dtype=A.dtype, device=A.device)
+        V = ht.ones((n, m), split=0, dtype=A.dtype, device=A.device)
     else:
-        V = ht.ones((m, n), split=None, dtype=A.dtype, device=A.device)
+        V = ht.ones((n, m), split=None, dtype=A.dtype, device=A.device)
 
     if v0 is None:
-        vr = ht.random.rand(n)
+        vr = ht.rand(n, split=V.split)
         v0 = vr / ht.norm(vr)
-
+    else:
+        if v0.split != V.split:
+            v0.resplit_(axis=V.split)
     # # 0th iteration
     # # vector v0 has euklidian norm = 1
     w = ht.matmul(A, v0)
     alpha = ht.dot(w, v0)
     w = w - alpha * v0
     T[0, 0] = alpha
-    V[0, :] = v0
+    V[:, 0] = v0
     for i in range(1, int(m)):
         beta = ht.norm(w)
         if abs(beta) < 1e-10:
             print("Lanczos breakdown in iteration {}".format(i))
             # Lanczos Breakdown, pick a random vector to continue
-            vr = ht.random.rand(n, dtype=A.dtype)
+            vr = ht.rand(n, dtype=A.dtype)
             # orthogonalize v_r with respect to all vectors v[i]
             for j in range(i):
-                vi_loc = V._DNDarray__array[j, :]
+                vi_loc = V._DNDarray__array[:, j]
                 a = torch.dot(vr._DNDarray__array, vi_loc)
                 b = torch.dot(vi_loc, vi_loc)
-                A.comm.Allreduce(ht.comm.MPI.IN_PLACE, a, ht.comm.MPI.SUM)
-                A.comm.Allreduce(ht.comm.MPI.IN_PLACE, b, ht.comm.MPI.SUM)
+                A.comm.Allreduce(ht.communication.MPI.IN_PLACE, a, ht.communication.MPI.SUM)
+                A.comm.Allreduce(ht.communication.MPI.IN_PLACE, b, ht.communication.MPI.SUM)
                 vr._DNDarray__array = vr._DNDarray__array - a / b * vi_loc
             # normalize v_r to Euclidian norm 1 and set as ith vector v
             vi = vr / ht.norm(vr)
@@ -148,27 +151,27 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
             # ToDo: Rethink this; mask torch calls, See issue #494
             # This is the fast solution, using item access on the ht.dndarray level is way slower
             for j in range(i):
-                vi_loc = V._DNDarray__array[j, :]
+                vi_loc = V._DNDarray__array[:, j]
                 a = torch.dot(vr._DNDarray__array, vi_loc)
                 b = torch.dot(vi_loc, vi_loc)
-                A.comm.Allreduce(ht.comm.MPI.IN_PLACE, a, ht.comm.MPI.SUM)
-                A.comm.Allreduce(ht.comm.MPI.IN_PLACE, b, ht.comm.MPI.SUM)
+                A.comm.Allreduce(ht.communication.MPI.IN_PLACE, a, ht.communication.MPI.SUM)
+                A.comm.Allreduce(ht.communication.MPI.IN_PLACE, b, ht.communication.MPI.SUM)
                 vr._DNDarray__array = vr._DNDarray__array - a / b * vi_loc
 
             vi = vr / ht.norm(vr)
 
         w = ht.matmul(A, vi)
         alpha = ht.dot(w, vi)
-        w = w - alpha * vi - beta * V[i - 1, :]
+        w = w - alpha * vi - beta * V[:, i - 1]
 
         T[i - 1, i] = beta
         T[i, i - 1] = beta
         T[i, i] = alpha
-        V[i, :] = vi
+        V[:, i] = vi
 
     if V.split is not None:
         V.resplit_(axis=None)
-    V = V.transpose()
+
     if T_out is not None:
         T_out = T.copy()
         if V_out is not None:
@@ -176,7 +179,7 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
             return V_out, T_out
         return V, T_out
     elif V_out is not None:
-        V_out = V
+        V_out = V.copy()
         return V_out, T
 
     return V, T

From d10a43262cf3090e94761fa71107e3b8086cb8cf Mon Sep 17 00:00:00 2001
From: Charlotte Debus <debus1@hdfmll01.hdfml>
Date: Mon, 6 Apr 2020 10:05:43 +0200
Subject: [PATCH 38/49] Bugfixes and testing for spectral clustering

---
 heat/cluster/spectral.py            | 79 ++++++++++++++++++++++++-----
 heat/cluster/tests/test_spectral.py | 51 +++++++++++--------
 heat/core/linalg/solver.py          |  6 +--
 3 files changed, 98 insertions(+), 38 deletions(-)

diff --git a/heat/cluster/spectral.py b/heat/cluster/spectral.py
index 23bc4e3a3c..072ce9f64e 100644
--- a/heat/cluster/spectral.py
+++ b/heat/cluster/spectral.py
@@ -14,9 +14,9 @@ def __init__(
         laplacian="fully_connected",
         threshold=1.0,
         boundary="upper",
-        normalize=True,
         n_lanczos=300,
         assign_labels="kmeans",
+        **params
     ):
         """
         Spectral clustering
@@ -32,23 +32,20 @@ def __init__(
             'euclidean' : construct the similarity matrix as only euclidean distance
         laplacian : string
             How to calculate the graph laplacian (affinity)
-            'fully_connected'
-            'eNeighborhood'
+            Currently supported : 'fully_connected', 'eNeighbour'
         theshold : float
-            Threshold for affinity matrix if laplacian='eNeighborhood'
+            Threshold for affinity matrix if laplacian='eNeighbour'
             Ignorded for laplacian='fully_connected'
         boundary : string
-            How to interpret threshold
-            'upper'
-            'lower'
-        normalize : bool, default = True
-            normalized vs. unnormalized Laplacian
+            How to interpret threshold: 'upper', 'lower'
+            Ignorded for laplacian='fully_connected'
         n_lanczos : int
             number of Lanczos iterations for Eigenvalue decomposition
         assign_labels: str, default = 'kmeans'
              The strategy to use to assign labels in the embedding space.
              'kmeans'
-
+        **params: dict
+              Parameter dictionary for the assign_labels estimator
         """
         self.n_clusters = n_clusters
         self.n_lanczos = n_lanczos
@@ -64,7 +61,7 @@ def __init__(
 
         elif metric == "euclidean":
             self.laplacian = ht.graph.Laplacian(
-                lambda x: ht.spatial.cdist(x),
+                lambda x: ht.spatial.cdist(x, quadratic_expansion=True),
                 definition="norm_sym",
                 mode=laplacian,
                 threshold_key=boundary,
@@ -74,7 +71,7 @@ def __init__(
             raise NotImplementedError("Other kernels currently not supported")
 
         if assign_labels == "kmeans":
-            self._cluster = ht.cluster.KMeans(init="random", max_iter=30, tol=-1.0)
+            self._cluster = ht.cluster.KMeans(params)
         else:
             raise NotImplementedError(
                 "Other Label Assignment Algorithms are currently not available"
@@ -137,7 +134,9 @@ def fit(self, X):
         # 3. Find the spectral gap, if number of clusters is not defined from the outside
         if self.n_clusters is None:
             diff = eigenvalues[1:] - eigenvalues[:-1]
-            self.n_clusters = ht.where(diff == diff.max()).item() + 1
+            tmp = ht.where(diff == diff.max()).item()
+            self.n_clusters = tmp + 1
+            print("Number of suggested Clusters: ", self.n_clusters)
         components = eigenvectors[:, : self.n_clusters].copy()
 
         params = self._cluster.get_params()
@@ -197,3 +196,57 @@ def fit_predict(self, X):
         """
         self.fit(X)
         return self._labels
+
+    def get_params(self, deep=True):
+        """
+        Get parameters for this estimator.
+
+        Parameters
+        ----------
+        deep : boolean, optional
+            If True, will return the parameters for this estimator and contained sub-objects that are estimators.
+            Defaults to true.
+
+        Returns
+        -------
+        params : dict of string to any
+            Parameter names mapped to their values.
+        """
+        # unused
+        _ = deep
+
+        return {
+            "n_clusters": self.n_clusters,
+            "gamma": self.gamma,
+            "metric": self.metric,
+            "laplacian": self.laplacian,
+            "threshold": self.threshold,
+            "boundary": self.boundary,
+            "n_lanczos": self.n_lanczos,
+            "assign_labels": self.assign_labels,
+        }
+
+    def set_params(self, **params):
+        """
+        Set the parameters of this estimator.
+
+        Parameters
+        ----------
+        params : dict
+            The parameters of the estimator to be modified.
+
+        Returns
+        -------
+        self : ht.ml.KMeans
+            This estimator instance for chaining.
+        """
+        self.n_clusters = params.get("n_clusters", self.n_clusters)
+        self.gamma = params.get("gamma", self.gamma)
+        self.metric = params.get("metric", self.metric)
+        self.laplacian = params.get("laplacian", self.laplacian)
+        self.threshold = params.get("thresholdtol", self.threshold)
+        self.boundary = params.get("boundary", self.boundary)
+        self.n_lanczos = params.get("n_lanczos", self.n_lanczos)
+        self.assign_labels = params.get("assign_labels", self.assign_labels)
+
+        return self
diff --git a/heat/cluster/tests/test_spectral.py b/heat/cluster/tests/test_spectral.py
index 0cbef20723..e9f07ebf6b 100644
--- a/heat/cluster/tests/test_spectral.py
+++ b/heat/cluster/tests/test_spectral.py
@@ -19,38 +19,45 @@
 class TestSpectral(unittest.TestCase):
     def test_fit_iris(self):
         # get some test data
-        iris = ht.load("heat/datasets/data/iris.csv", sep=";")
+        iris = ht.load("heat/datasets/data/iris.csv", sep=";", split=0)
+        m = 10
 
         # fit the clusters
-        m = 4
         spectral = ht.cluster.Spectral(
-            gamma=1.0, metric="rbf", laplacian="fully_connected", normalize=True, n_lanczos=m
+            n_clusters=3, gamma=1.0, metric="rbf", laplacian="fully_connected", n_lanczos=m
         )
         spectral.fit(iris)
-
         self.assertIsInstance(spectral.labels_, ht.DNDarray)
 
+        spectral = ht.cluster.Spectral(
+            metric="euclidean", laplacian="eNeighbour", theshold=0.5, boundary="upper", n_lanczos=m
+        )
+        labels = spectral.fit_predict(iris)
+        self.assertIsInstance(labels, ht.DNDarray)
+
+        spectral = ht.cluster.Spectral(
+            gamma=0.1,
+            metric="rbf",
+            laplacian="eNeighbour",
+            theshold=0.5,
+            boundary="upper",
+            n_lanczos=m,
+        )
+        labels = spectral.fit_predict(iris)
+        self.assertIsInstance(labels, ht.DNDarray)
+
+        kmeans = {"kmeans++": "kmeans++", "max_iter": 30, "tol": -1}
+        spectral = ht.cluster.Spectral(
+            n_clusters=3, gamma=1.0, normalize=True, n_lanczos=m, params=kmeans
+        )
+        labels = spectral.fit_predict(iris)
+        self.assertIsInstance(labels, ht.DNDarray)
+
+        # Errors
         with self.assertRaises(NotImplementedError):
-            spectral = ht.cluster.Spectral(
-                gamma=1.0,
-                metric="rbf",
-                laplacian="fully_connected",
-                normalize=True,
-                n_lanczos=20,
-                assign_labels="discretize",
-            )
-        with self.assertRaises(NotImplementedError):
-            spectral = ht.cluster.Spectral(
-                gamma=1.0,
-                metric="nearest_neighbors",
-                laplacian="fully_connected",
-                normalize=True,
-                n_lanczos=m,
-            )
-            spectral.fit(iris)
+            spectral = ht.cluster.Spectral(metric="ahalanobis", n_lanczos=m)
 
         iris_split = ht.load("heat/datasets/data/iris.csv", sep=";", split=1)
         spectral = ht.cluster.Spectral(n_lanczos=20)
-
         with self.assertRaises(NotImplementedError):
             spectral.fit(iris_split)
diff --git a/heat/core/linalg/solver.py b/heat/core/linalg/solver.py
index 299c532a15..d79f16b25c 100644
--- a/heat/core/linalg/solver.py
+++ b/heat/core/linalg/solver.py
@@ -116,7 +116,7 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
         V = ht.ones((n, m), split=None, dtype=A.dtype, device=A.device)
 
     if v0 is None:
-        vr = ht.rand(n, split=V.split)
+        vr = ht.random.rand(n, split=V.split)
         v0 = vr / ht.norm(vr)
     else:
         if v0.split != V.split:
@@ -131,9 +131,9 @@ def lanczos(A, m, v0=None, V_out=None, T_out=None):
     for i in range(1, int(m)):
         beta = ht.norm(w)
         if abs(beta) < 1e-10:
-            print("Lanczos breakdown in iteration {}".format(i))
+            # print("Lanczos breakdown in iteration {}".format(i))
             # Lanczos Breakdown, pick a random vector to continue
-            vr = ht.rand(n, dtype=A.dtype)
+            vr = ht.random.rand(n, dtype=A.dtype, split=V.split)
             # orthogonalize v_r with respect to all vectors v[i]
             for j in range(i):
                 vi_loc = V._DNDarray__array[:, j]

From bad9a8f70dc4b3905ec6f1072cdd155ba66c4b0e Mon Sep 17 00:00:00 2001
From: Charlotte Debus <debus1@hdfmll01.hdfml>
Date: Mon, 6 Apr 2020 10:10:50 +0200
Subject: [PATCH 39/49] bugfix in CG algorithm

---
 heat/core/linalg/solver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/heat/core/linalg/solver.py b/heat/core/linalg/solver.py
index d79f16b25c..94d7f3c405 100644
--- a/heat/core/linalg/solver.py
+++ b/heat/core/linalg/solver.py
@@ -54,7 +54,7 @@ def cg(A, b, x0, out=None):
         x = x + (alpha * p)
         r = r - (alpha * Ap)
         rsnew = ht.matmul(r, r)
-        if ht.sqrt(rsnew).get_item() < 1e-10:
+        if ht.sqrt(rsnew).item() < 1e-10:
             print(
                 "Residual r = {} reaches tolerance in it = {}".format(ht.sqrt(rsnew).get_item(), i)
             )

From 13279212d4eb3453e6e49cc21f7d72d5791572aa Mon Sep 17 00:00:00 2001
From: coquelin77 <daniel.coquelin@gmail.com>
Date: Mon, 6 Apr 2020 10:25:58 +0200
Subject: [PATCH 40/49] added  to resplit_, removed need for this fix in mm
 split=none

---
 heat/core/dndarray.py      | 1 +
 heat/core/linalg/basics.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/heat/core/dndarray.py b/heat/core/dndarray.py
index 96311193fa..72afa8fa72 100644
--- a/heat/core/dndarray.py
+++ b/heat/core/dndarray.py
@@ -2552,6 +2552,7 @@ def resplit_(self, axis=None):
 
             self.__array = redistributed
             self.__split = axis
+        return self
 
     def __rfloordiv__(self, other):
         """
diff --git a/heat/core/linalg/basics.py b/heat/core/linalg/basics.py
index 6bca68d3e1..784e96c856 100644
--- a/heat/core/linalg/basics.py
+++ b/heat/core/linalg/basics.py
@@ -163,7 +163,7 @@ def matmul(a, b, allow_resplit=False):
                 torch.matmul(a._DNDarray__array, b._DNDarray__array), device=a.device
             )
         else:
-            a = a.resplit_(0)
+            a.resplit_(0)
             slice_0 = a.comm.chunk(a.shape, a.split)[2][0]
             hold = a._DNDarray__array @ b._DNDarray__array
 

From 20deabad2e1982400ac8ec905518da6494c0146a Mon Sep 17 00:00:00 2001
From: Charlotte Debus <debus1@hdfmll01.hdfml>
Date: Mon, 6 Apr 2020 11:17:59 +0200
Subject: [PATCH 41/49] Temporary Fixe for dndarray fill_diagonal

---
 heat/core/tests/test_dndarray.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/heat/core/tests/test_dndarray.py b/heat/core/tests/test_dndarray.py
index f14776c390..0944f7cbe8 100644
--- a/heat/core/tests/test_dndarray.py
+++ b/heat/core/tests/test_dndarray.py
@@ -210,14 +210,15 @@ def test_fill_diagonal(self):
         a.fill_diagonal(0)
         self.assertTrue(ht.equal(a, ref))
 
+        # ToDo: uneven tensor dimensions x and y when bug in factories.eye is fixed
         ref = ht.zeros(
-            (ht.MPI_WORLD.size * 2, ht.MPI_WORLD.size * 3),
+            (ht.MPI_WORLD.size * 3, ht.MPI_WORLD.size * 3),
             dtype=ht.float32,
             split=1,
             device=ht_device,
         )
         a = ht.eye(
-            (ht.MPI_WORLD.size * 2, ht.MPI_WORLD.size * 3),
+            (ht.MPI_WORLD.size * 3, ht.MPI_WORLD.size * 3),
             dtype=ht.float32,
             split=1,
             device=ht_device,
@@ -225,14 +226,15 @@ def test_fill_diagonal(self):
         a.fill_diagonal(0)
         self.assertTrue(ht.equal(a, ref))
 
+        # ToDo: uneven tensor dimensions x and y when bug in factories.eye is fixed
         ref = ht.zeros(
-            (ht.MPI_WORLD.size * 4, ht.MPI_WORLD.size * 2),
+            (ht.MPI_WORLD.size * 4, ht.MPI_WORLD.size * 4),
             dtype=ht.float32,
             split=0,
             device=ht_device,
         )
         a = ht.eye(
-            (ht.MPI_WORLD.size * 4, ht.MPI_WORLD.size * 2),
+            (ht.MPI_WORLD.size * 4, ht.MPI_WORLD.size * 4),
             dtype=ht.float32,
             split=0,
             device=ht_device,

From f5c84a590fc6dfa43a928e5a0446913e41f1ad55 Mon Sep 17 00:00:00 2001
From: coquelin77 <daniel.coquelin@gmail.com>
Date: Mon, 6 Apr 2020 11:39:11 +0200
Subject: [PATCH 42/49] minor comment change to restart tests

---
 heat/core/linalg/tests/test_basics.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/heat/core/linalg/tests/test_basics.py b/heat/core/linalg/tests/test_basics.py
index 52a23a665e..e62d284ff6 100644
--- a/heat/core/linalg/tests/test_basics.py
+++ b/heat/core/linalg/tests/test_basics.py
@@ -59,13 +59,13 @@ def test_dot(self):
 
         const1 = 5
         const2 = 6
-        # a is const,
+        # a is const
         res = ht.dot(const1, b2d) - ht.array(np.dot(const1, data2d), device=ht_device)
         ret = 0
         ht.dot(const1, b2d, out=ret)
         self.assertEqual(ht.equal(res, ht.zeros(res.shape, device=ht_device)), 1)
 
-        # b is const,
+        # b is const
         res = ht.dot(a2d, const2) - ht.array(np.dot(data2d, const2), device=ht_device)
         self.assertEqual(ht.equal(res, ht.zeros(res.shape, device=ht_device)), 1)
         # a and b and const

From 29ea72e6c9cef78c0fa5233b83130383a3273629 Mon Sep 17 00:00:00 2001
From: Charlotte Debus <debus1@hdfmll01.hdfml>
Date: Mon, 6 Apr 2020 11:50:15 +0200
Subject: [PATCH 43/49] updated Changelog

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 07d9d25f4b..ba80f5c147 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,5 @@
 # Pending Additions
-
+- [#518](https://github.com/helmholtz-analytics/heat/pull/518) Implemenation of Spectral Clustering
 - [#429](https://github.com/helmholtz-analytics/heat/pull/429) Create submodule for Linear Algebra functions
 - [#429](https://github.com/helmholtz-analytics/heat/pull/429) Implementated QR
 - [#429](https://github.com/helmholtz-analytics/heat/pull/429) Implementated a tiling class to create Square tiles along the diagonal of a 2D matrix

From f609302ac2e8105409a2204380242883e23fcdde Mon Sep 17 00:00:00 2001
From: Charlotte Debus <debus1@hdfmll01.hdfml>
Date: Mon, 6 Apr 2020 14:56:32 +0200
Subject: [PATCH 44/49] Appeasing Markus about the order of changelog

---
 CHANGELOG.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ba80f5c147..78cba2dd67 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,4 @@
 # Pending Additions
-- [#518](https://github.com/helmholtz-analytics/heat/pull/518) Implemenation of Spectral Clustering
 - [#429](https://github.com/helmholtz-analytics/heat/pull/429) Create submodule for Linear Algebra functions
 - [#429](https://github.com/helmholtz-analytics/heat/pull/429) Implementated QR
 - [#429](https://github.com/helmholtz-analytics/heat/pull/429) Implementated a tiling class to create Square tiles along the diagonal of a 2D matrix
@@ -11,6 +10,8 @@
 - [#498](https://github.com/helmholtz-analytics/heat/pull/498) Feature: flip()
 - [#499](https://github.com/helmholtz-analytics/heat/pull/499) Bugfix: MPI datatype mapping: `torch.int16` now maps to `MPI.SHORT` instead of `MPI.SHORT_INT`
 - [#515] (https://github.com/helmholtz-analytics/heat/pull/515) ht.var() now returns the unadjusted sample variance by default, Bessel's correction can be applied by setting ddof=1.
+- [#518](https://github.com/helmholtz-analytics/heat/pull/518) Implemenation of Spectral Clustering
+
 
 # v0.3.0
 

From f5d401218ad93bd59ebbca0e133b984d5d7ca450 Mon Sep 17 00:00:00 2001
From: Markus Goetz <murxman@gmail.com>
Date: Mon, 6 Apr 2020 15:03:24 +0200
Subject: [PATCH 45/49] Update CHANGELOG.md

---
 CHANGELOG.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b94c2d9732..646455f9ae 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,12 +14,11 @@
 
 - [#507](https://github.com/helmholtz-analytics/heat/pull/507) Bugfix: sanitize_axis changes axis of 0-dim scalars to None
 - [#515](https://github.com/helmholtz-analytics/heat/pull/515) ht.var() now returns the unadjusted sample variance by default, Bessel's correction can be applied by setting ddof=1.
+- [#518](https://github.com/helmholtz-analytics/heat/pull/518) Implemenation of Spectral Clustering.
 - [#519](https://github.com/helmholtz-analytics/heat/pull/519) Bugfix: distributed slicing with empty list or scalar as input; distributed nonzero() of empty (local) tensor.
 - [#521](https://github.com/helmholtz-analytics/heat/pull/521) Add documentation for the generic reduce_op in Heat's core
 - [#522](https://github.com/helmholtz-analytics/heat/pull/522) Added CUDA-aware MPI detection for MVAPICH, MPICH and ParaStation.
 - [#526](https://github.com/helmholtz-analytics/heat/pull/526) float32 is now consistent default dtype for factories.
-- [#515] (https://github.com/helmholtz-analytics/heat/pull/515) ht.var() now returns the unadjusted sample variance by default, Bessel's correction can be applied by setting ddof=1.
-- [#518](https://github.com/helmholtz-analytics/heat/pull/518) Implemenation of Spectral Clustering
 
 # v0.3.0
 

From 28b4f388dd81ac51ea36bc543963b255550bec1e Mon Sep 17 00:00:00 2001
From: Charlotte Debus <debus1@hdfmll01.hdfml>
Date: Mon, 6 Apr 2020 15:09:51 +0200
Subject: [PATCH 46/49] PR review changes

---
 CHANGELOG.md               |  6 +-----
 heat/core/linalg/solver.py | 11 +++++------
 heat/graph/laplacian.py    |  5 -----
 3 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b94c2d9732..249b9dea7b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,17 +9,13 @@
 - [#496](https://github.com/helmholtz-analytics/heat/pull/496) New feature: flipud()
 - [#498](https://github.com/helmholtz-analytics/heat/pull/498) Feature: flip()
 - [#499](https://github.com/helmholtz-analytics/heat/pull/499) Bugfix: MPI datatype mapping: `torch.int16` now maps to `MPI.SHORT` instead of `MPI.SHORT_INT`
-- [#515] (https://github.com/helmholtz-analytics/heat/pull/515) ht.var() now returns the unadjusted sample variance by default, Bessel's correction can be applied by setting ddof=1.
-- [#518](https://github.com/helmholtz-analytics/heat/pull/518) Implemenation of Spectral Clustering
-
 - [#507](https://github.com/helmholtz-analytics/heat/pull/507) Bugfix: sanitize_axis changes axis of 0-dim scalars to None
 - [#515](https://github.com/helmholtz-analytics/heat/pull/515) ht.var() now returns the unadjusted sample variance by default, Bessel's correction can be applied by setting ddof=1.
+- [#518](https://github.com/helmholtz-analytics/heat/pull/518) Implemenation of Spectral Clustering
 - [#519](https://github.com/helmholtz-analytics/heat/pull/519) Bugfix: distributed slicing with empty list or scalar as input; distributed nonzero() of empty (local) tensor.
 - [#521](https://github.com/helmholtz-analytics/heat/pull/521) Add documentation for the generic reduce_op in Heat's core
 - [#522](https://github.com/helmholtz-analytics/heat/pull/522) Added CUDA-aware MPI detection for MVAPICH, MPICH and ParaStation.
 - [#526](https://github.com/helmholtz-analytics/heat/pull/526) float32 is now consistent default dtype for factories.
-- [#515] (https://github.com/helmholtz-analytics/heat/pull/515) ht.var() now returns the unadjusted sample variance by default, Bessel's correction can be applied by setting ddof=1.
-- [#518](https://github.com/helmholtz-analytics/heat/pull/518) Implemenation of Spectral Clustering
 
 # v0.3.0
 
diff --git a/heat/core/linalg/solver.py b/heat/core/linalg/solver.py
index 94d7f3c405..ec8fb206dd 100644
--- a/heat/core/linalg/solver.py
+++ b/heat/core/linalg/solver.py
@@ -35,24 +35,23 @@ def cg(A, b, x0, out=None):
             )
         )
 
-    if not (A.numdims == 2):
+    if not A.numdims == 2:
         raise RuntimeError("A needs to be a 2D matrix")
-    if not (b.numdims == 1):
+    if not b.numdims == 1:
         raise RuntimeError("b needs to be a 1D vector")
-    if not (x0.numdims == 1):
+    if not x0.numdims == 1:
         raise RuntimeError("c needs to be a 1D vector")
 
     r = b - ht.matmul(A, x0)
     p = r
-    print(A.shape, p.shape)
     rsold = ht.matmul(r, r)
     x = x0
 
     for i in range(len(b)):
         Ap = ht.matmul(A, p)
         alpha = rsold / ht.matmul(p, Ap)
-        x = x + (alpha * p)
-        r = r - (alpha * Ap)
+        x = x + alpha * p
+        r = r - alpha * Ap
         rsnew = ht.matmul(r, r)
         if ht.sqrt(rsnew).item() < 1e-10:
             print(
diff --git a/heat/graph/laplacian.py b/heat/graph/laplacian.py
index 604f44f490..6656f3b1c6 100644
--- a/heat/graph/laplacian.py
+++ b/heat/graph/laplacian.py
@@ -102,11 +102,6 @@ def construct(self, X):
                 else:
                     S = ht.int(S > self.epsilon[1])
 
-        # Add other construction methods here
-        # elif self.mode == 'kNN':
-        #   ...
-        # self.mode == 'fully_connected' needs no further steps
-
         if self.definition == "simple":
             L = self._simple_L(S)
         elif self.definition == "norm_sym":

From 4829c037606d2795a52627c35558cac1d6881554 Mon Sep 17 00:00:00 2001
From: Charlotte Debus <debus1@hdfmll01.hdfml>
Date: Mon, 6 Apr 2020 15:22:00 +0200
Subject: [PATCH 47/49] Documentation

---
 heat/core/linalg/solver.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/heat/core/linalg/solver.py b/heat/core/linalg/solver.py
index ec8fb206dd..e192014e41 100644
--- a/heat/core/linalg/solver.py
+++ b/heat/core/linalg/solver.py
@@ -17,6 +17,9 @@ def cg(A, b, x0, out=None):
         1D vector
     x0 : ht.DNDarray
         Arbitrary 1D starting vector
+    out : ht.DNDarray, optional
+        Output Vector
+
 
     Returns
     -------

From 77ea8853ca348f7a5896a6cc76fd30d39fb0551a Mon Sep 17 00:00:00 2001
From: Charlotte Debus <debus1@hdfmll01.hdfml>
Date: Tue, 7 Apr 2020 07:30:23 +0200
Subject: [PATCH 48/49] Cosmetics

---
 heat/cluster/spectral.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/heat/cluster/spectral.py b/heat/cluster/spectral.py
index 072ce9f64e..f46f182ac0 100644
--- a/heat/cluster/spectral.py
+++ b/heat/cluster/spectral.py
@@ -1,8 +1,6 @@
 import torch
 import math
-import numpy as np
 import heat as ht
-import time
 
 
 class Spectral:
@@ -136,7 +134,7 @@ def fit(self, X):
             diff = eigenvalues[1:] - eigenvalues[:-1]
             tmp = ht.where(diff == diff.max()).item()
             self.n_clusters = tmp + 1
-            print("Number of suggested Clusters: ", self.n_clusters)
+
         components = eigenvectors[:, : self.n_clusters].copy()
 
         params = self._cluster.get_params()

From a8621989d93c6ca8d55f812f8cae30c44d1a923d Mon Sep 17 00:00:00 2001
From: Charlotte Debus <debus1@hdfmll01.hdfml>
Date: Tue, 7 Apr 2020 08:38:33 +0200
Subject: [PATCH 49/49] Bugfix tests cdist

---
 heat/spatial/tests/test_distances.py | 42 +++++++++++++++++-----------
 1 file changed, 26 insertions(+), 16 deletions(-)

diff --git a/heat/spatial/tests/test_distances.py b/heat/spatial/tests/test_distances.py
index 81cd17b7ad..cb208cbb6c 100644
--- a/heat/spatial/tests/test_distances.py
+++ b/heat/spatial/tests/test_distances.py
@@ -67,11 +67,13 @@ def test_cdist(self):
         self.assertEqual(d.split, None)
 
         # Case 1c: X.split == None, Y != None, Y.split == 0
-        Y.resplit_(axis=0)
-        res_XX_cdist.resplit_(axis=1)
-        res_XX_rbf.resplit_(axis=1)
-        res_XY_cdist.resplit_(axis=1)
-        res_XY_rbf.resplit_(axis=1)
+        Y = ht.zeros((n * 2, 4), dtype=ht.float32, split=0, device=ht_device)
+        res_XX_cdist = ht.zeros((n * 2, n * 2), dtype=ht.float32, split=1, device=ht_device)
+        res_XX_rbf = ht.ones((n * 2, n * 2), dtype=ht.float32, split=1, device=ht_device)
+        res_XY_cdist = ht.ones((n * 2, n * 2), dtype=ht.float32, split=1, device=ht_device) * 2
+        res_XY_rbf = ht.ones(
+            (n * 2, n * 2), dtype=ht.float32, split=1, device=ht_device
+        ) * math.exp(-1.0)
 
         d = ht.spatial.cdist(X, Y, quadratic_expansion=False)
         self.assertTrue(ht.equal(d, res_XY_cdist))
@@ -90,12 +92,14 @@ def test_cdist(self):
         self.assertEqual(d.split, 1)
 
         # Case 2a: X.split == 0, Y == None
-        X.resplit_(axis=0)
-        Y.resplit_(axis=None)
-        res_XX_cdist.resplit_(axis=0)
-        res_XX_rbf.resplit_(axis=0)
-        res_XY_cdist.resplit_(axis=0)
-        res_XY_rbf.resplit_(axis=0)
+        X = ht.ones((n * 2, 4), dtype=ht.float32, split=0, device=ht_device)
+        Y = ht.zeros((n * 2, 4), dtype=ht.float32, split=None, device=ht_device)
+        res_XX_cdist = ht.zeros((n * 2, n * 2), dtype=ht.float32, split=0, device=ht_device)
+        res_XX_rbf = ht.ones((n * 2, n * 2), dtype=ht.float32, split=0, device=ht_device)
+        res_XY_cdist = ht.ones((n * 2, n * 2), dtype=ht.float32, split=0, device=ht_device) * 2
+        res_XY_rbf = ht.ones(
+            (n * 2, n * 2), dtype=ht.float32, split=0, device=ht_device
+        ) * math.exp(-1.0)
 
         d = ht.spatial.cdist(X, quadratic_expansion=False)
         self.assertTrue(ht.equal(d, res_XX_cdist))
@@ -131,7 +135,7 @@ def test_cdist(self):
         self.assertEqual(d.split, 0)
 
         # Case 2c: X.split == 0, Y != None, Y.split == 0
-        Y.resplit_(axis=0)
+        Y = ht.zeros((n * 2, 4), dtype=ht.float32, split=0, device=ht_device)
 
         d = ht.spatial.cdist(X, Y, quadratic_expansion=False)
         self.assertTrue(ht.equal(d, res_XY_cdist))
@@ -150,7 +154,7 @@ def test_cdist(self):
         self.assertEqual(d.split, 0)
 
         # Case 3 X.split == 1
-        X.resplit_(axis=1)
+        X = ht.ones((n * 2, 4), dtype=ht.float32, split=1, device=ht_device)
         with self.assertRaises(NotImplementedError):
             ht.spatial.cdist(X)
         with self.assertRaises(NotImplementedError):
@@ -173,12 +177,15 @@ def test_cdist(self):
             A[2 * i + 1, :] = A[2 * i + 1, :] * (2 * i + 1)
         res = torch.cdist(A._DNDarray__array, A._DNDarray__array)
 
-        A.resplit_(axis=0)
+        A = ht.ones((n * 2, 6), dtype=ht.float32, split=0, device=ht_device)
+        for i in range(n):
+            A[2 * i, :] = A[2 * i, :] * (2 * i)
+            A[2 * i + 1, :] = A[2 * i + 1, :] * (2 * i + 1)
         B = A.astype(ht.int32)
 
         d = ht.spatial.cdist(A, B, quadratic_expansion=False)
         result = ht.array(res, dtype=ht.float64, split=0, device=ht_device)
-        self.assertTrue(ht.allclose(d, result, atol=1e-8))
+        self.assertTrue(ht.allclose(d, result, atol=1e-5))
 
         n = ht.communication.MPI_WORLD.size
         A = ht.ones((n * 2, 6), dtype=ht.float32, split=None, device=ht_device)
@@ -187,7 +194,10 @@ def test_cdist(self):
             A[2 * i + 1, :] = A[2 * i + 1, :] * (2 * i + 1)
         res = torch.cdist(A._DNDarray__array, A._DNDarray__array)
 
-        A.resplit_(axis=0)
+        A = ht.ones((n * 2, 6), dtype=ht.float32, split=0, device=ht_device)
+        for i in range(n):
+            A[2 * i, :] = A[2 * i, :] * (2 * i)
+            A[2 * i + 1, :] = A[2 * i + 1, :] * (2 * i + 1)
         B = A.astype(ht.int32)
 
         d = ht.spatial.cdist(A, B, quadratic_expansion=False)