diff --git a/heat/core/arithmetics.py b/heat/core/arithmetics.py
index 6cde3e8fd6..474279a223 100644
--- a/heat/core/arithmetics.py
+++ b/heat/core/arithmetics.py
@@ -1,12 +1,13 @@
 import torch
 
 from .communication import MPI
-from . import operations
 from . import dndarray
-
+from . import operations
+from . import stride_tricks
 
 __all__ = [
     'add',
+    'diff',
     'div',
     'divide',
     'floordiv',
@@ -63,6 +64,83 @@ def add(t1, t2):
     return operations.__binary_op(torch.add, t1, t2)
 
 
+def diff(a, n=1, axis=-1):
+    """
+    Calculate the n-th discrete difference along the given axis.
+    The first difference is given by out[i] = a[i+1] - a[i] along the given axis, higher differences are calculated by using diff recursively.
+
+    a : DNDarray
+        Input array
+    n : int, optional
+        The number of times values are differenced. If zero, the input is returned as-is.
+        Default value is 1
+        n=2 is equivalent to ht.diff(ht.diff(a))
+    axis : int, optional
+        The axis along which the difference is taken, default is the last axis.
+
+    Returns
+    -------
+    diff : DNDarray
+        The n-th differences. The shape of the output is the same as a except along axis where the dimension is smaller by n.
+        The type of the output is the same as the type of the difference between any two elements of a.
+        The split does not change. The outpot array is balanced.
+    """
+    if n == 0:
+        return a
+    if n < 0:
+        raise ValueError('diff requires that n be a positive number, got {}'.format(n))
+    if not isinstance(a, dndarray.DNDarray):
+        raise TypeError('\'a\' must be a DNDarray')
+
+    axis = stride_tricks.sanitize_axis(a.gshape, axis)
+
+    if not a.is_distributed():
+        ret = a.copy()
+        for _ in range(n):
+            axis_slice = [slice(None)] * len(ret.shape)
+            axis_slice[axis] = slice(1, None, None)
+            axis_slice_end = [slice(None)] * len(ret.shape)
+            axis_slice_end[axis] = slice(None, -1, None)
+            ret = ret[axis_slice] - ret[axis_slice_end]
+        return ret
+
+    size = a.comm.size
+    rank = a.comm.rank
+    ret = a.copy()
+    for _ in range(n):  # work loop, runs n times. using the result at the end of the loop as the starting values for each loop
+        axis_slice = [slice(None)] * len(ret.shape)
+        axis_slice[axis] = slice(1, None, None)
+        axis_slice_end = [slice(None)] * len(ret.shape)
+        axis_slice_end[axis] = slice(None, -1, None)
+
+        arb_slice = [slice(None)] * len(a.shape)
+        arb_slice[axis] = 0  # build the slice for the first element on the specified axis
+        if rank > 0:
+            snd = ret.comm.Isend(ret.lloc[arb_slice].clone(), dest=rank - 1, tag=rank)  # send the first element of the array to rank - 1
+
+        dif = ret.lloc[axis_slice] - ret.lloc[axis_slice_end]  # standard logic for the diff with the next element
+        diff_slice = [slice(x) for x in dif.shape]  # need to slice out to select the proper elements of out
+        ret.lloc[diff_slice] = dif
+
+        if rank > 0:
+            snd.wait()  # wait for the send to finish
+        if rank < size - 1:
+            cr_slice = [slice(None)] * len(a.shape)
+            cr_slice[axis] = 1  # slice of 1 element in the selected axis for the shape creation
+            recv_data = torch.ones(ret.lloc[cr_slice].shape, dtype=ret.dtype.torch_type())
+            rec = ret.comm.Irecv(recv_data, source=rank + 1, tag=rank + 1)
+            axis_slice_end = [slice(None)] * len(a.shape)
+            axis_slice_end[axis] = slice(-1, None)  # select the last elements in the selected axis
+            rec.wait()
+            ret.lloc[axis_slice_end] = recv_data.reshape(ret.lloc[axis_slice_end].shape) - ret.lloc[axis_slice_end]  # diff logic
+
+    axis_slice_end = [slice(None)] * len(a.shape)
+    axis_slice_end[axis] = slice(None, -1 * n, None)
+    ret = ret[axis_slice_end]  # slice of the last element on the array (nonsense data)
+    ret.balance_()  # balance the array before returning
+    return ret
+
+
 def div(t1, t2):
     """
     Element-wise true division of values of operand t1 by values of operands t2 (i.e t1 / t2), not commutative.
diff --git a/heat/core/dndarray.py b/heat/core/dndarray.py
index b99bcd7aa3..58fcc312e3 100644
--- a/heat/core/dndarray.py
+++ b/heat/core/dndarray.py
@@ -1378,8 +1378,11 @@ def __getitem__(self, key):
 
                 if isinstance(key[self.split], slice):  # if a slice is given in the split direction
                     # below allows for the split given to contain Nones
+                    key_stop = key[self.split].stop
+                    if key_stop is not None and key_stop < 0:
+                        key_stop = self.gshape[self.split] + key[self.split].stop
                     key_set = set(range(key[self.split].start if key[self.split].start is not None else 0,
-                                        key[self.split].stop if key[self.split].stop is not None else self.gshape[self.split],
+                                        key_stop if key_stop is not None else self.gshape[self.split],
                                         key[self.split].step if key[self.split].step else 1))
                     key = list(key)
                     overlap = list(key_set & chunk_set)
diff --git a/heat/core/tests/test_arithmetics.py b/heat/core/tests/test_arithmetics.py
index bf2c43d194..e42e8e2ab5 100644
--- a/heat/core/tests/test_arithmetics.py
+++ b/heat/core/tests/test_arithmetics.py
@@ -4,6 +4,7 @@
 import unittest
 
 import heat as ht
+import numpy as np
 
 
 class TestArithmetics(unittest.TestCase):
@@ -48,6 +49,46 @@ def test_add(self):
         with self.assertRaises(TypeError):
             ht.add('T', 's')
 
+    def test_diff(self):
+        ht_array = ht.random.rand(20, 20, 20, split=None)
+        arb_slice = [0] * 3
+        for dim in range(3):  # loop over 3 dimensions
+            arb_slice[dim] = slice(None)
+            for ax in range(dim + 1):  # loop over the possible axis values
+                for sp in range(dim + 1):  # loop over the possible split values
+                    for nl in range(1, 4):  # loop to 3 for the number of times to do the diff
+                        lp_array = ht_array[arb_slice].resplit(sp)  # only generating the number once and then
+                        np_array = ht_array[arb_slice].numpy()
+
+                        ht_diff = ht.diff(lp_array, n=nl, axis=ax)
+                        np_diff = ht.array(np.diff(np_array, n=nl, axis=ax))
+                        self.assertTrue(ht.equal(ht_diff, np_diff))
+                        self.assertEqual(ht_diff.split, sp)
+                        self.assertEqual(ht_diff.dtype, lp_array.dtype)
+
+        np_array = ht_array.numpy()
+        ht_diff = ht.diff(ht_array, n=2)
+        np_diff = ht.array(np.diff(np_array, n=2))
+        self.assertTrue(ht.equal(ht_diff, np_diff))
+        self.assertEqual(ht_diff.split, None)
+        self.assertEqual(ht_diff.dtype, ht_array.dtype)
+
+        ht_array = ht.random.rand(20, 20, 20, split=1, dtype=ht.float64)
+        np_array = ht_array.copy().numpy()
+        ht_diff = ht.diff(ht_array, n=2)
+        np_diff = ht.array(np.diff(np_array, n=2))
+        self.assertTrue(ht.equal(ht_diff, np_diff))
+        self.assertEqual(ht_diff.split, 1)
+        self.assertEqual(ht_diff.dtype, ht_array.dtype)
+
+        # raises
+        with self.assertRaises(ValueError):
+            ht.diff(ht_array, n=-2)
+        with self.assertRaises(TypeError):
+            ht.diff(ht_array, axis='string')
+        with self.assertRaises(TypeError):
+            ht.diff('string', axis=2)
+
     def test_div(self):
         result = ht.array([
             [0.5, 1.0],
diff --git a/heat/core/tests/test_statistics.py b/heat/core/tests/test_statistics.py
index 4db5ff47e8..e9eef03a8c 100644
--- a/heat/core/tests/test_statistics.py
+++ b/heat/core/tests/test_statistics.py
@@ -299,7 +299,6 @@ def test_average(self):
         # check average over all float elements of split 3d tensor, tuple axis
         random_volume = ht.random.randn(3, 3, 3, split=0)
         avg_volume = ht.average(random_volume, axis=(1, 2))
-        alt_avg_volume = ht.average(random_volume, axis=(2, 1))
 
         self.assertIsInstance(avg_volume, ht.DNDarray)
         self.assertEqual(avg_volume.shape, (3,))