diff --git a/heat/core/arithmetics.py b/heat/core/arithmetics.py index 6cde3e8fd6..474279a223 100644 --- a/heat/core/arithmetics.py +++ b/heat/core/arithmetics.py @@ -1,12 +1,13 @@ import torch from .communication import MPI -from . import operations from . import dndarray - +from . import operations +from . import stride_tricks __all__ = [ 'add', + 'diff', 'div', 'divide', 'floordiv', @@ -63,6 +64,83 @@ def add(t1, t2): return operations.__binary_op(torch.add, t1, t2) +def diff(a, n=1, axis=-1): + """ + Calculate the n-th discrete difference along the given axis. + The first difference is given by out[i] = a[i+1] - a[i] along the given axis, higher differences are calculated by using diff recursively. + + a : DNDarray + Input array + n : int, optional + The number of times values are differenced. If zero, the input is returned as-is. + Default value is 1 + n=2 is equivalent to ht.diff(ht.diff(a)) + axis : int, optional + The axis along which the difference is taken, default is the last axis. + + Returns + ------- + diff : DNDarray + The n-th differences. The shape of the output is the same as a except along axis where the dimension is smaller by n. + The type of the output is the same as the type of the difference between any two elements of a. + The split does not change. The outpot array is balanced. + """ + if n == 0: + return a + if n < 0: + raise ValueError('diff requires that n be a positive number, got {}'.format(n)) + if not isinstance(a, dndarray.DNDarray): + raise TypeError('\'a\' must be a DNDarray') + + axis = stride_tricks.sanitize_axis(a.gshape, axis) + + if not a.is_distributed(): + ret = a.copy() + for _ in range(n): + axis_slice = [slice(None)] * len(ret.shape) + axis_slice[axis] = slice(1, None, None) + axis_slice_end = [slice(None)] * len(ret.shape) + axis_slice_end[axis] = slice(None, -1, None) + ret = ret[axis_slice] - ret[axis_slice_end] + return ret + + size = a.comm.size + rank = a.comm.rank + ret = a.copy() + for _ in range(n): # work loop, runs n times. using the result at the end of the loop as the starting values for each loop + axis_slice = [slice(None)] * len(ret.shape) + axis_slice[axis] = slice(1, None, None) + axis_slice_end = [slice(None)] * len(ret.shape) + axis_slice_end[axis] = slice(None, -1, None) + + arb_slice = [slice(None)] * len(a.shape) + arb_slice[axis] = 0 # build the slice for the first element on the specified axis + if rank > 0: + snd = ret.comm.Isend(ret.lloc[arb_slice].clone(), dest=rank - 1, tag=rank) # send the first element of the array to rank - 1 + + dif = ret.lloc[axis_slice] - ret.lloc[axis_slice_end] # standard logic for the diff with the next element + diff_slice = [slice(x) for x in dif.shape] # need to slice out to select the proper elements of out + ret.lloc[diff_slice] = dif + + if rank > 0: + snd.wait() # wait for the send to finish + if rank < size - 1: + cr_slice = [slice(None)] * len(a.shape) + cr_slice[axis] = 1 # slice of 1 element in the selected axis for the shape creation + recv_data = torch.ones(ret.lloc[cr_slice].shape, dtype=ret.dtype.torch_type()) + rec = ret.comm.Irecv(recv_data, source=rank + 1, tag=rank + 1) + axis_slice_end = [slice(None)] * len(a.shape) + axis_slice_end[axis] = slice(-1, None) # select the last elements in the selected axis + rec.wait() + ret.lloc[axis_slice_end] = recv_data.reshape(ret.lloc[axis_slice_end].shape) - ret.lloc[axis_slice_end] # diff logic + + axis_slice_end = [slice(None)] * len(a.shape) + axis_slice_end[axis] = slice(None, -1 * n, None) + ret = ret[axis_slice_end] # slice of the last element on the array (nonsense data) + ret.balance_() # balance the array before returning + return ret + + def div(t1, t2): """ Element-wise true division of values of operand t1 by values of operands t2 (i.e t1 / t2), not commutative. diff --git a/heat/core/dndarray.py b/heat/core/dndarray.py index b99bcd7aa3..58fcc312e3 100644 --- a/heat/core/dndarray.py +++ b/heat/core/dndarray.py @@ -1378,8 +1378,11 @@ def __getitem__(self, key): if isinstance(key[self.split], slice): # if a slice is given in the split direction # below allows for the split given to contain Nones + key_stop = key[self.split].stop + if key_stop is not None and key_stop < 0: + key_stop = self.gshape[self.split] + key[self.split].stop key_set = set(range(key[self.split].start if key[self.split].start is not None else 0, - key[self.split].stop if key[self.split].stop is not None else self.gshape[self.split], + key_stop if key_stop is not None else self.gshape[self.split], key[self.split].step if key[self.split].step else 1)) key = list(key) overlap = list(key_set & chunk_set) diff --git a/heat/core/tests/test_arithmetics.py b/heat/core/tests/test_arithmetics.py index bf2c43d194..e42e8e2ab5 100644 --- a/heat/core/tests/test_arithmetics.py +++ b/heat/core/tests/test_arithmetics.py @@ -4,6 +4,7 @@ import unittest import heat as ht +import numpy as np class TestArithmetics(unittest.TestCase): @@ -48,6 +49,46 @@ def test_add(self): with self.assertRaises(TypeError): ht.add('T', 's') + def test_diff(self): + ht_array = ht.random.rand(20, 20, 20, split=None) + arb_slice = [0] * 3 + for dim in range(3): # loop over 3 dimensions + arb_slice[dim] = slice(None) + for ax in range(dim + 1): # loop over the possible axis values + for sp in range(dim + 1): # loop over the possible split values + for nl in range(1, 4): # loop to 3 for the number of times to do the diff + lp_array = ht_array[arb_slice].resplit(sp) # only generating the number once and then + np_array = ht_array[arb_slice].numpy() + + ht_diff = ht.diff(lp_array, n=nl, axis=ax) + np_diff = ht.array(np.diff(np_array, n=nl, axis=ax)) + self.assertTrue(ht.equal(ht_diff, np_diff)) + self.assertEqual(ht_diff.split, sp) + self.assertEqual(ht_diff.dtype, lp_array.dtype) + + np_array = ht_array.numpy() + ht_diff = ht.diff(ht_array, n=2) + np_diff = ht.array(np.diff(np_array, n=2)) + self.assertTrue(ht.equal(ht_diff, np_diff)) + self.assertEqual(ht_diff.split, None) + self.assertEqual(ht_diff.dtype, ht_array.dtype) + + ht_array = ht.random.rand(20, 20, 20, split=1, dtype=ht.float64) + np_array = ht_array.copy().numpy() + ht_diff = ht.diff(ht_array, n=2) + np_diff = ht.array(np.diff(np_array, n=2)) + self.assertTrue(ht.equal(ht_diff, np_diff)) + self.assertEqual(ht_diff.split, 1) + self.assertEqual(ht_diff.dtype, ht_array.dtype) + + # raises + with self.assertRaises(ValueError): + ht.diff(ht_array, n=-2) + with self.assertRaises(TypeError): + ht.diff(ht_array, axis='string') + with self.assertRaises(TypeError): + ht.diff('string', axis=2) + def test_div(self): result = ht.array([ [0.5, 1.0], diff --git a/heat/core/tests/test_statistics.py b/heat/core/tests/test_statistics.py index 4db5ff47e8..e9eef03a8c 100644 --- a/heat/core/tests/test_statistics.py +++ b/heat/core/tests/test_statistics.py @@ -299,7 +299,6 @@ def test_average(self): # check average over all float elements of split 3d tensor, tuple axis random_volume = ht.random.randn(3, 3, 3, split=0) avg_volume = ht.average(random_volume, axis=(1, 2)) - alt_avg_volume = ht.average(random_volume, axis=(2, 1)) self.assertIsInstance(avg_volume, ht.DNDarray) self.assertEqual(avg_volume.shape, (3,))