Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Features/32 diff #388

Merged
merged 12 commits into from
Sep 25, 2019
82 changes: 80 additions & 2 deletions heat/core/arithmetics.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import torch

from .communication import MPI
from . import operations
from . import dndarray

from . import operations
from . import stride_tricks

__all__ = [
'add',
'diff',
'div',
'divide',
'floordiv',
Expand Down Expand Up @@ -63,6 +64,83 @@ def add(t1, t2):
return operations.__binary_op(torch.add, t1, t2)


def diff(a, n=1, axis=-1):
"""
Calculate the n-th discrete difference along the given axis.
The first difference is given by out[i] = a[i+1] - a[i] along the given axis, higher differences are calculated by using diff recursively.
Markus-Goetz marked this conversation as resolved.
Show resolved Hide resolved
coquelin77 marked this conversation as resolved.
Show resolved Hide resolved

a : DNDarray
Input array
n : int, optional
The number of times values are differenced. If zero, the input is returned as-is.
Default value is 1
n=2 is equivalent to ht.diff(ht.diff(a))
coquelin77 marked this conversation as resolved.
Show resolved Hide resolved
axis : int, optional
The axis along which the difference is taken, default is the last axis.

Returns
-------
diff : DNDarray
The n-th differences. The shape of the output is the same as a except along axis where the dimension is smaller by n.
coquelin77 marked this conversation as resolved.
Show resolved Hide resolved
The type of the output is the same as the type of the difference between any two elements of a.
The split does not change. The outpot array is balanced.
"""
if n == 0:
return a
if n < 0:
raise ValueError('diff requires that n be a positive number, got {}'.format(n))
if not isinstance(a, dndarray.DNDarray):
raise TypeError('\'a\' must be a DNDarray')

axis = stride_tricks.sanitize_axis(a.gshape, axis)

if not a.is_distributed():
ret = a.copy()
for _ in range(n):
axis_slice = [slice(None)] * len(ret.shape)
axis_slice[axis] = slice(1, None, None)
axis_slice_end = [slice(None)] * len(ret.shape)
axis_slice_end[axis] = slice(None, -1, None)
ret = ret[axis_slice] - ret[axis_slice_end]
return ret
coquelin77 marked this conversation as resolved.
Show resolved Hide resolved

size = a.comm.size
rank = a.comm.rank
ret = a.copy()
for _ in range(n): # work loop, runs n times. using the result at the end of the loop as the starting values for each loop
axis_slice = [slice(None)] * len(ret.shape)
axis_slice[axis] = slice(1, None, None)
axis_slice_end = [slice(None)] * len(ret.shape)
axis_slice_end[axis] = slice(None, -1, None)

arb_slice = [slice(None)] * len(a.shape)
arb_slice[axis] = 0 # build the slice for the first element on the specified axis
if rank > 0:
snd = ret.comm.Isend(ret.lloc[arb_slice].clone(), dest=rank - 1, tag=rank) # send the first element of the array to rank - 1

dif = ret.lloc[axis_slice] - ret.lloc[axis_slice_end] # standard logic for the diff with the next element
diff_slice = [slice(x) for x in dif.shape] # need to slice out to select the proper elements of out
ret.lloc[diff_slice] = dif

if rank > 0:
snd.wait() # wait for the send to finish
if rank < size - 1:
cr_slice = [slice(None)] * len(a.shape)
cr_slice[axis] = 1 # slice of 1 element in the selected axis for the shape creation
recv_data = torch.ones(ret.lloc[cr_slice].shape, dtype=ret.dtype.torch_type())
rec = ret.comm.Irecv(recv_data, source=rank + 1, tag=rank + 1)
axis_slice_end = [slice(None)] * len(a.shape)
axis_slice_end[axis] = slice(-1, None) # select the last elements in the selected axis
rec.wait()
ret.lloc[axis_slice_end] = recv_data.reshape(ret.lloc[axis_slice_end].shape) - ret.lloc[axis_slice_end] # diff logic

axis_slice_end = [slice(None)] * len(a.shape)
axis_slice_end[axis] = slice(None, -1 * n, None)
ret = ret[axis_slice_end] # slice of the last element on the array (nonsense data)
ret.balance_() # balance the array before returning
return ret


def div(t1, t2):
"""
Element-wise true division of values of operand t1 by values of operands t2 (i.e t1 / t2), not commutative.
Expand Down
5 changes: 4 additions & 1 deletion heat/core/dndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1378,8 +1378,11 @@ def __getitem__(self, key):

if isinstance(key[self.split], slice): # if a slice is given in the split direction
# below allows for the split given to contain Nones
key_stop = key[self.split].stop
if key_stop is not None and key_stop < 0:
key_stop = self.gshape[self.split] + key[self.split].stop
key_set = set(range(key[self.split].start if key[self.split].start is not None else 0,
key[self.split].stop if key[self.split].stop is not None else self.gshape[self.split],
key_stop if key_stop is not None else self.gshape[self.split],
key[self.split].step if key[self.split].step else 1))
key = list(key)
overlap = list(key_set & chunk_set)
Expand Down
41 changes: 41 additions & 0 deletions heat/core/tests/test_arithmetics.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import unittest

import heat as ht
import numpy as np


class TestArithmetics(unittest.TestCase):
Expand Down Expand Up @@ -48,6 +49,46 @@ def test_add(self):
with self.assertRaises(TypeError):
ht.add('T', 's')

def test_diff(self):
ht_array = ht.random.rand(20, 20, 20, split=None)
arb_slice = [0] * 3
for dim in range(3): # loop over 3 dimensions
arb_slice[dim] = slice(None)
for ax in range(dim + 1): # loop over the possible axis values
for sp in range(dim + 1): # loop over the possible split values
for nl in range(1, 4): # loop to 3 for the number of times to do the diff
lp_array = ht_array[arb_slice].resplit(sp) # only generating the number once and then
np_array = ht_array[arb_slice].numpy()

ht_diff = ht.diff(lp_array, n=nl, axis=ax)
np_diff = ht.array(np.diff(np_array, n=nl, axis=ax))
self.assertTrue(ht.equal(ht_diff, np_diff))
self.assertEqual(ht_diff.split, sp)
self.assertEqual(ht_diff.dtype, lp_array.dtype)

np_array = ht_array.numpy()
ht_diff = ht.diff(ht_array, n=2)
np_diff = ht.array(np.diff(np_array, n=2))
self.assertTrue(ht.equal(ht_diff, np_diff))
self.assertEqual(ht_diff.split, None)
self.assertEqual(ht_diff.dtype, ht_array.dtype)

ht_array = ht.random.rand(20, 20, 20, split=1, dtype=ht.float64)
np_array = ht_array.copy().numpy()
ht_diff = ht.diff(ht_array, n=2)
np_diff = ht.array(np.diff(np_array, n=2))
self.assertTrue(ht.equal(ht_diff, np_diff))
self.assertEqual(ht_diff.split, 1)
self.assertEqual(ht_diff.dtype, ht_array.dtype)

# raises
with self.assertRaises(ValueError):
ht.diff(ht_array, n=-2)
with self.assertRaises(TypeError):
ht.diff(ht_array, axis='string')
with self.assertRaises(TypeError):
ht.diff('string', axis=2)

def test_div(self):
result = ht.array([
[0.5, 1.0],
Expand Down
1 change: 0 additions & 1 deletion heat/core/tests/test_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,6 @@ def test_average(self):
# check average over all float elements of split 3d tensor, tuple axis
random_volume = ht.random.randn(3, 3, 3, split=0)
avg_volume = ht.average(random_volume, axis=(1, 2))
alt_avg_volume = ht.average(random_volume, axis=(2, 1))

self.assertIsInstance(avg_volume, ht.DNDarray)
self.assertEqual(avg_volume.shape, (3,))
Expand Down