Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8/roll 1d 2d #38

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions performance/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from performance.reference.util import dtype_from_element as dtype_from_element_ref
from performance.reference.util import array_deepcopy as array_deepcopy_ref
from performance.reference.util import isna_element as isna_element_ref
from performance.reference.util import roll_1d as roll_1d_ref

from performance.reference.array_go import ArrayGO as ArrayGOREF

Expand All @@ -32,6 +33,7 @@
from arraykit import dtype_from_element as dtype_from_element_ak
from arraykit import array_deepcopy as array_deepcopy_ak
from arraykit import isna_element as isna_element_ak
from arraykit import roll_1d as roll_1d_ak

from arraykit import ArrayGO as ArrayGOAK

Expand Down Expand Up @@ -359,6 +361,89 @@ class IsNaElementPerfREF(IsNaElementPerf):
entry = staticmethod(isna_element_ref)


#-------------------------------------------------------------------------------

storage = []
def build_subclassses(klass, ak_meth, ref_meth):
storage.append(type(f'{klass.__name__}AK', (klass,), dict(entry=staticmethod(ak_meth))))
storage.append(type(f'{klass.__name__}REF', (klass,), dict(entry=staticmethod(ref_meth))))


#-------------------------------------------------------------------------------
class Roll1d20kInt(Perf):
NUMBER = 10
SIZE = 20_000

def __init__(self):
self.array = np.arange(self.SIZE)

def main(self):
for i in range(-20_001, 20_001):
self.entry(self.array, i)

class Roll1d20kFloat(Perf):
NUMBER = 10
SIZE = 20_000

def __init__(self):
self.array = np.arange(self.SIZE).astype(float)

def main(self):
for i in range(-20_001, 20_001):
self.entry(self.array, i)

class Roll1d20kObject(Perf):
NUMBER = 2
SIZE = 20_000

def __init__(self):
self.array = np.arange(self.SIZE).astype(object)

def main(self):
for i in range(-20_001, 20_001):
self.entry(self.array, i)

class Roll1d1kInt(Perf):
NUMBER = 10
SIZE = 1_000

def __init__(self):
self.array = np.arange(self.SIZE)

def main(self):
for i in range(-20_000, 20_000):
self.entry(self.array, i)

class Roll1d1kFloat(Perf):
NUMBER = 10
SIZE = 1_000

def __init__(self):
self.array = np.arange(self.SIZE).astype(float)

def main(self):
for i in range(-20_000, 20_000):
self.entry(self.array, i)

class Roll1d1kObject(Perf):
NUMBER = 10
SIZE = 1_000

def __init__(self):
self.array = np.arange(self.SIZE).astype(object)

def main(self):
for i in range(-20_000, 20_000):
self.entry(self.array, i)


build_subclassses(Roll1d20kInt, roll_1d_ak, roll_1d_ref)
build_subclassses(Roll1d20kFloat, roll_1d_ak, roll_1d_ref)
build_subclassses(Roll1d20kObject, roll_1d_ak, roll_1d_ref)
build_subclassses(Roll1d1kInt, roll_1d_ak, roll_1d_ref)
build_subclassses(Roll1d1kFloat, roll_1d_ak, roll_1d_ref)
build_subclassses(Roll1d1kObject, roll_1d_ak, roll_1d_ref)

#-------------------------------------------------------------------------------

def get_arg_parser():
Expand Down
20 changes: 20 additions & 0 deletions performance/reference/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,3 +216,23 @@ def dtype_from_element(value: tp.Optional[tp.Hashable]) -> np.dtype:
# NOTE: calling array and getting dtype on np.nan is faster than combining isinstance, isnan calls
return np.array(value).dtype


def roll_1d(array: np.ndarray, shift: int) -> np.ndarray:
'''
Specialized form of np.roll that, by focusing on the 1D solution, is at least four times faster.
'''
size = len(array)
if size <= 1:
return array.copy()

# result will be positive
shift = shift % size
if shift == 0:
return array.copy()

post = np.empty(size, dtype=array.dtype)

post[0:shift] = array[-shift:]
post[shift:] = array[0:-shift]
return post

1 change: 1 addition & 0 deletions src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@
from ._arraykit import resolve_dtype_iter as resolve_dtype_iter
from ._arraykit import isna_element as isna_element
from ._arraykit import dtype_from_element as dtype_from_element
from ._arraykit import roll_1d as roll_1d
1 change: 1 addition & 0 deletions src/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,5 @@ def resolve_dtype(__d1: np.dtype, __d2: np.dtype) -> np.dtype: ...
def resolve_dtype_iter(__dtypes: tp.Iterable[np.dtype]) -> np.dtype: ...
def isna_element(__value: tp.Any) -> bool: ...
def dtype_from_element(__value: tp.Optional[tp.Hashable]) -> np.dtype: ...
def roll_1d(__array: np.ndarray, __shift: int) -> np.ndarray: ...

125 changes: 123 additions & 2 deletions src/_arraykit.c
Original file line number Diff line number Diff line change
Expand Up @@ -257,9 +257,9 @@ shape_filter(PyObject *Py_UNUSED(m), PyObject *a)
AK_CHECK_NUMPY_ARRAY_1D_2D(a);
PyArrayObject *array = (PyArrayObject *)a;

int size0 = PyArray_DIM(array, 0);
int size0 = (int)PyArray_DIM(array, 0);
// If 1D array, set size for axis 1 at 1, else use 2D array to get the size of axis 1
int size1 = PyArray_NDIM(array) == 1 ? 1 : PyArray_DIM(array, 1);
int size1 = (int)(PyArray_NDIM(array) == 1 ? 1 : PyArray_DIM(array, 1));
return Py_BuildValue("ii", size0, size1);
}

Expand Down Expand Up @@ -490,6 +490,126 @@ isna_element(PyObject *Py_UNUSED(m), PyObject *arg)
Py_RETURN_FALSE;
}

//------------------------------------------------------------------------------
// rolling

static PyObject *
_roll_1d(PyArrayObject *array, int shift)
{
// Tell the constructor to automatically allocate the output.
// The data type of the output will match that of the input.
PyArrayObject *arrays[2];
npy_uint32 arrays_flags[2];
arrays[0] = array;
arrays[1] = NULL;
arrays_flags[0] = NPY_ITER_READONLY;
arrays_flags[1] = NPY_ITER_WRITEONLY | NPY_ITER_ALLOCATE;

// No inner iteration - inner loop is handled by CopyArray code
// Reference objects are OK.
int iter_flags = NPY_ITER_EXTERNAL_LOOP | NPY_ITER_REFS_OK;

// Construct the iterator
NpyIter *iter = NpyIter_MultiNew(
2, // number of arrays
arrays,
iter_flags,
NPY_KEEPORDER, // Maintain existing order for `array`
NPY_NO_CASTING, // Both arrays will have the same dtype so casting isn't needed or allowed
arrays_flags,
NULL); // We don't have to specify dtypes since it will use array's

if (iter == NULL) {
return NULL;
}

NpyIter_IterNextFunc *iternext = NpyIter_GetIterNext(iter, NULL);
if (!iternext) {
NpyIter_Deallocate(iter);
return NULL;
}

char** dataptr = NpyIter_GetDataPtrArray(iter);
npy_intp *sizeptr = NpyIter_GetInnerLoopSizePtr(iter);
npy_intp itemsize = NpyIter_GetDescrArray(iter)[0]->elsize;

// If we don't need the GIL, iteration can be multi-threaded!
NPY_BEGIN_THREADS_DEF;
if (!NpyIter_IterationNeedsAPI(iter)) {
NPY_BEGIN_THREADS;
}

do {
char* src_data = dataptr[0];
char* dst_data = dataptr[1];
npy_intp size = *sizeptr;

npy_intp offset = ((size - shift) % size) * itemsize;
npy_intp first_chunk = (size * itemsize) - offset;

memcpy(dst_data, src_data + offset, first_chunk);
memcpy(dst_data + first_chunk, src_data, offset);

// Increment ref counts of objects.
if (PyDataType_ISOBJECT(PyArray_DESCR(array))) {
dst_data = dataptr[1];
while (size--) {
Py_INCREF(*(PyObject**)dst_data);
dst_data += itemsize;
}
}
} while (iternext(iter));

NPY_END_THREADS;

// Get the result from the iterator object array
PyArrayObject *ret = NpyIter_GetOperandArray(iter)[1];
if (!ret) {
NpyIter_Deallocate(iter);
return NULL;
}
Py_INCREF(ret);

if (NpyIter_Deallocate(iter) != NPY_SUCCEED) {
Py_DECREF(ret);
return NULL;
}

return (PyObject*)ret;
}

static PyObject *
roll_1d(PyObject *Py_UNUSED(m), PyObject *args)
{
PyArrayObject *array;
int shift;

if (!PyArg_ParseTuple(args, "O!i:roll_1d", &PyArray_Type, &array, &shift))
{
return NULL;
}

// Must be signed in order for modulo to work properly for negative shift values
int size = (int)PyArray_SIZE(array);

uint8_t is_empty = (size == 0);

if (!is_empty) {
shift = shift % size;
}

if (is_empty || (shift == 0)) {
PyObject* copy = PyArray_Copy(array);
if (!copy) {
return NULL;
}
return copy;
}

return _roll_1d(array, shift);
}


//------------------------------------------------------------------------------
// ArrayGO
//------------------------------------------------------------------------------
Expand Down Expand Up @@ -772,6 +892,7 @@ static PyMethodDef arraykit_methods[] = {
{"resolve_dtype_iter", resolve_dtype_iter, METH_O, NULL},
{"isna_element", isna_element, METH_O, NULL},
{"dtype_from_element", dtype_from_element, METH_O, NULL},
{"roll_1d", roll_1d, METH_VARARGS, NULL},
{NULL},
};

Expand Down
20 changes: 20 additions & 0 deletions test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from arraykit import array_deepcopy
from arraykit import isna_element
from arraykit import dtype_from_element
from arraykit import roll_1d

from performance.reference.util import mloc as mloc_ref

Expand Down Expand Up @@ -368,6 +369,25 @@ def test_dtype_from_element_str_and_bytes_dtypes(self) -> None:
self.assertEqual(np.dtype(f'|S{size}'), dtype_from_element(bytes(size)))
self.assertEqual(np.dtype(f'<U{size}'), dtype_from_element('x' * size))

def test_roll_1d_a(self) -> None:
a1 = np.arange(12, dtype=float)

for i in range(len(a1) + 1):
post = roll_1d(a1, i)
self.assertEqual(post.tolist(), np.roll(a1, i).tolist())

post = roll_1d(a1, -i)
self.assertEqual(post.tolist(), np.roll(a1, -i).tolist())

def test_roll_1d_b(self) -> None:
post = roll_1d(np.array([]), -4)
self.assertEqual([], post.tolist())

def test_roll_1d_c(self) -> None:
a1 = np.array([3, 4, 5, 6])
self.assertEqual(roll_1d(a1, 1).tolist(), [6, 3, 4, 5])
self.assertEqual(roll_1d(a1, -1).tolist(), [4, 5, 6, 3])


if __name__ == '__main__':
unittest.main()