diff --git a/pandas/algos.pyx b/pandas/algos.pyx index 40c8cabe3cb9a..cac9c5ccc7a6d 100644 --- a/pandas/algos.pyx +++ b/pandas/algos.pyx @@ -2018,7 +2018,7 @@ def group_median(ndarray[float64_t, ndim=2] out, data = np.empty((K, N), dtype=np.float64) ptr = data.data - take_2d_axis1_float64(values.T, indexer, out=data) + take_2d_axis1_float64_float64(values.T, indexer, out=data) for i in range(K): # exclude NA group diff --git a/pandas/core/common.py b/pandas/core/common.py index c99fd87f7a643..f83b218a1ae98 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -1,12 +1,14 @@ """ Misc tools for implementing data structures """ +# XXX: HACK for NumPy 1.5.1 to suppress warnings try: import cPickle as pickle except ImportError: # pragma: no cover import pickle import itertools +from datetime import datetime from numpy.lib.format import read_array, write_array import numpy as np @@ -244,231 +246,333 @@ def _unpickle_array(bytes): return arr -def _view_wrapper(f, wrap_dtype, na_override=None): +def _view_wrapper(f, arr_dtype, out_dtype, fill_wrap=None): def wrapper(arr, indexer, out, fill_value=np.nan): - if na_override is not None and np.isnan(fill_value): - fill_value = na_override - view = arr.view(wrap_dtype) - outview = out.view(wrap_dtype) - f(view, indexer, outview, fill_value=fill_value) + if arr_dtype is not None: + arr = arr.view(arr_dtype) + if out_dtype is not None: + out = out.view(out_dtype) + if fill_wrap is not None: + fill_value = fill_wrap(fill_value) + f(arr, indexer, out, fill_value=fill_value) return wrapper -_take1d_dict = { - 'float64': algos.take_1d_float64, - 'float32': algos.take_1d_float32, - 'int8': algos.take_1d_int8, - 'int16': algos.take_1d_int16, - 'int32': algos.take_1d_int32, - 'int64': algos.take_1d_int64, - 'object': algos.take_1d_object, - 'bool': _view_wrapper(algos.take_1d_bool, np.uint8), - 'datetime64[ns]': _view_wrapper(algos.take_1d_int64, np.int64, - na_override=tslib.iNaT), -} +def _datetime64_fill_wrap(fill_value): + if isnull(fill_value): + return tslib.iNaT + try: + return lib.Timestamp(fill_value).value + except: + # the proper thing to do here would probably be to upcast to object + # (but numpy 1.6.1 doesn't do this properly) + return tslib.iNaT + -_take2d_axis0_dict = { - 'float64': algos.take_2d_axis0_float64, - 'float32': algos.take_2d_axis0_float32, - 'int8': algos.take_2d_axis0_int8, - 'int16': algos.take_2d_axis0_int16, - 'int32': algos.take_2d_axis0_int32, - 'int64': algos.take_2d_axis0_int64, - 'object': algos.take_2d_axis0_object, - 'bool': _view_wrapper(algos.take_2d_axis0_bool, np.uint8), - 'datetime64[ns]': _view_wrapper(algos.take_2d_axis0_int64, np.int64, - na_override=tslib.iNaT), +def _convert_wrapper(f, conv_dtype): + def wrapper(arr, indexer, out, fill_value=np.nan): + arr = arr.astype(conv_dtype) + f(arr, indexer, out, fill_value=fill_value) + return wrapper + + +def _take_2d_multi_generic(arr, indexer, out, fill_value=np.nan): + # this is not ideal, performance-wise, but it's better than + # raising an exception + if arr.shape[0] == 0 or arr.shape[1] == 0: + return + row_idx, col_idx = indexer + row_mask = row_idx == -1 + col_mask = col_idx == -1 + if fill_value is not None: + if row_mask.any(): + out[row_mask, :] = fill_value + if col_mask.any(): + out[:, col_mask] = fill_value + for i in range(len(row_idx)): + u = row_idx[i] + for j in range(len(col_idx)): + v = col_idx[j] + out[i, j] = arr[u, v] + + +def _take_nd_generic(arr, indexer, out, axis=0, fill_value=np.nan): + if arr.shape[axis] == 0: + return + mask = indexer == -1 + needs_masking = mask.any() + if arr.dtype != out.dtype: + arr = arr.astype(out.dtype) + ndtake(arr, indexer, axis=axis, out=out) + if needs_masking: + outindexer = [slice(None)] * arr.ndim + outindexer[axis] = mask + out[tuple(outindexer)] = fill_value + + +_take_1d_dict = { + ('int8', 'int8'): algos.take_1d_int8_int8, + ('int8', 'int32'): algos.take_1d_int8_int32, + ('int8', 'int64'): algos.take_1d_int8_int64, + ('int8', 'float64'): algos.take_1d_int8_float64, + ('int16', 'int16'): algos.take_1d_int16_int16, + ('int16', 'int32'): algos.take_1d_int16_int32, + ('int16', 'int64'): algos.take_1d_int16_int64, + ('int16', 'float64'): algos.take_1d_int16_float64, + ('int32', 'int32'): algos.take_1d_int32_int32, + ('int32', 'int64'): algos.take_1d_int32_int64, + ('int32', 'float64'): algos.take_1d_int32_float64, + ('int64', 'int64'): algos.take_1d_int64_int64, + ('int64', 'float64'): algos.take_1d_int64_float64, + ('float32', 'float32'): algos.take_1d_float32_float32, + ('float32', 'float64'): algos.take_1d_float32_float64, + ('float64', 'float64'): algos.take_1d_float64_float64, + ('object', 'object'): algos.take_1d_object_object, + ('bool', 'bool'): + _view_wrapper(algos.take_1d_bool_bool, np.uint8, np.uint8), + ('bool', 'object'): + _view_wrapper(algos.take_1d_bool_object, np.uint8, None), + ('datetime64[ns]','datetime64[ns]'): + _view_wrapper(algos.take_1d_int64_int64, np.int64, np.int64, + fill_wrap=_datetime64_fill_wrap) } -_take2d_axis1_dict = { - 'float64': algos.take_2d_axis1_float64, - 'float32': algos.take_2d_axis1_float32, - 'int8': algos.take_2d_axis1_int8, - 'int16': algos.take_2d_axis1_int16, - 'int32': algos.take_2d_axis1_int32, - 'int64': algos.take_2d_axis1_int64, - 'object': algos.take_2d_axis1_object, - 'bool': _view_wrapper(algos.take_2d_axis1_bool, np.uint8), - 'datetime64[ns]': _view_wrapper(algos.take_2d_axis1_int64, np.int64, - na_override=tslib.iNaT), + +_take_2d_axis0_dict = { + ('int8', 'int8'): algos.take_2d_axis0_int8_int8, + ('int8', 'int32'): algos.take_2d_axis0_int8_int32, + ('int8', 'int64'): algos.take_2d_axis0_int8_int64, + ('int8', 'float64'): algos.take_2d_axis0_int8_float64, + ('int16', 'int16'): algos.take_2d_axis0_int16_int16, + ('int16', 'int32'): algos.take_2d_axis0_int16_int32, + ('int16', 'int64'): algos.take_2d_axis0_int16_int64, + ('int16', 'float64'): algos.take_2d_axis0_int16_float64, + ('int32', 'int32'): algos.take_2d_axis0_int32_int32, + ('int32', 'int64'): algos.take_2d_axis0_int32_int64, + ('int32', 'float64'): algos.take_2d_axis0_int32_float64, + ('int64', 'int64'): algos.take_2d_axis0_int64_int64, + ('int64', 'float64'): algos.take_2d_axis0_int64_float64, + ('float32', 'float32'): algos.take_2d_axis0_float32_float32, + ('float32', 'float64'): algos.take_2d_axis0_float32_float64, + ('float64', 'float64'): algos.take_2d_axis0_float64_float64, + ('object', 'object'): algos.take_2d_axis0_object_object, + ('bool', 'bool'): + _view_wrapper(algos.take_2d_axis0_bool_bool, np.uint8, np.uint8), + ('bool', 'object'): + _view_wrapper(algos.take_2d_axis0_bool_object, np.uint8, None), + ('datetime64[ns]','datetime64[ns]'): + _view_wrapper(algos.take_2d_axis0_int64_int64, np.int64, np.int64, + fill_wrap=_datetime64_fill_wrap) } -_take2d_multi_dict = { - 'float64': algos.take_2d_multi_float64, - 'float32': algos.take_2d_multi_float32, - 'int8': algos.take_2d_multi_int8, - 'int16': algos.take_2d_multi_int16, - 'int32': algos.take_2d_multi_int32, - 'int64': algos.take_2d_multi_int64, - 'object': algos.take_2d_multi_object, - 'bool': _view_wrapper(algos.take_2d_multi_bool, np.uint8), - 'datetime64[ns]': _view_wrapper(algos.take_2d_multi_int64, np.int64, - na_override=tslib.iNaT), + +_take_2d_axis1_dict = { + ('int8', 'int8'): algos.take_2d_axis1_int8_int8, + ('int8', 'int32'): algos.take_2d_axis1_int8_int32, + ('int8', 'int64'): algos.take_2d_axis1_int8_int64, + ('int8', 'float64'): algos.take_2d_axis1_int8_float64, + ('int16', 'int16'): algos.take_2d_axis1_int16_int16, + ('int16', 'int32'): algos.take_2d_axis1_int16_int32, + ('int16', 'int64'): algos.take_2d_axis1_int16_int64, + ('int16', 'float64'): algos.take_2d_axis1_int16_float64, + ('int32', 'int32'): algos.take_2d_axis1_int32_int32, + ('int32', 'int64'): algos.take_2d_axis1_int32_int64, + ('int32', 'float64'): algos.take_2d_axis1_int32_float64, + ('int64', 'int64'): algos.take_2d_axis1_int64_int64, + ('int64', 'float64'): algos.take_2d_axis1_int64_float64, + ('float32', 'float32'): algos.take_2d_axis1_float32_float32, + ('float32', 'float64'): algos.take_2d_axis1_float32_float64, + ('float64', 'float64'): algos.take_2d_axis1_float64_float64, + ('object', 'object'): algos.take_2d_axis1_object_object, + ('bool', 'bool'): + _view_wrapper(algos.take_2d_axis1_bool_bool, np.uint8, np.uint8), + ('bool', 'object'): + _view_wrapper(algos.take_2d_axis1_bool_object, np.uint8, None), + ('datetime64[ns]','datetime64[ns]'): + _view_wrapper(algos.take_2d_axis1_int64_int64, np.int64, np.int64, + fill_wrap=_datetime64_fill_wrap) } -_dtypes_no_na = set(['int8','int16','int32', 'int64', 'bool']) -_dtypes_na = set(['float32', 'float64', 'object', 'datetime64[ns]']) -def _get_take2d_function(dtype_str, axis=0): - if axis == 0: - return _take2d_axis0_dict[dtype_str] - elif axis == 1: - return _take2d_axis1_dict[dtype_str] - elif axis == 'multi': - return _take2d_multi_dict[dtype_str] - else: # pragma: no cover - raise ValueError('bad axis: %s' % axis) +_take_2d_multi_dict = { + ('int8', 'int8'): algos.take_2d_multi_int8_int8, + ('int8', 'int32'): algos.take_2d_multi_int8_int32, + ('int8', 'int64'): algos.take_2d_multi_int8_int64, + ('int8', 'float64'): algos.take_2d_multi_int8_float64, + ('int16', 'int16'): algos.take_2d_multi_int16_int16, + ('int16', 'int32'): algos.take_2d_multi_int16_int32, + ('int16', 'int64'): algos.take_2d_multi_int16_int64, + ('int16', 'float64'): algos.take_2d_multi_int16_float64, + ('int32', 'int32'): algos.take_2d_multi_int32_int32, + ('int32', 'int64'): algos.take_2d_multi_int32_int64, + ('int32', 'float64'): algos.take_2d_multi_int32_float64, + ('int64', 'int64'): algos.take_2d_multi_int64_int64, + ('int64', 'float64'): algos.take_2d_multi_int64_float64, + ('float32', 'float32'): algos.take_2d_multi_float32_float32, + ('float32', 'float64'): algos.take_2d_multi_float32_float64, + ('float64', 'float64'): algos.take_2d_multi_float64_float64, + ('object', 'object'): algos.take_2d_multi_object_object, + ('bool', 'bool'): + _view_wrapper(algos.take_2d_multi_bool_bool, np.uint8, np.uint8), + ('bool', 'object'): + _view_wrapper(algos.take_2d_multi_bool_object, np.uint8, None), + ('datetime64[ns]','datetime64[ns]'): + _view_wrapper(algos.take_2d_multi_int64_int64, np.int64, np.int64, + fill_wrap=_datetime64_fill_wrap) +} -def take_1d(arr, indexer, out=None, fill_value=np.nan): - """ - Specialized Cython take which sets NaN values in one pass - """ - dtype_str = arr.dtype.name +def _get_take_1d_function(dtype, out_dtype): + try: + return _take_1d_dict[dtype.name, out_dtype.name] + except KeyError: + pass - n = len(indexer) + if dtype != out_dtype: + try: + func = _take_1d_dict[out_dtype.name, out_dtype.name] + return _convert_wrapper(func, out_dtype) + except KeyError: + pass - indexer = _ensure_int64(indexer) + def wrapper(arr, indexer, out, fill_value=np.nan): + return _take_nd_generic(arr, indexer, out, axis=0, + fill_value=fill_value) + return wrapper - out_passed = out is not None - take_f = _take1d_dict.get(dtype_str) - if dtype_str in _dtypes_no_na: +def _get_take_2d_function(dtype, out_dtype, axis=0): + try: + if axis == 0: + return _take_2d_axis0_dict[dtype.name, out_dtype.name] + elif axis == 1: + return _take_2d_axis1_dict[dtype.name, out_dtype.name] + elif axis == 'multi': + return _take_2d_multi_dict[dtype.name, out_dtype.name] + else: # pragma: no cover + raise ValueError('bad axis: %s' % axis) + except KeyError: + pass + + if dtype != out_dtype: try: - if out is None: - out = np.empty(n, dtype=arr.dtype) - take_f(arr, _ensure_int64(indexer), out=out, fill_value=fill_value) - except ValueError: - mask = indexer == -1 - if len(arr) == 0: - if not out_passed: - out = np.empty(n, dtype=arr.dtype) + if axis == 0: + func = _take_2d_axis0_dict[out_dtype.name, out_dtype.name] + elif axis == 1: + func = _take_2d_axis1_dict[out_dtype.name, out_dtype.name] else: - out = ndtake(arr, indexer, out=out) - if mask.any(): - if out_passed: - raise Exception('out with dtype %s does not support NA' % - out.dtype) - out = _maybe_upcast(out) - np.putmask(out, mask, fill_value) - elif dtype_str in _dtypes_na: - if out is None: - out = np.empty(n, dtype=arr.dtype) - take_f(arr, _ensure_int64(indexer), out=out, fill_value=fill_value) - else: - out = ndtake(arr, indexer, out=out) - mask = indexer == -1 - if mask.any(): - if out_passed: - raise Exception('out with dtype %s does not support NA' % - out.dtype) - out = _maybe_upcast(out) - np.putmask(out, mask, fill_value) + func = _take_2d_multi_dict[out_dtype.name, out_dtype.name] + return _convert_wrapper(func, out_dtype) + except KeyError: + pass - return out + if axis == 'multi': + return _take_2d_multi_generic + def wrapper(arr, indexer, out, fill_value=np.nan): + return _take_nd_generic(arr, indexer, out, axis=axis, + fill_value=fill_value) + return wrapper -def take_2d_multi(arr, row_idx, col_idx, fill_value=np.nan, out=None): - dtype_str = arr.dtype.name +def _get_take_nd_function(ndim, dtype, out_dtype, axis=0): + if ndim == 2: + return _get_take_2d_function(dtype, out_dtype, axis=axis) + elif ndim == 1: + if axis != 0: + raise ValueError('axis must be 0 for one dimensional array') + return _get_take_1d_function(dtype, out_dtype) + elif ndim <= 0: + raise ValueError('ndim must be >= 1') - out_shape = len(row_idx), len(col_idx) + def wrapper(arr, indexer, out, fill_value=np.nan): + return _take_nd_generic(arr, indexer, out, axis=axis, + fill_value=fill_value) + if (dtype.name, out_dtype.name) == ('datetime64[ns]','datetime64[ns]'): + wrapper = _view_wrapper(wrapper, np.int64, np.int64, + fill_wrap=_datetime64_fill_wrap) + return wrapper - if dtype_str in _dtypes_no_na: - row_mask = row_idx == -1 - col_mask = col_idx == -1 - needs_masking = row_mask.any() or col_mask.any() - if needs_masking: - return take_2d_multi(_maybe_upcast(arr), row_idx, col_idx, - fill_value=fill_value, out=out) - else: - if out is None: - out = np.empty(out_shape, dtype=arr.dtype) - take_f = _get_take2d_function(dtype_str, axis='multi') - take_f(arr, _ensure_int64(row_idx), - _ensure_int64(col_idx), out=out, - fill_value=fill_value) - return out - elif dtype_str in _dtypes_na: - if out is None: - out = np.empty(out_shape, dtype=arr.dtype) - take_f = _get_take2d_function(dtype_str, axis='multi') - take_f(arr, _ensure_int64(row_idx), _ensure_int64(col_idx), out=out, - fill_value=fill_value) - return out +def take_1d(arr, indexer, out=None, fill_value=np.nan): + """ + Specialized Cython take which sets NaN values in one pass + """ + if indexer is None: + indexer = np.arange(len(arr), dtype=np.int64) + dtype, fill_value = arr.dtype, arr.dtype.type() else: - if out is not None: - raise ValueError('Cannot pass out in this case') + indexer = _ensure_int64(indexer) + dtype = _maybe_promote(arr.dtype, fill_value) + if dtype != arr.dtype: + mask = indexer == -1 + needs_masking = mask.any() + if needs_masking: + if out is not None and out.dtype != dtype: + raise Exception('Incompatible type for fill_value') + else: + dtype, fill_value = arr.dtype, arr.dtype.type() - return take_2d(take_2d(arr, row_idx, axis=0, fill_value=fill_value), - col_idx, axis=1, fill_value=fill_value) + if out is None: + out = np.empty(len(indexer), dtype=dtype) + take_f = _get_take_1d_function(arr.dtype, out.dtype) + take_f(arr, indexer, out=out, fill_value=fill_value) + return out -def take_2d(arr, indexer, out=None, mask=None, needs_masking=None, axis=0, - fill_value=np.nan): +def take_nd(arr, indexer, out=None, axis=0, fill_value=np.nan): """ Specialized Cython take which sets NaN values in one pass """ - dtype_str = arr.dtype.name + if indexer is None: + mask = None + needs_masking = False + fill_value = arr.dtype.type() + else: + indexer = _ensure_int64(indexer) + mask = indexer == -1 + needs_masking = mask.any() + if not needs_masking: + fill_value = arr.dtype.type() + return take_fast(arr, indexer, mask, needs_masking, axis, out, fill_value) - out_shape = list(arr.shape) - out_shape[axis] = len(indexer) - out_shape = tuple(out_shape) - if not isinstance(indexer, np.ndarray): - indexer = np.array(indexer, dtype=np.int64) +def take_2d_multi(arr, row_idx, col_idx, fill_value=np.nan, out=None): + """ + Specialized Cython take which sets NaN values in one pass + """ + if row_idx is None: + row_idx = np.arange(arr.shape[0], dtype=np.int64) + else: + row_idx = _ensure_int64(row_idx) - if dtype_str in _dtypes_no_na: - if mask is None: - mask = indexer == -1 - needs_masking = mask.any() + if col_idx is None: + col_idx = np.arange(arr.shape[1], dtype=np.int64) + else: + col_idx = _ensure_int64(col_idx) + dtype = _maybe_promote(arr.dtype, fill_value) + if dtype != arr.dtype: + row_mask = row_idx == -1 + col_mask = col_idx == -1 + needs_masking = row_mask.any() or col_mask.any() if needs_masking: - # upcasting may be required - result = ndtake(arr, indexer, axis=axis, out=out) - result = _maybe_mask(result, mask, needs_masking, axis=axis, - out_passed=out is not None, - fill_value=fill_value) - return result + if out is not None and out.dtype != dtype: + raise Exception('Incompatible type for fill_value') else: - if out is None: - out = np.empty(out_shape, dtype=arr.dtype) - take_f = _get_take2d_function(dtype_str, axis=axis) - take_f(arr, _ensure_int64(indexer), out=out, fill_value=fill_value) - return out - elif dtype_str in _dtypes_na: - if out is None: - out = np.empty(out_shape, dtype=arr.dtype) - take_f = _get_take2d_function(dtype_str, axis=axis) - take_f(arr, _ensure_int64(indexer), out=out, fill_value=fill_value) - return out - else: - if mask is None: - mask = indexer == -1 - needs_masking = mask.any() - - # GH #486 - if out is not None and arr.dtype != out.dtype: - arr = arr.astype(out.dtype) - - result = ndtake(arr, indexer, axis=axis, out=out) - result = _maybe_mask(result, mask, needs_masking, axis=axis, - out_passed=out is not None, - fill_value=fill_value) - return result + dtype, fill_value = arr.dtype, arr.dtype.type() + if out is None: + out_shape = len(row_idx), len(col_idx) + out = np.empty(out_shape, dtype=dtype) + take_f = _get_take_2d_function(arr.dtype, out.dtype, axis='multi') + take_f(arr, (row_idx, col_idx), out=out, fill_value=fill_value) + return out def ndtake(arr, indexer, axis=0, out=None): return arr.take(_ensure_platform_int(indexer), axis=axis, out=out) -def mask_out_axis(arr, mask, axis, fill_value=np.nan): - indexer = [slice(None)] * arr.ndim - indexer[axis] = mask - - arr[tuple(indexer)] = fill_value - _diff_special = { 'float64': algos.diff_2d_float64, 'float32': algos.diff_2d_float32, @@ -483,7 +587,7 @@ def diff(arr, n, axis=0): n = int(n) dtype = arr.dtype if issubclass(dtype.type, np.integer): - dtype = np.float64 + dtype = np.float_ elif issubclass(dtype.type, np.bool_): dtype = np.object_ @@ -512,43 +616,77 @@ def diff(arr, n, axis=0): def take_fast(arr, indexer, mask, needs_masking, axis=0, out=None, fill_value=np.nan): - if arr.ndim == 2: - return take_2d(arr, indexer, out=out, mask=mask, - needs_masking=needs_masking, - axis=axis, fill_value=fill_value) - indexer = _ensure_platform_int(indexer) - result = ndtake(arr, indexer, axis=axis, out=out) - result = _maybe_mask(result, mask, needs_masking, axis=axis, - out_passed=out is not None, fill_value=fill_value) - return result - + """ + Specialized Cython take which sets NaN values in one pass -def _maybe_mask(result, mask, needs_masking, axis=0, out_passed=False, - fill_value=np.nan): - if needs_masking: - if out_passed and _need_upcast(result): - raise Exception('incompatible type for NAs') + (equivalent to take_nd but requires mask and needs_masking + to be set appropriately already; slightly more efficient) + """ + if indexer is None: + indexer = np.arange(arr.shape[axis], dtype=np.int64) + dtype = arr.dtype + else: + indexer = _ensure_int64(indexer) + if needs_masking: + dtype = _maybe_promote(arr.dtype, fill_value) + if dtype != arr.dtype and out is not None and out.dtype != dtype: + raise Exception('Incompatible type for fill_value') else: - # a bit spaghettified - result = _maybe_upcast(result) - mask_out_axis(result, mask, axis, fill_value) - return result + dtype = arr.dtype + + if out is None: + out_shape = list(arr.shape) + out_shape[axis] = len(indexer) + out_shape = tuple(out_shape) + out = np.empty(out_shape, dtype=dtype) + take_f = _get_take_nd_function(arr.ndim, arr.dtype, out.dtype, axis=axis) + take_f(arr, indexer, out=out, fill_value=fill_value) + return out + + +def _maybe_promote(dtype, fill_value=np.nan): + if issubclass(dtype.type, np.datetime64): + # for now: refuse to upcast + # (this is because datetime64 will not implicitly upconvert + # to object correctly as of numpy 1.6.1) + return dtype + elif is_float(fill_value): + if issubclass(dtype.type, np.bool_): + return np.object_ + elif issubclass(dtype.type, np.integer): + return np.float_ + return dtype + elif is_bool(fill_value): + if issubclass(dtype.type, np.bool_): + return dtype + return np.object_ + elif is_integer(fill_value): + if issubclass(dtype.type, np.bool_): + return np.object_ + elif issubclass(dtype.type, np.integer): + # upcast to prevent overflow + arr = np.asarray(fill_value) + if arr != arr.astype(dtype): + return arr.dtype + return dtype + return dtype + elif is_complex(fill_value): + if issubclass(dtype.type, np.bool_): + return np.object_ + elif issubclass(dtype.type, (np.integer, np.floating)): + return np.complex_ + return dtype + return np.object_ def _maybe_upcast(values): + # TODO: convert remaining usage of _maybe_upcast to _maybe_promote if issubclass(values.dtype.type, np.integer): - values = values.astype(float) + values = values.astype(np.float_) elif issubclass(values.dtype.type, np.bool_): - values = values.astype(object) - + values = values.astype(np.object_) return values - - -def _need_upcast(values): - if issubclass(values.dtype.type, (np.integer, np.bool_)): - return True - return False - + def _interp_wrapper(f, wrap_dtype, na_override=None): def wrapper(arr, mask, limit=None): @@ -556,6 +694,7 @@ def wrapper(arr, mask, limit=None): f(view, mask, limit=limit) return wrapper + _pad_1d_datetime = _interp_wrapper(algos.pad_inplace_int64, np.int64) _pad_2d_datetime = _interp_wrapper(algos.pad_2d_inplace_int64, np.int64) _backfill_1d_datetime = _interp_wrapper(algos.backfill_inplace_int64, @@ -728,8 +867,10 @@ def _infer_dtype(value): return np.float_ elif isinstance(value, (bool, np.bool_)): return np.bool_ - elif isinstance(value, (int, np.integer)): + elif isinstance(value, (int, long, np.integer)): return np.int_ + elif isinstance(value, (complex, np.complexfloating)): + return np.complex_ else: return np.object_ @@ -1028,6 +1169,10 @@ def _maybe_make_list(obj): return obj +def is_bool(obj): + return isinstance(obj, (bool, np.bool_)) + + def is_integer(obj): return isinstance(obj, (int, long, np.integer)) @@ -1036,13 +1181,17 @@ def is_float(obj): return isinstance(obj, (float, np.floating)) +def is_complex(obj): + return isinstance(obj, (complex, np.complexfloating)) + + def is_iterator(obj): # python 3 generators have __next__ instead of next return hasattr(obj, 'next') or hasattr(obj, '__next__') def is_number(obj): - return isinstance(obj, (np.number, int, long, float)) + return isinstance(obj, (np.number, int, long, float, complex)) def is_integer_dtype(arr_or_dtype): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6c96317a645f7..30d8313acf2fb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2107,10 +2107,6 @@ def __setitem__(self, key, value): def _boolean_set(self, key, value): if key.values.dtype != np.bool_: raise ValueError('Must pass DataFrame with boolean values only') - - if self._is_mixed_type: - raise ValueError('Cannot do boolean setting on mixed-type frame') - self.where(-key, value, inplace=True) def _set_item_multiple(self, keys, value): @@ -2928,7 +2924,7 @@ def take(self, indices, axis=0): new_columns = self.columns.take(indices) return self.reindex(columns=new_columns) else: - new_values = com.take_2d(self.values, + new_values = com.take_nd(self.values, com._ensure_int64(indices), axis=axis) if axis == 0: @@ -5229,16 +5225,19 @@ def where(self, cond, other=NA, inplace=False, try_cast=False, raise_on_error=Tr Parameters ---------- - cond: boolean DataFrame or array - other: scalar or DataFrame - inplace: perform the operation in place on the data - try_cast: try to cast the result back to the input type (if possible), defaults to False - raise_on_error: should I raise on invalid data types (e.g. trying to where on strings), - defaults to True + cond : boolean DataFrame or array + other : scalar or DataFrame + inplace : boolean, default False + Whether to perform the operation in place on the data + try_cast : boolean, default False + try to cast the result back to the input type (if possible), + raise_on_error : boolean, default True + Whether to raise on invalid data types (e.g. trying to where on + strings) Returns ------- - wh: DataFrame + wh : DataFrame """ if not hasattr(cond, 'shape'): raise ValueError('where requires an ndarray like object for its ' @@ -5263,18 +5262,16 @@ def where(self, cond, other=NA, inplace=False, try_cast=False, raise_on_error=Tr if isinstance(other, DataFrame): _, other = self.align(other, join='left', fill_value=NA) elif isinstance(other,np.ndarray): - - if other.shape[0] != len(self.index) or other.shape[1] != len(self.columns): - raise ValueError('other must be the same shape as self when an ndarray') - other = DataFrame(other,self.index,self.columns) + if other.shape != self.shape: + raise ValueError('other must be the same shape as self ' + 'when an ndarray') + other = DataFrame(other, self.index, self.columns) if inplace: - # we may have different type blocks come out of putmask, so reconstruct the block manager self._data = self._data.putmask(cond,other,inplace=True) else: - func = lambda values, others, conds: np.where(conds, values, others) new_data = self._data.where(func, other, cond, raise_on_error=raise_on_error, try_cast=try_cast) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 58d193a956491..ee024ce68b5b4 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -125,15 +125,9 @@ def reindex_axis(self, indexer, mask, needs_masking, axis=0, """ Reindex using pre-computed indexer information """ - if self.values.size > 0: - new_values = com.take_fast(self.values, indexer, mask, - needs_masking, axis=axis, - fill_value=fill_value) - else: - shape = list(self.shape) - shape[axis] = len(indexer) - new_values = np.empty(shape) - new_values.fill(fill_value) + new_values = com.take_fast(self.values, indexer, + mask, needs_masking, axis=axis, + fill_value=fill_value) return make_block(new_values, self.items, self.ref_items) def reindex_items_from(self, new_ref_items, copy=True): @@ -155,12 +149,9 @@ def reindex_items_from(self, new_ref_items, copy=True): mask = indexer != -1 masked_idx = indexer[mask] - if self.values.ndim == 2: - new_values = com.take_2d(self.values, masked_idx, axis=0, - needs_masking=False) - else: - new_values = self.values.take(masked_idx, axis=0) - + new_values = com.take_fast(self.values, masked_idx, + mask=None, needs_masking=False, + axis=0) new_items = self.items.take(masked_idx) return make_block(new_values, new_items, new_ref_items) @@ -301,24 +292,23 @@ def putmask(self, mask, new, inplace=False): new_values = self.values if inplace else self.values.copy() # may need to align the new - if hasattr(new,'reindex_axis'): - axis = getattr(new,'_het_axis',0) + if hasattr(new, 'reindex_axis'): + axis = getattr(new, '_het_axis', 0) new = new.reindex_axis(self.items, axis=axis, copy=False).values.T # may need to align the mask - if hasattr(mask,'reindex_axis'): - axis = getattr(mask,'_het_axis',0) + if hasattr(mask, 'reindex_axis'): + axis = getattr(mask, '_het_axis', 0) mask = mask.reindex_axis(self.items, axis=axis, copy=False).values.T if self._can_hold_element(new): new = self._try_cast(new) np.putmask(new_values, mask, new) - # upcast me else: - # type of the new block - if isinstance(new,np.ndarray) and issubclass(new.dtype,np.number) or issubclass(type(new),float): + if ((isinstance(new, np.ndarray) and issubclass(new.dtype, np.number)) or + isinstance(new, float)): typ = float else: typ = object @@ -369,9 +359,8 @@ def interpolate(self, method='pad', axis=0, inplace=False, def take(self, indexer, axis=1, fill_value=np.nan): if axis < 1: raise AssertionError('axis must be at least 1, got %d' % axis) - new_values = com.take_fast(self.values, indexer, None, - None, axis=axis, - fill_value=fill_value) + new_values = com.take_fast(self.values, indexer, None, False, + axis=axis, fill_value=fill_value) return make_block(new_values, self.items, self.ref_items) def get_values(self, dtype): @@ -401,22 +390,21 @@ def where(self, func, other, cond = None, raise_on_error = True, try_cast = Fals Parameters ---------- - func : how to combine self,other + func : how to combine self, other other : a ndarray/object cond : the condition to respect, optional - raise_on_error : if True, raise when I can't perform the function, False by default (and just return - the data that we had coming in) + raise_on_error : if True, raise when I can't perform the function, + False by default (and just return the data that we had coming in) Returns ------- a new block, the result of the func """ - values = self.values # see if we can align other - if hasattr(other,'reindex_axis'): - axis = getattr(other,'_het_axis',0) + if hasattr(other, 'reindex_axis'): + axis = getattr(other, '_het_axis', 0) other = other.reindex_axis(self.items, axis=axis, copy=True).values # make sure that we can broadcast @@ -428,17 +416,20 @@ def where(self, func, other, cond = None, raise_on_error = True, try_cast = Fals # see if we can align cond if cond is not None: - if not hasattr(cond,'shape'): - raise ValueError("where must have a condition that is ndarray like") - if hasattr(cond,'reindex_axis'): - axis = getattr(cond,'_het_axis',0) - cond = cond.reindex_axis(self.items, axis=axis, copy=True).values + if not hasattr(cond, 'shape'): + raise ValueError('where must have a condition that is ndarray' + ' like') + if hasattr(cond, 'reindex_axis'): + axis = getattr(cond, '_het_axis', 0) + cond = cond.reindex_axis(self.items, axis=axis, + copy=True).values else: cond = cond.values # may need to undo transpose of values if hasattr(values, 'ndim'): - if values.ndim != cond.ndim or values.shape == cond.shape[::-1]: + if (values.ndim != cond.ndim or + values.shape == cond.shape[::-1]): values = values.T is_transposed = not is_transposed @@ -494,7 +485,7 @@ class FloatBlock(NumericBlock): def _can_hold_element(self, element): if isinstance(element, np.ndarray): - return issubclass(element.dtype.type, (np.floating,np.integer)) + return issubclass(element.dtype.type, (np.floating, np.integer)) return isinstance(element, (float, int)) def _try_cast(self, element): @@ -541,7 +532,8 @@ def _try_cast(self, element): def _try_cast_result(self, result): # this is quite restrictive to convert try: - if isinstance(result, np.ndarray) and issubclass(result.dtype.type, np.floating): + if (isinstance(result, np.ndarray) and + issubclass(result.dtype.type, np.floating)): if com.notnull(result).all(): new_result = result.astype(self.dtype) if (new_result == result).all(): @@ -958,7 +950,8 @@ def _get_clean_block_types(self, type_list): return type_list def get_bool_data(self, copy=False, as_blocks=False): - return self.get_numeric_data(copy=copy, type_list=(BoolBlock,), as_blocks=as_blocks) + return self.get_numeric_data(copy=copy, type_list=(BoolBlock,), + as_blocks=as_blocks) def get_slice(self, slobj, axis=0): new_axes = list(self.axes) @@ -1429,7 +1422,7 @@ def take(self, indexer, axis=1): if axis == 0: raise NotImplementedError - indexer = np.asarray(indexer, dtype='i4') + indexer = com._ensure_platform_int(indexer) n = len(self.axes[axis]) if ((indexer == -1) | (indexer >= n)).any(): @@ -1440,8 +1433,8 @@ def take(self, indexer, axis=1): new_axes[axis] = self.axes[axis].take(indexer) new_blocks = [] for blk in self.blocks: - new_values = com.take_fast(blk.values, indexer, - None, False, axis=axis) + new_values = com.take_fast(blk.values, indexer, None, False, + axis=axis) newb = make_block(new_values, blk.items, self.items) new_blocks.append(newb) @@ -1765,8 +1758,6 @@ def _consolidate(blocks, items): return new_blocks -# TODO: this could be much optimized - def _merge_blocks(blocks, items): if len(blocks) == 1: return blocks[0] diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 5ed3af4c34ee7..362215703e1f2 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -93,7 +93,7 @@ def _make_sorted_values_labels(self): indexer = algos.groupsort_indexer(comp_index, ngroups)[0] indexer = _ensure_platform_int(indexer) - self.sorted_values = com.take_2d(self.values, indexer, axis=0) + self.sorted_values = com.take_nd(self.values, indexer, axis=0) self.sorted_labels = [l.take(indexer) for l in to_sort] def _make_selectors(self): @@ -136,7 +136,7 @@ def get_result(self): # rare case, level values not observed if len(obs_ids) < self.full_shape[1]: inds = (value_mask.sum(0) > 0).nonzero()[0] - values = com.take_2d(values, inds, axis=1) + values = com.take_nd(values, inds, axis=1) columns = columns[inds] return DataFrame(values, index=index, columns=columns) diff --git a/pandas/core/series.py b/pandas/core/series.py index c3ae78b1b5e1f..bc54a1b7be0e8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -794,7 +794,9 @@ def convert_objects(self, convert_dates=True, convert_numeric=True): converted : Series """ if self.dtype == np.object_: - return Series(com._possibly_convert_objects(self.values,convert_dates=convert_dates,convert_numeric=convert_numeric), index=self.index, name=self.name) + return Series(com._possibly_convert_objects(self.values, + convert_dates=convert_dates, convert_numeric=convert_numeric), + index=self.index, name=self.name) return self.copy() def repeat(self, reps): diff --git a/pandas/src/generate_code.py b/pandas/src/generate_code.py index 9cc749d23a3a9..c68154b27f7d1 100644 --- a/pandas/src/generate_code.py +++ b/pandas/src/generate_code.py @@ -52,177 +52,148 @@ take_1d_template = """@cython.wraparound(False) -def take_1d_%(name)s(ndarray[%(c_type)s] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): +def take_1d_%(name)s_%(dest)s(ndarray[%(c_type_in)s] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, n, idx - ndarray[%(c_type)s] outbuf - %(c_type)s fv + ndarray[%(c_type_out)s] outbuf = out + %(c_type_out)s fv n = len(indexer) - if out is None: - outbuf = np.empty(n, dtype=values.dtype) - else: - outbuf = out - if %(raise_on_na)s and _checknan(fill_value): - for i in range(n): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: raise ValueError('No NA values allowed') else: - outbuf[i] = values[idx] + outbuf[i] = %(preval)svalues[idx]%(postval)s else: fv = fill_value - for i in range(n): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: outbuf[i] = fv else: - outbuf[i] = values[idx] + outbuf[i] = %(preval)svalues[idx]%(postval)s """ take_2d_axis0_template = """@cython.wraparound(False) @cython.boundscheck(False) -def take_2d_axis0_%(name)s(ndarray[%(c_type)s, ndim=2] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): +def take_2d_axis0_%(name)s_%(dest)s(ndarray[%(c_type_in)s, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[%(c_type)s, ndim=2] outbuf - %(c_type)s fv + ndarray[%(c_type_out)s, ndim=2] outbuf = out + %(c_type_out)s fv n = len(indexer) k = values.shape[1] - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) - else: - outbuf = out - if %(raise_on_na)s and _checknan(fill_value): - for i in range(n): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: for j from 0 <= j < k: raise ValueError('No NA values allowed') else: for j from 0 <= j < k: - outbuf[i, j] = values[idx, j] + outbuf[i, j] = %(preval)svalues[idx, j]%(postval)s else: fv = fill_value - for i in range(n): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: - for j in range(k): + for j from 0 <= j < k: outbuf[i, j] = fv else: - for j in range(k): - outbuf[i, j] = values[idx, j] + for j from 0 <= j < k: + outbuf[i, j] = %(preval)svalues[idx, j]%(postval)s """ take_2d_axis1_template = """@cython.wraparound(False) @cython.boundscheck(False) -def take_2d_axis1_%(name)s(ndarray[%(c_type)s, ndim=2] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): +def take_2d_axis1_%(name)s_%(dest)s(ndarray[%(c_type_in)s, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[%(c_type)s, ndim=2] outbuf - %(c_type)s fv + ndarray[%(c_type_out)s, ndim=2] outbuf = out + %(c_type_out)s fv n = len(values) k = len(indexer) - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) - else: - outbuf = out - if %(raise_on_na)s and _checknan(fill_value): - for j in range(k): + for j from 0 <= j < k: idx = indexer[j] - if idx == -1: - for i in range(n): + for i from 0 <= i < n: raise ValueError('No NA values allowed') else: - for i in range(n): - outbuf[i, j] = values[i, idx] + for i from 0 <= i < n: + outbuf[i, j] = %(preval)svalues[i, idx]%(postval)s else: fv = fill_value - for j in range(k): + for j from 0 <= j < k: idx = indexer[j] - if idx == -1: - for i in range(n): + for i from 0 <= i < n: outbuf[i, j] = fv else: - for i in range(n): - outbuf[i, j] = values[i, idx] + for i from 0 <= i < n: + outbuf[i, j] = %(preval)svalues[i, idx]%(postval)s """ take_2d_multi_template = """@cython.wraparound(False) @cython.boundscheck(False) -def take_2d_multi_%(name)s(ndarray[%(c_type)s, ndim=2] values, - ndarray[int64_t] idx0, - ndarray[int64_t] idx1, - out=None, fill_value=np.nan): +def take_2d_multi_%(name)s_%(dest)s(ndarray[%(c_type_in)s, ndim=2] values, + indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[%(c_type)s, ndim=2] outbuf - %(c_type)s fv + ndarray[int64_t] idx0 = indexer[0] + ndarray[int64_t] idx1 = indexer[1] + ndarray[%(c_type_out)s, ndim=2] outbuf = out + %(c_type_out)s fv n = len(idx0) k = len(idx1) - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) - else: - outbuf = out - - if %(raise_on_na)s and _checknan(fill_value): - for i in range(n): + for i from 0 <= i < n: idx = idx0[i] if idx == -1: - for j in range(k): + for j from 0 <= j < k: raise ValueError('No NA values allowed') else: - for j in range(k): + for j from 0 <= j < k: if idx1[j] == -1: raise ValueError('No NA values allowed') else: - outbuf[i, j] = values[idx, idx1[j]] + outbuf[i, j] = %(preval)svalues[idx, idx1[j]]%(postval)s else: fv = fill_value - for i in range(n): + for i from 0 <= i < n: idx = idx0[i] if idx == -1: - for j in range(k): + for j from 0 <= j < k: outbuf[i, j] = fv else: - for j in range(k): + for j from 0 <= j < k: if idx1[j] == -1: outbuf[i, j] = fv else: - outbuf[i, j] = values[idx, idx1[j]] + outbuf[i, j] = %(preval)svalues[idx, idx1[j]]%(postval)s """ -def set_na(na ="NaN"): - return "outbuf[i] = %s" % na - -def set_na_2d(na = "NaN"): - return "outbuf[i, j] = %s" % na - -raise_on_na = "raise ValueError('No NA values allowed')" ''' Backfilling logic for generating fill vector @@ -2184,20 +2155,55 @@ def generate_put_template(template, use_ints = True, use_floats = True): output.write(func) return output.getvalue() +def generate_take_template(template, exclude=None): + # name, dest, ctypein, ctypeout, preval, postval, capable of holding NA + function_list = [ + ('bool', 'bool', 'uint8_t', 'uint8_t', '', '', False), + ('bool', 'object', 'uint8_t', 'object', + 'True if ', ' > 0 else False', True), + ('int8', 'int8', 'int8_t', 'int8_t', '', '', False), + ('int8', 'int32', 'int8_t', 'int32_t', '', '', False), + ('int8', 'int64', 'int8_t', 'int64_t', '', '', False), + ('int8', 'float64', 'int8_t', 'float64_t', '', '', True), + ('int16', 'int16', 'int16_t', 'int16_t', '', '', False), + ('int16', 'int32', 'int16_t', 'int32_t', '', '', False), + ('int16', 'int64', 'int16_t', 'int64_t', '', '', False), + ('int16', 'float64', 'int16_t', 'float64_t', '', '', True), + ('int32', 'int32', 'int32_t', 'int32_t', '', '', False), + ('int32', 'int64', 'int32_t', 'int64_t', '', '', False), + ('int32', 'float64', 'int32_t', 'float64_t', '', '', True), + ('int64', 'int64', 'int64_t', 'int64_t', '', '', False), + ('int64', 'float64', 'int64_t', 'float64_t', '', '', True), + ('float32', 'float32', 'float32_t', 'float32_t', '', '', True), + ('float32', 'float64', 'float32_t', 'float64_t', '', '', True), + ('float64', 'float64', 'float64_t', 'float64_t', '', '', True), + ('object', 'object', 'object', 'object', '', '', True) + ] -# name, ctype, capable of holding NA -function_list = [ - ('float64', 'float64_t', 'np.float64', True), - ('float32', 'float32_t', 'np.float32', True), - ('object','object', 'object', True), - ('int8', 'int8_t', 'np.int8', False), - ('int16', 'int16_t', 'np.int16', False), - ('int32', 'int32_t', 'np.int32', False), - ('int64', 'int64_t', 'np.int64', False), - ('bool', 'uint8_t', 'np.bool', False) -] + output = StringIO() + for (name, dest, c_type_in, c_type_out, + preval, postval, can_hold_na) in function_list: + if exclude is not None and name in exclude: + continue + + func = template % {'name': name, 'dest': dest, + 'c_type_in': c_type_in, 'c_type_out': c_type_out, + 'preval': preval, 'postval': postval, + 'raise_on_na': 'False' if can_hold_na else 'True'} + output.write(func) + return output.getvalue() + +def generate_from_template(template, exclude=None): + # name, ctype, capable of holding NA + function_list = [ + ('float64', 'float64_t', 'np.float64', True), + ('float32', 'float32_t', 'np.float32', True), + ('object', 'object', 'object', True), + ('int32', 'int32_t', 'np.int32', False), + ('int64', 'int64_t', 'np.int64', False), + ('bool', 'uint8_t', 'np.bool', False) + ] -def generate_from_template(template, ndim=1, exclude=None): output = StringIO() for name, c_type, dtype, can_hold_na in function_list: if exclude is not None and name in exclude: @@ -2235,7 +2241,6 @@ def generate_from_template(template, ndim=1, exclude=None): backfill_1d_template, pad_2d_template, backfill_2d_template, - take_1d_template, is_monotonic_template, groupby_template, arrmap_template] @@ -2245,9 +2250,10 @@ def generate_from_template(template, ndim=1, exclude=None): outer_join_template2, inner_join_template] -templates_2d = [take_2d_axis0_template, - take_2d_axis1_template, - take_2d_multi_template] +take_templates = [take_1d_template, + take_2d_axis0_template, + take_2d_axis1_template, + take_2d_multi_template] def generate_take_cython_file(path='generated.pyx'): with open(path, 'w') as f: @@ -2258,8 +2264,8 @@ def generate_take_cython_file(path='generated.pyx'): for template in templates_1d: print >> f, generate_from_template(template) - for template in templates_2d: - print >> f, generate_from_template(template, ndim=2) + for template in take_templates: + print >> f, generate_take_template(template) for template in put_2d: print >> f, generate_put_template(template) diff --git a/pandas/src/generated.pyx b/pandas/src/generated.pyx index a20fb5668aec9..1723f2fb8b34c 100644 --- a/pandas/src/generated.pyx +++ b/pandas/src/generated.pyx @@ -183,50 +183,6 @@ cpdef map_indices_object(ndarray[object] index): return result -@cython.wraparound(False) -@cython.boundscheck(False) -cpdef map_indices_int8(ndarray[int8_t] index): - ''' - Produce a dict mapping the values of the input array to their respective - locations. - - Example: - array(['hi', 'there']) --> {'hi' : 0 , 'there' : 1} - - Better to do this with Cython because of the enormous speed boost. - ''' - cdef Py_ssize_t i, length - cdef dict result = {} - - length = len(index) - - for i in range(length): - result[index[i]] = i - - return result - -@cython.wraparound(False) -@cython.boundscheck(False) -cpdef map_indices_int16(ndarray[int16_t] index): - ''' - Produce a dict mapping the values of the input array to their respective - locations. - - Example: - array(['hi', 'there']) --> {'hi' : 0 , 'there' : 1} - - Better to do this with Cython because of the enormous speed boost. - ''' - cdef Py_ssize_t i, length - cdef dict result = {} - - length = len(index) - - for i in range(length): - result[index[i]] = i - - return result - @cython.wraparound(False) @cython.boundscheck(False) cpdef map_indices_int32(ndarray[int32_t] index): @@ -477,128 +433,6 @@ def pad_object(ndarray[object] old, ndarray[object] new, return indexer -@cython.boundscheck(False) -@cython.wraparound(False) -def pad_int8(ndarray[int8_t] old, ndarray[int8_t] new, - limit=None): - cdef Py_ssize_t i, j, nleft, nright - cdef ndarray[int64_t, ndim=1] indexer - cdef int8_t cur, next - cdef int lim, fill_count = 0 - - nleft = len(old) - nright = len(new) - indexer = np.empty(nright, dtype=np.int64) - indexer.fill(-1) - - if limit is None: - lim = nright - else: - if limit < 0: - raise ValueError('Limit must be non-negative') - lim = limit - - if nleft == 0 or nright == 0 or new[nright - 1] < old[0]: - return indexer - - i = j = 0 - - cur = old[0] - - while j <= nright - 1 and new[j] < cur: - j += 1 - - while True: - if j == nright: - break - - if i == nleft - 1: - while j < nright: - if new[j] == cur: - indexer[j] = i - elif new[j] > cur and fill_count < lim: - indexer[j] = i - fill_count += 1 - j += 1 - break - - next = old[i + 1] - - while j < nright and cur <= new[j] < next: - if new[j] == cur: - indexer[j] = i - elif fill_count < lim: - indexer[j] = i - fill_count += 1 - j += 1 - - fill_count = 0 - i += 1 - cur = next - - return indexer - -@cython.boundscheck(False) -@cython.wraparound(False) -def pad_int16(ndarray[int16_t] old, ndarray[int16_t] new, - limit=None): - cdef Py_ssize_t i, j, nleft, nright - cdef ndarray[int64_t, ndim=1] indexer - cdef int16_t cur, next - cdef int lim, fill_count = 0 - - nleft = len(old) - nright = len(new) - indexer = np.empty(nright, dtype=np.int64) - indexer.fill(-1) - - if limit is None: - lim = nright - else: - if limit < 0: - raise ValueError('Limit must be non-negative') - lim = limit - - if nleft == 0 or nright == 0 or new[nright - 1] < old[0]: - return indexer - - i = j = 0 - - cur = old[0] - - while j <= nright - 1 and new[j] < cur: - j += 1 - - while True: - if j == nright: - break - - if i == nleft - 1: - while j < nright: - if new[j] == cur: - indexer[j] = i - elif new[j] > cur and fill_count < lim: - indexer[j] = i - fill_count += 1 - j += 1 - break - - next = old[i + 1] - - while j < nright and cur <= new[j] < next: - if new[j] == cur: - indexer[j] = i - elif fill_count < lim: - indexer[j] = i - fill_count += 1 - j += 1 - - fill_count = 0 - i += 1 - cur = next - - return indexer - @cython.boundscheck(False) @cython.wraparound(False) def pad_int32(ndarray[int32_t] old, ndarray[int32_t] new, @@ -971,11 +805,11 @@ def backfill_object(ndarray[object] old, ndarray[object] new, @cython.boundscheck(False) @cython.wraparound(False) -def backfill_int8(ndarray[int8_t] old, ndarray[int8_t] new, +def backfill_int32(ndarray[int32_t] old, ndarray[int32_t] new, limit=None): cdef Py_ssize_t i, j, nleft, nright cdef ndarray[int64_t, ndim=1] indexer - cdef int8_t cur, prev + cdef int32_t cur, prev cdef int lim, fill_count = 0 nleft = len(old) @@ -1033,11 +867,11 @@ def backfill_int8(ndarray[int8_t] old, ndarray[int8_t] new, @cython.boundscheck(False) @cython.wraparound(False) -def backfill_int16(ndarray[int16_t] old, ndarray[int16_t] new, +def backfill_int64(ndarray[int64_t] old, ndarray[int64_t] new, limit=None): cdef Py_ssize_t i, j, nleft, nright cdef ndarray[int64_t, ndim=1] indexer - cdef int16_t cur, prev + cdef int64_t cur, prev cdef int lim, fill_count = 0 nleft = len(old) @@ -1095,11 +929,11 @@ def backfill_int16(ndarray[int16_t] old, ndarray[int16_t] new, @cython.boundscheck(False) @cython.wraparound(False) -def backfill_int32(ndarray[int32_t] old, ndarray[int32_t] new, +def backfill_bool(ndarray[uint8_t] old, ndarray[uint8_t] new, limit=None): cdef Py_ssize_t i, j, nleft, nright cdef ndarray[int64_t, ndim=1] indexer - cdef int32_t cur, prev + cdef uint8_t cur, prev cdef int lim, fill_count = 0 nleft = len(old) @@ -1155,139 +989,81 @@ def backfill_int32(ndarray[int32_t] old, ndarray[int32_t] new, return indexer + @cython.boundscheck(False) @cython.wraparound(False) -def backfill_int64(ndarray[int64_t] old, ndarray[int64_t] new, - limit=None): - cdef Py_ssize_t i, j, nleft, nright - cdef ndarray[int64_t, ndim=1] indexer - cdef int64_t cur, prev +def pad_inplace_float64(ndarray[float64_t] values, + ndarray[uint8_t, cast=True] mask, + limit=None): + cdef Py_ssize_t i, N + cdef float64_t val cdef int lim, fill_count = 0 - nleft = len(old) - nright = len(new) - indexer = np.empty(nright, dtype=np.int64) - indexer.fill(-1) + N = len(values) + + # GH 2778 + if N == 0: + return if limit is None: - lim = nright + lim = N else: if limit < 0: raise ValueError('Limit must be non-negative') lim = limit - if nleft == 0 or nright == 0 or new[0] > old[nleft - 1]: - return indexer + val = values[0] + for i in range(N): + if mask[i]: + if fill_count >= lim: + continue + fill_count += 1 + values[i] = val + else: + fill_count = 0 + val = values[i] - i = nleft - 1 - j = nright - 1 +@cython.boundscheck(False) +@cython.wraparound(False) +def pad_inplace_float32(ndarray[float32_t] values, + ndarray[uint8_t, cast=True] mask, + limit=None): + cdef Py_ssize_t i, N + cdef float32_t val + cdef int lim, fill_count = 0 - cur = old[nleft - 1] + N = len(values) - while j >= 0 and new[j] > cur: - j -= 1 + # GH 2778 + if N == 0: + return - while True: - if j < 0: - break + if limit is None: + lim = N + else: + if limit < 0: + raise ValueError('Limit must be non-negative') + lim = limit - if i == 0: - while j >= 0: - if new[j] == cur: - indexer[j] = i - elif new[j] < cur and fill_count < lim: - indexer[j] = i - fill_count += 1 - j -= 1 - break + val = values[0] + for i in range(N): + if mask[i]: + if fill_count >= lim: + continue + fill_count += 1 + values[i] = val + else: + fill_count = 0 + val = values[i] - prev = old[i - 1] - - while j >= 0 and prev < new[j] <= cur: - if new[j] == cur: - indexer[j] = i - elif new[j] < cur and fill_count < lim: - indexer[j] = i - fill_count += 1 - j -= 1 - - fill_count = 0 - i -= 1 - cur = prev - - return indexer - -@cython.boundscheck(False) -@cython.wraparound(False) -def backfill_bool(ndarray[uint8_t] old, ndarray[uint8_t] new, - limit=None): - cdef Py_ssize_t i, j, nleft, nright - cdef ndarray[int64_t, ndim=1] indexer - cdef uint8_t cur, prev - cdef int lim, fill_count = 0 - - nleft = len(old) - nright = len(new) - indexer = np.empty(nright, dtype=np.int64) - indexer.fill(-1) - - if limit is None: - lim = nright - else: - if limit < 0: - raise ValueError('Limit must be non-negative') - lim = limit - - if nleft == 0 or nright == 0 or new[0] > old[nleft - 1]: - return indexer - - i = nleft - 1 - j = nright - 1 - - cur = old[nleft - 1] - - while j >= 0 and new[j] > cur: - j -= 1 - - while True: - if j < 0: - break - - if i == 0: - while j >= 0: - if new[j] == cur: - indexer[j] = i - elif new[j] < cur and fill_count < lim: - indexer[j] = i - fill_count += 1 - j -= 1 - break - - prev = old[i - 1] - - while j >= 0 and prev < new[j] <= cur: - if new[j] == cur: - indexer[j] = i - elif new[j] < cur and fill_count < lim: - indexer[j] = i - fill_count += 1 - j -= 1 - - fill_count = 0 - i -= 1 - cur = prev - - return indexer - - -@cython.boundscheck(False) -@cython.wraparound(False) -def pad_inplace_float64(ndarray[float64_t] values, - ndarray[uint8_t, cast=True] mask, - limit=None): - cdef Py_ssize_t i, N - cdef float64_t val - cdef int lim, fill_count = 0 +@cython.boundscheck(False) +@cython.wraparound(False) +def pad_inplace_object(ndarray[object] values, + ndarray[uint8_t, cast=True] mask, + limit=None): + cdef Py_ssize_t i, N + cdef object val + cdef int lim, fill_count = 0 N = len(values) @@ -1315,11 +1091,11 @@ def pad_inplace_float64(ndarray[float64_t] values, @cython.boundscheck(False) @cython.wraparound(False) -def pad_inplace_float32(ndarray[float32_t] values, +def pad_inplace_int32(ndarray[int32_t] values, ndarray[uint8_t, cast=True] mask, limit=None): cdef Py_ssize_t i, N - cdef float32_t val + cdef int32_t val cdef int lim, fill_count = 0 N = len(values) @@ -1348,11 +1124,11 @@ def pad_inplace_float32(ndarray[float32_t] values, @cython.boundscheck(False) @cython.wraparound(False) -def pad_inplace_object(ndarray[object] values, +def pad_inplace_int64(ndarray[int64_t] values, ndarray[uint8_t, cast=True] mask, limit=None): cdef Py_ssize_t i, N - cdef object val + cdef int64_t val cdef int lim, fill_count = 0 N = len(values) @@ -1381,11 +1157,11 @@ def pad_inplace_object(ndarray[object] values, @cython.boundscheck(False) @cython.wraparound(False) -def pad_inplace_int8(ndarray[int8_t] values, +def pad_inplace_bool(ndarray[uint8_t] values, ndarray[uint8_t, cast=True] mask, limit=None): cdef Py_ssize_t i, N - cdef int8_t val + cdef uint8_t val cdef int lim, fill_count = 0 N = len(values) @@ -1412,46 +1188,14 @@ def pad_inplace_int8(ndarray[int8_t] values, fill_count = 0 val = values[i] -@cython.boundscheck(False) -@cython.wraparound(False) -def pad_inplace_int16(ndarray[int16_t] values, - ndarray[uint8_t, cast=True] mask, - limit=None): - cdef Py_ssize_t i, N - cdef int16_t val - cdef int lim, fill_count = 0 - - N = len(values) - - # GH 2778 - if N == 0: - return - - if limit is None: - lim = N - else: - if limit < 0: - raise ValueError('Limit must be non-negative') - lim = limit - - val = values[0] - for i in range(N): - if mask[i]: - if fill_count >= lim: - continue - fill_count += 1 - values[i] = val - else: - fill_count = 0 - val = values[i] @cython.boundscheck(False) @cython.wraparound(False) -def pad_inplace_int32(ndarray[int32_t] values, - ndarray[uint8_t, cast=True] mask, - limit=None): +def backfill_inplace_float64(ndarray[float64_t] values, + ndarray[uint8_t, cast=True] mask, + limit=None): cdef Py_ssize_t i, N - cdef int32_t val + cdef float64_t val cdef int lim, fill_count = 0 N = len(values) @@ -1467,8 +1211,8 @@ def pad_inplace_int32(ndarray[int32_t] values, raise ValueError('Limit must be non-negative') lim = limit - val = values[0] - for i in range(N): + val = values[N - 1] + for i in range(N - 1, -1 , -1): if mask[i]: if fill_count >= lim: continue @@ -1477,14 +1221,13 @@ def pad_inplace_int32(ndarray[int32_t] values, else: fill_count = 0 val = values[i] - @cython.boundscheck(False) @cython.wraparound(False) -def pad_inplace_int64(ndarray[int64_t] values, - ndarray[uint8_t, cast=True] mask, - limit=None): +def backfill_inplace_float32(ndarray[float32_t] values, + ndarray[uint8_t, cast=True] mask, + limit=None): cdef Py_ssize_t i, N - cdef int64_t val + cdef float32_t val cdef int lim, fill_count = 0 N = len(values) @@ -1500,8 +1243,8 @@ def pad_inplace_int64(ndarray[int64_t] values, raise ValueError('Limit must be non-negative') lim = limit - val = values[0] - for i in range(N): + val = values[N - 1] + for i in range(N - 1, -1 , -1): if mask[i]: if fill_count >= lim: continue @@ -1510,14 +1253,13 @@ def pad_inplace_int64(ndarray[int64_t] values, else: fill_count = 0 val = values[i] - @cython.boundscheck(False) @cython.wraparound(False) -def pad_inplace_bool(ndarray[uint8_t] values, - ndarray[uint8_t, cast=True] mask, - limit=None): +def backfill_inplace_object(ndarray[object] values, + ndarray[uint8_t, cast=True] mask, + limit=None): cdef Py_ssize_t i, N - cdef uint8_t val + cdef object val cdef int lim, fill_count = 0 N = len(values) @@ -1533,8 +1275,8 @@ def pad_inplace_bool(ndarray[uint8_t] values, raise ValueError('Limit must be non-negative') lim = limit - val = values[0] - for i in range(N): + val = values[N - 1] + for i in range(N - 1, -1 , -1): if mask[i]: if fill_count >= lim: continue @@ -1543,15 +1285,13 @@ def pad_inplace_bool(ndarray[uint8_t] values, else: fill_count = 0 val = values[i] - - @cython.boundscheck(False) @cython.wraparound(False) -def backfill_inplace_float64(ndarray[float64_t] values, +def backfill_inplace_int32(ndarray[int32_t] values, ndarray[uint8_t, cast=True] mask, limit=None): cdef Py_ssize_t i, N - cdef float64_t val + cdef int32_t val cdef int lim, fill_count = 0 N = len(values) @@ -1579,11 +1319,11 @@ def backfill_inplace_float64(ndarray[float64_t] values, val = values[i] @cython.boundscheck(False) @cython.wraparound(False) -def backfill_inplace_float32(ndarray[float32_t] values, +def backfill_inplace_int64(ndarray[int64_t] values, ndarray[uint8_t, cast=True] mask, limit=None): cdef Py_ssize_t i, N - cdef float32_t val + cdef int64_t val cdef int lim, fill_count = 0 N = len(values) @@ -1611,11 +1351,11 @@ def backfill_inplace_float32(ndarray[float32_t] values, val = values[i] @cython.boundscheck(False) @cython.wraparound(False) -def backfill_inplace_object(ndarray[object] values, +def backfill_inplace_bool(ndarray[uint8_t] values, ndarray[uint8_t, cast=True] mask, limit=None): cdef Py_ssize_t i, N - cdef object val + cdef uint8_t val cdef int lim, fill_count = 0 N = len(values) @@ -1641,16 +1381,17 @@ def backfill_inplace_object(ndarray[object] values, else: fill_count = 0 val = values[i] + @cython.boundscheck(False) @cython.wraparound(False) -def backfill_inplace_int8(ndarray[int8_t] values, - ndarray[uint8_t, cast=True] mask, - limit=None): - cdef Py_ssize_t i, N - cdef int8_t val +def pad_2d_inplace_float64(ndarray[float64_t, ndim=2] values, + ndarray[uint8_t, ndim=2] mask, + limit=None): + cdef Py_ssize_t i, j, N, K + cdef float64_t val cdef int lim, fill_count = 0 - N = len(values) + K, N = ( values).shape # GH 2778 if N == 0: @@ -1663,26 +1404,28 @@ def backfill_inplace_int8(ndarray[int8_t] values, raise ValueError('Limit must be non-negative') lim = limit - val = values[N - 1] - for i in range(N - 1, -1 , -1): - if mask[i]: - if fill_count >= lim: - continue - fill_count += 1 - values[i] = val - else: - fill_count = 0 - val = values[i] + for j in range(K): + fill_count = 0 + val = values[j, 0] + for i in range(N): + if mask[j, i]: + if fill_count >= lim: + continue + fill_count += 1 + values[j, i] = val + else: + fill_count = 0 + val = values[j, i] @cython.boundscheck(False) @cython.wraparound(False) -def backfill_inplace_int16(ndarray[int16_t] values, - ndarray[uint8_t, cast=True] mask, - limit=None): - cdef Py_ssize_t i, N - cdef int16_t val +def pad_2d_inplace_float32(ndarray[float32_t, ndim=2] values, + ndarray[uint8_t, ndim=2] mask, + limit=None): + cdef Py_ssize_t i, j, N, K + cdef float32_t val cdef int lim, fill_count = 0 - N = len(values) + K, N = ( values).shape # GH 2778 if N == 0: @@ -1695,259 +1438,28 @@ def backfill_inplace_int16(ndarray[int16_t] values, raise ValueError('Limit must be non-negative') lim = limit - val = values[N - 1] - for i in range(N - 1, -1 , -1): - if mask[i]: - if fill_count >= lim: - continue - fill_count += 1 - values[i] = val - else: - fill_count = 0 - val = values[i] + for j in range(K): + fill_count = 0 + val = values[j, 0] + for i in range(N): + if mask[j, i]: + if fill_count >= lim: + continue + fill_count += 1 + values[j, i] = val + else: + fill_count = 0 + val = values[j, i] @cython.boundscheck(False) @cython.wraparound(False) -def backfill_inplace_int32(ndarray[int32_t] values, - ndarray[uint8_t, cast=True] mask, - limit=None): - cdef Py_ssize_t i, N - cdef int32_t val +def pad_2d_inplace_object(ndarray[object, ndim=2] values, + ndarray[uint8_t, ndim=2] mask, + limit=None): + cdef Py_ssize_t i, j, N, K + cdef object val cdef int lim, fill_count = 0 - N = len(values) - - # GH 2778 - if N == 0: - return - - if limit is None: - lim = N - else: - if limit < 0: - raise ValueError('Limit must be non-negative') - lim = limit - - val = values[N - 1] - for i in range(N - 1, -1 , -1): - if mask[i]: - if fill_count >= lim: - continue - fill_count += 1 - values[i] = val - else: - fill_count = 0 - val = values[i] -@cython.boundscheck(False) -@cython.wraparound(False) -def backfill_inplace_int64(ndarray[int64_t] values, - ndarray[uint8_t, cast=True] mask, - limit=None): - cdef Py_ssize_t i, N - cdef int64_t val - cdef int lim, fill_count = 0 - - N = len(values) - - # GH 2778 - if N == 0: - return - - if limit is None: - lim = N - else: - if limit < 0: - raise ValueError('Limit must be non-negative') - lim = limit - - val = values[N - 1] - for i in range(N - 1, -1 , -1): - if mask[i]: - if fill_count >= lim: - continue - fill_count += 1 - values[i] = val - else: - fill_count = 0 - val = values[i] -@cython.boundscheck(False) -@cython.wraparound(False) -def backfill_inplace_bool(ndarray[uint8_t] values, - ndarray[uint8_t, cast=True] mask, - limit=None): - cdef Py_ssize_t i, N - cdef uint8_t val - cdef int lim, fill_count = 0 - - N = len(values) - - # GH 2778 - if N == 0: - return - - if limit is None: - lim = N - else: - if limit < 0: - raise ValueError('Limit must be non-negative') - lim = limit - - val = values[N - 1] - for i in range(N - 1, -1 , -1): - if mask[i]: - if fill_count >= lim: - continue - fill_count += 1 - values[i] = val - else: - fill_count = 0 - val = values[i] - -@cython.boundscheck(False) -@cython.wraparound(False) -def pad_2d_inplace_float64(ndarray[float64_t, ndim=2] values, - ndarray[uint8_t, ndim=2] mask, - limit=None): - cdef Py_ssize_t i, j, N, K - cdef float64_t val - cdef int lim, fill_count = 0 - - K, N = ( values).shape - - # GH 2778 - if N == 0: - return - - if limit is None: - lim = N - else: - if limit < 0: - raise ValueError('Limit must be non-negative') - lim = limit - - for j in range(K): - fill_count = 0 - val = values[j, 0] - for i in range(N): - if mask[j, i]: - if fill_count >= lim: - continue - fill_count += 1 - values[j, i] = val - else: - fill_count = 0 - val = values[j, i] -@cython.boundscheck(False) -@cython.wraparound(False) -def pad_2d_inplace_float32(ndarray[float32_t, ndim=2] values, - ndarray[uint8_t, ndim=2] mask, - limit=None): - cdef Py_ssize_t i, j, N, K - cdef float32_t val - cdef int lim, fill_count = 0 - - K, N = ( values).shape - - # GH 2778 - if N == 0: - return - - if limit is None: - lim = N - else: - if limit < 0: - raise ValueError('Limit must be non-negative') - lim = limit - - for j in range(K): - fill_count = 0 - val = values[j, 0] - for i in range(N): - if mask[j, i]: - if fill_count >= lim: - continue - fill_count += 1 - values[j, i] = val - else: - fill_count = 0 - val = values[j, i] -@cython.boundscheck(False) -@cython.wraparound(False) -def pad_2d_inplace_object(ndarray[object, ndim=2] values, - ndarray[uint8_t, ndim=2] mask, - limit=None): - cdef Py_ssize_t i, j, N, K - cdef object val - cdef int lim, fill_count = 0 - - K, N = ( values).shape - - # GH 2778 - if N == 0: - return - - if limit is None: - lim = N - else: - if limit < 0: - raise ValueError('Limit must be non-negative') - lim = limit - - for j in range(K): - fill_count = 0 - val = values[j, 0] - for i in range(N): - if mask[j, i]: - if fill_count >= lim: - continue - fill_count += 1 - values[j, i] = val - else: - fill_count = 0 - val = values[j, i] -@cython.boundscheck(False) -@cython.wraparound(False) -def pad_2d_inplace_int8(ndarray[int8_t, ndim=2] values, - ndarray[uint8_t, ndim=2] mask, - limit=None): - cdef Py_ssize_t i, j, N, K - cdef int8_t val - cdef int lim, fill_count = 0 - - K, N = ( values).shape - - # GH 2778 - if N == 0: - return - - if limit is None: - lim = N - else: - if limit < 0: - raise ValueError('Limit must be non-negative') - lim = limit - - for j in range(K): - fill_count = 0 - val = values[j, 0] - for i in range(N): - if mask[j, i]: - if fill_count >= lim: - continue - fill_count += 1 - values[j, i] = val - else: - fill_count = 0 - val = values[j, i] -@cython.boundscheck(False) -@cython.wraparound(False) -def pad_2d_inplace_int16(ndarray[int16_t, ndim=2] values, - ndarray[uint8_t, ndim=2] mask, - limit=None): - cdef Py_ssize_t i, j, N, K - cdef int16_t val - cdef int lim, fill_count = 0 - - K, N = ( values).shape + K, N = ( values).shape # GH 2778 if N == 0: @@ -2179,11 +1691,11 @@ def backfill_2d_inplace_object(ndarray[object, ndim=2] values, val = values[j, i] @cython.boundscheck(False) @cython.wraparound(False) -def backfill_2d_inplace_int8(ndarray[int8_t, ndim=2] values, +def backfill_2d_inplace_int32(ndarray[int32_t, ndim=2] values, ndarray[uint8_t, ndim=2] mask, limit=None): cdef Py_ssize_t i, j, N, K - cdef int8_t val + cdef int32_t val cdef int lim, fill_count = 0 K, N = ( values).shape @@ -2213,11 +1725,11 @@ def backfill_2d_inplace_int8(ndarray[int8_t, ndim=2] values, val = values[j, i] @cython.boundscheck(False) @cython.wraparound(False) -def backfill_2d_inplace_int16(ndarray[int16_t, ndim=2] values, +def backfill_2d_inplace_int64(ndarray[int64_t, ndim=2] values, ndarray[uint8_t, ndim=2] mask, limit=None): cdef Py_ssize_t i, j, N, K - cdef int16_t val + cdef int64_t val cdef int lim, fill_count = 0 K, N = ( values).shape @@ -2247,11 +1759,11 @@ def backfill_2d_inplace_int16(ndarray[int16_t, ndim=2] values, val = values[j, i] @cython.boundscheck(False) @cython.wraparound(False) -def backfill_2d_inplace_int32(ndarray[int32_t, ndim=2] values, +def backfill_2d_inplace_bool(ndarray[uint8_t, ndim=2] values, ndarray[uint8_t, ndim=2] mask, limit=None): cdef Py_ssize_t i, j, N, K - cdef int32_t val + cdef uint8_t val cdef int lim, fill_count = 0 K, N = ( values).shape @@ -2279,221 +1791,425 @@ def backfill_2d_inplace_int32(ndarray[int32_t, ndim=2] values, else: fill_count = 0 val = values[j, i] + @cython.boundscheck(False) @cython.wraparound(False) -def backfill_2d_inplace_int64(ndarray[int64_t, ndim=2] values, - ndarray[uint8_t, ndim=2] mask, - limit=None): - cdef Py_ssize_t i, j, N, K - cdef int64_t val - cdef int lim, fill_count = 0 +def is_monotonic_float64(ndarray[float64_t] arr): + ''' + Returns + ------- + is_monotonic, is_unique + ''' + cdef: + Py_ssize_t i, n + float64_t prev, cur + bint is_unique = 1 - K, N = ( values).shape + n = len(arr) - # GH 2778 - if N == 0: - return - - if limit is None: - lim = N - else: - if limit < 0: - raise ValueError('Limit must be non-negative') - lim = limit + if n < 2: + return True, True - for j in range(K): - fill_count = 0 - val = values[j, N - 1] - for i in range(N - 1, -1 , -1): - if mask[j, i]: - if fill_count >= lim: - continue - fill_count += 1 - values[j, i] = val - else: - fill_count = 0 - val = values[j, i] + prev = arr[0] + for i in range(1, n): + cur = arr[i] + if cur < prev: + return False, None + elif cur == prev: + is_unique = 0 + prev = cur + return True, is_unique @cython.boundscheck(False) @cython.wraparound(False) -def backfill_2d_inplace_bool(ndarray[uint8_t, ndim=2] values, - ndarray[uint8_t, ndim=2] mask, - limit=None): - cdef Py_ssize_t i, j, N, K - cdef uint8_t val - cdef int lim, fill_count = 0 - - K, N = ( values).shape - - # GH 2778 - if N == 0: - return +def is_monotonic_float32(ndarray[float32_t] arr): + ''' + Returns + ------- + is_monotonic, is_unique + ''' + cdef: + Py_ssize_t i, n + float32_t prev, cur + bint is_unique = 1 - if limit is None: - lim = N - else: - if limit < 0: - raise ValueError('Limit must be non-negative') - lim = limit + n = len(arr) - for j in range(K): - fill_count = 0 - val = values[j, N - 1] - for i in range(N - 1, -1 , -1): - if mask[j, i]: - if fill_count >= lim: - continue - fill_count += 1 - values[j, i] = val - else: - fill_count = 0 - val = values[j, i] + if n < 2: + return True, True + prev = arr[0] + for i in range(1, n): + cur = arr[i] + if cur < prev: + return False, None + elif cur == prev: + is_unique = 0 + prev = cur + return True, is_unique +@cython.boundscheck(False) @cython.wraparound(False) -def take_1d_float64(ndarray[float64_t] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): +def is_monotonic_object(ndarray[object] arr): + ''' + Returns + ------- + is_monotonic, is_unique + ''' cdef: - Py_ssize_t i, n, idx - ndarray[float64_t] outbuf - float64_t fv - - n = len(indexer) + Py_ssize_t i, n + object prev, cur + bint is_unique = 1 - if out is None: - outbuf = np.empty(n, dtype=values.dtype) - else: - outbuf = out + n = len(arr) - if False and _checknan(fill_value): - for i in range(n): - idx = indexer[i] - if idx == -1: - raise ValueError('No NA values allowed') - else: - outbuf[i] = values[idx] - else: - fv = fill_value - for i in range(n): - idx = indexer[i] - if idx == -1: - outbuf[i] = fv - else: - outbuf[i] = values[idx] + if n < 2: + return True, True + prev = arr[0] + for i in range(1, n): + cur = arr[i] + if cur < prev: + return False, None + elif cur == prev: + is_unique = 0 + prev = cur + return True, is_unique +@cython.boundscheck(False) @cython.wraparound(False) -def take_1d_float32(ndarray[float32_t] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): +def is_monotonic_int32(ndarray[int32_t] arr): + ''' + Returns + ------- + is_monotonic, is_unique + ''' cdef: - Py_ssize_t i, n, idx - ndarray[float32_t] outbuf - float32_t fv - - n = len(indexer) + Py_ssize_t i, n + int32_t prev, cur + bint is_unique = 1 - if out is None: - outbuf = np.empty(n, dtype=values.dtype) - else: - outbuf = out + n = len(arr) - if False and _checknan(fill_value): - for i in range(n): - idx = indexer[i] - if idx == -1: - raise ValueError('No NA values allowed') - else: - outbuf[i] = values[idx] - else: - fv = fill_value - for i in range(n): - idx = indexer[i] - if idx == -1: - outbuf[i] = fv - else: - outbuf[i] = values[idx] + if n < 2: + return True, True + prev = arr[0] + for i in range(1, n): + cur = arr[i] + if cur < prev: + return False, None + elif cur == prev: + is_unique = 0 + prev = cur + return True, is_unique +@cython.boundscheck(False) @cython.wraparound(False) -def take_1d_object(ndarray[object] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): +def is_monotonic_int64(ndarray[int64_t] arr): + ''' + Returns + ------- + is_monotonic, is_unique + ''' cdef: - Py_ssize_t i, n, idx - ndarray[object] outbuf - object fv - - n = len(indexer) + Py_ssize_t i, n + int64_t prev, cur + bint is_unique = 1 - if out is None: - outbuf = np.empty(n, dtype=values.dtype) - else: - outbuf = out + n = len(arr) - if False and _checknan(fill_value): - for i in range(n): - idx = indexer[i] - if idx == -1: - raise ValueError('No NA values allowed') - else: - outbuf[i] = values[idx] - else: - fv = fill_value - for i in range(n): - idx = indexer[i] - if idx == -1: - outbuf[i] = fv - else: - outbuf[i] = values[idx] + if n < 2: + return True, True + prev = arr[0] + for i in range(1, n): + cur = arr[i] + if cur < prev: + return False, None + elif cur == prev: + is_unique = 0 + prev = cur + return True, is_unique +@cython.boundscheck(False) @cython.wraparound(False) -def take_1d_int8(ndarray[int8_t] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): +def is_monotonic_bool(ndarray[uint8_t] arr): + ''' + Returns + ------- + is_monotonic, is_unique + ''' cdef: - Py_ssize_t i, n, idx - ndarray[int8_t] outbuf - int8_t fv + Py_ssize_t i, n + uint8_t prev, cur + bint is_unique = 1 - n = len(indexer) + n = len(arr) - if out is None: - outbuf = np.empty(n, dtype=values.dtype) - else: - outbuf = out + if n < 2: + return True, True - if True and _checknan(fill_value): - for i in range(n): - idx = indexer[i] - if idx == -1: - raise ValueError('No NA values allowed') - else: - outbuf[i] = values[idx] - else: - fv = fill_value - for i in range(n): - idx = indexer[i] - if idx == -1: - outbuf[i] = fv - else: - outbuf[i] = values[idx] + prev = arr[0] + for i in range(1, n): + cur = arr[i] + if cur < prev: + return False, None + elif cur == prev: + is_unique = 0 + prev = cur + return True, is_unique @cython.wraparound(False) -def take_1d_int16(ndarray[int16_t] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): - cdef: - Py_ssize_t i, n, idx - ndarray[int16_t] outbuf - int16_t fv - - n = len(indexer) +@cython.boundscheck(False) +def groupby_float64(ndarray[float64_t] index, ndarray labels): + cdef dict result = {} + cdef Py_ssize_t i, length + cdef list members + cdef object idx, key - if out is None: - outbuf = np.empty(n, dtype=values.dtype) - else: - outbuf = out + length = len(index) + + for i in range(length): + key = util.get_value_1d(labels, i) + + if _checknull(key): + continue + + idx = index[i] + if key in result: + members = result[key] + members.append(idx) + else: + result[key] = [idx] + + return result + +@cython.wraparound(False) +@cython.boundscheck(False) +def groupby_float32(ndarray[float32_t] index, ndarray labels): + cdef dict result = {} + cdef Py_ssize_t i, length + cdef list members + cdef object idx, key + + length = len(index) + + for i in range(length): + key = util.get_value_1d(labels, i) + + if _checknull(key): + continue + + idx = index[i] + if key in result: + members = result[key] + members.append(idx) + else: + result[key] = [idx] + + return result + +@cython.wraparound(False) +@cython.boundscheck(False) +def groupby_object(ndarray[object] index, ndarray labels): + cdef dict result = {} + cdef Py_ssize_t i, length + cdef list members + cdef object idx, key + + length = len(index) + + for i in range(length): + key = util.get_value_1d(labels, i) + + if _checknull(key): + continue + + idx = index[i] + if key in result: + members = result[key] + members.append(idx) + else: + result[key] = [idx] + + return result + +@cython.wraparound(False) +@cython.boundscheck(False) +def groupby_int32(ndarray[int32_t] index, ndarray labels): + cdef dict result = {} + cdef Py_ssize_t i, length + cdef list members + cdef object idx, key + + length = len(index) + + for i in range(length): + key = util.get_value_1d(labels, i) + + if _checknull(key): + continue + + idx = index[i] + if key in result: + members = result[key] + members.append(idx) + else: + result[key] = [idx] + + return result + +@cython.wraparound(False) +@cython.boundscheck(False) +def groupby_int64(ndarray[int64_t] index, ndarray labels): + cdef dict result = {} + cdef Py_ssize_t i, length + cdef list members + cdef object idx, key + + length = len(index) + + for i in range(length): + key = util.get_value_1d(labels, i) + + if _checknull(key): + continue + + idx = index[i] + if key in result: + members = result[key] + members.append(idx) + else: + result[key] = [idx] + + return result + +@cython.wraparound(False) +@cython.boundscheck(False) +def groupby_bool(ndarray[uint8_t] index, ndarray labels): + cdef dict result = {} + cdef Py_ssize_t i, length + cdef list members + cdef object idx, key + + length = len(index) + + for i in range(length): + key = util.get_value_1d(labels, i) + + if _checknull(key): + continue + + idx = index[i] + if key in result: + members = result[key] + members.append(idx) + else: + result[key] = [idx] + + return result + + +@cython.wraparound(False) +@cython.boundscheck(False) +def arrmap_float64(ndarray[float64_t] index, object func): + cdef Py_ssize_t length = index.shape[0] + cdef Py_ssize_t i = 0 + + cdef ndarray[object] result = np.empty(length, dtype=np.object_) + + from pandas.lib import maybe_convert_objects + + for i in range(length): + result[i] = func(index[i]) + + return maybe_convert_objects(result) + +@cython.wraparound(False) +@cython.boundscheck(False) +def arrmap_float32(ndarray[float32_t] index, object func): + cdef Py_ssize_t length = index.shape[0] + cdef Py_ssize_t i = 0 + + cdef ndarray[object] result = np.empty(length, dtype=np.object_) + + from pandas.lib import maybe_convert_objects + + for i in range(length): + result[i] = func(index[i]) + + return maybe_convert_objects(result) + +@cython.wraparound(False) +@cython.boundscheck(False) +def arrmap_object(ndarray[object] index, object func): + cdef Py_ssize_t length = index.shape[0] + cdef Py_ssize_t i = 0 + + cdef ndarray[object] result = np.empty(length, dtype=np.object_) + + from pandas.lib import maybe_convert_objects + + for i in range(length): + result[i] = func(index[i]) + + return maybe_convert_objects(result) + +@cython.wraparound(False) +@cython.boundscheck(False) +def arrmap_int32(ndarray[int32_t] index, object func): + cdef Py_ssize_t length = index.shape[0] + cdef Py_ssize_t i = 0 + + cdef ndarray[object] result = np.empty(length, dtype=np.object_) + + from pandas.lib import maybe_convert_objects + + for i in range(length): + result[i] = func(index[i]) + + return maybe_convert_objects(result) + +@cython.wraparound(False) +@cython.boundscheck(False) +def arrmap_int64(ndarray[int64_t] index, object func): + cdef Py_ssize_t length = index.shape[0] + cdef Py_ssize_t i = 0 + + cdef ndarray[object] result = np.empty(length, dtype=np.object_) + + from pandas.lib import maybe_convert_objects + + for i in range(length): + result[i] = func(index[i]) + + return maybe_convert_objects(result) + +@cython.wraparound(False) +@cython.boundscheck(False) +def arrmap_bool(ndarray[uint8_t] index, object func): + cdef Py_ssize_t length = index.shape[0] + cdef Py_ssize_t i = 0 + + cdef ndarray[object] result = np.empty(length, dtype=np.object_) + + from pandas.lib import maybe_convert_objects + + for i in range(length): + result[i] = func(index[i]) + + return maybe_convert_objects(result) + + +@cython.wraparound(False) +def take_1d_bool_bool(ndarray[uint8_t] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + ndarray[uint8_t] outbuf = out + uint8_t fv + + n = len(indexer) if True and _checknan(fill_value): - for i in range(n): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: raise ValueError('No NA values allowed') @@ -2501,7 +2217,7 @@ def take_1d_int16(ndarray[int16_t] values, outbuf[i] = values[idx] else: fv = fill_value - for i in range(n): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: outbuf[i] = fv @@ -2509,55 +2225,45 @@ def take_1d_int16(ndarray[int16_t] values, outbuf[i] = values[idx] @cython.wraparound(False) -def take_1d_int32(ndarray[int32_t] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): +def take_1d_bool_object(ndarray[uint8_t] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, n, idx - ndarray[int32_t] outbuf - int32_t fv + ndarray[object] outbuf = out + object fv n = len(indexer) - if out is None: - outbuf = np.empty(n, dtype=values.dtype) - else: - outbuf = out - - if True and _checknan(fill_value): - for i in range(n): + if False and _checknan(fill_value): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: raise ValueError('No NA values allowed') else: - outbuf[i] = values[idx] + outbuf[i] = True if values[idx] > 0 else False else: fv = fill_value - for i in range(n): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: outbuf[i] = fv else: - outbuf[i] = values[idx] + outbuf[i] = True if values[idx] > 0 else False @cython.wraparound(False) -def take_1d_int64(ndarray[int64_t] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): +def take_1d_int8_int8(ndarray[int8_t] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, n, idx - ndarray[int64_t] outbuf - int64_t fv + ndarray[int8_t] outbuf = out + int8_t fv n = len(indexer) - if out is None: - outbuf = np.empty(n, dtype=values.dtype) - else: - outbuf = out - if True and _checknan(fill_value): - for i in range(n): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: raise ValueError('No NA values allowed') @@ -2565,7 +2271,7 @@ def take_1d_int64(ndarray[int64_t] values, outbuf[i] = values[idx] else: fv = fill_value - for i in range(n): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: outbuf[i] = fv @@ -2573,23 +2279,18 @@ def take_1d_int64(ndarray[int64_t] values, outbuf[i] = values[idx] @cython.wraparound(False) -def take_1d_bool(ndarray[uint8_t] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): +def take_1d_int8_int32(ndarray[int8_t] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, n, idx - ndarray[uint8_t] outbuf - uint8_t fv + ndarray[int32_t] outbuf = out + int32_t fv n = len(indexer) - if out is None: - outbuf = np.empty(n, dtype=values.dtype) - else: - outbuf = out - if True and _checknan(fill_value): - for i in range(n): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: raise ValueError('No NA values allowed') @@ -2597,573 +2298,434 @@ def take_1d_bool(ndarray[uint8_t] values, outbuf[i] = values[idx] else: fv = fill_value - for i in range(n): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: outbuf[i] = fv else: outbuf[i] = values[idx] - -@cython.boundscheck(False) @cython.wraparound(False) -def is_monotonic_float64(ndarray[float64_t] arr): - ''' - Returns - ------- - is_monotonic, is_unique - ''' +def take_1d_int8_int64(ndarray[int8_t] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: - Py_ssize_t i, n - float64_t prev, cur - bint is_unique = 1 + Py_ssize_t i, n, idx + ndarray[int64_t] outbuf = out + int64_t fv - n = len(arr) + n = len(indexer) - if n < 2: - return True, True + if True and _checknan(fill_value): + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i] = values[idx] + else: + fv = fill_value + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + outbuf[i] = fv + else: + outbuf[i] = values[idx] - prev = arr[0] - for i in range(1, n): - cur = arr[i] - if cur < prev: - return False, None - elif cur == prev: - is_unique = 0 - prev = cur - return True, is_unique -@cython.boundscheck(False) @cython.wraparound(False) -def is_monotonic_float32(ndarray[float32_t] arr): - ''' - Returns - ------- - is_monotonic, is_unique - ''' +def take_1d_int8_float64(ndarray[int8_t] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: - Py_ssize_t i, n - float32_t prev, cur - bint is_unique = 1 + Py_ssize_t i, n, idx + ndarray[float64_t] outbuf = out + float64_t fv - n = len(arr) + n = len(indexer) - if n < 2: - return True, True + if False and _checknan(fill_value): + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i] = values[idx] + else: + fv = fill_value + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + outbuf[i] = fv + else: + outbuf[i] = values[idx] - prev = arr[0] - for i in range(1, n): - cur = arr[i] - if cur < prev: - return False, None - elif cur == prev: - is_unique = 0 - prev = cur - return True, is_unique -@cython.boundscheck(False) @cython.wraparound(False) -def is_monotonic_object(ndarray[object] arr): - ''' - Returns - ------- - is_monotonic, is_unique - ''' +def take_1d_int16_int16(ndarray[int16_t] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: - Py_ssize_t i, n - object prev, cur - bint is_unique = 1 + Py_ssize_t i, n, idx + ndarray[int16_t] outbuf = out + int16_t fv - n = len(arr) + n = len(indexer) - if n < 2: - return True, True + if True and _checknan(fill_value): + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i] = values[idx] + else: + fv = fill_value + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + outbuf[i] = fv + else: + outbuf[i] = values[idx] - prev = arr[0] - for i in range(1, n): - cur = arr[i] - if cur < prev: - return False, None - elif cur == prev: - is_unique = 0 - prev = cur - return True, is_unique -@cython.boundscheck(False) @cython.wraparound(False) -def is_monotonic_int8(ndarray[int8_t] arr): - ''' - Returns - ------- - is_monotonic, is_unique - ''' +def take_1d_int16_int32(ndarray[int16_t] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: - Py_ssize_t i, n - int8_t prev, cur - bint is_unique = 1 + Py_ssize_t i, n, idx + ndarray[int32_t] outbuf = out + int32_t fv - n = len(arr) + n = len(indexer) - if n < 2: - return True, True + if True and _checknan(fill_value): + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i] = values[idx] + else: + fv = fill_value + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + outbuf[i] = fv + else: + outbuf[i] = values[idx] - prev = arr[0] - for i in range(1, n): - cur = arr[i] - if cur < prev: - return False, None - elif cur == prev: - is_unique = 0 - prev = cur - return True, is_unique -@cython.boundscheck(False) @cython.wraparound(False) -def is_monotonic_int16(ndarray[int16_t] arr): - ''' - Returns - ------- - is_monotonic, is_unique - ''' +def take_1d_int16_int64(ndarray[int16_t] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: - Py_ssize_t i, n - int16_t prev, cur - bint is_unique = 1 + Py_ssize_t i, n, idx + ndarray[int64_t] outbuf = out + int64_t fv - n = len(arr) + n = len(indexer) - if n < 2: - return True, True + if True and _checknan(fill_value): + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i] = values[idx] + else: + fv = fill_value + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + outbuf[i] = fv + else: + outbuf[i] = values[idx] - prev = arr[0] - for i in range(1, n): - cur = arr[i] - if cur < prev: - return False, None - elif cur == prev: - is_unique = 0 - prev = cur - return True, is_unique -@cython.boundscheck(False) @cython.wraparound(False) -def is_monotonic_int32(ndarray[int32_t] arr): - ''' - Returns - ------- - is_monotonic, is_unique - ''' +def take_1d_int16_float64(ndarray[int16_t] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: - Py_ssize_t i, n - int32_t prev, cur - bint is_unique = 1 + Py_ssize_t i, n, idx + ndarray[float64_t] outbuf = out + float64_t fv - n = len(arr) + n = len(indexer) - if n < 2: - return True, True + if False and _checknan(fill_value): + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i] = values[idx] + else: + fv = fill_value + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + outbuf[i] = fv + else: + outbuf[i] = values[idx] - prev = arr[0] - for i in range(1, n): - cur = arr[i] - if cur < prev: - return False, None - elif cur == prev: - is_unique = 0 - prev = cur - return True, is_unique -@cython.boundscheck(False) @cython.wraparound(False) -def is_monotonic_int64(ndarray[int64_t] arr): - ''' - Returns - ------- - is_monotonic, is_unique - ''' +def take_1d_int32_int32(ndarray[int32_t] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: - Py_ssize_t i, n - int64_t prev, cur - bint is_unique = 1 + Py_ssize_t i, n, idx + ndarray[int32_t] outbuf = out + int32_t fv - n = len(arr) + n = len(indexer) - if n < 2: - return True, True + if True and _checknan(fill_value): + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i] = values[idx] + else: + fv = fill_value + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + outbuf[i] = fv + else: + outbuf[i] = values[idx] - prev = arr[0] - for i in range(1, n): - cur = arr[i] - if cur < prev: - return False, None - elif cur == prev: - is_unique = 0 - prev = cur - return True, is_unique -@cython.boundscheck(False) @cython.wraparound(False) -def is_monotonic_bool(ndarray[uint8_t] arr): - ''' - Returns - ------- - is_monotonic, is_unique - ''' +def take_1d_int32_int64(ndarray[int32_t] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: - Py_ssize_t i, n - uint8_t prev, cur - bint is_unique = 1 - - n = len(arr) + Py_ssize_t i, n, idx + ndarray[int64_t] outbuf = out + int64_t fv - if n < 2: - return True, True + n = len(indexer) - prev = arr[0] - for i in range(1, n): - cur = arr[i] - if cur < prev: - return False, None - elif cur == prev: - is_unique = 0 - prev = cur - return True, is_unique + if True and _checknan(fill_value): + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i] = values[idx] + else: + fv = fill_value + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + outbuf[i] = fv + else: + outbuf[i] = values[idx] @cython.wraparound(False) -@cython.boundscheck(False) -def groupby_float64(ndarray[float64_t] index, ndarray labels): - cdef dict result = {} - cdef Py_ssize_t i, length - cdef list members - cdef object idx, key +def take_1d_int32_float64(ndarray[int32_t] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + ndarray[float64_t] outbuf = out + float64_t fv - length = len(index) + n = len(indexer) - for i in range(length): - key = util.get_value_1d(labels, i) + if False and _checknan(fill_value): + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i] = values[idx] + else: + fv = fill_value + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + outbuf[i] = fv + else: + outbuf[i] = values[idx] - if _checknull(key): - continue +@cython.wraparound(False) +def take_1d_int64_int64(ndarray[int64_t] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + ndarray[int64_t] outbuf = out + int64_t fv - idx = index[i] - if key in result: - members = result[key] - members.append(idx) - else: - result[key] = [idx] + n = len(indexer) - return result + if True and _checknan(fill_value): + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i] = values[idx] + else: + fv = fill_value + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + outbuf[i] = fv + else: + outbuf[i] = values[idx] @cython.wraparound(False) -@cython.boundscheck(False) -def groupby_float32(ndarray[float32_t] index, ndarray labels): - cdef dict result = {} - cdef Py_ssize_t i, length - cdef list members - cdef object idx, key +def take_1d_int64_float64(ndarray[int64_t] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + ndarray[float64_t] outbuf = out + float64_t fv - length = len(index) + n = len(indexer) - for i in range(length): - key = util.get_value_1d(labels, i) + if False and _checknan(fill_value): + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i] = values[idx] + else: + fv = fill_value + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + outbuf[i] = fv + else: + outbuf[i] = values[idx] - if _checknull(key): - continue +@cython.wraparound(False) +def take_1d_float32_float32(ndarray[float32_t] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + ndarray[float32_t] outbuf = out + float32_t fv - idx = index[i] - if key in result: - members = result[key] - members.append(idx) - else: - result[key] = [idx] + n = len(indexer) - return result + if False and _checknan(fill_value): + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i] = values[idx] + else: + fv = fill_value + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + outbuf[i] = fv + else: + outbuf[i] = values[idx] @cython.wraparound(False) -@cython.boundscheck(False) -def groupby_object(ndarray[object] index, ndarray labels): - cdef dict result = {} - cdef Py_ssize_t i, length - cdef list members - cdef object idx, key +def take_1d_float32_float64(ndarray[float32_t] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + ndarray[float64_t] outbuf = out + float64_t fv - length = len(index) + n = len(indexer) - for i in range(length): - key = util.get_value_1d(labels, i) + if False and _checknan(fill_value): + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i] = values[idx] + else: + fv = fill_value + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + outbuf[i] = fv + else: + outbuf[i] = values[idx] - if _checknull(key): - continue +@cython.wraparound(False) +def take_1d_float64_float64(ndarray[float64_t] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + ndarray[float64_t] outbuf = out + float64_t fv - idx = index[i] - if key in result: - members = result[key] - members.append(idx) - else: - result[key] = [idx] + n = len(indexer) - return result + if False and _checknan(fill_value): + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i] = values[idx] + else: + fv = fill_value + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + outbuf[i] = fv + else: + outbuf[i] = values[idx] @cython.wraparound(False) -@cython.boundscheck(False) -def groupby_int8(ndarray[int8_t] index, ndarray labels): - cdef dict result = {} - cdef Py_ssize_t i, length - cdef list members - cdef object idx, key - - length = len(index) - - for i in range(length): - key = util.get_value_1d(labels, i) +def take_1d_object_object(ndarray[object] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + ndarray[object] outbuf = out + object fv - if _checknull(key): - continue + n = len(indexer) - idx = index[i] - if key in result: - members = result[key] - members.append(idx) - else: - result[key] = [idx] + if False and _checknan(fill_value): + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i] = values[idx] + else: + fv = fill_value + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + outbuf[i] = fv + else: + outbuf[i] = values[idx] - return result @cython.wraparound(False) @cython.boundscheck(False) -def groupby_int16(ndarray[int16_t] index, ndarray labels): - cdef dict result = {} - cdef Py_ssize_t i, length - cdef list members - cdef object idx, key - - length = len(index) - - for i in range(length): - key = util.get_value_1d(labels, i) +def take_2d_axis0_bool_bool(ndarray[uint8_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, j, k, n, idx + ndarray[uint8_t, ndim=2] outbuf = out + uint8_t fv - if _checknull(key): - continue + n = len(indexer) + k = values.shape[1] - idx = index[i] - if key in result: - members = result[key] - members.append(idx) - else: - result[key] = [idx] - - return result - -@cython.wraparound(False) -@cython.boundscheck(False) -def groupby_int32(ndarray[int32_t] index, ndarray labels): - cdef dict result = {} - cdef Py_ssize_t i, length - cdef list members - cdef object idx, key - - length = len(index) - - for i in range(length): - key = util.get_value_1d(labels, i) - - if _checknull(key): - continue - - idx = index[i] - if key in result: - members = result[key] - members.append(idx) - else: - result[key] = [idx] - - return result - -@cython.wraparound(False) -@cython.boundscheck(False) -def groupby_int64(ndarray[int64_t] index, ndarray labels): - cdef dict result = {} - cdef Py_ssize_t i, length - cdef list members - cdef object idx, key - - length = len(index) - - for i in range(length): - key = util.get_value_1d(labels, i) - - if _checknull(key): - continue - - idx = index[i] - if key in result: - members = result[key] - members.append(idx) - else: - result[key] = [idx] - - return result - -@cython.wraparound(False) -@cython.boundscheck(False) -def groupby_bool(ndarray[uint8_t] index, ndarray labels): - cdef dict result = {} - cdef Py_ssize_t i, length - cdef list members - cdef object idx, key - - length = len(index) - - for i in range(length): - key = util.get_value_1d(labels, i) - - if _checknull(key): - continue - - idx = index[i] - if key in result: - members = result[key] - members.append(idx) - else: - result[key] = [idx] - - return result - - -@cython.wraparound(False) -@cython.boundscheck(False) -def arrmap_float64(ndarray[float64_t] index, object func): - cdef Py_ssize_t length = index.shape[0] - cdef Py_ssize_t i = 0 - - cdef ndarray[object] result = np.empty(length, dtype=np.object_) - - from pandas.lib import maybe_convert_objects - - for i in range(length): - result[i] = func(index[i]) - - return maybe_convert_objects(result) - -@cython.wraparound(False) -@cython.boundscheck(False) -def arrmap_float32(ndarray[float32_t] index, object func): - cdef Py_ssize_t length = index.shape[0] - cdef Py_ssize_t i = 0 - - cdef ndarray[object] result = np.empty(length, dtype=np.object_) - - from pandas.lib import maybe_convert_objects - - for i in range(length): - result[i] = func(index[i]) - - return maybe_convert_objects(result) - -@cython.wraparound(False) -@cython.boundscheck(False) -def arrmap_object(ndarray[object] index, object func): - cdef Py_ssize_t length = index.shape[0] - cdef Py_ssize_t i = 0 - - cdef ndarray[object] result = np.empty(length, dtype=np.object_) - - from pandas.lib import maybe_convert_objects - - for i in range(length): - result[i] = func(index[i]) - - return maybe_convert_objects(result) - -@cython.wraparound(False) -@cython.boundscheck(False) -def arrmap_int8(ndarray[int8_t] index, object func): - cdef Py_ssize_t length = index.shape[0] - cdef Py_ssize_t i = 0 - - cdef ndarray[object] result = np.empty(length, dtype=np.object_) - - from pandas.lib import maybe_convert_objects - - for i in range(length): - result[i] = func(index[i]) - - return maybe_convert_objects(result) - -@cython.wraparound(False) -@cython.boundscheck(False) -def arrmap_int16(ndarray[int16_t] index, object func): - cdef Py_ssize_t length = index.shape[0] - cdef Py_ssize_t i = 0 - - cdef ndarray[object] result = np.empty(length, dtype=np.object_) - - from pandas.lib import maybe_convert_objects - - for i in range(length): - result[i] = func(index[i]) - - return maybe_convert_objects(result) - -@cython.wraparound(False) -@cython.boundscheck(False) -def arrmap_int32(ndarray[int32_t] index, object func): - cdef Py_ssize_t length = index.shape[0] - cdef Py_ssize_t i = 0 - - cdef ndarray[object] result = np.empty(length, dtype=np.object_) - - from pandas.lib import maybe_convert_objects - - for i in range(length): - result[i] = func(index[i]) - - return maybe_convert_objects(result) - -@cython.wraparound(False) -@cython.boundscheck(False) -def arrmap_int64(ndarray[int64_t] index, object func): - cdef Py_ssize_t length = index.shape[0] - cdef Py_ssize_t i = 0 - - cdef ndarray[object] result = np.empty(length, dtype=np.object_) - - from pandas.lib import maybe_convert_objects - - for i in range(length): - result[i] = func(index[i]) - - return maybe_convert_objects(result) - -@cython.wraparound(False) -@cython.boundscheck(False) -def arrmap_bool(ndarray[uint8_t] index, object func): - cdef Py_ssize_t length = index.shape[0] - cdef Py_ssize_t i = 0 - - cdef ndarray[object] result = np.empty(length, dtype=np.object_) - - from pandas.lib import maybe_convert_objects - - for i in range(length): - result[i] = func(index[i]) - - return maybe_convert_objects(result) - - -@cython.wraparound(False) -@cython.boundscheck(False) -def take_2d_axis0_float64(ndarray[float64_t, ndim=2] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): - cdef: - Py_ssize_t i, j, k, n, idx - ndarray[float64_t, ndim=2] outbuf - float64_t fv - - n = len(indexer) - k = values.shape[1] - - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) - else: - outbuf = out - - if False and _checknan(fill_value): - for i in range(n): + if True and _checknan(fill_value): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: for j from 0 <= j < k: @@ -3173,73 +2735,63 @@ def take_2d_axis0_float64(ndarray[float64_t, ndim=2] values, outbuf[i, j] = values[idx, j] else: fv = fill_value - for i in range(n): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: - for j in range(k): + for j from 0 <= j < k: outbuf[i, j] = fv else: - for j in range(k): + for j from 0 <= j < k: outbuf[i, j] = values[idx, j] @cython.wraparound(False) @cython.boundscheck(False) -def take_2d_axis0_float32(ndarray[float32_t, ndim=2] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): +def take_2d_axis0_bool_object(ndarray[uint8_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[float32_t, ndim=2] outbuf - float32_t fv + ndarray[object, ndim=2] outbuf = out + object fv n = len(indexer) k = values.shape[1] - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) - else: - outbuf = out - if False and _checknan(fill_value): - for i in range(n): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: for j from 0 <= j < k: raise ValueError('No NA values allowed') else: for j from 0 <= j < k: - outbuf[i, j] = values[idx, j] + outbuf[i, j] = True if values[idx, j] > 0 else False else: fv = fill_value - for i in range(n): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: - for j in range(k): + for j from 0 <= j < k: outbuf[i, j] = fv else: - for j in range(k): - outbuf[i, j] = values[idx, j] + for j from 0 <= j < k: + outbuf[i, j] = True if values[idx, j] > 0 else False @cython.wraparound(False) @cython.boundscheck(False) -def take_2d_axis0_object(ndarray[object, ndim=2] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): +def take_2d_axis0_int8_int8(ndarray[int8_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[object, ndim=2] outbuf - object fv + ndarray[int8_t, ndim=2] outbuf = out + int8_t fv n = len(indexer) k = values.shape[1] - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) - else: - outbuf = out - - if False and _checknan(fill_value): - for i in range(n): + if True and _checknan(fill_value): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: for j from 0 <= j < k: @@ -3249,35 +2801,30 @@ def take_2d_axis0_object(ndarray[object, ndim=2] values, outbuf[i, j] = values[idx, j] else: fv = fill_value - for i in range(n): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: - for j in range(k): + for j from 0 <= j < k: outbuf[i, j] = fv else: - for j in range(k): + for j from 0 <= j < k: outbuf[i, j] = values[idx, j] @cython.wraparound(False) @cython.boundscheck(False) -def take_2d_axis0_int8(ndarray[int8_t, ndim=2] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): +def take_2d_axis0_int8_int32(ndarray[int8_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[int8_t, ndim=2] outbuf - int8_t fv + ndarray[int32_t, ndim=2] outbuf = out + int32_t fv n = len(indexer) k = values.shape[1] - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) - else: - outbuf = out - if True and _checknan(fill_value): - for i in range(n): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: for j from 0 <= j < k: @@ -3287,35 +2834,30 @@ def take_2d_axis0_int8(ndarray[int8_t, ndim=2] values, outbuf[i, j] = values[idx, j] else: fv = fill_value - for i in range(n): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: - for j in range(k): + for j from 0 <= j < k: outbuf[i, j] = fv else: - for j in range(k): + for j from 0 <= j < k: outbuf[i, j] = values[idx, j] @cython.wraparound(False) @cython.boundscheck(False) -def take_2d_axis0_int16(ndarray[int16_t, ndim=2] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): +def take_2d_axis0_int8_int64(ndarray[int8_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[int16_t, ndim=2] outbuf - int16_t fv + ndarray[int64_t, ndim=2] outbuf = out + int64_t fv n = len(indexer) k = values.shape[1] - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) - else: - outbuf = out - if True and _checknan(fill_value): - for i in range(n): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: for j from 0 <= j < k: @@ -3325,35 +2867,30 @@ def take_2d_axis0_int16(ndarray[int16_t, ndim=2] values, outbuf[i, j] = values[idx, j] else: fv = fill_value - for i in range(n): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: - for j in range(k): + for j from 0 <= j < k: outbuf[i, j] = fv else: - for j in range(k): + for j from 0 <= j < k: outbuf[i, j] = values[idx, j] @cython.wraparound(False) @cython.boundscheck(False) -def take_2d_axis0_int32(ndarray[int32_t, ndim=2] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): +def take_2d_axis0_int8_float64(ndarray[int8_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[int32_t, ndim=2] outbuf - int32_t fv + ndarray[float64_t, ndim=2] outbuf = out + float64_t fv n = len(indexer) k = values.shape[1] - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) - else: - outbuf = out - - if True and _checknan(fill_value): - for i in range(n): + if False and _checknan(fill_value): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: for j from 0 <= j < k: @@ -3363,35 +2900,30 @@ def take_2d_axis0_int32(ndarray[int32_t, ndim=2] values, outbuf[i, j] = values[idx, j] else: fv = fill_value - for i in range(n): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: - for j in range(k): + for j from 0 <= j < k: outbuf[i, j] = fv else: - for j in range(k): + for j from 0 <= j < k: outbuf[i, j] = values[idx, j] @cython.wraparound(False) @cython.boundscheck(False) -def take_2d_axis0_int64(ndarray[int64_t, ndim=2] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): +def take_2d_axis0_int16_int16(ndarray[int16_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[int64_t, ndim=2] outbuf - int64_t fv + ndarray[int16_t, ndim=2] outbuf = out + int16_t fv n = len(indexer) k = values.shape[1] - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) - else: - outbuf = out - if True and _checknan(fill_value): - for i in range(n): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: for j from 0 <= j < k: @@ -3401,35 +2933,30 @@ def take_2d_axis0_int64(ndarray[int64_t, ndim=2] values, outbuf[i, j] = values[idx, j] else: fv = fill_value - for i in range(n): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: - for j in range(k): + for j from 0 <= j < k: outbuf[i, j] = fv else: - for j in range(k): + for j from 0 <= j < k: outbuf[i, j] = values[idx, j] @cython.wraparound(False) @cython.boundscheck(False) -def take_2d_axis0_bool(ndarray[uint8_t, ndim=2] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): +def take_2d_axis0_int16_int32(ndarray[int16_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[uint8_t, ndim=2] outbuf - uint8_t fv + ndarray[int32_t, ndim=2] outbuf = out + int32_t fv n = len(indexer) k = values.shape[1] - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) - else: - outbuf = out - if True and _checknan(fill_value): - for i in range(n): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: for j from 0 <= j < k: @@ -3439,1775 +2966,2416 @@ def take_2d_axis0_bool(ndarray[uint8_t, ndim=2] values, outbuf[i, j] = values[idx, j] else: fv = fill_value - for i in range(n): + for i from 0 <= i < n: idx = indexer[i] if idx == -1: - for j in range(k): + for j from 0 <= j < k: outbuf[i, j] = fv else: - for j in range(k): + for j from 0 <= j < k: outbuf[i, j] = values[idx, j] - @cython.wraparound(False) @cython.boundscheck(False) -def take_2d_axis1_float64(ndarray[float64_t, ndim=2] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): +def take_2d_axis0_int16_int64(ndarray[int16_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[float64_t, ndim=2] outbuf - float64_t fv - - n = len(values) - k = len(indexer) - - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) - else: - outbuf = out + ndarray[int64_t, ndim=2] outbuf = out + int64_t fv - if False and _checknan(fill_value): - for j in range(k): - idx = indexer[j] + n = len(indexer) + k = values.shape[1] + if True and _checknan(fill_value): + for i from 0 <= i < n: + idx = indexer[i] if idx == -1: - for i in range(n): + for j from 0 <= j < k: raise ValueError('No NA values allowed') else: - for i in range(n): - outbuf[i, j] = values[i, idx] + for j from 0 <= j < k: + outbuf[i, j] = values[idx, j] else: fv = fill_value - for j in range(k): - idx = indexer[j] - + for i from 0 <= i < n: + idx = indexer[i] if idx == -1: - for i in range(n): + for j from 0 <= j < k: outbuf[i, j] = fv else: - for i in range(n): - outbuf[i, j] = values[i, idx] + for j from 0 <= j < k: + outbuf[i, j] = values[idx, j] @cython.wraparound(False) @cython.boundscheck(False) -def take_2d_axis1_float32(ndarray[float32_t, ndim=2] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): +def take_2d_axis0_int16_float64(ndarray[int16_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[float32_t, ndim=2] outbuf - float32_t fv - - n = len(values) - k = len(indexer) + ndarray[float64_t, ndim=2] outbuf = out + float64_t fv - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) - else: - outbuf = out + n = len(indexer) + k = values.shape[1] if False and _checknan(fill_value): - for j in range(k): - idx = indexer[j] - + for i from 0 <= i < n: + idx = indexer[i] if idx == -1: - for i in range(n): + for j from 0 <= j < k: raise ValueError('No NA values allowed') else: - for i in range(n): - outbuf[i, j] = values[i, idx] + for j from 0 <= j < k: + outbuf[i, j] = values[idx, j] else: fv = fill_value - for j in range(k): - idx = indexer[j] - + for i from 0 <= i < n: + idx = indexer[i] if idx == -1: - for i in range(n): + for j from 0 <= j < k: outbuf[i, j] = fv else: - for i in range(n): - outbuf[i, j] = values[i, idx] + for j from 0 <= j < k: + outbuf[i, j] = values[idx, j] @cython.wraparound(False) @cython.boundscheck(False) -def take_2d_axis1_object(ndarray[object, ndim=2] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): +def take_2d_axis0_int32_int32(ndarray[int32_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[object, ndim=2] outbuf - object fv - - n = len(values) - k = len(indexer) - - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) - else: - outbuf = out + ndarray[int32_t, ndim=2] outbuf = out + int32_t fv - if False and _checknan(fill_value): - for j in range(k): - idx = indexer[j] + n = len(indexer) + k = values.shape[1] + if True and _checknan(fill_value): + for i from 0 <= i < n: + idx = indexer[i] if idx == -1: - for i in range(n): + for j from 0 <= j < k: raise ValueError('No NA values allowed') else: - for i in range(n): - outbuf[i, j] = values[i, idx] + for j from 0 <= j < k: + outbuf[i, j] = values[idx, j] else: fv = fill_value - for j in range(k): - idx = indexer[j] - + for i from 0 <= i < n: + idx = indexer[i] if idx == -1: - for i in range(n): + for j from 0 <= j < k: outbuf[i, j] = fv else: - for i in range(n): - outbuf[i, j] = values[i, idx] + for j from 0 <= j < k: + outbuf[i, j] = values[idx, j] @cython.wraparound(False) @cython.boundscheck(False) -def take_2d_axis1_int8(ndarray[int8_t, ndim=2] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): +def take_2d_axis0_int32_int64(ndarray[int32_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[int8_t, ndim=2] outbuf - int8_t fv - - n = len(values) - k = len(indexer) + ndarray[int64_t, ndim=2] outbuf = out + int64_t fv - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) - else: - outbuf = out + n = len(indexer) + k = values.shape[1] if True and _checknan(fill_value): - for j in range(k): - idx = indexer[j] - + for i from 0 <= i < n: + idx = indexer[i] if idx == -1: - for i in range(n): + for j from 0 <= j < k: raise ValueError('No NA values allowed') else: - for i in range(n): - outbuf[i, j] = values[i, idx] + for j from 0 <= j < k: + outbuf[i, j] = values[idx, j] else: fv = fill_value - for j in range(k): - idx = indexer[j] - + for i from 0 <= i < n: + idx = indexer[i] if idx == -1: - for i in range(n): + for j from 0 <= j < k: outbuf[i, j] = fv else: - for i in range(n): - outbuf[i, j] = values[i, idx] + for j from 0 <= j < k: + outbuf[i, j] = values[idx, j] @cython.wraparound(False) @cython.boundscheck(False) -def take_2d_axis1_int16(ndarray[int16_t, ndim=2] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): +def take_2d_axis0_int32_float64(ndarray[int32_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[int16_t, ndim=2] outbuf - int16_t fv - - n = len(values) - k = len(indexer) - - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) - else: - outbuf = out + ndarray[float64_t, ndim=2] outbuf = out + float64_t fv - if True and _checknan(fill_value): - for j in range(k): - idx = indexer[j] + n = len(indexer) + k = values.shape[1] + if False and _checknan(fill_value): + for i from 0 <= i < n: + idx = indexer[i] if idx == -1: - for i in range(n): + for j from 0 <= j < k: raise ValueError('No NA values allowed') else: - for i in range(n): - outbuf[i, j] = values[i, idx] + for j from 0 <= j < k: + outbuf[i, j] = values[idx, j] else: fv = fill_value - for j in range(k): - idx = indexer[j] - + for i from 0 <= i < n: + idx = indexer[i] if idx == -1: - for i in range(n): + for j from 0 <= j < k: outbuf[i, j] = fv else: - for i in range(n): - outbuf[i, j] = values[i, idx] + for j from 0 <= j < k: + outbuf[i, j] = values[idx, j] @cython.wraparound(False) @cython.boundscheck(False) -def take_2d_axis1_int32(ndarray[int32_t, ndim=2] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): +def take_2d_axis0_int64_int64(ndarray[int64_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[int32_t, ndim=2] outbuf - int32_t fv - - n = len(values) - k = len(indexer) + ndarray[int64_t, ndim=2] outbuf = out + int64_t fv - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) - else: - outbuf = out + n = len(indexer) + k = values.shape[1] if True and _checknan(fill_value): - for j in range(k): - idx = indexer[j] - + for i from 0 <= i < n: + idx = indexer[i] if idx == -1: - for i in range(n): + for j from 0 <= j < k: raise ValueError('No NA values allowed') else: - for i in range(n): - outbuf[i, j] = values[i, idx] + for j from 0 <= j < k: + outbuf[i, j] = values[idx, j] else: fv = fill_value - for j in range(k): - idx = indexer[j] - + for i from 0 <= i < n: + idx = indexer[i] if idx == -1: - for i in range(n): + for j from 0 <= j < k: outbuf[i, j] = fv else: - for i in range(n): - outbuf[i, j] = values[i, idx] + for j from 0 <= j < k: + outbuf[i, j] = values[idx, j] @cython.wraparound(False) @cython.boundscheck(False) -def take_2d_axis1_int64(ndarray[int64_t, ndim=2] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): +def take_2d_axis0_int64_float64(ndarray[int64_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[int64_t, ndim=2] outbuf - int64_t fv - - n = len(values) - k = len(indexer) - - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) - else: - outbuf = out + ndarray[float64_t, ndim=2] outbuf = out + float64_t fv - if True and _checknan(fill_value): - for j in range(k): - idx = indexer[j] + n = len(indexer) + k = values.shape[1] + if False and _checknan(fill_value): + for i from 0 <= i < n: + idx = indexer[i] if idx == -1: - for i in range(n): + for j from 0 <= j < k: raise ValueError('No NA values allowed') else: - for i in range(n): - outbuf[i, j] = values[i, idx] + for j from 0 <= j < k: + outbuf[i, j] = values[idx, j] else: fv = fill_value - for j in range(k): - idx = indexer[j] - + for i from 0 <= i < n: + idx = indexer[i] if idx == -1: - for i in range(n): + for j from 0 <= j < k: outbuf[i, j] = fv else: - for i in range(n): - outbuf[i, j] = values[i, idx] + for j from 0 <= j < k: + outbuf[i, j] = values[idx, j] @cython.wraparound(False) @cython.boundscheck(False) -def take_2d_axis1_bool(ndarray[uint8_t, ndim=2] values, - ndarray[int64_t] indexer, - out=None, fill_value=np.nan): +def take_2d_axis0_float32_float32(ndarray[float32_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[uint8_t, ndim=2] outbuf - uint8_t fv - - n = len(values) - k = len(indexer) - - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) - else: - outbuf = out + ndarray[float32_t, ndim=2] outbuf = out + float32_t fv - if True and _checknan(fill_value): - for j in range(k): - idx = indexer[j] + n = len(indexer) + k = values.shape[1] + if False and _checknan(fill_value): + for i from 0 <= i < n: + idx = indexer[i] if idx == -1: - for i in range(n): + for j from 0 <= j < k: raise ValueError('No NA values allowed') else: - for i in range(n): - outbuf[i, j] = values[i, idx] + for j from 0 <= j < k: + outbuf[i, j] = values[idx, j] else: fv = fill_value - for j in range(k): - idx = indexer[j] - + for i from 0 <= i < n: + idx = indexer[i] if idx == -1: - for i in range(n): + for j from 0 <= j < k: outbuf[i, j] = fv else: - for i in range(n): - outbuf[i, j] = values[i, idx] - + for j from 0 <= j < k: + outbuf[i, j] = values[idx, j] @cython.wraparound(False) @cython.boundscheck(False) -def take_2d_multi_float64(ndarray[float64_t, ndim=2] values, - ndarray[int64_t] idx0, - ndarray[int64_t] idx1, - out=None, fill_value=np.nan): +def take_2d_axis0_float32_float64(ndarray[float32_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[float64_t, ndim=2] outbuf + ndarray[float64_t, ndim=2] outbuf = out float64_t fv - n = len(idx0) - k = len(idx1) - - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) - else: - outbuf = out - + n = len(indexer) + k = values.shape[1] if False and _checknan(fill_value): - for i in range(n): - idx = idx0[i] + for i from 0 <= i < n: + idx = indexer[i] if idx == -1: - for j in range(k): + for j from 0 <= j < k: raise ValueError('No NA values allowed') else: - for j in range(k): - if idx1[j] == -1: - raise ValueError('No NA values allowed') - else: - outbuf[i, j] = values[idx, idx1[j]] + for j from 0 <= j < k: + outbuf[i, j] = values[idx, j] else: fv = fill_value - for i in range(n): - idx = idx0[i] + for i from 0 <= i < n: + idx = indexer[i] if idx == -1: - for j in range(k): + for j from 0 <= j < k: outbuf[i, j] = fv else: - for j in range(k): - if idx1[j] == -1: - outbuf[i, j] = fv - else: - outbuf[i, j] = values[idx, idx1[j]] + for j from 0 <= j < k: + outbuf[i, j] = values[idx, j] @cython.wraparound(False) @cython.boundscheck(False) -def take_2d_multi_float32(ndarray[float32_t, ndim=2] values, - ndarray[int64_t] idx0, - ndarray[int64_t] idx1, - out=None, fill_value=np.nan): +def take_2d_axis0_float64_float64(ndarray[float64_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[float32_t, ndim=2] outbuf - float32_t fv - - n = len(idx0) - k = len(idx1) - - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) - else: - outbuf = out + ndarray[float64_t, ndim=2] outbuf = out + float64_t fv + n = len(indexer) + k = values.shape[1] if False and _checknan(fill_value): - for i in range(n): - idx = idx0[i] + for i from 0 <= i < n: + idx = indexer[i] if idx == -1: - for j in range(k): + for j from 0 <= j < k: raise ValueError('No NA values allowed') else: - for j in range(k): - if idx1[j] == -1: - raise ValueError('No NA values allowed') - else: - outbuf[i, j] = values[idx, idx1[j]] + for j from 0 <= j < k: + outbuf[i, j] = values[idx, j] else: fv = fill_value - for i in range(n): - idx = idx0[i] + for i from 0 <= i < n: + idx = indexer[i] if idx == -1: - for j in range(k): + for j from 0 <= j < k: outbuf[i, j] = fv else: - for j in range(k): - if idx1[j] == -1: - outbuf[i, j] = fv - else: - outbuf[i, j] = values[idx, idx1[j]] + for j from 0 <= j < k: + outbuf[i, j] = values[idx, j] @cython.wraparound(False) @cython.boundscheck(False) -def take_2d_multi_object(ndarray[object, ndim=2] values, - ndarray[int64_t] idx0, - ndarray[int64_t] idx1, - out=None, fill_value=np.nan): +def take_2d_axis0_object_object(ndarray[object, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[object, ndim=2] outbuf + ndarray[object, ndim=2] outbuf = out object fv - n = len(idx0) - k = len(idx1) - - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) - else: - outbuf = out - + n = len(indexer) + k = values.shape[1] if False and _checknan(fill_value): - for i in range(n): - idx = idx0[i] + for i from 0 <= i < n: + idx = indexer[i] if idx == -1: - for j in range(k): + for j from 0 <= j < k: raise ValueError('No NA values allowed') else: - for j in range(k): - if idx1[j] == -1: - raise ValueError('No NA values allowed') - else: - outbuf[i, j] = values[idx, idx1[j]] + for j from 0 <= j < k: + outbuf[i, j] = values[idx, j] else: fv = fill_value - for i in range(n): - idx = idx0[i] + for i from 0 <= i < n: + idx = indexer[i] if idx == -1: - for j in range(k): + for j from 0 <= j < k: outbuf[i, j] = fv else: - for j in range(k): - if idx1[j] == -1: - outbuf[i, j] = fv - else: - outbuf[i, j] = values[idx, idx1[j]] + for j from 0 <= j < k: + outbuf[i, j] = values[idx, j] + @cython.wraparound(False) @cython.boundscheck(False) -def take_2d_multi_int8(ndarray[int8_t, ndim=2] values, - ndarray[int64_t] idx0, - ndarray[int64_t] idx1, - out=None, fill_value=np.nan): +def take_2d_axis1_bool_bool(ndarray[uint8_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[int8_t, ndim=2] outbuf - int8_t fv - - n = len(idx0) - k = len(idx1) - - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) - else: - outbuf = out + ndarray[uint8_t, ndim=2] outbuf = out + uint8_t fv + n = len(values) + k = len(indexer) if True and _checknan(fill_value): - for i in range(n): - idx = idx0[i] + for j from 0 <= j < k: + idx = indexer[j] if idx == -1: - for j in range(k): + for i from 0 <= i < n: raise ValueError('No NA values allowed') else: - for j in range(k): - if idx1[j] == -1: - raise ValueError('No NA values allowed') - else: - outbuf[i, j] = values[idx, idx1[j]] + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] else: fv = fill_value - for i in range(n): - idx = idx0[i] + for j from 0 <= j < k: + idx = indexer[j] if idx == -1: - for j in range(k): + for i from 0 <= i < n: outbuf[i, j] = fv else: - for j in range(k): - if idx1[j] == -1: - outbuf[i, j] = fv - else: - outbuf[i, j] = values[idx, idx1[j]] + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] @cython.wraparound(False) @cython.boundscheck(False) -def take_2d_multi_int16(ndarray[int16_t, ndim=2] values, - ndarray[int64_t] idx0, - ndarray[int64_t] idx1, - out=None, fill_value=np.nan): +def take_2d_axis1_bool_object(ndarray[uint8_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[int16_t, ndim=2] outbuf - int16_t fv + ndarray[object, ndim=2] outbuf = out + object fv - n = len(idx0) - k = len(idx1) + n = len(values) + k = len(indexer) - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) + if False and _checknan(fill_value): + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + raise ValueError('No NA values allowed') + else: + for i from 0 <= i < n: + outbuf[i, j] = True if values[i, idx] > 0 else False else: - outbuf = out + fv = fill_value + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + outbuf[i, j] = fv + else: + for i from 0 <= i < n: + outbuf[i, j] = True if values[i, idx] > 0 else False + +@cython.wraparound(False) +@cython.boundscheck(False) +def take_2d_axis1_int8_int8(ndarray[int8_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, j, k, n, idx + ndarray[int8_t, ndim=2] outbuf = out + int8_t fv + n = len(values) + k = len(indexer) if True and _checknan(fill_value): - for i in range(n): - idx = idx0[i] + for j from 0 <= j < k: + idx = indexer[j] if idx == -1: - for j in range(k): + for i from 0 <= i < n: raise ValueError('No NA values allowed') else: - for j in range(k): - if idx1[j] == -1: - raise ValueError('No NA values allowed') - else: - outbuf[i, j] = values[idx, idx1[j]] + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] else: fv = fill_value - for i in range(n): - idx = idx0[i] + for j from 0 <= j < k: + idx = indexer[j] if idx == -1: - for j in range(k): + for i from 0 <= i < n: outbuf[i, j] = fv else: - for j in range(k): - if idx1[j] == -1: - outbuf[i, j] = fv - else: - outbuf[i, j] = values[idx, idx1[j]] + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] @cython.wraparound(False) @cython.boundscheck(False) -def take_2d_multi_int32(ndarray[int32_t, ndim=2] values, - ndarray[int64_t] idx0, - ndarray[int64_t] idx1, - out=None, fill_value=np.nan): +def take_2d_axis1_int8_int32(ndarray[int8_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[int32_t, ndim=2] outbuf + ndarray[int32_t, ndim=2] outbuf = out int32_t fv - n = len(idx0) - k = len(idx1) + n = len(values) + k = len(indexer) - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) + if True and _checknan(fill_value): + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + raise ValueError('No NA values allowed') + else: + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] else: - outbuf = out + fv = fill_value + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + outbuf[i, j] = fv + else: + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] + +@cython.wraparound(False) +@cython.boundscheck(False) +def take_2d_axis1_int8_int64(ndarray[int8_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, j, k, n, idx + ndarray[int64_t, ndim=2] outbuf = out + int64_t fv + n = len(values) + k = len(indexer) if True and _checknan(fill_value): - for i in range(n): - idx = idx0[i] + for j from 0 <= j < k: + idx = indexer[j] if idx == -1: - for j in range(k): + for i from 0 <= i < n: raise ValueError('No NA values allowed') else: - for j in range(k): - if idx1[j] == -1: - raise ValueError('No NA values allowed') - else: - outbuf[i, j] = values[idx, idx1[j]] + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] else: fv = fill_value - for i in range(n): - idx = idx0[i] + for j from 0 <= j < k: + idx = indexer[j] if idx == -1: - for j in range(k): + for i from 0 <= i < n: outbuf[i, j] = fv else: - for j in range(k): - if idx1[j] == -1: - outbuf[i, j] = fv - else: - outbuf[i, j] = values[idx, idx1[j]] + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] @cython.wraparound(False) @cython.boundscheck(False) -def take_2d_multi_int64(ndarray[int64_t, ndim=2] values, - ndarray[int64_t] idx0, - ndarray[int64_t] idx1, - out=None, fill_value=np.nan): +def take_2d_axis1_int8_float64(ndarray[int8_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[int64_t, ndim=2] outbuf - int64_t fv + ndarray[float64_t, ndim=2] outbuf = out + float64_t fv - n = len(idx0) - k = len(idx1) + n = len(values) + k = len(indexer) - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) + if False and _checknan(fill_value): + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + raise ValueError('No NA values allowed') + else: + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] else: - outbuf = out + fv = fill_value + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + outbuf[i, j] = fv + else: + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] + +@cython.wraparound(False) +@cython.boundscheck(False) +def take_2d_axis1_int16_int16(ndarray[int16_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, j, k, n, idx + ndarray[int16_t, ndim=2] outbuf = out + int16_t fv + n = len(values) + k = len(indexer) if True and _checknan(fill_value): - for i in range(n): - idx = idx0[i] + for j from 0 <= j < k: + idx = indexer[j] if idx == -1: - for j in range(k): + for i from 0 <= i < n: raise ValueError('No NA values allowed') else: - for j in range(k): - if idx1[j] == -1: - raise ValueError('No NA values allowed') - else: - outbuf[i, j] = values[idx, idx1[j]] + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] else: fv = fill_value - for i in range(n): - idx = idx0[i] + for j from 0 <= j < k: + idx = indexer[j] if idx == -1: - for j in range(k): + for i from 0 <= i < n: outbuf[i, j] = fv else: - for j in range(k): - if idx1[j] == -1: - outbuf[i, j] = fv - else: - outbuf[i, j] = values[idx, idx1[j]] + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] @cython.wraparound(False) @cython.boundscheck(False) -def take_2d_multi_bool(ndarray[uint8_t, ndim=2] values, - ndarray[int64_t] idx0, - ndarray[int64_t] idx1, - out=None, fill_value=np.nan): +def take_2d_axis1_int16_int32(ndarray[int16_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[uint8_t, ndim=2] outbuf - uint8_t fv + ndarray[int32_t, ndim=2] outbuf = out + int32_t fv - n = len(idx0) - k = len(idx1) + n = len(values) + k = len(indexer) - if out is None: - outbuf = np.empty((n, k), dtype=values.dtype) + if True and _checknan(fill_value): + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + raise ValueError('No NA values allowed') + else: + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] else: - outbuf = out + fv = fill_value + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + outbuf[i, j] = fv + else: + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] + +@cython.wraparound(False) +@cython.boundscheck(False) +def take_2d_axis1_int16_int64(ndarray[int16_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, j, k, n, idx + ndarray[int64_t, ndim=2] outbuf = out + int64_t fv + n = len(values) + k = len(indexer) if True and _checknan(fill_value): - for i in range(n): - idx = idx0[i] + for j from 0 <= j < k: + idx = indexer[j] if idx == -1: - for j in range(k): + for i from 0 <= i < n: raise ValueError('No NA values allowed') else: - for j in range(k): - if idx1[j] == -1: - raise ValueError('No NA values allowed') - else: - outbuf[i, j] = values[idx, idx1[j]] + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] else: fv = fill_value - for i in range(n): - idx = idx0[i] + for j from 0 <= j < k: + idx = indexer[j] if idx == -1: - for j in range(k): + for i from 0 <= i < n: outbuf[i, j] = fv else: - for j in range(k): - if idx1[j] == -1: - outbuf[i, j] = fv - else: - outbuf[i, j] = values[idx, idx1[j]] - + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] -@cython.boundscheck(False) @cython.wraparound(False) -def diff_2d_float64(ndarray[float64_t, ndim=2] arr, - ndarray[float64_t, ndim=2] out, - Py_ssize_t periods, int axis): +@cython.boundscheck(False) +def take_2d_axis1_int16_float64(ndarray[int16_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: - Py_ssize_t i, j, sx, sy + Py_ssize_t i, j, k, n, idx + ndarray[float64_t, ndim=2] outbuf = out + float64_t fv - sx, sy = ( arr).shape - if arr.flags.f_contiguous: - if axis == 0: - if periods >= 0: - start, stop = periods, sx - else: - start, stop = 0, sx + periods - for j in range(sy): - for i in range(start, stop): - out[i, j] = arr[i, j] - arr[i - periods, j] - else: - if periods >= 0: - start, stop = periods, sy + n = len(values) + k = len(indexer) + + if False and _checknan(fill_value): + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + raise ValueError('No NA values allowed') else: - start, stop = 0, sy + periods - for j in range(start, stop): - for i in range(sx): - out[i, j] = arr[i, j] - arr[i, j - periods] + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] else: - if axis == 0: - if periods >= 0: - start, stop = periods, sx - else: - start, stop = 0, sx + periods - for i in range(start, stop): - for j in range(sy): - out[i, j] = arr[i, j] - arr[i - periods, j] - else: - if periods >= 0: - start, stop = periods, sy + fv = fill_value + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + outbuf[i, j] = fv else: - start, stop = 0, sy + periods - for i in range(sx): - for j in range(start, stop): - out[i, j] = arr[i, j] - arr[i, j - periods] -@cython.boundscheck(False) + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] + @cython.wraparound(False) -def diff_2d_float32(ndarray[float32_t, ndim=2] arr, - ndarray[float32_t, ndim=2] out, - Py_ssize_t periods, int axis): +@cython.boundscheck(False) +def take_2d_axis1_int32_int32(ndarray[int32_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: - Py_ssize_t i, j, sx, sy + Py_ssize_t i, j, k, n, idx + ndarray[int32_t, ndim=2] outbuf = out + int32_t fv - sx, sy = ( arr).shape - if arr.flags.f_contiguous: - if axis == 0: - if periods >= 0: - start, stop = periods, sx - else: - start, stop = 0, sx + periods - for j in range(sy): - for i in range(start, stop): - out[i, j] = arr[i, j] - arr[i - periods, j] - else: - if periods >= 0: - start, stop = periods, sy + n = len(values) + k = len(indexer) + + if True and _checknan(fill_value): + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + raise ValueError('No NA values allowed') else: - start, stop = 0, sy + periods - for j in range(start, stop): - for i in range(sx): - out[i, j] = arr[i, j] - arr[i, j - periods] + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] else: - if axis == 0: - if periods >= 0: - start, stop = periods, sx - else: - start, stop = 0, sx + periods - for i in range(start, stop): - for j in range(sy): - out[i, j] = arr[i, j] - arr[i - periods, j] - else: - if periods >= 0: - start, stop = periods, sy + fv = fill_value + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + outbuf[i, j] = fv else: - start, stop = 0, sy + periods - for i in range(sx): - for j in range(start, stop): - out[i, j] = arr[i, j] - arr[i, j - periods] -@cython.boundscheck(False) + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] + @cython.wraparound(False) -def diff_2d_int8(ndarray[int8_t, ndim=2] arr, - ndarray[float32_t, ndim=2] out, - Py_ssize_t periods, int axis): +@cython.boundscheck(False) +def take_2d_axis1_int32_int64(ndarray[int32_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: - Py_ssize_t i, j, sx, sy + Py_ssize_t i, j, k, n, idx + ndarray[int64_t, ndim=2] outbuf = out + int64_t fv - sx, sy = ( arr).shape - if arr.flags.f_contiguous: - if axis == 0: - if periods >= 0: - start, stop = periods, sx - else: - start, stop = 0, sx + periods - for j in range(sy): - for i in range(start, stop): - out[i, j] = arr[i, j] - arr[i - periods, j] - else: - if periods >= 0: - start, stop = periods, sy + n = len(values) + k = len(indexer) + + if True and _checknan(fill_value): + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + raise ValueError('No NA values allowed') else: - start, stop = 0, sy + periods - for j in range(start, stop): - for i in range(sx): - out[i, j] = arr[i, j] - arr[i, j - periods] + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] else: - if axis == 0: - if periods >= 0: - start, stop = periods, sx - else: - start, stop = 0, sx + periods - for i in range(start, stop): - for j in range(sy): - out[i, j] = arr[i, j] - arr[i - periods, j] - else: - if periods >= 0: - start, stop = periods, sy + fv = fill_value + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + outbuf[i, j] = fv else: - start, stop = 0, sy + periods - for i in range(sx): - for j in range(start, stop): - out[i, j] = arr[i, j] - arr[i, j - periods] -@cython.boundscheck(False) + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] + @cython.wraparound(False) -def diff_2d_int16(ndarray[int16_t, ndim=2] arr, - ndarray[float32_t, ndim=2] out, - Py_ssize_t periods, int axis): +@cython.boundscheck(False) +def take_2d_axis1_int32_float64(ndarray[int32_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: - Py_ssize_t i, j, sx, sy + Py_ssize_t i, j, k, n, idx + ndarray[float64_t, ndim=2] outbuf = out + float64_t fv - sx, sy = ( arr).shape - if arr.flags.f_contiguous: - if axis == 0: - if periods >= 0: - start, stop = periods, sx - else: - start, stop = 0, sx + periods - for j in range(sy): - for i in range(start, stop): - out[i, j] = arr[i, j] - arr[i - periods, j] - else: - if periods >= 0: - start, stop = periods, sy + n = len(values) + k = len(indexer) + + if False and _checknan(fill_value): + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + raise ValueError('No NA values allowed') else: - start, stop = 0, sy + periods - for j in range(start, stop): - for i in range(sx): - out[i, j] = arr[i, j] - arr[i, j - periods] + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] else: - if axis == 0: - if periods >= 0: - start, stop = periods, sx - else: - start, stop = 0, sx + periods - for i in range(start, stop): - for j in range(sy): - out[i, j] = arr[i, j] - arr[i - periods, j] - else: - if periods >= 0: - start, stop = periods, sy + fv = fill_value + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + outbuf[i, j] = fv else: - start, stop = 0, sy + periods - for i in range(sx): - for j in range(start, stop): - out[i, j] = arr[i, j] - arr[i, j - periods] -@cython.boundscheck(False) + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] + @cython.wraparound(False) -def diff_2d_int32(ndarray[int32_t, ndim=2] arr, - ndarray[float64_t, ndim=2] out, - Py_ssize_t periods, int axis): +@cython.boundscheck(False) +def take_2d_axis1_int64_int64(ndarray[int64_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: - Py_ssize_t i, j, sx, sy + Py_ssize_t i, j, k, n, idx + ndarray[int64_t, ndim=2] outbuf = out + int64_t fv - sx, sy = ( arr).shape - if arr.flags.f_contiguous: - if axis == 0: - if periods >= 0: - start, stop = periods, sx - else: - start, stop = 0, sx + periods - for j in range(sy): - for i in range(start, stop): - out[i, j] = arr[i, j] - arr[i - periods, j] - else: - if periods >= 0: - start, stop = periods, sy + n = len(values) + k = len(indexer) + + if True and _checknan(fill_value): + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + raise ValueError('No NA values allowed') else: - start, stop = 0, sy + periods - for j in range(start, stop): - for i in range(sx): - out[i, j] = arr[i, j] - arr[i, j - periods] + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] else: - if axis == 0: - if periods >= 0: - start, stop = periods, sx - else: - start, stop = 0, sx + periods - for i in range(start, stop): - for j in range(sy): - out[i, j] = arr[i, j] - arr[i - periods, j] - else: - if periods >= 0: - start, stop = periods, sy + fv = fill_value + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + outbuf[i, j] = fv else: - start, stop = 0, sy + periods - for i in range(sx): - for j in range(start, stop): - out[i, j] = arr[i, j] - arr[i, j - periods] -@cython.boundscheck(False) + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] + @cython.wraparound(False) -def diff_2d_int64(ndarray[int64_t, ndim=2] arr, - ndarray[float64_t, ndim=2] out, - Py_ssize_t periods, int axis): +@cython.boundscheck(False) +def take_2d_axis1_int64_float64(ndarray[int64_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: - Py_ssize_t i, j, sx, sy + Py_ssize_t i, j, k, n, idx + ndarray[float64_t, ndim=2] outbuf = out + float64_t fv - sx, sy = ( arr).shape - if arr.flags.f_contiguous: - if axis == 0: - if periods >= 0: - start, stop = periods, sx - else: - start, stop = 0, sx + periods - for j in range(sy): - for i in range(start, stop): - out[i, j] = arr[i, j] - arr[i - periods, j] - else: - if periods >= 0: - start, stop = periods, sy + n = len(values) + k = len(indexer) + + if False and _checknan(fill_value): + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + raise ValueError('No NA values allowed') else: - start, stop = 0, sy + periods - for j in range(start, stop): - for i in range(sx): - out[i, j] = arr[i, j] - arr[i, j - periods] + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] else: - if axis == 0: - if periods >= 0: - start, stop = periods, sx + fv = fill_value + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + outbuf[i, j] = fv else: - start, stop = 0, sx + periods - for i in range(start, stop): - for j in range(sy): - out[i, j] = arr[i, j] - arr[i - periods, j] - else: - if periods >= 0: - start, stop = periods, sy - else: - start, stop = 0, sy + periods - for i in range(sx): - for j in range(start, stop): - out[i, j] = arr[i, j] - arr[i, j - periods] + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] @cython.wraparound(False) -@cython.wraparound(False) -def group_last_float64(ndarray[float64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float64_t, ndim=2] values, - ndarray[int64_t] labels): - ''' - Only aggregates on axis=0 - ''' +@cython.boundscheck(False) +def take_2d_axis1_float32_float32(ndarray[float32_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: - Py_ssize_t i, j, N, K, lab - float64_t val, count - ndarray[float64_t, ndim=2] resx - ndarray[int64_t, ndim=2] nobs - - nobs = np.zeros(( out).shape, dtype=np.int64) - resx = np.empty_like(out) - - N, K = ( values).shape - - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - for j in range(K): - val = values[i, j] + Py_ssize_t i, j, k, n, idx + ndarray[float32_t, ndim=2] outbuf = out + float32_t fv - # not nan - if val == val: - nobs[lab, j] += 1 - resx[lab, j] = val + n = len(values) + k = len(indexer) - for i in range(len(counts)): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = nan + if False and _checknan(fill_value): + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + raise ValueError('No NA values allowed') else: - out[i, j] = resx[i, j] -@cython.wraparound(False) + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] + else: + fv = fill_value + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + outbuf[i, j] = fv + else: + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] + @cython.wraparound(False) -def group_last_float32(ndarray[float32_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float32_t, ndim=2] values, - ndarray[int64_t] labels): - ''' - Only aggregates on axis=0 - ''' +@cython.boundscheck(False) +def take_2d_axis1_float32_float64(ndarray[float32_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: - Py_ssize_t i, j, N, K, lab - float32_t val, count - ndarray[float32_t, ndim=2] resx - ndarray[int64_t, ndim=2] nobs - - nobs = np.zeros(( out).shape, dtype=np.int64) - resx = np.empty_like(out) + Py_ssize_t i, j, k, n, idx + ndarray[float64_t, ndim=2] outbuf = out + float64_t fv - N, K = ( values).shape + n = len(values) + k = len(indexer) - for i in range(N): - lab = labels[i] - if lab < 0: - continue + if False and _checknan(fill_value): + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + raise ValueError('No NA values allowed') + else: + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] + else: + fv = fill_value + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + outbuf[i, j] = fv + else: + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] - counts[lab] += 1 - for j in range(K): - val = values[i, j] +@cython.wraparound(False) +@cython.boundscheck(False) +def take_2d_axis1_float64_float64(ndarray[float64_t, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, j, k, n, idx + ndarray[float64_t, ndim=2] outbuf = out + float64_t fv - # not nan - if val == val: - nobs[lab, j] += 1 - resx[lab, j] = val + n = len(values) + k = len(indexer) - for i in range(len(counts)): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = nan + if False and _checknan(fill_value): + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + raise ValueError('No NA values allowed') else: - out[i, j] = resx[i, j] + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] + else: + fv = fill_value + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + outbuf[i, j] = fv + else: + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] @cython.wraparound(False) -@cython.wraparound(False) -def group_last_bin_float64(ndarray[float64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float64_t, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' +@cython.boundscheck(False) +def take_2d_axis1_object_object(ndarray[object, ndim=2] values, + ndarray[int64_t] indexer, + out, fill_value=np.nan): cdef: - Py_ssize_t i, j, N, K, ngroups, b - float64_t val, count - ndarray[float64_t, ndim=2] resx, nobs + Py_ssize_t i, j, k, n, idx + ndarray[object, ndim=2] outbuf = out + object fv - nobs = np.zeros_like(out) - resx = np.empty_like(out) + n = len(values) + k = len(indexer) - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) + if False and _checknan(fill_value): + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + raise ValueError('No NA values allowed') + else: + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] else: - ngroups = len(bins) + 1 - - N, K = ( values).shape + fv = fill_value + for j from 0 <= j < k: + idx = indexer[j] + if idx == -1: + for i from 0 <= i < n: + outbuf[i, j] = fv + else: + for i from 0 <= i < n: + outbuf[i, j] = values[i, idx] - b = 0 - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - counts[b] += 1 - for j in range(K): - val = values[i, j] +@cython.wraparound(False) +@cython.boundscheck(False) +def take_2d_multi_bool_bool(ndarray[uint8_t, ndim=2] values, + indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, j, k, n, idx + ndarray[int64_t] idx0 = indexer[0] + ndarray[int64_t] idx1 = indexer[1] + ndarray[uint8_t, ndim=2] outbuf = out + uint8_t fv - # not nan - if val == val: - nobs[b, j] += 1 - resx[b, j] = val + n = len(idx0) + k = len(idx1) - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = nan + if True and _checknan(fill_value): + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + raise ValueError('No NA values allowed') else: - out[i, j] = resx[i, j] -@cython.wraparound(False) + for j from 0 <= j < k: + if idx1[j] == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i, j] = values[idx, idx1[j]] + else: + fv = fill_value + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + outbuf[i, j] = fv + else: + for j from 0 <= j < k: + if idx1[j] == -1: + outbuf[i, j] = fv + else: + outbuf[i, j] = values[idx, idx1[j]] + @cython.wraparound(False) -def group_last_bin_float32(ndarray[float32_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float32_t, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' +@cython.boundscheck(False) +def take_2d_multi_bool_object(ndarray[uint8_t, ndim=2] values, + indexer, + out, fill_value=np.nan): cdef: - Py_ssize_t i, j, N, K, ngroups, b - float32_t val, count - ndarray[float32_t, ndim=2] resx, nobs + Py_ssize_t i, j, k, n, idx + ndarray[int64_t] idx0 = indexer[0] + ndarray[int64_t] idx1 = indexer[1] + ndarray[object, ndim=2] outbuf = out + object fv - nobs = np.zeros_like(out) - resx = np.empty_like(out) + n = len(idx0) + k = len(idx1) - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) + if False and _checknan(fill_value): + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + raise ValueError('No NA values allowed') + else: + for j from 0 <= j < k: + if idx1[j] == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i, j] = True if values[idx, idx1[j]] > 0 else False else: - ngroups = len(bins) + 1 + fv = fill_value + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + outbuf[i, j] = fv + else: + for j from 0 <= j < k: + if idx1[j] == -1: + outbuf[i, j] = fv + else: + outbuf[i, j] = True if values[idx, idx1[j]] > 0 else False - N, K = ( values).shape - - b = 0 - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] +@cython.wraparound(False) +@cython.boundscheck(False) +def take_2d_multi_int8_int8(ndarray[int8_t, ndim=2] values, + indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, j, k, n, idx + ndarray[int64_t] idx0 = indexer[0] + ndarray[int64_t] idx1 = indexer[1] + ndarray[int8_t, ndim=2] outbuf = out + int8_t fv - # not nan - if val == val: - nobs[b, j] += 1 - resx[b, j] = val + n = len(idx0) + k = len(idx1) - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = nan + if True and _checknan(fill_value): + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + raise ValueError('No NA values allowed') else: - out[i, j] = resx[i, j] + for j from 0 <= j < k: + if idx1[j] == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i, j] = values[idx, idx1[j]] + else: + fv = fill_value + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + outbuf[i, j] = fv + else: + for j from 0 <= j < k: + if idx1[j] == -1: + outbuf[i, j] = fv + else: + outbuf[i, j] = values[idx, idx1[j]] -@cython.boundscheck(False) @cython.wraparound(False) -def group_nth_float64(ndarray[float64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float64_t, ndim=2] values, - ndarray[int64_t] labels, int64_t rank): - ''' - Only aggregates on axis=0 - ''' +@cython.boundscheck(False) +def take_2d_multi_int8_int32(ndarray[int8_t, ndim=2] values, + indexer, + out, fill_value=np.nan): cdef: - Py_ssize_t i, j, N, K, lab - float64_t val, count - ndarray[float64_t, ndim=2] resx - ndarray[int64_t, ndim=2] nobs - - nobs = np.zeros(( out).shape, dtype=np.int64) - resx = np.empty_like(out) - - N, K = ( values).shape - - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - for j in range(K): - val = values[i, j] + Py_ssize_t i, j, k, n, idx + ndarray[int64_t] idx0 = indexer[0] + ndarray[int64_t] idx1 = indexer[1] + ndarray[int32_t, ndim=2] outbuf = out + int32_t fv - # not nan - if val == val: - nobs[lab, j] += 1 - if nobs[lab, j] == rank: - resx[lab, j] = val + n = len(idx0) + k = len(idx1) - for i in range(len(counts)): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = nan + if True and _checknan(fill_value): + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + raise ValueError('No NA values allowed') else: - out[i, j] = resx[i, j] -@cython.boundscheck(False) + for j from 0 <= j < k: + if idx1[j] == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i, j] = values[idx, idx1[j]] + else: + fv = fill_value + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + outbuf[i, j] = fv + else: + for j from 0 <= j < k: + if idx1[j] == -1: + outbuf[i, j] = fv + else: + outbuf[i, j] = values[idx, idx1[j]] + @cython.wraparound(False) -def group_nth_float32(ndarray[float32_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float32_t, ndim=2] values, - ndarray[int64_t] labels, int64_t rank): - ''' - Only aggregates on axis=0 - ''' +@cython.boundscheck(False) +def take_2d_multi_int8_int64(ndarray[int8_t, ndim=2] values, + indexer, + out, fill_value=np.nan): cdef: - Py_ssize_t i, j, N, K, lab - float32_t val, count - ndarray[float32_t, ndim=2] resx - ndarray[int64_t, ndim=2] nobs - - nobs = np.zeros(( out).shape, dtype=np.int64) - resx = np.empty_like(out) + Py_ssize_t i, j, k, n, idx + ndarray[int64_t] idx0 = indexer[0] + ndarray[int64_t] idx1 = indexer[1] + ndarray[int64_t, ndim=2] outbuf = out + int64_t fv - N, K = ( values).shape + n = len(idx0) + k = len(idx1) - for i in range(N): - lab = labels[i] - if lab < 0: - continue + if True and _checknan(fill_value): + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + raise ValueError('No NA values allowed') + else: + for j from 0 <= j < k: + if idx1[j] == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i, j] = values[idx, idx1[j]] + else: + fv = fill_value + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + outbuf[i, j] = fv + else: + for j from 0 <= j < k: + if idx1[j] == -1: + outbuf[i, j] = fv + else: + outbuf[i, j] = values[idx, idx1[j]] - counts[lab] += 1 - for j in range(K): - val = values[i, j] +@cython.wraparound(False) +@cython.boundscheck(False) +def take_2d_multi_int8_float64(ndarray[int8_t, ndim=2] values, + indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, j, k, n, idx + ndarray[int64_t] idx0 = indexer[0] + ndarray[int64_t] idx1 = indexer[1] + ndarray[float64_t, ndim=2] outbuf = out + float64_t fv - # not nan - if val == val: - nobs[lab, j] += 1 - if nobs[lab, j] == rank: - resx[lab, j] = val + n = len(idx0) + k = len(idx1) - for i in range(len(counts)): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = nan + if False and _checknan(fill_value): + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + raise ValueError('No NA values allowed') else: - out[i, j] = resx[i, j] + for j from 0 <= j < k: + if idx1[j] == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i, j] = values[idx, idx1[j]] + else: + fv = fill_value + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + outbuf[i, j] = fv + else: + for j from 0 <= j < k: + if idx1[j] == -1: + outbuf[i, j] = fv + else: + outbuf[i, j] = values[idx, idx1[j]] -@cython.boundscheck(False) @cython.wraparound(False) -def group_nth_bin_float64(ndarray[float64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float64_t, ndim=2] values, - ndarray[int64_t] bins, int64_t rank): - ''' - Only aggregates on axis=0 - ''' +@cython.boundscheck(False) +def take_2d_multi_int16_int16(ndarray[int16_t, ndim=2] values, + indexer, + out, fill_value=np.nan): cdef: - Py_ssize_t i, j, N, K, ngroups, b - float64_t val, count - ndarray[float64_t, ndim=2] resx, nobs + Py_ssize_t i, j, k, n, idx + ndarray[int64_t] idx0 = indexer[0] + ndarray[int64_t] idx1 = indexer[1] + ndarray[int16_t, ndim=2] outbuf = out + int16_t fv - nobs = np.zeros_like(out) - resx = np.empty_like(out) + n = len(idx0) + k = len(idx1) - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) + if True and _checknan(fill_value): + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + raise ValueError('No NA values allowed') + else: + for j from 0 <= j < k: + if idx1[j] == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i, j] = values[idx, idx1[j]] else: - ngroups = len(bins) + 1 - - N, K = ( values).shape + fv = fill_value + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + outbuf[i, j] = fv + else: + for j from 0 <= j < k: + if idx1[j] == -1: + outbuf[i, j] = fv + else: + outbuf[i, j] = values[idx, idx1[j]] - b = 0 - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 +@cython.wraparound(False) +@cython.boundscheck(False) +def take_2d_multi_int16_int32(ndarray[int16_t, ndim=2] values, + indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, j, k, n, idx + ndarray[int64_t] idx0 = indexer[0] + ndarray[int64_t] idx1 = indexer[1] + ndarray[int32_t, ndim=2] outbuf = out + int32_t fv - counts[b] += 1 - for j in range(K): - val = values[i, j] + n = len(idx0) + k = len(idx1) - # not nan - if val == val: - nobs[b, j] += 1 - if nobs[b, j] == rank: - resx[b, j] = val + if True and _checknan(fill_value): + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + raise ValueError('No NA values allowed') + else: + for j from 0 <= j < k: + if idx1[j] == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i, j] = values[idx, idx1[j]] + else: + fv = fill_value + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + outbuf[i, j] = fv + else: + for j from 0 <= j < k: + if idx1[j] == -1: + outbuf[i, j] = fv + else: + outbuf[i, j] = values[idx, idx1[j]] - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = nan +@cython.wraparound(False) +@cython.boundscheck(False) +def take_2d_multi_int16_int64(ndarray[int16_t, ndim=2] values, + indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, j, k, n, idx + ndarray[int64_t] idx0 = indexer[0] + ndarray[int64_t] idx1 = indexer[1] + ndarray[int64_t, ndim=2] outbuf = out + int64_t fv + + n = len(idx0) + k = len(idx1) + + if True and _checknan(fill_value): + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + raise ValueError('No NA values allowed') else: - out[i, j] = resx[i, j] + for j from 0 <= j < k: + if idx1[j] == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i, j] = values[idx, idx1[j]] + else: + fv = fill_value + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + outbuf[i, j] = fv + else: + for j from 0 <= j < k: + if idx1[j] == -1: + outbuf[i, j] = fv + else: + outbuf[i, j] = values[idx, idx1[j]] + +@cython.wraparound(False) @cython.boundscheck(False) +def take_2d_multi_int16_float64(ndarray[int16_t, ndim=2] values, + indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, j, k, n, idx + ndarray[int64_t] idx0 = indexer[0] + ndarray[int64_t] idx1 = indexer[1] + ndarray[float64_t, ndim=2] outbuf = out + float64_t fv + + n = len(idx0) + k = len(idx1) + + if False and _checknan(fill_value): + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + raise ValueError('No NA values allowed') + else: + for j from 0 <= j < k: + if idx1[j] == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i, j] = values[idx, idx1[j]] + else: + fv = fill_value + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + outbuf[i, j] = fv + else: + for j from 0 <= j < k: + if idx1[j] == -1: + outbuf[i, j] = fv + else: + outbuf[i, j] = values[idx, idx1[j]] + @cython.wraparound(False) -def group_nth_bin_float32(ndarray[float32_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float32_t, ndim=2] values, - ndarray[int64_t] bins, int64_t rank): - ''' - Only aggregates on axis=0 - ''' +@cython.boundscheck(False) +def take_2d_multi_int32_int32(ndarray[int32_t, ndim=2] values, + indexer, + out, fill_value=np.nan): cdef: - Py_ssize_t i, j, N, K, ngroups, b - float32_t val, count - ndarray[float32_t, ndim=2] resx, nobs + Py_ssize_t i, j, k, n, idx + ndarray[int64_t] idx0 = indexer[0] + ndarray[int64_t] idx1 = indexer[1] + ndarray[int32_t, ndim=2] outbuf = out + int32_t fv - nobs = np.zeros_like(out) - resx = np.empty_like(out) + n = len(idx0) + k = len(idx1) - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) + if True and _checknan(fill_value): + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + raise ValueError('No NA values allowed') + else: + for j from 0 <= j < k: + if idx1[j] == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i, j] = values[idx, idx1[j]] else: - ngroups = len(bins) + 1 + fv = fill_value + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + outbuf[i, j] = fv + else: + for j from 0 <= j < k: + if idx1[j] == -1: + outbuf[i, j] = fv + else: + outbuf[i, j] = values[idx, idx1[j]] - N, K = ( values).shape +@cython.wraparound(False) +@cython.boundscheck(False) +def take_2d_multi_int32_int64(ndarray[int32_t, ndim=2] values, + indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, j, k, n, idx + ndarray[int64_t] idx0 = indexer[0] + ndarray[int64_t] idx1 = indexer[1] + ndarray[int64_t, ndim=2] outbuf = out + int64_t fv - b = 0 - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 + n = len(idx0) + k = len(idx1) - counts[b] += 1 - for j in range(K): - val = values[i, j] + if True and _checknan(fill_value): + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + raise ValueError('No NA values allowed') + else: + for j from 0 <= j < k: + if idx1[j] == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i, j] = values[idx, idx1[j]] + else: + fv = fill_value + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + outbuf[i, j] = fv + else: + for j from 0 <= j < k: + if idx1[j] == -1: + outbuf[i, j] = fv + else: + outbuf[i, j] = values[idx, idx1[j]] - # not nan - if val == val: - nobs[b, j] += 1 - if nobs[b, j] == rank: - resx[b, j] = val +@cython.wraparound(False) +@cython.boundscheck(False) +def take_2d_multi_int32_float64(ndarray[int32_t, ndim=2] values, + indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, j, k, n, idx + ndarray[int64_t] idx0 = indexer[0] + ndarray[int64_t] idx1 = indexer[1] + ndarray[float64_t, ndim=2] outbuf = out + float64_t fv - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = nan + n = len(idx0) + k = len(idx1) + + if False and _checknan(fill_value): + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + raise ValueError('No NA values allowed') + else: + for j from 0 <= j < k: + if idx1[j] == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i, j] = values[idx, idx1[j]] + else: + fv = fill_value + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + outbuf[i, j] = fv + else: + for j from 0 <= j < k: + if idx1[j] == -1: + outbuf[i, j] = fv + else: + outbuf[i, j] = values[idx, idx1[j]] + +@cython.wraparound(False) +@cython.boundscheck(False) +def take_2d_multi_int64_int64(ndarray[int64_t, ndim=2] values, + indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, j, k, n, idx + ndarray[int64_t] idx0 = indexer[0] + ndarray[int64_t] idx1 = indexer[1] + ndarray[int64_t, ndim=2] outbuf = out + int64_t fv + + n = len(idx0) + k = len(idx1) + + if True and _checknan(fill_value): + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + raise ValueError('No NA values allowed') + else: + for j from 0 <= j < k: + if idx1[j] == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i, j] = values[idx, idx1[j]] + else: + fv = fill_value + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + outbuf[i, j] = fv + else: + for j from 0 <= j < k: + if idx1[j] == -1: + outbuf[i, j] = fv + else: + outbuf[i, j] = values[idx, idx1[j]] + +@cython.wraparound(False) +@cython.boundscheck(False) +def take_2d_multi_int64_float64(ndarray[int64_t, ndim=2] values, + indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, j, k, n, idx + ndarray[int64_t] idx0 = indexer[0] + ndarray[int64_t] idx1 = indexer[1] + ndarray[float64_t, ndim=2] outbuf = out + float64_t fv + + n = len(idx0) + k = len(idx1) + + if False and _checknan(fill_value): + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + raise ValueError('No NA values allowed') + else: + for j from 0 <= j < k: + if idx1[j] == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i, j] = values[idx, idx1[j]] + else: + fv = fill_value + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + outbuf[i, j] = fv + else: + for j from 0 <= j < k: + if idx1[j] == -1: + outbuf[i, j] = fv + else: + outbuf[i, j] = values[idx, idx1[j]] + +@cython.wraparound(False) +@cython.boundscheck(False) +def take_2d_multi_float32_float32(ndarray[float32_t, ndim=2] values, + indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, j, k, n, idx + ndarray[int64_t] idx0 = indexer[0] + ndarray[int64_t] idx1 = indexer[1] + ndarray[float32_t, ndim=2] outbuf = out + float32_t fv + + n = len(idx0) + k = len(idx1) + + if False and _checknan(fill_value): + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + raise ValueError('No NA values allowed') + else: + for j from 0 <= j < k: + if idx1[j] == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i, j] = values[idx, idx1[j]] + else: + fv = fill_value + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + outbuf[i, j] = fv + else: + for j from 0 <= j < k: + if idx1[j] == -1: + outbuf[i, j] = fv + else: + outbuf[i, j] = values[idx, idx1[j]] + +@cython.wraparound(False) +@cython.boundscheck(False) +def take_2d_multi_float32_float64(ndarray[float32_t, ndim=2] values, + indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, j, k, n, idx + ndarray[int64_t] idx0 = indexer[0] + ndarray[int64_t] idx1 = indexer[1] + ndarray[float64_t, ndim=2] outbuf = out + float64_t fv + + n = len(idx0) + k = len(idx1) + + if False and _checknan(fill_value): + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + raise ValueError('No NA values allowed') + else: + for j from 0 <= j < k: + if idx1[j] == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i, j] = values[idx, idx1[j]] + else: + fv = fill_value + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + outbuf[i, j] = fv + else: + for j from 0 <= j < k: + if idx1[j] == -1: + outbuf[i, j] = fv + else: + outbuf[i, j] = values[idx, idx1[j]] + +@cython.wraparound(False) +@cython.boundscheck(False) +def take_2d_multi_float64_float64(ndarray[float64_t, ndim=2] values, + indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, j, k, n, idx + ndarray[int64_t] idx0 = indexer[0] + ndarray[int64_t] idx1 = indexer[1] + ndarray[float64_t, ndim=2] outbuf = out + float64_t fv + + n = len(idx0) + k = len(idx1) + + if False and _checknan(fill_value): + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + raise ValueError('No NA values allowed') + else: + for j from 0 <= j < k: + if idx1[j] == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i, j] = values[idx, idx1[j]] + else: + fv = fill_value + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + outbuf[i, j] = fv + else: + for j from 0 <= j < k: + if idx1[j] == -1: + outbuf[i, j] = fv + else: + outbuf[i, j] = values[idx, idx1[j]] + +@cython.wraparound(False) +@cython.boundscheck(False) +def take_2d_multi_object_object(ndarray[object, ndim=2] values, + indexer, + out, fill_value=np.nan): + cdef: + Py_ssize_t i, j, k, n, idx + ndarray[int64_t] idx0 = indexer[0] + ndarray[int64_t] idx1 = indexer[1] + ndarray[object, ndim=2] outbuf = out + object fv + + n = len(idx0) + k = len(idx1) + + if False and _checknan(fill_value): + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + raise ValueError('No NA values allowed') + else: + for j from 0 <= j < k: + if idx1[j] == -1: + raise ValueError('No NA values allowed') + else: + outbuf[i, j] = values[idx, idx1[j]] + else: + fv = fill_value + for i from 0 <= i < n: + idx = idx0[i] + if idx == -1: + for j from 0 <= j < k: + outbuf[i, j] = fv + else: + for j from 0 <= j < k: + if idx1[j] == -1: + outbuf[i, j] = fv + else: + outbuf[i, j] = values[idx, idx1[j]] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def diff_2d_float64(ndarray[float64_t, ndim=2] arr, + ndarray[float64_t, ndim=2] out, + Py_ssize_t periods, int axis): + cdef: + Py_ssize_t i, j, sx, sy + + sx, sy = ( arr).shape + if arr.flags.f_contiguous: + if axis == 0: + if periods >= 0: + start, stop = periods, sx + else: + start, stop = 0, sx + periods + for j in range(sy): + for i in range(start, stop): + out[i, j] = arr[i, j] - arr[i - periods, j] + else: + if periods >= 0: + start, stop = periods, sy + else: + start, stop = 0, sy + periods + for j in range(start, stop): + for i in range(sx): + out[i, j] = arr[i, j] - arr[i, j - periods] + else: + if axis == 0: + if periods >= 0: + start, stop = periods, sx + else: + start, stop = 0, sx + periods + for i in range(start, stop): + for j in range(sy): + out[i, j] = arr[i, j] - arr[i - periods, j] + else: + if periods >= 0: + start, stop = periods, sy + else: + start, stop = 0, sy + periods + for i in range(sx): + for j in range(start, stop): + out[i, j] = arr[i, j] - arr[i, j - periods] +@cython.boundscheck(False) +@cython.wraparound(False) +def diff_2d_float32(ndarray[float32_t, ndim=2] arr, + ndarray[float32_t, ndim=2] out, + Py_ssize_t periods, int axis): + cdef: + Py_ssize_t i, j, sx, sy + + sx, sy = ( arr).shape + if arr.flags.f_contiguous: + if axis == 0: + if periods >= 0: + start, stop = periods, sx + else: + start, stop = 0, sx + periods + for j in range(sy): + for i in range(start, stop): + out[i, j] = arr[i, j] - arr[i - periods, j] + else: + if periods >= 0: + start, stop = periods, sy + else: + start, stop = 0, sy + periods + for j in range(start, stop): + for i in range(sx): + out[i, j] = arr[i, j] - arr[i, j - periods] + else: + if axis == 0: + if periods >= 0: + start, stop = periods, sx + else: + start, stop = 0, sx + periods + for i in range(start, stop): + for j in range(sy): + out[i, j] = arr[i, j] - arr[i - periods, j] + else: + if periods >= 0: + start, stop = periods, sy + else: + start, stop = 0, sy + periods + for i in range(sx): + for j in range(start, stop): + out[i, j] = arr[i, j] - arr[i, j - periods] +@cython.boundscheck(False) +@cython.wraparound(False) +def diff_2d_int8(ndarray[int8_t, ndim=2] arr, + ndarray[float32_t, ndim=2] out, + Py_ssize_t periods, int axis): + cdef: + Py_ssize_t i, j, sx, sy + + sx, sy = ( arr).shape + if arr.flags.f_contiguous: + if axis == 0: + if periods >= 0: + start, stop = periods, sx + else: + start, stop = 0, sx + periods + for j in range(sy): + for i in range(start, stop): + out[i, j] = arr[i, j] - arr[i - periods, j] + else: + if periods >= 0: + start, stop = periods, sy + else: + start, stop = 0, sy + periods + for j in range(start, stop): + for i in range(sx): + out[i, j] = arr[i, j] - arr[i, j - periods] + else: + if axis == 0: + if periods >= 0: + start, stop = periods, sx + else: + start, stop = 0, sx + periods + for i in range(start, stop): + for j in range(sy): + out[i, j] = arr[i, j] - arr[i - periods, j] + else: + if periods >= 0: + start, stop = periods, sy + else: + start, stop = 0, sy + periods + for i in range(sx): + for j in range(start, stop): + out[i, j] = arr[i, j] - arr[i, j - periods] +@cython.boundscheck(False) +@cython.wraparound(False) +def diff_2d_int16(ndarray[int16_t, ndim=2] arr, + ndarray[float32_t, ndim=2] out, + Py_ssize_t periods, int axis): + cdef: + Py_ssize_t i, j, sx, sy + + sx, sy = ( arr).shape + if arr.flags.f_contiguous: + if axis == 0: + if periods >= 0: + start, stop = periods, sx + else: + start, stop = 0, sx + periods + for j in range(sy): + for i in range(start, stop): + out[i, j] = arr[i, j] - arr[i - periods, j] + else: + if periods >= 0: + start, stop = periods, sy + else: + start, stop = 0, sy + periods + for j in range(start, stop): + for i in range(sx): + out[i, j] = arr[i, j] - arr[i, j - periods] + else: + if axis == 0: + if periods >= 0: + start, stop = periods, sx + else: + start, stop = 0, sx + periods + for i in range(start, stop): + for j in range(sy): + out[i, j] = arr[i, j] - arr[i - periods, j] + else: + if periods >= 0: + start, stop = periods, sy + else: + start, stop = 0, sy + periods + for i in range(sx): + for j in range(start, stop): + out[i, j] = arr[i, j] - arr[i, j - periods] +@cython.boundscheck(False) +@cython.wraparound(False) +def diff_2d_int32(ndarray[int32_t, ndim=2] arr, + ndarray[float64_t, ndim=2] out, + Py_ssize_t periods, int axis): + cdef: + Py_ssize_t i, j, sx, sy + + sx, sy = ( arr).shape + if arr.flags.f_contiguous: + if axis == 0: + if periods >= 0: + start, stop = periods, sx + else: + start, stop = 0, sx + periods + for j in range(sy): + for i in range(start, stop): + out[i, j] = arr[i, j] - arr[i - periods, j] + else: + if periods >= 0: + start, stop = periods, sy + else: + start, stop = 0, sy + periods + for j in range(start, stop): + for i in range(sx): + out[i, j] = arr[i, j] - arr[i, j - periods] + else: + if axis == 0: + if periods >= 0: + start, stop = periods, sx + else: + start, stop = 0, sx + periods + for i in range(start, stop): + for j in range(sy): + out[i, j] = arr[i, j] - arr[i - periods, j] + else: + if periods >= 0: + start, stop = periods, sy + else: + start, stop = 0, sy + periods + for i in range(sx): + for j in range(start, stop): + out[i, j] = arr[i, j] - arr[i, j - periods] +@cython.boundscheck(False) +@cython.wraparound(False) +def diff_2d_int64(ndarray[int64_t, ndim=2] arr, + ndarray[float64_t, ndim=2] out, + Py_ssize_t periods, int axis): + cdef: + Py_ssize_t i, j, sx, sy + + sx, sy = ( arr).shape + if arr.flags.f_contiguous: + if axis == 0: + if periods >= 0: + start, stop = periods, sx + else: + start, stop = 0, sx + periods + for j in range(sy): + for i in range(start, stop): + out[i, j] = arr[i, j] - arr[i - periods, j] + else: + if periods >= 0: + start, stop = periods, sy else: - out[i, j] = resx[i, j] + start, stop = 0, sy + periods + for j in range(start, stop): + for i in range(sx): + out[i, j] = arr[i, j] - arr[i, j - periods] + else: + if axis == 0: + if periods >= 0: + start, stop = periods, sx + else: + start, stop = 0, sx + periods + for i in range(start, stop): + for j in range(sy): + out[i, j] = arr[i, j] - arr[i - periods, j] + else: + if periods >= 0: + start, stop = periods, sy + else: + start, stop = 0, sy + periods + for i in range(sx): + for j in range(start, stop): + out[i, j] = arr[i, j] - arr[i, j - periods] -@cython.boundscheck(False) @cython.wraparound(False) -def group_add_float64(ndarray[float64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float64_t, ndim=2] values, - ndarray[int64_t] labels): +@cython.wraparound(False) +def group_last_float64(ndarray[float64_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float64_t, ndim=2] values, + ndarray[int64_t] labels): ''' Only aggregates on axis=0 ''' cdef: Py_ssize_t i, j, N, K, lab float64_t val, count - ndarray[float64_t, ndim=2] sumx, nobs + ndarray[float64_t, ndim=2] resx + ndarray[int64_t, ndim=2] nobs - nobs = np.zeros_like(out) - sumx = np.zeros_like(out) + nobs = np.zeros(( out).shape, dtype=np.int64) + resx = np.empty_like(out) N, K = ( values).shape - if K > 1: - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[lab, j] += 1 - sumx[lab, j] += val - else: - for i in range(N): - lab = labels[i] - if lab < 0: - continue + for i in range(N): + lab = labels[i] + if lab < 0: + continue - counts[lab] += 1 - val = values[i, 0] + counts[lab] += 1 + for j in range(K): + val = values[i, j] # not nan if val == val: - nobs[lab, 0] += 1 - sumx[lab, 0] += val + nobs[lab, j] += 1 + resx[lab, j] = val for i in range(len(counts)): for j in range(K): if nobs[i, j] == 0: out[i, j] = nan else: - out[i, j] = sumx[i, j] -@cython.boundscheck(False) + out[i, j] = resx[i, j] @cython.wraparound(False) -def group_add_float32(ndarray[float32_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float32_t, ndim=2] values, - ndarray[int64_t] labels): +@cython.wraparound(False) +def group_last_float32(ndarray[float32_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float32_t, ndim=2] values, + ndarray[int64_t] labels): ''' Only aggregates on axis=0 ''' cdef: Py_ssize_t i, j, N, K, lab float32_t val, count - ndarray[float32_t, ndim=2] sumx, nobs + ndarray[float32_t, ndim=2] resx + ndarray[int64_t, ndim=2] nobs - nobs = np.zeros_like(out) - sumx = np.zeros_like(out) + nobs = np.zeros(( out).shape, dtype=np.int64) + resx = np.empty_like(out) N, K = ( values).shape - if K > 1: - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[lab, j] += 1 - sumx[lab, j] += val - else: - for i in range(N): - lab = labels[i] - if lab < 0: - continue + for i in range(N): + lab = labels[i] + if lab < 0: + continue - counts[lab] += 1 - val = values[i, 0] + counts[lab] += 1 + for j in range(K): + val = values[i, j] # not nan if val == val: - nobs[lab, 0] += 1 - sumx[lab, 0] += val + nobs[lab, j] += 1 + resx[lab, j] = val for i in range(len(counts)): for j in range(K): if nobs[i, j] == 0: out[i, j] = nan else: - out[i, j] = sumx[i, j] + out[i, j] = resx[i, j] -@cython.boundscheck(False) @cython.wraparound(False) -def group_add_bin_float64(ndarray[float64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float64_t, ndim=2] values, - ndarray[int64_t] bins): +@cython.wraparound(False) +def group_last_bin_float64(ndarray[float64_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float64_t, ndim=2] values, + ndarray[int64_t] bins): ''' Only aggregates on axis=0 ''' cdef: - Py_ssize_t i, j, N, K, ngroups, b, nbins + Py_ssize_t i, j, N, K, ngroups, b float64_t val, count - ndarray[float64_t, ndim=2] sumx, nobs + ndarray[float64_t, ndim=2] resx, nobs nobs = np.zeros_like(out) - sumx = np.zeros_like(out) + resx = np.empty_like(out) if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: ngroups = len(bins) + 1 + N, K = ( values).shape b = 0 - if K > 1: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - sumx[b, j] += val - else: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 + for i in range(N): + while b < ngroups - 1 and i >= bins[b]: + b += 1 - counts[b] += 1 - val = values[i, 0] + counts[b] += 1 + for j in range(K): + val = values[i, j] # not nan if val == val: - nobs[b, 0] += 1 - sumx[b, 0] += val + nobs[b, j] += 1 + resx[b, j] = val for i in range(ngroups): for j in range(K): if nobs[i, j] == 0: out[i, j] = nan else: - out[i, j] = sumx[i, j] -@cython.boundscheck(False) + out[i, j] = resx[i, j] @cython.wraparound(False) -def group_add_bin_float32(ndarray[float32_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float32_t, ndim=2] values, - ndarray[int64_t] bins): +@cython.wraparound(False) +def group_last_bin_float32(ndarray[float32_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float32_t, ndim=2] values, + ndarray[int64_t] bins): ''' Only aggregates on axis=0 ''' cdef: - Py_ssize_t i, j, N, K, ngroups, b, nbins + Py_ssize_t i, j, N, K, ngroups, b float32_t val, count - ndarray[float32_t, ndim=2] sumx, nobs + ndarray[float32_t, ndim=2] resx, nobs nobs = np.zeros_like(out) - sumx = np.zeros_like(out) + resx = np.empty_like(out) if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: ngroups = len(bins) + 1 + N, K = ( values).shape b = 0 - if K > 1: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - sumx[b, j] += val - else: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 + for i in range(N): + while b < ngroups - 1 and i >= bins[b]: + b += 1 - counts[b] += 1 - val = values[i, 0] + counts[b] += 1 + for j in range(K): + val = values[i, j] # not nan if val == val: - nobs[b, 0] += 1 - sumx[b, 0] += val + nobs[b, j] += 1 + resx[b, j] = val for i in range(ngroups): for j in range(K): if nobs[i, j] == 0: out[i, j] = nan else: - out[i, j] = sumx[i, j] + out[i, j] = resx[i, j] @cython.boundscheck(False) @cython.wraparound(False) -def group_prod_float64(ndarray[float64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float64_t, ndim=2] values, - ndarray[int64_t] labels): +def group_nth_float64(ndarray[float64_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float64_t, ndim=2] values, + ndarray[int64_t] labels, int64_t rank): ''' Only aggregates on axis=0 ''' cdef: Py_ssize_t i, j, N, K, lab float64_t val, count - ndarray[float64_t, ndim=2] prodx, nobs - - nobs = np.zeros_like(out) - prodx = np.ones_like(out) - - N, K = ( values).shape - - if K > 1: - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[lab, j] += 1 - prodx[lab, j] *= val - else: - for i in range(N): - lab = labels[i] - if lab < 0: - continue + ndarray[float64_t, ndim=2] resx + ndarray[int64_t, ndim=2] nobs - counts[lab] += 1 - val = values[i, 0] + nobs = np.zeros(( out).shape, dtype=np.int64) + resx = np.empty_like(out) + + N, K = ( values).shape + + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] # not nan if val == val: - nobs[lab, 0] += 1 - prodx[lab, 0] *= val + nobs[lab, j] += 1 + if nobs[lab, j] == rank: + resx[lab, j] = val for i in range(len(counts)): for j in range(K): if nobs[i, j] == 0: out[i, j] = nan else: - out[i, j] = prodx[i, j] + out[i, j] = resx[i, j] @cython.boundscheck(False) @cython.wraparound(False) -def group_prod_float32(ndarray[float32_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float32_t, ndim=2] values, - ndarray[int64_t] labels): +def group_nth_float32(ndarray[float32_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float32_t, ndim=2] values, + ndarray[int64_t] labels, int64_t rank): ''' Only aggregates on axis=0 ''' cdef: Py_ssize_t i, j, N, K, lab float32_t val, count - ndarray[float32_t, ndim=2] prodx, nobs + ndarray[float32_t, ndim=2] resx + ndarray[int64_t, ndim=2] nobs - nobs = np.zeros_like(out) - prodx = np.ones_like(out) + nobs = np.zeros(( out).shape, dtype=np.int64) + resx = np.empty_like(out) N, K = ( values).shape - if K > 1: - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[lab, j] += 1 - prodx[lab, j] *= val - else: - for i in range(N): - lab = labels[i] - if lab < 0: - continue + for i in range(N): + lab = labels[i] + if lab < 0: + continue - counts[lab] += 1 - val = values[i, 0] + counts[lab] += 1 + for j in range(K): + val = values[i, j] # not nan if val == val: - nobs[lab, 0] += 1 - prodx[lab, 0] *= val + nobs[lab, j] += 1 + if nobs[lab, j] == rank: + resx[lab, j] = val for i in range(len(counts)): for j in range(K): if nobs[i, j] == 0: out[i, j] = nan else: - out[i, j] = prodx[i, j] + out[i, j] = resx[i, j] @cython.boundscheck(False) @cython.wraparound(False) -def group_prod_bin_float64(ndarray[float64_t, ndim=2] out, +def group_nth_bin_float64(ndarray[float64_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float64_t, ndim=2] values, - ndarray[int64_t] bins): + ndarray[int64_t] bins, int64_t rank): ''' Only aggregates on axis=0 ''' cdef: Py_ssize_t i, j, N, K, ngroups, b float64_t val, count - ndarray[float64_t, ndim=2] prodx, nobs + ndarray[float64_t, ndim=2] resx, nobs nobs = np.zeros_like(out) - prodx = np.ones_like(out) + resx = np.empty_like(out) if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: ngroups = len(bins) + 1 + N, K = ( values).shape b = 0 - if K > 1: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - prodx[b, j] *= val - else: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 + for i in range(N): + while b < ngroups - 1 and i >= bins[b]: + b += 1 - counts[b] += 1 - val = values[i, 0] + counts[b] += 1 + for j in range(K): + val = values[i, j] # not nan if val == val: - nobs[b, 0] += 1 - prodx[b, 0] *= val + nobs[b, j] += 1 + if nobs[b, j] == rank: + resx[b, j] = val for i in range(ngroups): for j in range(K): if nobs[i, j] == 0: out[i, j] = nan else: - out[i, j] = prodx[i, j] + out[i, j] = resx[i, j] @cython.boundscheck(False) @cython.wraparound(False) -def group_prod_bin_float32(ndarray[float32_t, ndim=2] out, +def group_nth_bin_float32(ndarray[float32_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float32_t, ndim=2] values, - ndarray[int64_t] bins): + ndarray[int64_t] bins, int64_t rank): ''' Only aggregates on axis=0 ''' cdef: Py_ssize_t i, j, N, K, ngroups, b float32_t val, count - ndarray[float32_t, ndim=2] prodx, nobs + ndarray[float32_t, ndim=2] resx, nobs nobs = np.zeros_like(out) - prodx = np.ones_like(out) + resx = np.empty_like(out) if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: ngroups = len(bins) + 1 + N, K = ( values).shape b = 0 - if K > 1: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - prodx[b, j] *= val - else: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 + for i in range(N): + while b < ngroups - 1 and i >= bins[b]: + b += 1 - counts[b] += 1 - val = values[i, 0] + counts[b] += 1 + for j in range(K): + val = values[i, j] # not nan if val == val: - nobs[b, 0] += 1 - prodx[b, 0] *= val + nobs[b, j] += 1 + if nobs[b, j] == rank: + resx[b, j] = val for i in range(ngroups): for j in range(K): if nobs[i, j] == 0: out[i, j] = nan else: - out[i, j] = prodx[i, j] + out[i, j] = resx[i, j] -@cython.wraparound(False) @cython.boundscheck(False) -def group_var_float64(ndarray[float64_t, ndim=2] out, +@cython.wraparound(False) +def group_add_float64(ndarray[float64_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float64_t, ndim=2] values, ndarray[int64_t] labels): + ''' + Only aggregates on axis=0 + ''' cdef: Py_ssize_t i, j, N, K, lab - float64_t val, ct - ndarray[float64_t, ndim=2] nobs, sumx, sumxx + float64_t val, count + ndarray[float64_t, ndim=2] sumx, nobs nobs = np.zeros_like(out) sumx = np.zeros_like(out) - sumxx = np.zeros_like(out) N, K = ( values).shape if K > 1: for i in range(N): - lab = labels[i] if lab < 0: continue counts[lab] += 1 - for j in range(K): val = values[i, j] @@ -5215,57 +5383,52 @@ def group_var_float64(ndarray[float64_t, ndim=2] out, if val == val: nobs[lab, j] += 1 sumx[lab, j] += val - sumxx[lab, j] += val * val else: for i in range(N): - lab = labels[i] if lab < 0: continue counts[lab] += 1 val = values[i, 0] + # not nan if val == val: nobs[lab, 0] += 1 sumx[lab, 0] += val - sumxx[lab, 0] += val * val - for i in range(len(counts)): for j in range(K): - ct = nobs[i, j] - if ct < 2: + if nobs[i, j] == 0: out[i, j] = nan else: - out[i, j] = ((ct * sumxx[i, j] - sumx[i, j] * sumx[i, j]) / - (ct * ct - ct)) -@cython.wraparound(False) + out[i, j] = sumx[i, j] @cython.boundscheck(False) -def group_var_float32(ndarray[float32_t, ndim=2] out, +@cython.wraparound(False) +def group_add_float32(ndarray[float32_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float32_t, ndim=2] values, ndarray[int64_t] labels): + ''' + Only aggregates on axis=0 + ''' cdef: Py_ssize_t i, j, N, K, lab - float32_t val, ct - ndarray[float32_t, ndim=2] nobs, sumx, sumxx + float32_t val, count + ndarray[float32_t, ndim=2] sumx, nobs nobs = np.zeros_like(out) sumx = np.zeros_like(out) - sumxx = np.zeros_like(out) N, K = ( values).shape if K > 1: for i in range(N): - lab = labels[i] if lab < 0: continue counts[lab] += 1 - for j in range(K): val = values[i, j] @@ -5273,53 +5436,48 @@ def group_var_float32(ndarray[float32_t, ndim=2] out, if val == val: nobs[lab, j] += 1 sumx[lab, j] += val - sumxx[lab, j] += val * val else: for i in range(N): - lab = labels[i] if lab < 0: continue counts[lab] += 1 val = values[i, 0] + # not nan if val == val: nobs[lab, 0] += 1 sumx[lab, 0] += val - sumxx[lab, 0] += val * val - for i in range(len(counts)): for j in range(K): - ct = nobs[i, j] - if ct < 2: + if nobs[i, j] == 0: out[i, j] = nan else: - out[i, j] = ((ct * sumxx[i, j] - sumx[i, j] * sumx[i, j]) / - (ct * ct - ct)) + out[i, j] = sumx[i, j] -@cython.wraparound(False) @cython.boundscheck(False) -def group_var_bin_float64(ndarray[float64_t, ndim=2] out, +@cython.wraparound(False) +def group_add_bin_float64(ndarray[float64_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float64_t, ndim=2] values, ndarray[int64_t] bins): - + ''' + Only aggregates on axis=0 + ''' cdef: - Py_ssize_t i, j, N, K, ngroups, b - float64_t val, ct - ndarray[float64_t, ndim=2] nobs, sumx, sumxx + Py_ssize_t i, j, N, K, ngroups, b, nbins + float64_t val, count + ndarray[float64_t, ndim=2] sumx, nobs nobs = np.zeros_like(out) sumx = np.zeros_like(out) - sumxx = np.zeros_like(out) if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: ngroups = len(bins) + 1 - N, K = ( values).shape b = 0 @@ -5329,7 +5487,6 @@ def group_var_bin_float64(ndarray[float64_t, ndim=2] out, b += 1 counts[b] += 1 - for j in range(K): val = values[i, j] @@ -5337,7 +5494,6 @@ def group_var_bin_float64(ndarray[float64_t, ndim=2] out, if val == val: nobs[b, j] += 1 sumx[b, j] += val - sumxx[b, j] += val * val else: for i in range(N): while b < ngroups - 1 and i >= bins[b]: @@ -5350,37 +5506,34 @@ def group_var_bin_float64(ndarray[float64_t, ndim=2] out, if val == val: nobs[b, 0] += 1 sumx[b, 0] += val - sumxx[b, 0] += val * val for i in range(ngroups): for j in range(K): - ct = nobs[i, j] - if ct < 2: + if nobs[i, j] == 0: out[i, j] = nan else: - out[i, j] = ((ct * sumxx[i, j] - sumx[i, j] * sumx[i, j]) / - (ct * ct - ct)) -@cython.wraparound(False) + out[i, j] = sumx[i, j] @cython.boundscheck(False) -def group_var_bin_float32(ndarray[float32_t, ndim=2] out, +@cython.wraparound(False) +def group_add_bin_float32(ndarray[float32_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float32_t, ndim=2] values, ndarray[int64_t] bins): - + ''' + Only aggregates on axis=0 + ''' cdef: - Py_ssize_t i, j, N, K, ngroups, b - float32_t val, ct - ndarray[float32_t, ndim=2] nobs, sumx, sumxx + Py_ssize_t i, j, N, K, ngroups, b, nbins + float32_t val, count + ndarray[float32_t, ndim=2] sumx, nobs nobs = np.zeros_like(out) sumx = np.zeros_like(out) - sumxx = np.zeros_like(out) if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: ngroups = len(bins) + 1 - N, K = ( values).shape b = 0 @@ -5390,7 +5543,6 @@ def group_var_bin_float32(ndarray[float32_t, ndim=2] out, b += 1 counts[b] += 1 - for j in range(K): val = values[i, j] @@ -5398,7 +5550,6 @@ def group_var_bin_float32(ndarray[float32_t, ndim=2] out, if val == val: nobs[b, j] += 1 sumx[b, j] += val - sumxx[b, j] += val * val else: for i in range(N): while b < ngroups - 1 and i >= bins[b]: @@ -5411,30 +5562,30 @@ def group_var_bin_float32(ndarray[float32_t, ndim=2] out, if val == val: nobs[b, 0] += 1 sumx[b, 0] += val - sumxx[b, 0] += val * val for i in range(ngroups): for j in range(K): - ct = nobs[i, j] - if ct < 2: + if nobs[i, j] == 0: out[i, j] = nan else: - out[i, j] = ((ct * sumxx[i, j] - sumx[i, j] * sumx[i, j]) / - (ct * ct - ct)) + out[i, j] = sumx[i, j] -@cython.wraparound(False) @cython.boundscheck(False) -def group_mean_float64(ndarray[float64_t, ndim=2] out, +@cython.wraparound(False) +def group_prod_float64(ndarray[float64_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float64_t, ndim=2] values, ndarray[int64_t] labels): + ''' + Only aggregates on axis=0 + ''' cdef: Py_ssize_t i, j, N, K, lab float64_t val, count - ndarray[float64_t, ndim=2] sumx, nobs + ndarray[float64_t, ndim=2] prodx, nobs nobs = np.zeros_like(out) - sumx = np.zeros_like(out) + prodx = np.ones_like(out) N, K = ( values).shape @@ -5447,10 +5598,11 @@ def group_mean_float64(ndarray[float64_t, ndim=2] out, counts[lab] += 1 for j in range(K): val = values[i, j] + # not nan if val == val: nobs[lab, j] += 1 - sumx[lab, j] += val + prodx[lab, j] *= val else: for i in range(N): lab = labels[i] @@ -5459,31 +5611,34 @@ def group_mean_float64(ndarray[float64_t, ndim=2] out, counts[lab] += 1 val = values[i, 0] + # not nan if val == val: nobs[lab, 0] += 1 - sumx[lab, 0] += val + prodx[lab, 0] *= val for i in range(len(counts)): for j in range(K): - count = nobs[i, j] if nobs[i, j] == 0: out[i, j] = nan else: - out[i, j] = sumx[i, j] / count -@cython.wraparound(False) + out[i, j] = prodx[i, j] @cython.boundscheck(False) -def group_mean_float32(ndarray[float32_t, ndim=2] out, +@cython.wraparound(False) +def group_prod_float32(ndarray[float32_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float32_t, ndim=2] values, ndarray[int64_t] labels): + ''' + Only aggregates on axis=0 + ''' cdef: Py_ssize_t i, j, N, K, lab float32_t val, count - ndarray[float32_t, ndim=2] sumx, nobs + ndarray[float32_t, ndim=2] prodx, nobs nobs = np.zeros_like(out) - sumx = np.zeros_like(out) + prodx = np.ones_like(out) N, K = ( values).shape @@ -5496,10 +5651,11 @@ def group_mean_float32(ndarray[float32_t, ndim=2] out, counts[lab] += 1 for j in range(K): val = values[i, j] + # not nan if val == val: nobs[lab, j] += 1 - sumx[lab, j] += val + prodx[lab, j] *= val else: for i in range(N): lab = labels[i] @@ -5508,37 +5664,41 @@ def group_mean_float32(ndarray[float32_t, ndim=2] out, counts[lab] += 1 val = values[i, 0] + # not nan if val == val: nobs[lab, 0] += 1 - sumx[lab, 0] += val + prodx[lab, 0] *= val for i in range(len(counts)): for j in range(K): - count = nobs[i, j] if nobs[i, j] == 0: out[i, j] = nan else: - out[i, j] = sumx[i, j] / count - + out[i, j] = prodx[i, j] -def group_mean_bin_float64(ndarray[float64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float64_t, ndim=2] values, - ndarray[int64_t] bins): +@cython.boundscheck(False) +@cython.wraparound(False) +def group_prod_bin_float64(ndarray[float64_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float64_t, ndim=2] values, + ndarray[int64_t] bins): + ''' + Only aggregates on axis=0 + ''' cdef: Py_ssize_t i, j, N, K, ngroups, b float64_t val, count - ndarray[float64_t, ndim=2] sumx, nobs + ndarray[float64_t, ndim=2] prodx, nobs nobs = np.zeros_like(out) - sumx = np.zeros_like(out) + prodx = np.ones_like(out) - N, K = ( values).shape if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: ngroups = len(bins) + 1 + N, K = ( values).shape b = 0 if K > 1: @@ -5553,7 +5713,7 @@ def group_mean_bin_float64(ndarray[float64_t, ndim=2] out, # not nan if val == val: nobs[b, j] += 1 - sumx[b, j] += val + prodx[b, j] *= val else: for i in range(N): while b < ngroups - 1 and i >= bins[b]: @@ -5565,33 +5725,36 @@ def group_mean_bin_float64(ndarray[float64_t, ndim=2] out, # not nan if val == val: nobs[b, 0] += 1 - sumx[b, 0] += val + prodx[b, 0] *= val for i in range(ngroups): for j in range(K): - count = nobs[i, j] if nobs[i, j] == 0: out[i, j] = nan else: - out[i, j] = sumx[i, j] / count - -def group_mean_bin_float32(ndarray[float32_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float32_t, ndim=2] values, - ndarray[int64_t] bins): + out[i, j] = prodx[i, j] +@cython.boundscheck(False) +@cython.wraparound(False) +def group_prod_bin_float32(ndarray[float32_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float32_t, ndim=2] values, + ndarray[int64_t] bins): + ''' + Only aggregates on axis=0 + ''' cdef: Py_ssize_t i, j, N, K, ngroups, b float32_t val, count - ndarray[float32_t, ndim=2] sumx, nobs + ndarray[float32_t, ndim=2] prodx, nobs nobs = np.zeros_like(out) - sumx = np.zeros_like(out) + prodx = np.ones_like(out) - N, K = ( values).shape if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: ngroups = len(bins) + 1 + N, K = ( values).shape b = 0 if K > 1: @@ -5606,7 +5769,7 @@ def group_mean_bin_float32(ndarray[float32_t, ndim=2] out, # not nan if val == val: nobs[b, j] += 1 - sumx[b, j] += val + prodx[b, j] *= val else: for i in range(N): while b < ngroups - 1 and i >= bins[b]: @@ -5618,149 +5781,147 @@ def group_mean_bin_float32(ndarray[float32_t, ndim=2] out, # not nan if val == val: nobs[b, 0] += 1 - sumx[b, 0] += val + prodx[b, 0] *= val for i in range(ngroups): for j in range(K): - count = nobs[i, j] if nobs[i, j] == 0: out[i, j] = nan else: - out[i, j] = sumx[i, j] / count + out[i, j] = prodx[i, j] @cython.wraparound(False) @cython.boundscheck(False) -def group_min_float64(ndarray[float64_t, ndim=2] out, +def group_var_float64(ndarray[float64_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float64_t, ndim=2] values, ndarray[int64_t] labels): - ''' - Only aggregates on axis=0 - ''' cdef: Py_ssize_t i, j, N, K, lab - float64_t val, count - ndarray[float64_t, ndim=2] minx, nobs + float64_t val, ct + ndarray[float64_t, ndim=2] nobs, sumx, sumxx nobs = np.zeros_like(out) - - minx = np.empty_like(out) - minx.fill(np.inf) + sumx = np.zeros_like(out) + sumxx = np.zeros_like(out) N, K = ( values).shape if K > 1: for i in range(N): + lab = labels[i] if lab < 0: continue counts[lab] += 1 + for j in range(K): val = values[i, j] # not nan if val == val: nobs[lab, j] += 1 - if val < minx[lab, j]: - minx[lab, j] = val + sumx[lab, j] += val + sumxx[lab, j] += val * val else: for i in range(N): + lab = labels[i] if lab < 0: continue counts[lab] += 1 val = values[i, 0] - # not nan if val == val: nobs[lab, 0] += 1 - if val < minx[lab, 0]: - minx[lab, 0] = val + sumx[lab, 0] += val + sumxx[lab, 0] += val * val + for i in range(len(counts)): for j in range(K): - if nobs[i, j] == 0: + ct = nobs[i, j] + if ct < 2: out[i, j] = nan else: - out[i, j] = minx[i, j] + out[i, j] = ((ct * sumxx[i, j] - sumx[i, j] * sumx[i, j]) / + (ct * ct - ct)) @cython.wraparound(False) @cython.boundscheck(False) -def group_min_float32(ndarray[float32_t, ndim=2] out, +def group_var_float32(ndarray[float32_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float32_t, ndim=2] values, ndarray[int64_t] labels): - ''' - Only aggregates on axis=0 - ''' cdef: Py_ssize_t i, j, N, K, lab - float32_t val, count - ndarray[float32_t, ndim=2] minx, nobs + float32_t val, ct + ndarray[float32_t, ndim=2] nobs, sumx, sumxx nobs = np.zeros_like(out) - - minx = np.empty_like(out) - minx.fill(np.inf) + sumx = np.zeros_like(out) + sumxx = np.zeros_like(out) N, K = ( values).shape if K > 1: for i in range(N): + lab = labels[i] if lab < 0: continue counts[lab] += 1 + for j in range(K): val = values[i, j] # not nan if val == val: nobs[lab, j] += 1 - if val < minx[lab, j]: - minx[lab, j] = val + sumx[lab, j] += val + sumxx[lab, j] += val * val else: for i in range(N): + lab = labels[i] if lab < 0: continue counts[lab] += 1 val = values[i, 0] - # not nan if val == val: nobs[lab, 0] += 1 - if val < minx[lab, 0]: - minx[lab, 0] = val + sumx[lab, 0] += val + sumxx[lab, 0] += val * val + for i in range(len(counts)): for j in range(K): - if nobs[i, j] == 0: + ct = nobs[i, j] + if ct < 2: out[i, j] = nan else: - out[i, j] = minx[i, j] + out[i, j] = ((ct * sumxx[i, j] - sumx[i, j] * sumx[i, j]) / + (ct * ct - ct)) @cython.wraparound(False) @cython.boundscheck(False) -def group_min_bin_float64(ndarray[float64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float64_t, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' +def group_var_bin_float64(ndarray[float64_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float64_t, ndim=2] values, + ndarray[int64_t] bins): + cdef: Py_ssize_t i, j, N, K, ngroups, b - float64_t val, count - ndarray[float64_t, ndim=2] minx, nobs + float64_t val, ct + ndarray[float64_t, ndim=2] nobs, sumx, sumxx nobs = np.zeros_like(out) - - minx = np.empty_like(out) - minx.fill(np.inf) + sumx = np.zeros_like(out) + sumxx = np.zeros_like(out) if bins[len(bins) - 1] == len(values): ngroups = len(bins) @@ -5776,14 +5937,15 @@ def group_min_bin_float64(ndarray[float64_t, ndim=2] out, b += 1 counts[b] += 1 + for j in range(K): val = values[i, j] # not nan if val == val: nobs[b, j] += 1 - if val < minx[b, j]: - minx[b, j] = val + sumx[b, j] += val + sumxx[b, j] += val * val else: for i in range(N): while b < ngroups - 1 and i >= bins[b]: @@ -5795,33 +5957,32 @@ def group_min_bin_float64(ndarray[float64_t, ndim=2] out, # not nan if val == val: nobs[b, 0] += 1 - if val < minx[b, 0]: - minx[b, 0] = val + sumx[b, 0] += val + sumxx[b, 0] += val * val for i in range(ngroups): for j in range(K): - if nobs[i, j] == 0: + ct = nobs[i, j] + if ct < 2: out[i, j] = nan else: - out[i, j] = minx[i, j] + out[i, j] = ((ct * sumxx[i, j] - sumx[i, j] * sumx[i, j]) / + (ct * ct - ct)) @cython.wraparound(False) @cython.boundscheck(False) -def group_min_bin_float32(ndarray[float32_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float32_t, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' +def group_var_bin_float32(ndarray[float32_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float32_t, ndim=2] values, + ndarray[int64_t] bins): + cdef: Py_ssize_t i, j, N, K, ngroups, b - float32_t val, count - ndarray[float32_t, ndim=2] minx, nobs + float32_t val, ct + ndarray[float32_t, ndim=2] nobs, sumx, sumxx nobs = np.zeros_like(out) - - minx = np.empty_like(out) - minx.fill(np.inf) + sumx = np.zeros_like(out) + sumxx = np.zeros_like(out) if bins[len(bins) - 1] == len(values): ngroups = len(bins) @@ -5837,14 +5998,15 @@ def group_min_bin_float32(ndarray[float32_t, ndim=2] out, b += 1 counts[b] += 1 + for j in range(K): val = values[i, j] # not nan if val == val: nobs[b, j] += 1 - if val < minx[b, j]: - minx[b, j] = val + sumx[b, j] += val + sumxx[b, j] += val * val else: for i in range(N): while b < ngroups - 1 and i >= bins[b]: @@ -5856,34 +6018,31 @@ def group_min_bin_float32(ndarray[float32_t, ndim=2] out, # not nan if val == val: nobs[b, 0] += 1 - if val < minx[b, 0]: - minx[b, 0] = val + sumx[b, 0] += val + sumxx[b, 0] += val * val for i in range(ngroups): for j in range(K): - if nobs[i, j] == 0: + ct = nobs[i, j] + if ct < 2: out[i, j] = nan else: - out[i, j] = minx[i, j] + out[i, j] = ((ct * sumxx[i, j] - sumx[i, j] * sumx[i, j]) / + (ct * ct - ct)) @cython.wraparound(False) @cython.boundscheck(False) -def group_max_float64(ndarray[float64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float64_t, ndim=2] values, - ndarray[int64_t] labels): - ''' - Only aggregates on axis=0 - ''' +def group_mean_float64(ndarray[float64_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float64_t, ndim=2] values, + ndarray[int64_t] labels): cdef: Py_ssize_t i, j, N, K, lab float64_t val, count - ndarray[float64_t, ndim=2] maxx, nobs + ndarray[float64_t, ndim=2] sumx, nobs nobs = np.zeros_like(out) - - maxx = np.empty_like(out) - maxx.fill(-np.inf) + sumx = np.zeros_like(out) N, K = ( values).shape @@ -5896,12 +6055,10 @@ def group_max_float64(ndarray[float64_t, ndim=2] out, counts[lab] += 1 for j in range(K): val = values[i, j] - # not nan if val == val: nobs[lab, j] += 1 - if val > maxx[lab, j]: - maxx[lab, j] = val + sumx[lab, j] += val else: for i in range(N): lab = labels[i] @@ -5910,37 +6067,31 @@ def group_max_float64(ndarray[float64_t, ndim=2] out, counts[lab] += 1 val = values[i, 0] - # not nan if val == val: nobs[lab, 0] += 1 - if val > maxx[lab, 0]: - maxx[lab, 0] = val + sumx[lab, 0] += val for i in range(len(counts)): for j in range(K): + count = nobs[i, j] if nobs[i, j] == 0: out[i, j] = nan else: - out[i, j] = maxx[i, j] + out[i, j] = sumx[i, j] / count @cython.wraparound(False) @cython.boundscheck(False) -def group_max_float32(ndarray[float32_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float32_t, ndim=2] values, - ndarray[int64_t] labels): - ''' - Only aggregates on axis=0 - ''' +def group_mean_float32(ndarray[float32_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float32_t, ndim=2] values, + ndarray[int64_t] labels): cdef: Py_ssize_t i, j, N, K, lab float32_t val, count - ndarray[float32_t, ndim=2] maxx, nobs + ndarray[float32_t, ndim=2] sumx, nobs nobs = np.zeros_like(out) - - maxx = np.empty_like(out) - maxx.fill(-np.inf) + sumx = np.zeros_like(out) N, K = ( values).shape @@ -5953,12 +6104,10 @@ def group_max_float32(ndarray[float32_t, ndim=2] out, counts[lab] += 1 for j in range(K): val = values[i, j] - # not nan if val == val: nobs[lab, j] += 1 - if val > maxx[lab, j]: - maxx[lab, j] = val + sumx[lab, j] += val else: for i in range(N): lab = labels[i] @@ -5967,45 +6116,38 @@ def group_max_float32(ndarray[float32_t, ndim=2] out, counts[lab] += 1 val = values[i, 0] - # not nan if val == val: nobs[lab, 0] += 1 - if val > maxx[lab, 0]: - maxx[lab, 0] = val + sumx[lab, 0] += val for i in range(len(counts)): for j in range(K): + count = nobs[i, j] if nobs[i, j] == 0: out[i, j] = nan else: - out[i, j] = maxx[i, j] + out[i, j] = sumx[i, j] / count -@cython.wraparound(False) -@cython.boundscheck(False) -def group_max_bin_float64(ndarray[float64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float64_t, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' + +def group_mean_bin_float64(ndarray[float64_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float64_t, ndim=2] values, + ndarray[int64_t] bins): cdef: Py_ssize_t i, j, N, K, ngroups, b float64_t val, count - ndarray[float64_t, ndim=2] maxx, nobs + ndarray[float64_t, ndim=2] sumx, nobs nobs = np.zeros_like(out) - maxx = np.empty_like(out) - maxx.fill(-np.inf) + sumx = np.zeros_like(out) + N, K = ( values).shape if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: ngroups = len(bins) + 1 - N, K = ( values).shape - b = 0 if K > 1: for i in range(N): @@ -6019,8 +6161,7 @@ def group_max_bin_float64(ndarray[float64_t, ndim=2] out, # not nan if val == val: nobs[b, j] += 1 - if val > maxx[b, j]: - maxx[b, j] = val + sumx[b, j] += val else: for i in range(N): while b < ngroups - 1 and i >= bins[b]: @@ -6032,40 +6173,34 @@ def group_max_bin_float64(ndarray[float64_t, ndim=2] out, # not nan if val == val: nobs[b, 0] += 1 - if val > maxx[b, 0]: - maxx[b, 0] = val + sumx[b, 0] += val for i in range(ngroups): for j in range(K): + count = nobs[i, j] if nobs[i, j] == 0: out[i, j] = nan else: - out[i, j] = maxx[i, j] -@cython.wraparound(False) -@cython.boundscheck(False) -def group_max_bin_float32(ndarray[float32_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float32_t, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' + out[i, j] = sumx[i, j] / count + +def group_mean_bin_float32(ndarray[float32_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float32_t, ndim=2] values, + ndarray[int64_t] bins): cdef: Py_ssize_t i, j, N, K, ngroups, b float32_t val, count - ndarray[float32_t, ndim=2] maxx, nobs + ndarray[float32_t, ndim=2] sumx, nobs nobs = np.zeros_like(out) - maxx = np.empty_like(out) - maxx.fill(-np.inf) + sumx = np.zeros_like(out) + N, K = ( values).shape if bins[len(bins) - 1] == len(values): ngroups = len(bins) else: ngroups = len(bins) + 1 - N, K = ( values).shape - b = 0 if K > 1: for i in range(N): @@ -6079,8 +6214,7 @@ def group_max_bin_float32(ndarray[float32_t, ndim=2] out, # not nan if val == val: nobs[b, j] += 1 - if val > maxx[b, j]: - maxx[b, j] = val + sumx[b, j] += val else: for i in range(N): while b < ngroups - 1 and i >= bins[b]: @@ -6092,103 +6226,149 @@ def group_max_bin_float32(ndarray[float32_t, ndim=2] out, # not nan if val == val: nobs[b, 0] += 1 - if val > maxx[b, 0]: - maxx[b, 0] = val + sumx[b, 0] += val for i in range(ngroups): for j in range(K): + count = nobs[i, j] if nobs[i, j] == 0: out[i, j] = nan else: - out[i, j] = maxx[i, j] + out[i, j] = sumx[i, j] / count @cython.wraparound(False) @cython.boundscheck(False) -def group_ohlc_float64(ndarray[float64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float64_t, ndim=2] values, - ndarray[int64_t] bins): +def group_min_float64(ndarray[float64_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float64_t, ndim=2] values, + ndarray[int64_t] labels): ''' Only aggregates on axis=0 ''' cdef: - Py_ssize_t i, j, N, K, ngroups, b + Py_ssize_t i, j, N, K, lab float64_t val, count - float64_t vopen, vhigh, vlow, vclose, NA - bint got_first = 0 + ndarray[float64_t, ndim=2] minx, nobs - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 + nobs = np.zeros_like(out) + + minx = np.empty_like(out) + minx.fill(np.inf) N, K = ( values).shape - if out.shape[1] != 4: - raise ValueError('Output array must have 4 columns') + if K > 1: + for i in range(N): + lab = labels[i] + if lab < 0: + continue - NA = np.nan + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + # not nan + if val == val: + nobs[lab, j] += 1 + if val < minx[lab, j]: + minx[lab, j] = val + else: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + val = values[i, 0] + + # not nan + if val == val: + nobs[lab, 0] += 1 + if val < minx[lab, 0]: + minx[lab, 0] = val + + for i in range(len(counts)): + for j in range(K): + if nobs[i, j] == 0: + out[i, j] = nan + else: + out[i, j] = minx[i, j] +@cython.wraparound(False) +@cython.boundscheck(False) +def group_min_float32(ndarray[float32_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float32_t, ndim=2] values, + ndarray[int64_t] labels): + ''' + Only aggregates on axis=0 + ''' + cdef: + Py_ssize_t i, j, N, K, lab + float32_t val, count + ndarray[float32_t, ndim=2] minx, nobs + + nobs = np.zeros_like(out) + + minx = np.empty_like(out) + minx.fill(np.inf) + + N, K = ( values).shape - b = 0 if K > 1: - raise NotImplementedError + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + # not nan + if val == val: + nobs[lab, j] += 1 + if val < minx[lab, j]: + minx[lab, j] = val else: for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - if not got_first: - out[b, 0] = NA - out[b, 1] = NA - out[b, 2] = NA - out[b, 3] = NA - else: - out[b, 0] = vopen - out[b, 1] = vhigh - out[b, 2] = vlow - out[b, 3] = vclose - b += 1 - got_first = 0 + lab = labels[i] + if lab < 0: + continue - counts[b] += 1 + counts[lab] += 1 val = values[i, 0] # not nan if val == val: - if not got_first: - got_first = 1 - vopen = val - vlow = val - vhigh = val - else: - if val < vlow: - vlow = val - if val > vhigh: - vhigh = val - vclose = val + nobs[lab, 0] += 1 + if val < minx[lab, 0]: + minx[lab, 0] = val + + for i in range(len(counts)): + for j in range(K): + if nobs[i, j] == 0: + out[i, j] = nan + else: + out[i, j] = minx[i, j] - if not got_first: - out[b, 0] = NA - out[b, 1] = NA - out[b, 2] = NA - out[b, 3] = NA - else: - out[b, 0] = vopen - out[b, 1] = vhigh - out[b, 2] = vlow - out[b, 3] = vclose @cython.wraparound(False) @cython.boundscheck(False) -def group_ohlc_float32(ndarray[float32_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float32_t, ndim=2] values, - ndarray[int64_t] bins): +def group_min_bin_float64(ndarray[float64_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float64_t, ndim=2] values, + ndarray[int64_t] bins): ''' Only aggregates on axis=0 ''' cdef: Py_ssize_t i, j, N, K, ngroups, b - float32_t val, count - float32_t vopen, vhigh, vlow, vclose, NA - bint got_first = 0 + float64_t val, count + ndarray[float64_t, ndim=2] minx, nobs + + nobs = np.zeros_like(out) + + minx = np.empty_like(out) + minx.fill(np.inf) if bins[len(bins) - 1] == len(values): ngroups = len(bins) @@ -6197,691 +6377,723 @@ def group_ohlc_float32(ndarray[float32_t, ndim=2] out, N, K = ( values).shape - if out.shape[1] != 4: - raise ValueError('Output array must have 4 columns') - - NA = np.nan - b = 0 if K > 1: - raise NotImplementedError + for i in range(N): + while b < ngroups - 1 and i >= bins[b]: + b += 1 + + counts[b] += 1 + for j in range(K): + val = values[i, j] + + # not nan + if val == val: + nobs[b, j] += 1 + if val < minx[b, j]: + minx[b, j] = val else: for i in range(N): while b < ngroups - 1 and i >= bins[b]: - if not got_first: - out[b, 0] = NA - out[b, 1] = NA - out[b, 2] = NA - out[b, 3] = NA - else: - out[b, 0] = vopen - out[b, 1] = vhigh - out[b, 2] = vlow - out[b, 3] = vclose b += 1 - got_first = 0 counts[b] += 1 val = values[i, 0] # not nan if val == val: - if not got_first: - got_first = 1 - vopen = val - vlow = val - vhigh = val - else: - if val < vlow: - vlow = val - if val > vhigh: - vhigh = val - vclose = val - - if not got_first: - out[b, 0] = NA - out[b, 1] = NA - out[b, 2] = NA - out[b, 3] = NA - else: - out[b, 0] = vopen - out[b, 1] = vhigh - out[b, 2] = vlow - out[b, 3] = vclose + nobs[b, 0] += 1 + if val < minx[b, 0]: + minx[b, 0] = val + for i in range(ngroups): + for j in range(K): + if nobs[i, j] == 0: + out[i, j] = nan + else: + out[i, j] = minx[i, j] @cython.wraparound(False) @cython.boundscheck(False) -def left_join_indexer_unique_float64(ndarray[float64_t] left, - ndarray[float64_t] right): +def group_min_bin_float32(ndarray[float32_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float32_t, ndim=2] values, + ndarray[int64_t] bins): + ''' + Only aggregates on axis=0 + ''' cdef: - Py_ssize_t i, j, nleft, nright - ndarray[int64_t] indexer - float64_t lval, rval - - i = 0 - j = 0 - nleft = len(left) - nright = len(right) - - indexer = np.empty(nleft, dtype=np.int64) - while True: - if i == nleft: - break - - if j == nright: - indexer[i] = -1 - i += 1 - continue + Py_ssize_t i, j, N, K, ngroups, b + float32_t val, count + ndarray[float32_t, ndim=2] minx, nobs - rval = right[j] + nobs = np.zeros_like(out) - while i < nleft - 1 and left[i] == rval: - indexer[i] = j - i += 1 + minx = np.empty_like(out) + minx.fill(np.inf) - if left[i] == right[j]: - indexer[i] = j - i += 1 - while i < nleft - 1 and left[i] == rval: - indexer[i] = j - i += 1 - j += 1 - elif left[i] > rval: - indexer[i] = -1 - j += 1 - else: - indexer[i] = -1 - i += 1 - return indexer + if bins[len(bins) - 1] == len(values): + ngroups = len(bins) + else: + ngroups = len(bins) + 1 -@cython.wraparound(False) -@cython.boundscheck(False) -def left_join_indexer_unique_float32(ndarray[float32_t] left, - ndarray[float32_t] right): - cdef: - Py_ssize_t i, j, nleft, nright - ndarray[int64_t] indexer - float32_t lval, rval + N, K = ( values).shape - i = 0 - j = 0 - nleft = len(left) - nright = len(right) + b = 0 + if K > 1: + for i in range(N): + while b < ngroups - 1 and i >= bins[b]: + b += 1 - indexer = np.empty(nleft, dtype=np.int64) - while True: - if i == nleft: - break + counts[b] += 1 + for j in range(K): + val = values[i, j] - if j == nright: - indexer[i] = -1 - i += 1 - continue + # not nan + if val == val: + nobs[b, j] += 1 + if val < minx[b, j]: + minx[b, j] = val + else: + for i in range(N): + while b < ngroups - 1 and i >= bins[b]: + b += 1 - rval = right[j] + counts[b] += 1 + val = values[i, 0] - while i < nleft - 1 and left[i] == rval: - indexer[i] = j - i += 1 + # not nan + if val == val: + nobs[b, 0] += 1 + if val < minx[b, 0]: + minx[b, 0] = val - if left[i] == right[j]: - indexer[i] = j - i += 1 - while i < nleft - 1 and left[i] == rval: - indexer[i] = j - i += 1 - j += 1 - elif left[i] > rval: - indexer[i] = -1 - j += 1 - else: - indexer[i] = -1 - i += 1 - return indexer + for i in range(ngroups): + for j in range(K): + if nobs[i, j] == 0: + out[i, j] = nan + else: + out[i, j] = minx[i, j] @cython.wraparound(False) @cython.boundscheck(False) -def left_join_indexer_unique_object(ndarray[object] left, - ndarray[object] right): +def group_max_float64(ndarray[float64_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float64_t, ndim=2] values, + ndarray[int64_t] labels): + ''' + Only aggregates on axis=0 + ''' cdef: - Py_ssize_t i, j, nleft, nright - ndarray[int64_t] indexer - object lval, rval + Py_ssize_t i, j, N, K, lab + float64_t val, count + ndarray[float64_t, ndim=2] maxx, nobs - i = 0 - j = 0 - nleft = len(left) - nright = len(right) + nobs = np.zeros_like(out) - indexer = np.empty(nleft, dtype=np.int64) - while True: - if i == nleft: - break + maxx = np.empty_like(out) + maxx.fill(-np.inf) - if j == nright: - indexer[i] = -1 - i += 1 - continue + N, K = ( values).shape - rval = right[j] + if K > 1: + for i in range(N): + lab = labels[i] + if lab < 0: + continue - while i < nleft - 1 and left[i] == rval: - indexer[i] = j - i += 1 + counts[lab] += 1 + for j in range(K): + val = values[i, j] - if left[i] == right[j]: - indexer[i] = j - i += 1 - while i < nleft - 1 and left[i] == rval: - indexer[i] = j - i += 1 - j += 1 - elif left[i] > rval: - indexer[i] = -1 - j += 1 - else: - indexer[i] = -1 - i += 1 - return indexer + # not nan + if val == val: + nobs[lab, j] += 1 + if val > maxx[lab, j]: + maxx[lab, j] = val + else: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + val = values[i, 0] + + # not nan + if val == val: + nobs[lab, 0] += 1 + if val > maxx[lab, 0]: + maxx[lab, 0] = val + for i in range(len(counts)): + for j in range(K): + if nobs[i, j] == 0: + out[i, j] = nan + else: + out[i, j] = maxx[i, j] @cython.wraparound(False) @cython.boundscheck(False) -def left_join_indexer_unique_int8(ndarray[int8_t] left, - ndarray[int8_t] right): +def group_max_float32(ndarray[float32_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float32_t, ndim=2] values, + ndarray[int64_t] labels): + ''' + Only aggregates on axis=0 + ''' cdef: - Py_ssize_t i, j, nleft, nright - ndarray[int64_t] indexer - int8_t lval, rval + Py_ssize_t i, j, N, K, lab + float32_t val, count + ndarray[float32_t, ndim=2] maxx, nobs - i = 0 - j = 0 - nleft = len(left) - nright = len(right) + nobs = np.zeros_like(out) - indexer = np.empty(nleft, dtype=np.int64) - while True: - if i == nleft: - break + maxx = np.empty_like(out) + maxx.fill(-np.inf) - if j == nright: - indexer[i] = -1 - i += 1 - continue + N, K = ( values).shape - rval = right[j] + if K > 1: + for i in range(N): + lab = labels[i] + if lab < 0: + continue - while i < nleft - 1 and left[i] == rval: - indexer[i] = j - i += 1 + counts[lab] += 1 + for j in range(K): + val = values[i, j] - if left[i] == right[j]: - indexer[i] = j - i += 1 - while i < nleft - 1 and left[i] == rval: - indexer[i] = j - i += 1 - j += 1 - elif left[i] > rval: - indexer[i] = -1 - j += 1 - else: - indexer[i] = -1 - i += 1 - return indexer + # not nan + if val == val: + nobs[lab, j] += 1 + if val > maxx[lab, j]: + maxx[lab, j] = val + else: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + val = values[i, 0] + + # not nan + if val == val: + nobs[lab, 0] += 1 + if val > maxx[lab, 0]: + maxx[lab, 0] = val + + for i in range(len(counts)): + for j in range(K): + if nobs[i, j] == 0: + out[i, j] = nan + else: + out[i, j] = maxx[i, j] @cython.wraparound(False) @cython.boundscheck(False) -def left_join_indexer_unique_int16(ndarray[int16_t] left, - ndarray[int16_t] right): +def group_max_bin_float64(ndarray[float64_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float64_t, ndim=2] values, + ndarray[int64_t] bins): + ''' + Only aggregates on axis=0 + ''' cdef: - Py_ssize_t i, j, nleft, nright - ndarray[int64_t] indexer - int16_t lval, rval + Py_ssize_t i, j, N, K, ngroups, b + float64_t val, count + ndarray[float64_t, ndim=2] maxx, nobs - i = 0 - j = 0 - nleft = len(left) - nright = len(right) + nobs = np.zeros_like(out) + maxx = np.empty_like(out) + maxx.fill(-np.inf) - indexer = np.empty(nleft, dtype=np.int64) - while True: - if i == nleft: - break + if bins[len(bins) - 1] == len(values): + ngroups = len(bins) + else: + ngroups = len(bins) + 1 - if j == nright: - indexer[i] = -1 - i += 1 - continue + N, K = ( values).shape - rval = right[j] + b = 0 + if K > 1: + for i in range(N): + while b < ngroups - 1 and i >= bins[b]: + b += 1 - while i < nleft - 1 and left[i] == rval: - indexer[i] = j - i += 1 + counts[b] += 1 + for j in range(K): + val = values[i, j] - if left[i] == right[j]: - indexer[i] = j - i += 1 - while i < nleft - 1 and left[i] == rval: - indexer[i] = j - i += 1 - j += 1 - elif left[i] > rval: - indexer[i] = -1 - j += 1 - else: - indexer[i] = -1 - i += 1 - return indexer + # not nan + if val == val: + nobs[b, j] += 1 + if val > maxx[b, j]: + maxx[b, j] = val + else: + for i in range(N): + while b < ngroups - 1 and i >= bins[b]: + b += 1 + + counts[b] += 1 + val = values[i, 0] + + # not nan + if val == val: + nobs[b, 0] += 1 + if val > maxx[b, 0]: + maxx[b, 0] = val + for i in range(ngroups): + for j in range(K): + if nobs[i, j] == 0: + out[i, j] = nan + else: + out[i, j] = maxx[i, j] @cython.wraparound(False) @cython.boundscheck(False) -def left_join_indexer_unique_int32(ndarray[int32_t] left, - ndarray[int32_t] right): +def group_max_bin_float32(ndarray[float32_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float32_t, ndim=2] values, + ndarray[int64_t] bins): + ''' + Only aggregates on axis=0 + ''' cdef: - Py_ssize_t i, j, nleft, nright - ndarray[int64_t] indexer - int32_t lval, rval + Py_ssize_t i, j, N, K, ngroups, b + float32_t val, count + ndarray[float32_t, ndim=2] maxx, nobs - i = 0 - j = 0 - nleft = len(left) - nright = len(right) + nobs = np.zeros_like(out) + maxx = np.empty_like(out) + maxx.fill(-np.inf) - indexer = np.empty(nleft, dtype=np.int64) - while True: - if i == nleft: - break + if bins[len(bins) - 1] == len(values): + ngroups = len(bins) + else: + ngroups = len(bins) + 1 - if j == nright: - indexer[i] = -1 - i += 1 - continue + N, K = ( values).shape - rval = right[j] + b = 0 + if K > 1: + for i in range(N): + while b < ngroups - 1 and i >= bins[b]: + b += 1 - while i < nleft - 1 and left[i] == rval: - indexer[i] = j - i += 1 + counts[b] += 1 + for j in range(K): + val = values[i, j] - if left[i] == right[j]: - indexer[i] = j - i += 1 - while i < nleft - 1 and left[i] == rval: - indexer[i] = j - i += 1 - j += 1 - elif left[i] > rval: - indexer[i] = -1 - j += 1 - else: - indexer[i] = -1 - i += 1 - return indexer + # not nan + if val == val: + nobs[b, j] += 1 + if val > maxx[b, j]: + maxx[b, j] = val + else: + for i in range(N): + while b < ngroups - 1 and i >= bins[b]: + b += 1 + + counts[b] += 1 + val = values[i, 0] + + # not nan + if val == val: + nobs[b, 0] += 1 + if val > maxx[b, 0]: + maxx[b, 0] = val + + for i in range(ngroups): + for j in range(K): + if nobs[i, j] == 0: + out[i, j] = nan + else: + out[i, j] = maxx[i, j] @cython.wraparound(False) @cython.boundscheck(False) -def left_join_indexer_unique_int64(ndarray[int64_t] left, - ndarray[int64_t] right): +def group_ohlc_float64(ndarray[float64_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float64_t, ndim=2] values, + ndarray[int64_t] bins): + ''' + Only aggregates on axis=0 + ''' cdef: - Py_ssize_t i, j, nleft, nright - ndarray[int64_t] indexer - int64_t lval, rval - - i = 0 - j = 0 - nleft = len(left) - nright = len(right) + Py_ssize_t i, j, N, K, ngroups, b + float64_t val, count + float64_t vopen, vhigh, vlow, vclose, NA + bint got_first = 0 - indexer = np.empty(nleft, dtype=np.int64) - while True: - if i == nleft: - break + if bins[len(bins) - 1] == len(values): + ngroups = len(bins) + else: + ngroups = len(bins) + 1 - if j == nright: - indexer[i] = -1 - i += 1 - continue + N, K = ( values).shape - rval = right[j] + if out.shape[1] != 4: + raise ValueError('Output array must have 4 columns') - while i < nleft - 1 and left[i] == rval: - indexer[i] = j - i += 1 + NA = np.nan - if left[i] == right[j]: - indexer[i] = j - i += 1 - while i < nleft - 1 and left[i] == rval: - indexer[i] = j - i += 1 - j += 1 - elif left[i] > rval: - indexer[i] = -1 - j += 1 - else: - indexer[i] = -1 - i += 1 - return indexer + b = 0 + if K > 1: + raise NotImplementedError + else: + for i in range(N): + while b < ngroups - 1 and i >= bins[b]: + if not got_first: + out[b, 0] = NA + out[b, 1] = NA + out[b, 2] = NA + out[b, 3] = NA + else: + out[b, 0] = vopen + out[b, 1] = vhigh + out[b, 2] = vlow + out[b, 3] = vclose + b += 1 + got_first = 0 + counts[b] += 1 + val = values[i, 0] + # not nan + if val == val: + if not got_first: + got_first = 1 + vopen = val + vlow = val + vhigh = val + else: + if val < vlow: + vlow = val + if val > vhigh: + vhigh = val + vclose = val -def left_join_indexer_float64(ndarray[float64_t] left, - ndarray[float64_t] right): + if not got_first: + out[b, 0] = NA + out[b, 1] = NA + out[b, 2] = NA + out[b, 3] = NA + else: + out[b, 0] = vopen + out[b, 1] = vhigh + out[b, 2] = vlow + out[b, 3] = vclose +@cython.wraparound(False) +@cython.boundscheck(False) +def group_ohlc_float32(ndarray[float32_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float32_t, ndim=2] values, + ndarray[int64_t] bins): ''' - Two-pass algorithm for monotonic indexes. Handles many-to-one merges + Only aggregates on axis=0 ''' cdef: - Py_ssize_t i, j, k, nright, nleft, count - float64_t lval, rval - ndarray[int64_t] lindexer, rindexer - ndarray[float64_t] result + Py_ssize_t i, j, N, K, ngroups, b + float32_t val, count + float32_t vopen, vhigh, vlow, vclose, NA + bint got_first = 0 - nleft = len(left) - nright = len(right) + if bins[len(bins) - 1] == len(values): + ngroups = len(bins) + else: + ngroups = len(bins) + 1 - i = 0 - j = 0 - count = 0 - if nleft > 0: - while i < nleft: - if j == nright: - count += nleft - i - break + N, K = ( values).shape - lval = left[i] - rval = right[j] + if out.shape[1] != 4: + raise ValueError('Output array must have 4 columns') - if lval == rval: - count += 1 - if i < nleft - 1: - if j < nright - 1 and right[j + 1] == rval: - j += 1 - else: - i += 1 - if left[i] != rval: - j += 1 - elif j < nright - 1: - j += 1 - if lval != right[j]: - i += 1 + NA = np.nan + + b = 0 + if K > 1: + raise NotImplementedError + else: + for i in range(N): + while b < ngroups - 1 and i >= bins[b]: + if not got_first: + out[b, 0] = NA + out[b, 1] = NA + out[b, 2] = NA + out[b, 3] = NA else: - # end of the road - break - elif lval < rval: - count += 1 - i += 1 - else: - j += 1 + out[b, 0] = vopen + out[b, 1] = vhigh + out[b, 2] = vlow + out[b, 3] = vclose + b += 1 + got_first = 0 - # do it again now that result size is known + counts[b] += 1 + val = values[i, 0] - lindexer = np.empty(count, dtype=np.int64) - rindexer = np.empty(count, dtype=np.int64) - result = np.empty(count, dtype=np.float64) + # not nan + if val == val: + if not got_first: + got_first = 1 + vopen = val + vlow = val + vhigh = val + else: + if val < vlow: + vlow = val + if val > vhigh: + vhigh = val + vclose = val - i = 0 - j = 0 - count = 0 - if nleft > 0: - while i < nleft: - if j == nright: - while i < nleft: - lindexer[count] = i - rindexer[count] = -1 - result[count] = left[i] - i += 1 - count += 1 - break + if not got_first: + out[b, 0] = NA + out[b, 1] = NA + out[b, 2] = NA + out[b, 3] = NA + else: + out[b, 0] = vopen + out[b, 1] = vhigh + out[b, 2] = vlow + out[b, 3] = vclose - lval = left[i] - rval = right[j] +@cython.wraparound(False) +@cython.boundscheck(False) +def left_join_indexer_unique_float64(ndarray[float64_t] left, + ndarray[float64_t] right): + cdef: + Py_ssize_t i, j, nleft, nright + ndarray[int64_t] indexer + float64_t lval, rval - if lval == rval: - lindexer[count] = i - rindexer[count] = j - result[count] = lval - count += 1 - if i < nleft - 1: - if j < nright - 1 and right[j + 1] == rval: - j += 1 - else: - i += 1 - if left[i] != rval: - j += 1 - elif j < nright - 1: - j += 1 - if lval != right[j]: - i += 1 - else: - # end of the road - break - elif lval < rval: - lindexer[count] = i - rindexer[count] = -1 - result[count] = left[i] - count += 1 - i += 1 - else: - j += 1 + i = 0 + j = 0 + nleft = len(left) + nright = len(right) - return result, lindexer, rindexer + indexer = np.empty(nleft, dtype=np.int64) + while True: + if i == nleft: + break + if j == nright: + indexer[i] = -1 + i += 1 + continue -def left_join_indexer_float32(ndarray[float32_t] left, - ndarray[float32_t] right): - ''' - Two-pass algorithm for monotonic indexes. Handles many-to-one merges - ''' + rval = right[j] + + while i < nleft - 1 and left[i] == rval: + indexer[i] = j + i += 1 + + if left[i] == right[j]: + indexer[i] = j + i += 1 + while i < nleft - 1 and left[i] == rval: + indexer[i] = j + i += 1 + j += 1 + elif left[i] > rval: + indexer[i] = -1 + j += 1 + else: + indexer[i] = -1 + i += 1 + return indexer + +@cython.wraparound(False) +@cython.boundscheck(False) +def left_join_indexer_unique_float32(ndarray[float32_t] left, + ndarray[float32_t] right): cdef: - Py_ssize_t i, j, k, nright, nleft, count + Py_ssize_t i, j, nleft, nright + ndarray[int64_t] indexer float32_t lval, rval - ndarray[int64_t] lindexer, rindexer - ndarray[float32_t] result + i = 0 + j = 0 nleft = len(left) nright = len(right) - i = 0 - j = 0 - count = 0 - if nleft > 0: - while i < nleft: - if j == nright: - count += nleft - i - break + indexer = np.empty(nleft, dtype=np.int64) + while True: + if i == nleft: + break - lval = left[i] - rval = right[j] + if j == nright: + indexer[i] = -1 + i += 1 + continue - if lval == rval: - count += 1 - if i < nleft - 1: - if j < nright - 1 and right[j + 1] == rval: - j += 1 - else: - i += 1 - if left[i] != rval: - j += 1 - elif j < nright - 1: - j += 1 - if lval != right[j]: - i += 1 - else: - # end of the road - break - elif lval < rval: - count += 1 - i += 1 - else: - j += 1 + rval = right[j] - # do it again now that result size is known + while i < nleft - 1 and left[i] == rval: + indexer[i] = j + i += 1 - lindexer = np.empty(count, dtype=np.int64) - rindexer = np.empty(count, dtype=np.int64) - result = np.empty(count, dtype=np.float32) + if left[i] == right[j]: + indexer[i] = j + i += 1 + while i < nleft - 1 and left[i] == rval: + indexer[i] = j + i += 1 + j += 1 + elif left[i] > rval: + indexer[i] = -1 + j += 1 + else: + indexer[i] = -1 + i += 1 + return indexer + +@cython.wraparound(False) +@cython.boundscheck(False) +def left_join_indexer_unique_object(ndarray[object] left, + ndarray[object] right): + cdef: + Py_ssize_t i, j, nleft, nright + ndarray[int64_t] indexer + object lval, rval i = 0 j = 0 - count = 0 - if nleft > 0: - while i < nleft: - if j == nright: - while i < nleft: - lindexer[count] = i - rindexer[count] = -1 - result[count] = left[i] - i += 1 - count += 1 - break + nleft = len(left) + nright = len(right) - lval = left[i] - rval = right[j] + indexer = np.empty(nleft, dtype=np.int64) + while True: + if i == nleft: + break - if lval == rval: - lindexer[count] = i - rindexer[count] = j - result[count] = lval - count += 1 - if i < nleft - 1: - if j < nright - 1 and right[j + 1] == rval: - j += 1 - else: - i += 1 - if left[i] != rval: - j += 1 - elif j < nright - 1: - j += 1 - if lval != right[j]: - i += 1 - else: - # end of the road - break - elif lval < rval: - lindexer[count] = i - rindexer[count] = -1 - result[count] = left[i] - count += 1 - i += 1 - else: - j += 1 + if j == nright: + indexer[i] = -1 + i += 1 + continue - return result, lindexer, rindexer + rval = right[j] + while i < nleft - 1 and left[i] == rval: + indexer[i] = j + i += 1 -def left_join_indexer_object(ndarray[object] left, - ndarray[object] right): - ''' - Two-pass algorithm for monotonic indexes. Handles many-to-one merges - ''' + if left[i] == right[j]: + indexer[i] = j + i += 1 + while i < nleft - 1 and left[i] == rval: + indexer[i] = j + i += 1 + j += 1 + elif left[i] > rval: + indexer[i] = -1 + j += 1 + else: + indexer[i] = -1 + i += 1 + return indexer + +@cython.wraparound(False) +@cython.boundscheck(False) +def left_join_indexer_unique_int32(ndarray[int32_t] left, + ndarray[int32_t] right): cdef: - Py_ssize_t i, j, k, nright, nleft, count - object lval, rval - ndarray[int64_t] lindexer, rindexer - ndarray[object] result + Py_ssize_t i, j, nleft, nright + ndarray[int64_t] indexer + int32_t lval, rval + i = 0 + j = 0 nleft = len(left) nright = len(right) - i = 0 - j = 0 - count = 0 - if nleft > 0: - while i < nleft: - if j == nright: - count += nleft - i - break + indexer = np.empty(nleft, dtype=np.int64) + while True: + if i == nleft: + break - lval = left[i] - rval = right[j] + if j == nright: + indexer[i] = -1 + i += 1 + continue - if lval == rval: - count += 1 - if i < nleft - 1: - if j < nright - 1 and right[j + 1] == rval: - j += 1 - else: - i += 1 - if left[i] != rval: - j += 1 - elif j < nright - 1: - j += 1 - if lval != right[j]: - i += 1 - else: - # end of the road - break - elif lval < rval: - count += 1 - i += 1 - else: - j += 1 + rval = right[j] - # do it again now that result size is known + while i < nleft - 1 and left[i] == rval: + indexer[i] = j + i += 1 + + if left[i] == right[j]: + indexer[i] = j + i += 1 + while i < nleft - 1 and left[i] == rval: + indexer[i] = j + i += 1 + j += 1 + elif left[i] > rval: + indexer[i] = -1 + j += 1 + else: + indexer[i] = -1 + i += 1 + return indexer - lindexer = np.empty(count, dtype=np.int64) - rindexer = np.empty(count, dtype=np.int64) - result = np.empty(count, dtype=object) +@cython.wraparound(False) +@cython.boundscheck(False) +def left_join_indexer_unique_int64(ndarray[int64_t] left, + ndarray[int64_t] right): + cdef: + Py_ssize_t i, j, nleft, nright + ndarray[int64_t] indexer + int64_t lval, rval i = 0 j = 0 - count = 0 - if nleft > 0: - while i < nleft: - if j == nright: - while i < nleft: - lindexer[count] = i - rindexer[count] = -1 - result[count] = left[i] - i += 1 - count += 1 - break + nleft = len(left) + nright = len(right) - lval = left[i] - rval = right[j] + indexer = np.empty(nleft, dtype=np.int64) + while True: + if i == nleft: + break - if lval == rval: - lindexer[count] = i - rindexer[count] = j - result[count] = lval - count += 1 - if i < nleft - 1: - if j < nright - 1 and right[j + 1] == rval: - j += 1 - else: - i += 1 - if left[i] != rval: - j += 1 - elif j < nright - 1: - j += 1 - if lval != right[j]: - i += 1 - else: - # end of the road - break - elif lval < rval: - lindexer[count] = i - rindexer[count] = -1 - result[count] = left[i] - count += 1 + if j == nright: + indexer[i] = -1 + i += 1 + continue + + rval = right[j] + + while i < nleft - 1 and left[i] == rval: + indexer[i] = j + i += 1 + + if left[i] == right[j]: + indexer[i] = j + i += 1 + while i < nleft - 1 and left[i] == rval: + indexer[i] = j i += 1 - else: - j += 1 + j += 1 + elif left[i] > rval: + indexer[i] = -1 + j += 1 + else: + indexer[i] = -1 + i += 1 + return indexer - return result, lindexer, rindexer -def left_join_indexer_int8(ndarray[int8_t] left, - ndarray[int8_t] right): +def left_join_indexer_float64(ndarray[float64_t] left, + ndarray[float64_t] right): ''' Two-pass algorithm for monotonic indexes. Handles many-to-one merges ''' cdef: Py_ssize_t i, j, k, nright, nleft, count - int8_t lval, rval + float64_t lval, rval ndarray[int64_t] lindexer, rindexer - ndarray[int8_t] result + ndarray[float64_t] result nleft = len(left) nright = len(right) @@ -6924,7 +7136,7 @@ def left_join_indexer_int8(ndarray[int8_t] left, lindexer = np.empty(count, dtype=np.int64) rindexer = np.empty(count, dtype=np.int64) - result = np.empty(count, dtype=np.int8) + result = np.empty(count, dtype=np.float64) i = 0 j = 0 @@ -6974,16 +7186,16 @@ def left_join_indexer_int8(ndarray[int8_t] left, return result, lindexer, rindexer -def left_join_indexer_int16(ndarray[int16_t] left, - ndarray[int16_t] right): +def left_join_indexer_float32(ndarray[float32_t] left, + ndarray[float32_t] right): ''' Two-pass algorithm for monotonic indexes. Handles many-to-one merges ''' cdef: Py_ssize_t i, j, k, nright, nleft, count - int16_t lval, rval + float32_t lval, rval ndarray[int64_t] lindexer, rindexer - ndarray[int16_t] result + ndarray[float32_t] result nleft = len(left) nright = len(right) @@ -7026,7 +7238,7 @@ def left_join_indexer_int16(ndarray[int16_t] left, lindexer = np.empty(count, dtype=np.int64) rindexer = np.empty(count, dtype=np.int64) - result = np.empty(count, dtype=np.int16) + result = np.empty(count, dtype=np.float32) i = 0 j = 0 @@ -7076,16 +7288,16 @@ def left_join_indexer_int16(ndarray[int16_t] left, return result, lindexer, rindexer -def left_join_indexer_int32(ndarray[int32_t] left, - ndarray[int32_t] right): +def left_join_indexer_object(ndarray[object] left, + ndarray[object] right): ''' Two-pass algorithm for monotonic indexes. Handles many-to-one merges ''' cdef: Py_ssize_t i, j, k, nright, nleft, count - int32_t lval, rval + object lval, rval ndarray[int64_t] lindexer, rindexer - ndarray[int32_t] result + ndarray[object] result nleft = len(left) nright = len(right) @@ -7128,7 +7340,7 @@ def left_join_indexer_int32(ndarray[int32_t] left, lindexer = np.empty(count, dtype=np.int64) rindexer = np.empty(count, dtype=np.int64) - result = np.empty(count, dtype=np.int32) + result = np.empty(count, dtype=object) i = 0 j = 0 @@ -7178,16 +7390,16 @@ def left_join_indexer_int32(ndarray[int32_t] left, return result, lindexer, rindexer -def left_join_indexer_int64(ndarray[int64_t] left, - ndarray[int64_t] right): +def left_join_indexer_int32(ndarray[int32_t] left, + ndarray[int32_t] right): ''' Two-pass algorithm for monotonic indexes. Handles many-to-one merges ''' cdef: Py_ssize_t i, j, k, nright, nleft, count - int64_t lval, rval + int32_t lval, rval ndarray[int64_t] lindexer, rindexer - ndarray[int64_t] result + ndarray[int32_t] result nleft = len(left) nright = len(right) @@ -7230,7 +7442,7 @@ def left_join_indexer_int64(ndarray[int64_t] left, lindexer = np.empty(count, dtype=np.int64) rindexer = np.empty(count, dtype=np.int64) - result = np.empty(count, dtype=np.int64) + result = np.empty(count, dtype=np.int32) i = 0 j = 0 @@ -7280,144 +7492,16 @@ def left_join_indexer_int64(ndarray[int64_t] left, return result, lindexer, rindexer -@cython.wraparound(False) -@cython.boundscheck(False) -def outer_join_indexer_float64(ndarray[float64_t] left, - ndarray[float64_t] right): - cdef: - Py_ssize_t i, j, nright, nleft, count - float64_t lval, rval - ndarray[int64_t] lindexer, rindexer - ndarray[float64_t] result - - nleft = len(left) - nright = len(right) - - i = 0 - j = 0 - count = 0 - if nleft == 0: - count = nright - elif nright == 0: - count = nleft - else: - while True: - if i == nleft: - count += nright - j - break - if j == nright: - count += nleft - i - break - - lval = left[i] - rval = right[j] - if lval == rval: - count += 1 - if i < nleft - 1: - if j < nright - 1 and right[j + 1] == rval: - j += 1 - else: - i += 1 - if left[i] != rval: - j += 1 - elif j < nright - 1: - j += 1 - if lval != right[j]: - i += 1 - else: - # end of the road - break - elif lval < rval: - count += 1 - i += 1 - else: - count += 1 - j += 1 - - lindexer = np.empty(count, dtype=np.int64) - rindexer = np.empty(count, dtype=np.int64) - result = np.empty(count, dtype=np.float64) - - # do it again, but populate the indexers / result - - i = 0 - j = 0 - count = 0 - if nleft == 0: - for j in range(nright): - lindexer[j] = -1 - rindexer[j] = j - result[j] = right[j] - elif nright == 0: - for i in range(nright): - lindexer[i] = i - rindexer[i] = -1 - result[i] = left[i] - else: - while True: - if i == nleft: - while j < nright: - lindexer[count] = -1 - rindexer[count] = j - result[count] = right[j] - count += 1 - j += 1 - break - if j == nright: - while i < nleft: - lindexer[count] = i - rindexer[count] = -1 - result[count] = left[i] - count += 1 - i += 1 - break - - lval = left[i] - rval = right[j] - - if lval == rval: - lindexer[count] = i - rindexer[count] = j - result[count] = lval - count += 1 - if i < nleft - 1: - if j < nright - 1 and right[j + 1] == rval: - j += 1 - else: - i += 1 - if left[i] != rval: - j += 1 - elif j < nright - 1: - j += 1 - if lval != right[j]: - i += 1 - else: - # end of the road - break - elif lval < rval: - lindexer[count] = i - rindexer[count] = -1 - result[count] = lval - count += 1 - i += 1 - else: - lindexer[count] = -1 - rindexer[count] = j - result[count] = rval - count += 1 - j += 1 - - return result, lindexer, rindexer - -@cython.wraparound(False) -@cython.boundscheck(False) -def outer_join_indexer_float32(ndarray[float32_t] left, - ndarray[float32_t] right): +def left_join_indexer_int64(ndarray[int64_t] left, + ndarray[int64_t] right): + ''' + Two-pass algorithm for monotonic indexes. Handles many-to-one merges + ''' cdef: - Py_ssize_t i, j, nright, nleft, count - float32_t lval, rval + Py_ssize_t i, j, k, nright, nleft, count + int64_t lval, rval ndarray[int64_t] lindexer, rindexer - ndarray[float32_t] result + ndarray[int64_t] result nleft = len(left) nright = len(right) @@ -7425,21 +7509,15 @@ def outer_join_indexer_float32(ndarray[float32_t] left, i = 0 j = 0 count = 0 - if nleft == 0: - count = nright - elif nright == 0: - count = nleft - else: - while True: - if i == nleft: - count += nright - j - break + if nleft > 0: + while i < nleft: if j == nright: count += nleft - i break lval = left[i] rval = right[j] + if lval == rval: count += 1 if i < nleft - 1: @@ -7460,45 +7538,26 @@ def outer_join_indexer_float32(ndarray[float32_t] left, count += 1 i += 1 else: - count += 1 j += 1 + # do it again now that result size is known + lindexer = np.empty(count, dtype=np.int64) rindexer = np.empty(count, dtype=np.int64) - result = np.empty(count, dtype=np.float32) - - # do it again, but populate the indexers / result + result = np.empty(count, dtype=np.int64) i = 0 j = 0 count = 0 - if nleft == 0: - for j in range(nright): - lindexer[j] = -1 - rindexer[j] = j - result[j] = right[j] - elif nright == 0: - for i in range(nright): - lindexer[i] = i - rindexer[i] = -1 - result[i] = left[i] - else: - while True: - if i == nleft: - while j < nright: - lindexer[count] = -1 - rindexer[count] = j - result[count] = right[j] - count += 1 - j += 1 - break + if nleft > 0: + while i < nleft: if j == nright: while i < nleft: lindexer[count] = i rindexer[count] = -1 result[count] = left[i] - count += 1 i += 1 + count += 1 break lval = left[i] @@ -7526,27 +7585,24 @@ def outer_join_indexer_float32(ndarray[float32_t] left, elif lval < rval: lindexer[count] = i rindexer[count] = -1 - result[count] = lval + result[count] = left[i] count += 1 i += 1 else: - lindexer[count] = -1 - rindexer[count] = j - result[count] = rval - count += 1 j += 1 return result, lindexer, rindexer + @cython.wraparound(False) @cython.boundscheck(False) -def outer_join_indexer_object(ndarray[object] left, - ndarray[object] right): +def outer_join_indexer_float64(ndarray[float64_t] left, + ndarray[float64_t] right): cdef: Py_ssize_t i, j, nright, nleft, count - object lval, rval + float64_t lval, rval ndarray[int64_t] lindexer, rindexer - ndarray[object] result + ndarray[float64_t] result nleft = len(left) nright = len(right) @@ -7594,7 +7650,7 @@ def outer_join_indexer_object(ndarray[object] left, lindexer = np.empty(count, dtype=np.int64) rindexer = np.empty(count, dtype=np.int64) - result = np.empty(count, dtype=object) + result = np.empty(count, dtype=np.float64) # do it again, but populate the indexers / result @@ -7669,13 +7725,13 @@ def outer_join_indexer_object(ndarray[object] left, @cython.wraparound(False) @cython.boundscheck(False) -def outer_join_indexer_int8(ndarray[int8_t] left, - ndarray[int8_t] right): +def outer_join_indexer_float32(ndarray[float32_t] left, + ndarray[float32_t] right): cdef: Py_ssize_t i, j, nright, nleft, count - int8_t lval, rval + float32_t lval, rval ndarray[int64_t] lindexer, rindexer - ndarray[int8_t] result + ndarray[float32_t] result nleft = len(left) nright = len(right) @@ -7723,7 +7779,7 @@ def outer_join_indexer_int8(ndarray[int8_t] left, lindexer = np.empty(count, dtype=np.int64) rindexer = np.empty(count, dtype=np.int64) - result = np.empty(count, dtype=np.int8) + result = np.empty(count, dtype=np.float32) # do it again, but populate the indexers / result @@ -7798,13 +7854,13 @@ def outer_join_indexer_int8(ndarray[int8_t] left, @cython.wraparound(False) @cython.boundscheck(False) -def outer_join_indexer_int16(ndarray[int16_t] left, - ndarray[int16_t] right): +def outer_join_indexer_object(ndarray[object] left, + ndarray[object] right): cdef: Py_ssize_t i, j, nright, nleft, count - int16_t lval, rval + object lval, rval ndarray[int64_t] lindexer, rindexer - ndarray[int16_t] result + ndarray[object] result nleft = len(left) nright = len(right) @@ -7852,7 +7908,7 @@ def outer_join_indexer_int16(ndarray[int16_t] left, lindexer = np.empty(count, dtype=np.int64) rindexer = np.empty(count, dtype=np.int64) - result = np.empty(count, dtype=np.int16) + result = np.empty(count, dtype=object) # do it again, but populate the indexers / result @@ -8463,192 +8519,6 @@ def inner_join_indexer_object(ndarray[object] left, return result, lindexer, rindexer -@cython.wraparound(False) -@cython.boundscheck(False) -def inner_join_indexer_int8(ndarray[int8_t] left, - ndarray[int8_t] right): - ''' - Two-pass algorithm for monotonic indexes. Handles many-to-one merges - ''' - cdef: - Py_ssize_t i, j, k, nright, nleft, count - int8_t lval, rval - ndarray[int64_t] lindexer, rindexer - ndarray[int8_t] result - - nleft = len(left) - nright = len(right) - - i = 0 - j = 0 - count = 0 - if nleft > 0 and nright > 0: - while True: - if i == nleft: - break - if j == nright: - break - - lval = left[i] - rval = right[j] - if lval == rval: - count += 1 - if i < nleft - 1: - if j < nright - 1 and right[j + 1] == rval: - j += 1 - else: - i += 1 - if left[i] != rval: - j += 1 - elif j < nright - 1: - j += 1 - if lval != right[j]: - i += 1 - else: - # end of the road - break - elif lval < rval: - i += 1 - else: - j += 1 - - # do it again now that result size is known - - lindexer = np.empty(count, dtype=np.int64) - rindexer = np.empty(count, dtype=np.int64) - result = np.empty(count, dtype=np.int8) - - i = 0 - j = 0 - count = 0 - if nleft > 0 and nright > 0: - while True: - if i == nleft: - break - if j == nright: - break - - lval = left[i] - rval = right[j] - if lval == rval: - lindexer[count] = i - rindexer[count] = j - result[count] = rval - count += 1 - if i < nleft - 1: - if j < nright - 1 and right[j + 1] == rval: - j += 1 - else: - i += 1 - if left[i] != rval: - j += 1 - elif j < nright - 1: - j += 1 - if lval != right[j]: - i += 1 - else: - # end of the road - break - elif lval < rval: - i += 1 - else: - j += 1 - - return result, lindexer, rindexer - -@cython.wraparound(False) -@cython.boundscheck(False) -def inner_join_indexer_int16(ndarray[int16_t] left, - ndarray[int16_t] right): - ''' - Two-pass algorithm for monotonic indexes. Handles many-to-one merges - ''' - cdef: - Py_ssize_t i, j, k, nright, nleft, count - int16_t lval, rval - ndarray[int64_t] lindexer, rindexer - ndarray[int16_t] result - - nleft = len(left) - nright = len(right) - - i = 0 - j = 0 - count = 0 - if nleft > 0 and nright > 0: - while True: - if i == nleft: - break - if j == nright: - break - - lval = left[i] - rval = right[j] - if lval == rval: - count += 1 - if i < nleft - 1: - if j < nright - 1 and right[j + 1] == rval: - j += 1 - else: - i += 1 - if left[i] != rval: - j += 1 - elif j < nright - 1: - j += 1 - if lval != right[j]: - i += 1 - else: - # end of the road - break - elif lval < rval: - i += 1 - else: - j += 1 - - # do it again now that result size is known - - lindexer = np.empty(count, dtype=np.int64) - rindexer = np.empty(count, dtype=np.int64) - result = np.empty(count, dtype=np.int16) - - i = 0 - j = 0 - count = 0 - if nleft > 0 and nright > 0: - while True: - if i == nleft: - break - if j == nright: - break - - lval = left[i] - rval = right[j] - if lval == rval: - lindexer[count] = i - rindexer[count] = j - result[count] = rval - count += 1 - if i < nleft - 1: - if j < nright - 1 and right[j + 1] == rval: - j += 1 - else: - i += 1 - if left[i] != rval: - j += 1 - elif j < nright - 1: - j += 1 - if lval != right[j]: - i += 1 - else: - # end of the road - break - elif lval < rval: - i += 1 - else: - j += 1 - - return result, lindexer, rindexer - @cython.wraparound(False) @cython.boundscheck(False) def inner_join_indexer_int32(ndarray[int32_t] left, diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 1017f9cd7c503..e9f19e0fbad11 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -286,59 +286,295 @@ class TestTake(unittest.TestCase): _multiprocess_can_split_ = True def test_1d_with_out(self): - def _test_dtype(dtype): - out = np.empty(5, dtype=dtype) - arr = np.random.randn(10).astype(dtype) - indexer = [0, 2, 4, 7, 1] + def _test_dtype(dtype, can_hold_na): + data = np.random.randint(0, 2, 4).astype(dtype) - arr.take(indexer, out=out) - expected = arr.take(indexer) + indexer = [2, 1, 0, 1] + out = np.empty(4, dtype=dtype) + com.take_1d(data, indexer, out=out) + expected = data.take(indexer) tm.assert_almost_equal(out, expected) - _test_dtype(np.float64) - _test_dtype(np.float32) - _test_dtype(np.int32) - _test_dtype(np.int64) - _test_dtype(np.object_) - _test_dtype(np.bool) - - def test_1d_upcast_with_out(self): - def _test_dtype(dtype): + indexer = [2, 1, 0, -1] out = np.empty(4, dtype=dtype) - data = np.random.randint(0, 2, 5).astype(dtype) + if can_hold_na: + com.take_1d(data, indexer, out=out) + expected = data.take(indexer) + expected[3] = np.nan + tm.assert_almost_equal(out, expected) + else: + self.assertRaises(Exception, com.take_1d, data, + indexer, out=out) + # no exception o/w + data.take(indexer, out=out) + + _test_dtype(np.float64, True) + _test_dtype(np.float32, True) + _test_dtype(np.uint64, False) + _test_dtype(np.uint32, False) + _test_dtype(np.uint16, False) + _test_dtype(np.uint8, False) + _test_dtype(np.int64, False) + _test_dtype(np.int32, False) + _test_dtype(np.int16, False) + _test_dtype(np.int8, False) + _test_dtype(np.object_, True) + _test_dtype(np.bool, False) + + def test_1d_fill_nonna(self): + def _test_dtype(dtype, fill_value, out_dtype): + data = np.random.randint(0, 2, 4).astype(dtype) indexer = [2, 1, 0, -1] - self.assertRaises(Exception, com.take_1d, data, - indexer, out=out) - _test_dtype(np.int64) - _test_dtype(np.int32) - _test_dtype(np.int16) - _test_dtype(np.int8) - _test_dtype(np.bool) + result = com.take_1d(data, indexer, fill_value=fill_value) + assert((result[[0, 1, 2]] == data[[2, 1, 0]]).all()) + assert(result[3] == fill_value) + assert(result.dtype == out_dtype) + + indexer = [2, 1, 0, 1] + + result = com.take_1d(data, indexer, fill_value=fill_value) + assert((result[[0, 1, 2, 3]] == data[indexer]).all()) + assert(result.dtype == dtype) + + _test_dtype(np.int8, np.int16(127), np.int8) + _test_dtype(np.int8, np.int16(128), np.int16) + _test_dtype(np.int32, 1, np.int32) + _test_dtype(np.int32, 2.0, np.float64) + _test_dtype(np.int32, 3.0 + 4.0j, np.complex128) + _test_dtype(np.int32, True, np.object_) + _test_dtype(np.int32, '', np.object_) + _test_dtype(np.float64, 1, np.float64) + _test_dtype(np.float64, 2.0, np.float64) + _test_dtype(np.float64, 3.0 + 4.0j, np.complex128) + _test_dtype(np.float64, True, np.object_) + _test_dtype(np.float64, '', np.object_) + _test_dtype(np.complex128, 1, np.complex128) + _test_dtype(np.complex128, 2.0, np.complex128) + _test_dtype(np.complex128, 3.0 + 4.0j, np.complex128) + _test_dtype(np.complex128, True, np.object_) + _test_dtype(np.complex128, '', np.object_) + _test_dtype(np.bool_, 1, np.object_) + _test_dtype(np.bool_, 2.0, np.object_) + _test_dtype(np.bool_, 3.0 + 4.0j, np.object_) + _test_dtype(np.bool_, True, np.bool_) + _test_dtype(np.bool_, '', np.object_) + + def test_2d_with_out(self): + def _test_dtype(dtype, can_hold_na): + data = np.random.randint(0, 2, (5, 3)).astype(dtype) - def test_2d_upcast_with_out(self): - def _test_dtype(dtype): + indexer = [2, 1, 0, 1] out0 = np.empty((4, 3), dtype=dtype) out1 = np.empty((5, 4), dtype=dtype) + com.take_nd(data, indexer, out=out0, axis=0) + com.take_nd(data, indexer, out=out1, axis=1) + expected0 = data.take(indexer, axis=0) + expected1 = data.take(indexer, axis=1) + tm.assert_almost_equal(out0, expected0) + tm.assert_almost_equal(out1, expected1) + indexer = [2, 1, 0, -1] + out0 = np.empty((4, 3), dtype=dtype) + out1 = np.empty((5, 4), dtype=dtype) + if can_hold_na: + com.take_nd(data, indexer, out=out0, axis=0) + com.take_nd(data, indexer, out=out1, axis=1) + expected0 = data.take(indexer, axis=0) + expected1 = data.take(indexer, axis=1) + expected0[3, :] = np.nan + expected1[:, 3] = np.nan + tm.assert_almost_equal(out0, expected0) + tm.assert_almost_equal(out1, expected1) + else: + self.assertRaises(Exception, com.take_nd, data, + indexer, out=out0, axis=0) + self.assertRaises(Exception, com.take_nd, data, + indexer, out=out1, axis=1) + # no exception o/w + data.take(indexer, out=out0, axis=0) + data.take(indexer, out=out1, axis=1) + + _test_dtype(np.float64, True) + _test_dtype(np.float32, True) + _test_dtype(np.uint64, False) + _test_dtype(np.uint32, False) + _test_dtype(np.uint16, False) + _test_dtype(np.uint8, False) + _test_dtype(np.int64, False) + _test_dtype(np.int32, False) + _test_dtype(np.int16, False) + _test_dtype(np.int8, False) + _test_dtype(np.object_, True) + _test_dtype(np.bool, False) + + def test_2d_fill_nonna(self): + def _test_dtype(dtype, fill_value, out_dtype): data = np.random.randint(0, 2, (5, 3)).astype(dtype) indexer = [2, 1, 0, -1] - self.assertRaises(Exception, com.take_2d, data, - indexer, out=out0, axis=0) - self.assertRaises(Exception, com.take_2d, data, - indexer, out=out1, axis=1) - # no exception o/w - data.take(indexer, out=out0, axis=0) - data.take(indexer, out=out1, axis=1) + result = com.take_nd(data, indexer, axis=0, fill_value=fill_value) + assert((result[[0, 1, 2], :] == data[[2, 1, 0], :]).all()) + assert((result[3, :] == fill_value).all()) + assert(result.dtype == out_dtype) + + result = com.take_nd(data, indexer, axis=1, fill_value=fill_value) + assert((result[:, [0, 1, 2]] == data[:, [2, 1, 0]]).all()) + assert((result[:, 3] == fill_value).all()) + assert(result.dtype == out_dtype) + + indexer = [2, 1, 0, 1] + + result = com.take_nd(data, indexer, axis=0, fill_value=fill_value) + assert((result[[0, 1, 2, 3], :] == data[indexer, :]).all()) + assert(result.dtype == dtype) + + result = com.take_nd(data, indexer, axis=1, fill_value=fill_value) + assert((result[:, [0, 1, 2, 3]] == data[:, indexer]).all()) + assert(result.dtype == dtype) + + _test_dtype(np.int8, np.int16(127), np.int8) + _test_dtype(np.int8, np.int16(128), np.int16) + _test_dtype(np.int32, 1, np.int32) + _test_dtype(np.int32, 2.0, np.float64) + _test_dtype(np.int32, 3.0 + 4.0j, np.complex128) + _test_dtype(np.int32, True, np.object_) + _test_dtype(np.int32, '', np.object_) + _test_dtype(np.float64, 1, np.float64) + _test_dtype(np.float64, 2.0, np.float64) + _test_dtype(np.float64, 3.0 + 4.0j, np.complex128) + _test_dtype(np.float64, True, np.object_) + _test_dtype(np.float64, '', np.object_) + _test_dtype(np.complex128, 1, np.complex128) + _test_dtype(np.complex128, 2.0, np.complex128) + _test_dtype(np.complex128, 3.0 + 4.0j, np.complex128) + _test_dtype(np.complex128, True, np.object_) + _test_dtype(np.complex128, '', np.object_) + _test_dtype(np.bool_, 1, np.object_) + _test_dtype(np.bool_, 2.0, np.object_) + _test_dtype(np.bool_, 3.0 + 4.0j, np.object_) + _test_dtype(np.bool_, True, np.bool_) + _test_dtype(np.bool_, '', np.object_) + + def test_3d_with_out(self): + def _test_dtype(dtype, can_hold_na): + data = np.random.randint(0, 2, (5, 4, 3)).astype(dtype) + + indexer = [2, 1, 0, 1] + out0 = np.empty((4, 4, 3), dtype=dtype) + out1 = np.empty((5, 4, 3), dtype=dtype) + out2 = np.empty((5, 4, 4), dtype=dtype) + com.take_nd(data, indexer, out=out0, axis=0) + com.take_nd(data, indexer, out=out1, axis=1) + com.take_nd(data, indexer, out=out2, axis=2) + expected0 = data.take(indexer, axis=0) + expected1 = data.take(indexer, axis=1) + expected2 = data.take(indexer, axis=2) + tm.assert_almost_equal(out0, expected0) + tm.assert_almost_equal(out1, expected1) + tm.assert_almost_equal(out2, expected2) - _test_dtype(np.int64) - _test_dtype(np.int32) - _test_dtype(np.int16) - _test_dtype(np.int8) - _test_dtype(np.bool) + indexer = [2, 1, 0, -1] + out0 = np.empty((4, 4, 3), dtype=dtype) + out1 = np.empty((5, 4, 3), dtype=dtype) + out2 = np.empty((5, 4, 4), dtype=dtype) + if can_hold_na: + com.take_nd(data, indexer, out=out0, axis=0) + com.take_nd(data, indexer, out=out1, axis=1) + com.take_nd(data, indexer, out=out2, axis=2) + expected0 = data.take(indexer, axis=0) + expected1 = data.take(indexer, axis=1) + expected2 = data.take(indexer, axis=2) + expected0[3, :, :] = np.nan + expected1[:, 3, :] = np.nan + expected2[:, :, 3] = np.nan + tm.assert_almost_equal(out0, expected0) + tm.assert_almost_equal(out1, expected1) + tm.assert_almost_equal(out2, expected2) + else: + self.assertRaises(Exception, com.take_nd, data, + indexer, out=out0, axis=0) + self.assertRaises(Exception, com.take_nd, data, + indexer, out=out1, axis=1) + self.assertRaises(Exception, com.take_nd, data, + indexer, out=out2, axis=2) + # no exception o/w + data.take(indexer, out=out0, axis=0) + data.take(indexer, out=out1, axis=1) + data.take(indexer, out=out2, axis=2) + + _test_dtype(np.float64, True) + _test_dtype(np.float32, True) + _test_dtype(np.uint64, False) + _test_dtype(np.uint32, False) + _test_dtype(np.uint16, False) + _test_dtype(np.uint8, False) + _test_dtype(np.int64, False) + _test_dtype(np.int32, False) + _test_dtype(np.int16, False) + _test_dtype(np.int8, False) + _test_dtype(np.object_, True) + _test_dtype(np.bool, False) + + def test_3d_fill_nonna(self): + def _test_dtype(dtype, fill_value, out_dtype): + data = np.random.randint(0, 2, (5, 4, 3)).astype(dtype) + + indexer = [2, 1, 0, -1] + + result = com.take_nd(data, indexer, axis=0, fill_value=fill_value) + assert((result[[0, 1, 2], :, :] == data[[2, 1, 0], :, :]).all()) + assert((result[3, :, :] == fill_value).all()) + assert(result.dtype == out_dtype) + + result = com.take_nd(data, indexer, axis=1, fill_value=fill_value) + assert((result[:, [0, 1, 2], :] == data[:, [2, 1, 0], :]).all()) + assert((result[:, 3, :] == fill_value).all()) + assert(result.dtype == out_dtype) + + result = com.take_nd(data, indexer, axis=2, fill_value=fill_value) + assert((result[:, :, [0, 1, 2]] == data[:, :, [2, 1, 0]]).all()) + assert((result[:, :, 3] == fill_value).all()) + assert(result.dtype == out_dtype) + + indexer = [2, 1, 0, 1] + + result = com.take_nd(data, indexer, axis=0, fill_value=fill_value) + assert((result[[0, 1, 2, 3], :, :] == data[indexer, :, :]).all()) + assert(result.dtype == dtype) + + result = com.take_nd(data, indexer, axis=1, fill_value=fill_value) + assert((result[:, [0, 1, 2, 3], :] == data[:, indexer, :]).all()) + assert(result.dtype == dtype) + + result = com.take_nd(data, indexer, axis=2, fill_value=fill_value) + assert((result[:, :, [0, 1, 2, 3]] == data[:, :, indexer]).all()) + assert(result.dtype == dtype) + + _test_dtype(np.int8, np.int16(127), np.int8) + _test_dtype(np.int8, np.int16(128), np.int16) + _test_dtype(np.int32, 1, np.int32) + _test_dtype(np.int32, 2.0, np.float64) + _test_dtype(np.int32, 3.0 + 4.0j, np.complex128) + _test_dtype(np.int32, True, np.object_) + _test_dtype(np.int32, '', np.object_) + _test_dtype(np.float64, 1, np.float64) + _test_dtype(np.float64, 2.0, np.float64) + _test_dtype(np.float64, 3.0 + 4.0j, np.complex128) + _test_dtype(np.float64, True, np.object_) + _test_dtype(np.float64, '', np.object_) + _test_dtype(np.complex128, 1, np.complex128) + _test_dtype(np.complex128, 2.0, np.complex128) + _test_dtype(np.complex128, 3.0 + 4.0j, np.complex128) + _test_dtype(np.complex128, True, np.object_) + _test_dtype(np.complex128, '', np.object_) + _test_dtype(np.bool_, 1, np.object_) + _test_dtype(np.bool_, 2.0, np.object_) + _test_dtype(np.bool_, 3.0 + 4.0j, np.object_) + _test_dtype(np.bool_, True, np.bool_) + _test_dtype(np.bool_, '', np.object_) def test_1d_other_dtypes(self): arr = np.random.randn(10).astype(np.float32) @@ -355,13 +591,13 @@ def test_2d_other_dtypes(self): indexer = [1, 2, 3, -1] # axis=0 - result = com.take_2d(arr, indexer, axis=0) + result = com.take_nd(arr, indexer, axis=0) expected = arr.take(indexer, axis=0) expected[-1] = np.nan tm.assert_almost_equal(result, expected) # axis=1 - result = com.take_2d(arr, indexer, axis=1) + result = com.take_nd(arr, indexer, axis=1) expected = arr.take(indexer, axis=1) expected[:, -1] = np.nan tm.assert_almost_equal(result, expected) @@ -381,15 +617,15 @@ def test_2d_bool(self): [1, 0, 1], [0, 1, 1]], dtype=bool) - result = com.take_2d(arr, [0, 2, 2, 1]) + result = com.take_nd(arr, [0, 2, 2, 1]) expected = arr.take([0, 2, 2, 1], axis=0) self.assert_(np.array_equal(result, expected)) - result = com.take_2d(arr, [0, 2, 2, 1], axis=1) + result = com.take_nd(arr, [0, 2, 2, 1], axis=1) expected = arr.take([0, 2, 2, 1], axis=1) self.assert_(np.array_equal(result, expected)) - result = com.take_2d(arr, [0, 2, -1]) + result = com.take_nd(arr, [0, 2, -1]) self.assert_(result.dtype == np.object_) def test_2d_float32(self): @@ -397,28 +633,76 @@ def test_2d_float32(self): indexer = [0, 2, -1, 1, -1] # axis=0 - result = com.take_2d(arr, indexer) + result = com.take_nd(arr, indexer, axis=0) result2 = np.empty_like(result) - com.take_2d(arr, indexer, out=result2) - tm.assert_almost_equal(result, result) + com.take_nd(arr, indexer, axis=0, out=result2) + tm.assert_almost_equal(result, result2) expected = arr.take(indexer, axis=0) - expected[[2, 4]] = np.nan + expected[[2, 4], :] = np.nan tm.assert_almost_equal(result, expected) #### this now accepts a float32! # test with float64 out buffer out = np.empty((len(indexer), arr.shape[1]), dtype='float32') - com.take_2d(arr, indexer, out=out) # it works! + com.take_nd(arr, indexer, out=out) # it works! # axis=1 - result = com.take_2d(arr, indexer, axis=1) + result = com.take_nd(arr, indexer, axis=1) result2 = np.empty_like(result) - com.take_2d(arr, indexer, axis=1, out=result2) - tm.assert_almost_equal(result, result) + com.take_nd(arr, indexer, axis=1, out=result2) + tm.assert_almost_equal(result, result2) expected = arr.take(indexer, axis=1) expected[:, [2, 4]] = np.nan tm.assert_almost_equal(result, expected) + + def test_2d_datetime64(self): + # 2005/01/01 - 2006/01/01 + arr = np.random.randint(11045376L, 11360736L, (5,3))*100000000000 + arr = arr.view(dtype='datetime64[ns]') + indexer = [0, 2, -1, 1, -1] + + # axis=0 + result = com.take_nd(arr, indexer, axis=0) + result2 = np.empty_like(result) + com.take_nd(arr, indexer, axis=0, out=result2) + tm.assert_almost_equal(result, result2) + + expected = arr.take(indexer, axis=0) + expected.view(np.int64)[[2, 4], :] = iNaT + tm.assert_almost_equal(result, expected) + + result = com.take_nd(arr, indexer, axis=0, + fill_value=datetime(2007, 1, 1)) + result2 = np.empty_like(result) + com.take_nd(arr, indexer, out=result2, axis=0, + fill_value=datetime(2007, 1, 1)) + tm.assert_almost_equal(result, result2) + + expected = arr.take(indexer, axis=0) + expected[[2, 4], :] = datetime(2007, 1, 1) + tm.assert_almost_equal(result, expected) + + # axis=1 + result = com.take_nd(arr, indexer, axis=1) + result2 = np.empty_like(result) + com.take_nd(arr, indexer, axis=1, out=result2) + tm.assert_almost_equal(result, result2) + + expected = arr.take(indexer, axis=1) + expected.view(np.int64)[:, [2, 4]] = iNaT + tm.assert_almost_equal(result, expected) + + result = com.take_nd(arr, indexer, axis=1, + fill_value=datetime(2007, 1, 1)) + result2 = np.empty_like(result) + com.take_nd(arr, indexer, out=result2, axis=1, + fill_value=datetime(2007, 1, 1)) + tm.assert_almost_equal(result, result2) + + expected = arr.take(indexer, axis=1) + expected[:, [2, 4]] = datetime(2007, 1, 1) + tm.assert_almost_equal(result, expected) if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 03fdd53ce19af..5cad4a0518ce9 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -8033,10 +8033,6 @@ def test_boolean_set_uncons(self): self.frame[self.frame > 1] = 2 assert_almost_equal(expected, self.frame.values) - def test_boolean_set_mixed_type(self): - bools = self.mixed_frame.applymap(lambda x: x != 2).astype(bool) - self.assertRaises(Exception, self.mixed_frame.__setitem__, bools, 2) - def test_xs_view(self): dm = DataFrame(np.arange(20.).reshape(4, 5), index=range(4), columns=range(5)) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 3adfb38e6144b..6d699967915ba 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -715,18 +715,9 @@ def _merge_blocks(self, merge_chunks): sofar = 0 for unit, blk in merge_chunks: out_chunk = out[sofar: sofar + len(blk)] - - if unit.indexer is None: - # is this really faster than assigning to arr.flat? - com.take_fast(blk.values, np.arange(n, dtype=np.int64), - None, False, - axis=self.axis, out=out_chunk) - else: - # write out the values to the result array - com.take_fast(blk.values, unit.indexer, - None, False, - axis=self.axis, out=out_chunk) - + com.take_fast(blk.values, unit.indexer, + None, False, axis=self.axis, + out=out_chunk) sofar += len(blk) # does not sort @@ -771,10 +762,7 @@ def reindex_block(self, block, axis, ref_items, copy=True): mask, need_masking = self.mask_info if self.indexer is None: - if copy: - result = block.copy() - else: - result = block + result = block.copy() if copy else block else: result = block.reindex_axis(self.indexer, mask, need_masking, axis=axis) diff --git a/vb_suite/frame_methods.py b/vb_suite/frame_methods.py index ce341b2de8060..2bd69d79ac024 100644 --- a/vb_suite/frame_methods.py +++ b/vb_suite/frame_methods.py @@ -54,6 +54,19 @@ frame_reindex_both_axes_ix = Benchmark('df.ix[idx, idx]', setup, start_date=datetime(2011, 1, 1)) +#---------------------------------------------------------------------- +# reindex with upcasts +setup = common_setup + """ +df=DataFrame(dict([(c, { + 0: randint(0, 2, 1000).astype(np.bool_), + 1: randint(0, 1000, 1000).astype(np.int16), + 2: randint(0, 1000, 1000).astype(np.int32), + 3: randint(0, 1000, 1000).astype(np.int64) + }[randint(0, 4)]) for c in range(1000)])) +""" + +frame_reindex_upcast = Benchmark('df.reindex(permutation(range(1200)))', setup) + #---------------------------------------------------------------------- # boolean indexing @@ -71,6 +84,7 @@ setup = common_setup + """ df = DataFrame(randn(10000, 100)) + def f(): if hasattr(df, '_item_cache'): df._item_cache.clear() diff --git a/vb_suite/pandas_vb_common.py b/vb_suite/pandas_vb_common.py index 58cd67227cf80..77d0e2e27260e 100644 --- a/vb_suite/pandas_vb_common.py +++ b/vb_suite/pandas_vb_common.py @@ -2,6 +2,8 @@ from pandas.util.testing import rands from datetime import timedelta from numpy.random import randn +from numpy.random import randint +from numpy.random import permutation import pandas.util.testing as tm import random import numpy as np