diff --git a/awkward-cpp/include/awkward/util.h b/awkward-cpp/include/awkward/util.h index b6ff5c7561..ed1e38ff90 100644 --- a/awkward-cpp/include/awkward/util.h +++ b/awkward-cpp/include/awkward/util.h @@ -64,7 +64,7 @@ namespace awkward { quote(const std::string& x); /// @brief Exhaustive list of runtime errors possible in the ForthMachine. - enum class ForthError { + enum class EXPORT_SYMBOL ForthError { // execution can continue none, diff --git a/src/awkward/_broadcasting.py b/src/awkward/_broadcasting.py index 1bbaf9693f..d70da4c64f 100644 --- a/src/awkward/_broadcasting.py +++ b/src/awkward/_broadcasting.py @@ -773,121 +773,74 @@ def broadcast_any_union(): else: nextparameters.append(NO_PARAMETERS) - if not backend.nplike.known_data: - # assert False - union_num_contents = [] - length = None - for x in contents: - if x.is_union: - x._touch_data(recursive=False) - union_num_contents.append(len(x.contents)) - if length is None: - length = x.length - - all_combos = list( - itertools.product(*[range(x) for x in union_num_contents]) - ) - - tags = backend.index_nplike.empty(length, dtype=np.int8) - index = backend.index_nplike.empty(length, dtype=np.int64) - numoutputs = None - outcontents = [] - for combo in all_combos: - nextinputs = [] - i = 0 - for x in inputs: - if isinstance(x, UnionArray): - nextinputs.append(x._contents[combo[i]]) - i += 1 - else: - nextinputs.append(x) - assert len(nextinputs) == len(nextparameters) - outcontents.append( - apply_step( - backend, - nextinputs, - action, - depth, - copy.copy(depth_context), - lateral_context, - options, + union_tags, union_num_contents, length = [], [], unknown_length + for x in contents: + if x.is_union: + tags = x.tags.raw(backend.index_nplike) + union_tags.append(tags) + union_num_contents.append(len(x.contents)) + + if length is unknown_length: + length = tags.shape[0] + elif tags.shape[0] is unknown_length: + continue + elif length != tags.shape[0]: + raise ValueError( + "cannot broadcast UnionArray of length {} " + "with UnionArray of length {}{}".format( + length, + tags.shape[0], + in_function(options), + ) ) - ) - assert isinstance(outcontents[-1], tuple) - if numoutputs is not None: - assert numoutputs == len(outcontents[-1]) - numoutputs = len(outcontents[-1]) - assert numoutputs is not None + tags = backend.index_nplike.empty(length, dtype=np.int8) + index = backend.index_nplike.empty(length, dtype=np.int64) - else: - union_tags, union_num_contents, length = [], [], None - for x in contents: - if x.is_union: - tags = x.tags.raw(backend.index_nplike) - union_tags.append(tags) - union_num_contents.append(len(x.contents)) - if tags.shape[0] is unknown_length: - continue - - if length is None: - length = tags.shape[0] - elif length != tags.shape[0]: - raise ValueError( - "cannot broadcast UnionArray of length {} " - "with UnionArray of length {}{}".format( - length, - tags.shape[0], - in_function(options), - ) - ) - assert length is not unknown_length - - # Stack all union tags - combos = backend.index_nplike.stack(union_tags, axis=-1) - # Build array of indices (c1, c2, c3, ..., cn) of contents in - # (union 1, union 2, union 3, ..., union n) - all_combos = backend.index_nplike.asarray( - list(itertools.product(*[range(x) for x in union_num_contents])) - ) + # Stack all union tags + combos = backend.index_nplike.stack(union_tags, axis=-1) - tags = backend.index_nplike.empty(length, dtype=np.int8) - index = backend.index_nplike.empty(length, dtype=np.int64) - numoutputs, outcontents = None, [] - for tag, combo in enumerate(all_combos): - mask = backend.index_nplike.all(combos == combo, axis=-1) - tags[mask] = tag - index[mask] = backend.index_nplike.arange( - backend.index_nplike.count_nonzero(mask), dtype=np.int64 - ) - nextinputs = [] - i = 0 - for x in inputs: - if isinstance(x, UnionArray): - nextinputs.append(x[mask].project(combo[i])) - i += 1 - elif isinstance(x, Content): - nextinputs.append(x[mask]) - else: - nextinputs.append(x) - outcontents.append( - apply_step( - backend, - nextinputs, - action, - depth, - copy.copy(depth_context), - lateral_context, - options, - ) - ) - assert isinstance(outcontents[-1], tuple) - if numoutputs is None: - numoutputs = len(outcontents[-1]) + # Build array of indices (c1, c2, c3, ..., cn) of contents in + # (union 1, union 2, union 3, ..., union n) + all_combos = list(itertools.product(*[range(x) for x in union_num_contents])) + + numoutputs = None + outcontents = [] + + for tag, j_contents in enumerate(all_combos): + combo = backend.index_nplike.asarray(j_contents, dtype=np.int64) + mask = backend.index_nplike.all(combos == combo, axis=-1) + tags[mask] = tag + index[mask] = backend.index_nplike.arange( + backend.index_nplike.count_nonzero(mask), dtype=np.int64 + ) + nextinputs = [] + it_j_contents = iter(j_contents) + for x in inputs: + if isinstance(x, UnionArray): + nextinputs.append(x[mask].project(next(it_j_contents))) + elif isinstance(x, Content): + nextinputs.append(x[mask]) else: - assert numoutputs == len(outcontents[-1]) + nextinputs.append(x) + outcontents.append( + apply_step( + backend, + nextinputs, + action, + depth, + copy.copy(depth_context), + lateral_context, + options, + ) + ) + assert isinstance(outcontents[-1], tuple) + if numoutputs is None: + numoutputs = len(outcontents[-1]) + else: + assert numoutputs == len(outcontents[-1]) - assert numoutputs is not None + assert numoutputs is not None parameters = parameters_factory(nextparameters, numoutputs) diff --git a/src/awkward/_nplikes/typetracer.py b/src/awkward/_nplikes/typetracer.py index 90f13bd10d..b78bc3018a 100644 --- a/src/awkward/_nplikes/typetracer.py +++ b/src/awkward/_nplikes/typetracer.py @@ -187,7 +187,7 @@ def _new( if not isinstance(shape, tuple): raise TypeError("typetracer shape must be a tuple") - if any(is_unknown_scalar(x) for x in shape): + if not all(isinstance(x, int) or x is unknown_length for x in shape): raise TypeError("typetracer shape must be integers or unknown-length") if not isinstance(dtype, np.dtype): raise TypeError("typetracer dtype must be an instance of np.dtype") @@ -945,7 +945,7 @@ def shape_item_as_index(self, x1: ShapeItem) -> IndexType: elif isinstance(x1, int): return x1 else: - raise TypeError(f"expected None or int type, received {x1}") + raise TypeError(f"expected unknown_length or int type, received {x1}") def index_as_shape_item(self, x1: IndexType) -> ShapeItem: if is_unknown_scalar(x1) and np.issubdtype(x1.dtype, np.integer): @@ -1294,10 +1294,24 @@ def stack( *, axis: int = 0, ) -> TypeTracerArray: + # Ensure all arrays have same ndim + ndim = arrays[0].ndim + assert all(x.ndim == ndim for x in arrays[1:]) + + if axis is None: + assert all(x.ndim == 1 for x in arrays) + elif axis < 0: + axis = ndim + axis + if not 0 <= axis < ndim: + raise ValueError(axis) + for x in arrays: assert isinstance(x, TypeTracerArray) try_touch_data(x) - raise NotImplementedError + + emptyarrays = [numpy.empty_like((0,) * ndim, dtype=a.dtype) for a in arrays] + result = numpy.stack(emptyarrays, axis=axis) + return TypeTracerArray._new(result.dtype, result.shape) def packbits( self, @@ -1436,10 +1450,33 @@ def all( ) -> TypeTracerArray: assert isinstance(x, TypeTracerArray) try_touch_data(x) + + if isinstance(axis, tuple): + raise NotImplementedError + if maybe_out is not None: + raise NotImplementedError + if axis is None: - return TypeTracerArray._new(np.dtype(np.bool_), shape=()) + return self.all( + cast(TypeTracerArray, self.reshape(x, (-1,))), + axis=axis, + keepdims=keepdims, + maybe_out=maybe_out, + ) + + if axis < 0: + axis = axis + x.ndim + + assert 0 <= axis < x.ndim + + if keepdims: + next_shape = list(x.shape) + next_shape[axis] = 1 + return TypeTracerArray._new(np.dtype(np.bool_), shape=tuple(next_shape)) else: - raise NotImplementedError + next_shape = list(x.shape) + del next_shape[axis] + return TypeTracerArray._new(np.dtype(np.bool_), shape=tuple(next_shape)) def any( self, @@ -1449,12 +1486,7 @@ def any( keepdims: bool = False, maybe_out: TypeTracerArray | None = None, ) -> TypeTracerArray: - assert isinstance(x, TypeTracerArray) - try_touch_data(x) - if axis is None: - return TypeTracerArray._new(np.dtype(np.bool_), shape=()) - else: - raise NotImplementedError + return self.all(x, axis=axis, keepdims=keepdims, maybe_out=maybe_out) def count_nonzero( self, x: TypeTracerArray, *, axis: int | tuple[int, ...] | None = None @@ -1476,7 +1508,33 @@ def min( ) -> TypeTracerArray: assert isinstance(x, TypeTracerArray) try_touch_data(x) - raise NotImplementedError + + if isinstance(axis, tuple): + raise NotImplementedError + if maybe_out is not None: + raise NotImplementedError + + if axis is None: + return self.min( + cast(TypeTracerArray, self.reshape(x, (-1,))), + axis=axis, + keepdims=keepdims, + maybe_out=maybe_out, + ) + + if axis < 0: + axis = axis + x.ndim + + assert 0 <= axis < x.ndim + + if keepdims: + next_shape = list(x.shape) + next_shape[axis] = 1 + return TypeTracerArray._new(x.dtype, shape=tuple(next_shape)) + else: + next_shape = list(x.shape) + del next_shape[axis] + return TypeTracerArray._new(x.dtype, shape=tuple(next_shape)) def max( self, @@ -1486,12 +1544,7 @@ def max( keepdims: bool = False, maybe_out: TypeTracerArray | None = None, ) -> TypeTracerArray: - assert isinstance(x, TypeTracerArray) - try_touch_data(x) - if axis is None: - return TypeTracerArray._new(x.dtype, shape=()) - else: - raise NotImplementedError + return self.min(x, axis=axis, keepdims=keepdims, maybe_out=maybe_out) def array_str( self, diff --git a/src/awkward/_slicing.py b/src/awkward/_slicing.py index bb266c3b53..e09407e66d 100644 --- a/src/awkward/_slicing.py +++ b/src/awkward/_slicing.py @@ -2,8 +2,6 @@ from __future__ import annotations -import operator - import awkward as ak from awkward._backends.backend import Backend from awkward._nplikes import to_nplike @@ -15,7 +13,7 @@ from awkward._typing import TYPE_CHECKING, Sequence, TypeAlias, TypeVar if TYPE_CHECKING: - from awkward._nplikes.numpy_like import ArrayLike + from awkward._nplikes.numpy_like import ArrayLike, NumpyLike from awkward.contents.content import Content np = NumpyMetadata.instance() @@ -24,48 +22,27 @@ SliceItem: TypeAlias = "int | slice | str | None | Ellipsis | ArrayLike | Content" -def normalize_slice_item(item, *, backend: Backend): - if backend.index_nplike.is_own_array(item): - if item.ndim != 0: - raise ValueError( - f"slice items must be 0D arrays or Python integers, not {item!r}" - ) - else: - return item - else: - return operator.index(item) - - -def normalize_slice(slice_: slice, *, backend: Backend) -> slice: +def normalize_slice(slice_: slice, *, nplike: NumpyLike) -> slice: """ Args: slice_: slice object - backend: backend of layout + nplike: NumpyLike of array Return a slice of (start, stop, step) for which the slice items have been normalized into index types. """ - index_nplike = backend.index_nplike start = slice_.start stop = slice_.stop step = slice_.step - if index_nplike.known_data: + if nplike.known_data: return slice_ # Unknown lengths mean that the slice index is unknown else: - start = ( - index_nplike.shape_item_as_index(start) - if start is unknown_length - else start - ) - stop = ( - index_nplike.shape_item_as_index(stop) if stop is unknown_length else stop - ) - step = ( - index_nplike.shape_item_as_index(step) if step is unknown_length else step - ) + start = nplike.shape_item_as_index(start) if start is unknown_length else start + stop = nplike.shape_item_as_index(stop) if stop is unknown_length else stop + step = nplike.shape_item_as_index(step) if step is unknown_length else step return slice(start, stop, step) @@ -226,7 +203,7 @@ def normalise_item(item, backend: Backend) -> SliceItem: return normalize_integer_like(item) elif isinstance(item, slice): - return normalize_slice(item, backend=backend) + return normalize_slice(item, nplike=backend.index_nplike) elif isinstance(item, str): return item diff --git a/src/awkward/contents/bytemaskedarray.py b/src/awkward/contents/bytemaskedarray.py index 84742101d3..6881292a2b 100644 --- a/src/awkward/contents/bytemaskedarray.py +++ b/src/awkward/contents/bytemaskedarray.py @@ -390,7 +390,7 @@ def _getitem_at(self, where: IndexType): else: return None - def _getitem_range(self, start: SupportsIndex, stop: IndexType) -> Content: + def _getitem_range(self, start: IndexType, stop: IndexType) -> Content: if not self._backend.nplike.known_data: self._touch_shape(recursive=False) return self diff --git a/src/awkward/contents/content.py b/src/awkward/contents/content.py index 7315a32ff8..3cefb566e0 100644 --- a/src/awkward/contents/content.py +++ b/src/awkward/contents/content.py @@ -525,7 +525,7 @@ def _getitem(self, where): elif isinstance(where, slice) and where.step is None: # Ensure that start, stop are non-negative! start, stop, _, _ = self._backend.index_nplike.derive_slice_for_length( - normalize_slice(where, backend=self._backend), self.length + normalize_slice(where, nplike=self._backend.index_nplike), self.length ) return self._getitem_range(start, stop) @@ -700,7 +700,7 @@ def _getitem(self, where): def _getitem_at(self, where: IndexType): raise NotImplementedError - def _getitem_range(self, start: SupportsIndex, stop: IndexType) -> Content: + def _getitem_range(self, start: IndexType, stop: IndexType) -> Content: raise NotImplementedError def _getitem_field( diff --git a/src/awkward/contents/emptyarray.py b/src/awkward/contents/emptyarray.py index 831d8c3810..264855317a 100644 --- a/src/awkward/contents/emptyarray.py +++ b/src/awkward/contents/emptyarray.py @@ -182,7 +182,7 @@ def _getitem_nothing(self): def _getitem_at(self, where: IndexType): raise ak._errors.index_error(self, where, "array is empty") - def _getitem_range(self, start: SupportsIndex, stop: IndexType) -> Content: + def _getitem_range(self, start: IndexType, stop: IndexType) -> Content: return self def _getitem_field( diff --git a/src/awkward/contents/indexedarray.py b/src/awkward/contents/indexedarray.py index 7f30634e52..b0d5701837 100644 --- a/src/awkward/contents/indexedarray.py +++ b/src/awkward/contents/indexedarray.py @@ -292,7 +292,7 @@ def _getitem_at(self, where: IndexType): raise ak._errors.index_error(self, where) return self._content._getitem_at(self._index[where]) - def _getitem_range(self, start: SupportsIndex, stop: IndexType) -> Content: + def _getitem_range(self, start: IndexType, stop: IndexType) -> Content: if not self._backend.nplike.known_data: self._touch_shape(recursive=False) return self diff --git a/src/awkward/contents/indexedoptionarray.py b/src/awkward/contents/indexedoptionarray.py index 6373e61161..6772a76c1e 100644 --- a/src/awkward/contents/indexedoptionarray.py +++ b/src/awkward/contents/indexedoptionarray.py @@ -316,7 +316,7 @@ def _getitem_at(self, where: IndexType): else: return self._content._getitem_at(self._index[where]) - def _getitem_range(self, start: SupportsIndex, stop: IndexType) -> Content: + def _getitem_range(self, start: IndexType, stop: IndexType) -> Content: if not self._backend.nplike.known_data: self._touch_shape(recursive=False) return self diff --git a/src/awkward/contents/listarray.py b/src/awkward/contents/listarray.py index da0ef351bf..bd0bf63828 100644 --- a/src/awkward/contents/listarray.py +++ b/src/awkward/contents/listarray.py @@ -320,7 +320,7 @@ def _getitem_at(self, where: IndexType): start, stop = self._starts[where], self._stops[where] return self._content._getitem_range(start, stop) - def _getitem_range(self, start: SupportsIndex, stop: IndexType) -> Content: + def _getitem_range(self, start: IndexType, stop: IndexType) -> Content: if not self._backend.nplike.known_data: self._touch_shape(recursive=False) return self diff --git a/src/awkward/contents/listoffsetarray.py b/src/awkward/contents/listoffsetarray.py index e35b32320e..fb1991c4d8 100644 --- a/src/awkward/contents/listoffsetarray.py +++ b/src/awkward/contents/listoffsetarray.py @@ -320,7 +320,7 @@ def _getitem_at(self, where: IndexType): start, stop = self._offsets[where], self._offsets[where + 1] return self._content._getitem_range(start, stop) - def _getitem_range(self, start: SupportsIndex, stop: IndexType) -> Content: + def _getitem_range(self, start: IndexType, stop: IndexType) -> Content: if not self._backend.nplike.known_data: self._touch_shape(recursive=False) return self diff --git a/src/awkward/contents/numpyarray.py b/src/awkward/contents/numpyarray.py index 0adba74ba4..d136366446 100644 --- a/src/awkward/contents/numpyarray.py +++ b/src/awkward/contents/numpyarray.py @@ -321,7 +321,7 @@ def _getitem_at(self, where: IndexType): else: return out - def _getitem_range(self, start: SupportsIndex, stop: IndexType) -> Content: + def _getitem_range(self, start: IndexType, stop: IndexType) -> Content: try: out = self._data[start:stop] except IndexError as err: diff --git a/src/awkward/contents/recordarray.py b/src/awkward/contents/recordarray.py index 30697f6f8f..96825938f6 100644 --- a/src/awkward/contents/recordarray.py +++ b/src/awkward/contents/recordarray.py @@ -445,7 +445,7 @@ def _getitem_at(self, where: IndexType): raise ak._errors.index_error(self, where) return Record(self, where) - def _getitem_range(self, start: SupportsIndex, stop: IndexType) -> Content: + def _getitem_range(self, start: IndexType, stop: IndexType) -> Content: if not self._backend.nplike.known_data: self._touch_shape(recursive=False) diff --git a/src/awkward/contents/regulararray.py b/src/awkward/contents/regulararray.py index c5748d7f37..4fefc1899a 100644 --- a/src/awkward/contents/regulararray.py +++ b/src/awkward/contents/regulararray.py @@ -300,7 +300,7 @@ def _getitem_at(self, where: IndexType): start, stop = where * size_scalar, (where + 1) * size_scalar return self._content._getitem_range(start, stop) - def _getitem_range(self, start: SupportsIndex, stop: IndexType) -> Content: + def _getitem_range(self, start: IndexType, stop: IndexType) -> Content: index_nplike = self._backend.index_nplike if not index_nplike.known_data: self._touch_shape(recursive=False) diff --git a/src/awkward/contents/unionarray.py b/src/awkward/contents/unionarray.py index 431312c21e..fed19ebe42 100644 --- a/src/awkward/contents/unionarray.py +++ b/src/awkward/contents/unionarray.py @@ -552,7 +552,7 @@ def _getitem_at(self, where: IndexType): tag, index = self._tags[where], self._index[where] return self._contents[tag]._getitem_at(index) - def _getitem_range(self, start: SupportsIndex, stop: IndexType) -> Content: + def _getitem_range(self, start: IndexType, stop: IndexType) -> Content: if not self._backend.nplike.known_data: self._touch_shape(recursive=False) return self diff --git a/src/awkward/contents/unmaskedarray.py b/src/awkward/contents/unmaskedarray.py index 8afdc6af5d..018146e198 100644 --- a/src/awkward/contents/unmaskedarray.py +++ b/src/awkward/contents/unmaskedarray.py @@ -238,7 +238,7 @@ def _getitem_at(self, where: IndexType): return self._content._getitem_at(where) - def _getitem_range(self, start: SupportsIndex, stop: IndexType) -> Content: + def _getitem_range(self, start: IndexType, stop: IndexType) -> Content: if not self._backend.nplike.known_data: self._touch_shape(recursive=False) return self diff --git a/src/awkward/index.py b/src/awkward/index.py index 984c1cfd24..814a2106ff 100644 --- a/src/awkward/index.py +++ b/src/awkward/index.py @@ -13,6 +13,7 @@ from awkward._nplikes.numpy_like import NumpyLike, NumpyMetadata from awkward._nplikes.shape import ShapeItem from awkward._nplikes.typetracer import TypeTracer +from awkward._slicing import normalize_slice from awkward._typing import Any, DType, Final, Self, cast np: Final = NumpyMetadata.instance() @@ -214,6 +215,9 @@ def form(self) -> str: return _dtype_to_form[self._data.dtype] def __getitem__(self, where): + if isinstance(where, slice): + where = normalize_slice(where, nplike=self.nplike) + out = self._data[where] if hasattr(out, "shape") and len(out.shape) != 0: