Skip to content

Commit

Permalink
fix: touching of unions (#2906)
Browse files Browse the repository at this point in the history
* fix: touching of unions

* fix: error for unsupported arguments

* refactor: fix type hint

* fix: normalise slice

* ci: simplify test workflow (#2869)

* ci: simplify test workflow

* ci: try exporting ForthError

* ci: use macos11 for now

* ci: rename General to Test

* ci: fix wheelhouse path

* ci: better parameterisation

* ci: try cross-platform

* ci: use glob action

* ci: fix ROOT workflow

* ci: fix defaults

* ci: fixes

* ci: fixes

* fix: remaining paths

* ci: disable PRE
  • Loading branch information
agoose77 authored Dec 20, 2023
1 parent 1c52490 commit a9c2e3b
Show file tree
Hide file tree
Showing 17 changed files with 159 additions and 172 deletions.
2 changes: 1 addition & 1 deletion awkward-cpp/include/awkward/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ namespace awkward {
quote(const std::string& x);

/// @brief Exhaustive list of runtime errors possible in the ForthMachine.
enum class ForthError {
enum class EXPORT_SYMBOL ForthError {
// execution can continue
none,

Expand Down
171 changes: 62 additions & 109 deletions src/awkward/_broadcasting.py
Original file line number Diff line number Diff line change
Expand Up @@ -773,121 +773,74 @@ def broadcast_any_union():
else:
nextparameters.append(NO_PARAMETERS)

if not backend.nplike.known_data:
# assert False
union_num_contents = []
length = None
for x in contents:
if x.is_union:
x._touch_data(recursive=False)
union_num_contents.append(len(x.contents))
if length is None:
length = x.length

all_combos = list(
itertools.product(*[range(x) for x in union_num_contents])
)

tags = backend.index_nplike.empty(length, dtype=np.int8)
index = backend.index_nplike.empty(length, dtype=np.int64)
numoutputs = None
outcontents = []
for combo in all_combos:
nextinputs = []
i = 0
for x in inputs:
if isinstance(x, UnionArray):
nextinputs.append(x._contents[combo[i]])
i += 1
else:
nextinputs.append(x)
assert len(nextinputs) == len(nextparameters)
outcontents.append(
apply_step(
backend,
nextinputs,
action,
depth,
copy.copy(depth_context),
lateral_context,
options,
union_tags, union_num_contents, length = [], [], unknown_length
for x in contents:
if x.is_union:
tags = x.tags.raw(backend.index_nplike)
union_tags.append(tags)
union_num_contents.append(len(x.contents))

if length is unknown_length:
length = tags.shape[0]
elif tags.shape[0] is unknown_length:
continue
elif length != tags.shape[0]:
raise ValueError(
"cannot broadcast UnionArray of length {} "
"with UnionArray of length {}{}".format(
length,
tags.shape[0],
in_function(options),
)
)
)
assert isinstance(outcontents[-1], tuple)
if numoutputs is not None:
assert numoutputs == len(outcontents[-1])
numoutputs = len(outcontents[-1])

assert numoutputs is not None
tags = backend.index_nplike.empty(length, dtype=np.int8)
index = backend.index_nplike.empty(length, dtype=np.int64)

else:
union_tags, union_num_contents, length = [], [], None
for x in contents:
if x.is_union:
tags = x.tags.raw(backend.index_nplike)
union_tags.append(tags)
union_num_contents.append(len(x.contents))
if tags.shape[0] is unknown_length:
continue

if length is None:
length = tags.shape[0]
elif length != tags.shape[0]:
raise ValueError(
"cannot broadcast UnionArray of length {} "
"with UnionArray of length {}{}".format(
length,
tags.shape[0],
in_function(options),
)
)
assert length is not unknown_length

# Stack all union tags
combos = backend.index_nplike.stack(union_tags, axis=-1)
# Build array of indices (c1, c2, c3, ..., cn) of contents in
# (union 1, union 2, union 3, ..., union n)
all_combos = backend.index_nplike.asarray(
list(itertools.product(*[range(x) for x in union_num_contents]))
)
# Stack all union tags
combos = backend.index_nplike.stack(union_tags, axis=-1)

tags = backend.index_nplike.empty(length, dtype=np.int8)
index = backend.index_nplike.empty(length, dtype=np.int64)
numoutputs, outcontents = None, []
for tag, combo in enumerate(all_combos):
mask = backend.index_nplike.all(combos == combo, axis=-1)
tags[mask] = tag
index[mask] = backend.index_nplike.arange(
backend.index_nplike.count_nonzero(mask), dtype=np.int64
)
nextinputs = []
i = 0
for x in inputs:
if isinstance(x, UnionArray):
nextinputs.append(x[mask].project(combo[i]))
i += 1
elif isinstance(x, Content):
nextinputs.append(x[mask])
else:
nextinputs.append(x)
outcontents.append(
apply_step(
backend,
nextinputs,
action,
depth,
copy.copy(depth_context),
lateral_context,
options,
)
)
assert isinstance(outcontents[-1], tuple)
if numoutputs is None:
numoutputs = len(outcontents[-1])
# Build array of indices (c1, c2, c3, ..., cn) of contents in
# (union 1, union 2, union 3, ..., union n)
all_combos = list(itertools.product(*[range(x) for x in union_num_contents]))

numoutputs = None
outcontents = []

for tag, j_contents in enumerate(all_combos):
combo = backend.index_nplike.asarray(j_contents, dtype=np.int64)
mask = backend.index_nplike.all(combos == combo, axis=-1)
tags[mask] = tag
index[mask] = backend.index_nplike.arange(
backend.index_nplike.count_nonzero(mask), dtype=np.int64
)
nextinputs = []
it_j_contents = iter(j_contents)
for x in inputs:
if isinstance(x, UnionArray):
nextinputs.append(x[mask].project(next(it_j_contents)))
elif isinstance(x, Content):
nextinputs.append(x[mask])
else:
assert numoutputs == len(outcontents[-1])
nextinputs.append(x)
outcontents.append(
apply_step(
backend,
nextinputs,
action,
depth,
copy.copy(depth_context),
lateral_context,
options,
)
)
assert isinstance(outcontents[-1], tuple)
if numoutputs is None:
numoutputs = len(outcontents[-1])
else:
assert numoutputs == len(outcontents[-1])

assert numoutputs is not None
assert numoutputs is not None

parameters = parameters_factory(nextparameters, numoutputs)

Expand Down
89 changes: 71 additions & 18 deletions src/awkward/_nplikes/typetracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def _new(

if not isinstance(shape, tuple):
raise TypeError("typetracer shape must be a tuple")
if any(is_unknown_scalar(x) for x in shape):
if not all(isinstance(x, int) or x is unknown_length for x in shape):
raise TypeError("typetracer shape must be integers or unknown-length")
if not isinstance(dtype, np.dtype):
raise TypeError("typetracer dtype must be an instance of np.dtype")
Expand Down Expand Up @@ -945,7 +945,7 @@ def shape_item_as_index(self, x1: ShapeItem) -> IndexType:
elif isinstance(x1, int):
return x1
else:
raise TypeError(f"expected None or int type, received {x1}")
raise TypeError(f"expected unknown_length or int type, received {x1}")

def index_as_shape_item(self, x1: IndexType) -> ShapeItem:
if is_unknown_scalar(x1) and np.issubdtype(x1.dtype, np.integer):
Expand Down Expand Up @@ -1294,10 +1294,24 @@ def stack(
*,
axis: int = 0,
) -> TypeTracerArray:
# Ensure all arrays have same ndim
ndim = arrays[0].ndim
assert all(x.ndim == ndim for x in arrays[1:])

if axis is None:
assert all(x.ndim == 1 for x in arrays)
elif axis < 0:
axis = ndim + axis
if not 0 <= axis < ndim:
raise ValueError(axis)

for x in arrays:
assert isinstance(x, TypeTracerArray)
try_touch_data(x)
raise NotImplementedError

emptyarrays = [numpy.empty_like((0,) * ndim, dtype=a.dtype) for a in arrays]
result = numpy.stack(emptyarrays, axis=axis)
return TypeTracerArray._new(result.dtype, result.shape)

def packbits(
self,
Expand Down Expand Up @@ -1436,10 +1450,33 @@ def all(
) -> TypeTracerArray:
assert isinstance(x, TypeTracerArray)
try_touch_data(x)

if isinstance(axis, tuple):
raise NotImplementedError
if maybe_out is not None:
raise NotImplementedError

if axis is None:
return TypeTracerArray._new(np.dtype(np.bool_), shape=())
return self.all(
cast(TypeTracerArray, self.reshape(x, (-1,))),
axis=axis,
keepdims=keepdims,
maybe_out=maybe_out,
)

if axis < 0:
axis = axis + x.ndim

assert 0 <= axis < x.ndim

if keepdims:
next_shape = list(x.shape)
next_shape[axis] = 1
return TypeTracerArray._new(np.dtype(np.bool_), shape=tuple(next_shape))
else:
raise NotImplementedError
next_shape = list(x.shape)
del next_shape[axis]
return TypeTracerArray._new(np.dtype(np.bool_), shape=tuple(next_shape))

def any(
self,
Expand All @@ -1449,12 +1486,7 @@ def any(
keepdims: bool = False,
maybe_out: TypeTracerArray | None = None,
) -> TypeTracerArray:
assert isinstance(x, TypeTracerArray)
try_touch_data(x)
if axis is None:
return TypeTracerArray._new(np.dtype(np.bool_), shape=())
else:
raise NotImplementedError
return self.all(x, axis=axis, keepdims=keepdims, maybe_out=maybe_out)

def count_nonzero(
self, x: TypeTracerArray, *, axis: int | tuple[int, ...] | None = None
Expand All @@ -1476,7 +1508,33 @@ def min(
) -> TypeTracerArray:
assert isinstance(x, TypeTracerArray)
try_touch_data(x)
raise NotImplementedError

if isinstance(axis, tuple):
raise NotImplementedError
if maybe_out is not None:
raise NotImplementedError

if axis is None:
return self.min(
cast(TypeTracerArray, self.reshape(x, (-1,))),
axis=axis,
keepdims=keepdims,
maybe_out=maybe_out,
)

if axis < 0:
axis = axis + x.ndim

assert 0 <= axis < x.ndim

if keepdims:
next_shape = list(x.shape)
next_shape[axis] = 1
return TypeTracerArray._new(x.dtype, shape=tuple(next_shape))
else:
next_shape = list(x.shape)
del next_shape[axis]
return TypeTracerArray._new(x.dtype, shape=tuple(next_shape))

def max(
self,
Expand All @@ -1486,12 +1544,7 @@ def max(
keepdims: bool = False,
maybe_out: TypeTracerArray | None = None,
) -> TypeTracerArray:
assert isinstance(x, TypeTracerArray)
try_touch_data(x)
if axis is None:
return TypeTracerArray._new(x.dtype, shape=())
else:
raise NotImplementedError
return self.min(x, axis=axis, keepdims=keepdims, maybe_out=maybe_out)

def array_str(
self,
Expand Down
Loading

0 comments on commit a9c2e3b

Please sign in to comment.