Skip to content

Commit

Permalink
fix: don't use np.asarray on Index or Content objects (#2740)
Browse files Browse the repository at this point in the history
* test: update tests

* test: cleanup more test lines!

* fix: asarray usage

* fix: re-evaluate!
  • Loading branch information
agoose77 authored Oct 5, 2023
1 parent c377185 commit 6dee1ea
Show file tree
Hide file tree
Showing 18 changed files with 180 additions and 172 deletions.
12 changes: 10 additions & 2 deletions src/awkward/_connect/numexpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,14 @@ def action(inputs, **ignore):
or not isinstance(x, ak.contents.Content)
for x in inputs
):
input_primitives = [
x.data if isinstance(x, ak.contents.NumpyArray) else x for x in inputs
]
return (
ak.contents.NumpyArray(
numexpr.evaluate(
expression,
dict(zip(names, inputs)),
dict(zip(names, input_primitives)),
{},
order=order,
casting=casting,
Expand Down Expand Up @@ -138,8 +141,13 @@ def action(inputs, **ignore):
or not isinstance(x, ak.contents.Content)
for x in inputs
):
input_primitives = [
x.data if isinstance(x, ak.contents.NumpyArray) else x for x in inputs
]
return (
ak.contents.NumpyArray(numexpr.re_evaluate(dict(zip(names, inputs)))),
ak.contents.NumpyArray(
numexpr.re_evaluate(dict(zip(names, input_primitives)))
),
)
else:
return None
Expand Down
17 changes: 11 additions & 6 deletions src/awkward/_connect/numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,8 @@ def _array_ufunc_categorical(
assert method == "__call__"
one, two = inputs

one_index = numpy.asarray(one.index)
two_index = numpy.asarray(two.index)
one_index = numpy.asarray(one.index.data)
two_index = numpy.asarray(two.index.data)
one_content = wrap_layout(one.content, behavior)
two_content = wrap_layout(two.content, behavior)

Expand Down Expand Up @@ -280,12 +280,17 @@ def _array_ufunc_string_likes(
right = ak.without_parameters(right, highlevel=False)

# first condition: string lengths must be the same
counts1 = nplike.asarray(
ak._do.reduce(left, ak._reducers.Count(), axis=-1, mask=False)
left_counts_layout = ak._do.reduce(
left, ak._reducers.Count(), axis=-1, mask=False
)
counts2 = nplike.asarray(
ak._do.reduce(right, ak._reducers.Count(), axis=-1, mask=False)
assert left_counts_layout.is_numpy
right_counts_layout = ak._do.reduce(
right, ak._reducers.Count(), axis=-1, mask=False
)
assert right_counts_layout.is_numpy

counts1 = nplike.asarray(left_counts_layout.data)
counts2 = nplike.asarray(right_counts_layout.data)

out = counts1 == counts2

Expand Down
3 changes: 0 additions & 3 deletions src/awkward/contents/emptyarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,6 @@ def to_NumpyArray(self, dtype, backend=None):
backend=backend,
)

def __array__(self, **kwargs):
return numpy.empty(0, dtype=np.float64)

def __iter__(self):
return iter([])

Expand Down
2 changes: 1 addition & 1 deletion src/awkward/contents/indexedoptionarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1518,7 +1518,7 @@ def _pad_none(self, target, axis, depth, clip):
)

def _to_arrow(self, pyarrow, mask_node, validbytes, length, options):
index = numpy.asarray(self._index, copy=True)
index = numpy.asarray(self._index.data, copy=True)
this_validbytes = self.mask_as_bool(valid_when=True)
index[~this_validbytes] = 0

Expand Down
3 changes: 0 additions & 3 deletions src/awkward/contents/numpyarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,9 +273,6 @@ def to_RegularArray(self):
def maybe_to_NumpyArray(self) -> Self:
return self

def __array__(self, dtype=None):
return self._backend.nplike.asarray(self._data, dtype=dtype)

def __iter__(self):
return iter(self._data)

Expand Down
3 changes: 1 addition & 2 deletions src/awkward/operations/ak_concatenate.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,8 +264,7 @@ def action(inputs, depth, **kwargs):

for x in nextinputs:
o, f = x._offsets_and_flattened(1, 1)
o = backend.index_nplike.asarray(o)
c = o[1:] - o[:-1]
c = o.data[1:] - o.data[:-1]
backend.index_nplike.add(counts, c, maybe_out=counts)
all_counts.append(c)
all_flatten.append(f)
Expand Down
4 changes: 2 additions & 2 deletions src/awkward/operations/ak_from_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,8 +430,8 @@ def action(node, **kwargs):
):
with numpy._module.errstate(invalid="ignore"):
return ak.contents.NumpyArray(
node.backend.nplike.asarray(real)
+ node.backend.nplike.asarray(imag) * 1j
node.backend.nplike.asarray(real.data)
+ node.backend.nplike.asarray(imag.data) * 1j
)
else:
raise ValueError(
Expand Down
4 changes: 2 additions & 2 deletions src/awkward/operations/ak_mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ def mask(array, mask, *, valid_when=True, highlevel=True, behavior=None):
def _impl(array, mask, valid_when, highlevel, behavior):
def action(inputs, backend, **kwargs):
layoutarray, layoutmask = inputs
if isinstance(layoutmask, ak.contents.NumpyArray):
m = backend.nplike.asarray(layoutmask)
if layoutmask.is_numpy:
m = backend.nplike.asarray(layoutmask.data)
if not issubclass(m.dtype.type, (bool, np.bool_)):
raise ValueError(f"mask must have boolean type, not {m.dtype!r}")
bytemask = ak.index.Index8(m.view(np.int8))
Expand Down
14 changes: 8 additions & 6 deletions src/awkward/operations/ak_run_lengths.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,10 +156,12 @@ def action(layout, **kwargs):
nextcontent, _ = lengths_of(ak.highlevel.Array(layout), None)
return ak.contents.NumpyArray(nextcontent)

if not isinstance(layout, (ak.contents.NumpyArray, ak.contents.EmptyArray)):
if layout.is_unknown:
layout = layout.to_NumpyArray(np.float64)
elif not layout.is_numpy:
raise NotImplementedError("run_lengths on " + type(layout).__name__)

nextcontent, _ = lengths_of(backend.nplike.asarray(layout), None)
nextcontent, _ = lengths_of(backend.nplike.asarray(layout.data), None)
return ak.contents.NumpyArray(nextcontent)

elif layout.branch_depth == (False, 2):
Expand Down Expand Up @@ -197,9 +199,9 @@ def action(layout, **kwargs):
if content.is_indexed:
content = content.project()

if not isinstance(
content, (ak.contents.NumpyArray, ak.contents.EmptyArray)
):
if content.is_unknown:
content = content.to_NumpyArray(np.float64)
elif not content.is_numpy:
raise NotImplementedError(
"run_lengths on "
+ type(layout).__name__
Expand All @@ -208,7 +210,7 @@ def action(layout, **kwargs):
)

nextcontent, nextoffsets = lengths_of(
backend.nplike.asarray(content), offsets - offsets[0]
backend.nplike.asarray(content.data), offsets - offsets[0]
)
return ak.contents.ListOffsetArray(
ak.index.Index64(nextoffsets),
Expand Down
2 changes: 1 addition & 1 deletion src/awkward/operations/ak_where.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def _impl3(condition, x, y, mergebool, highlevel, behavior):
def action(inputs, **kwargs):
condition, x, y = inputs
if isinstance(condition, ak.contents.NumpyArray):
npcondition = backend.index_nplike.asarray(condition)
npcondition = backend.index_nplike.asarray(condition.data)
tags = ak.index.Index8((npcondition == 0).view(np.int8))
index = ak.index.Index64(
backend.index_nplike.arange(tags.length, dtype=np.int64),
Expand Down
34 changes: 17 additions & 17 deletions tests/test_0002_minimal_listarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def test():
data = np.array([0, 2, 2, 3], dtype="i8")
offsets = ak.index.Index64(data)

assert np.asarray(offsets).tolist() == [0, 2, 2, 3]
assert np.asarray(offsets.data).tolist() == [0, 2, 2, 3]
assert offsets[0] == 0
assert offsets[1] == 2
assert offsets[2] == 2
Expand All @@ -21,7 +21,7 @@ def test():
data = np.array([0, 2, 2, 3], dtype="i4")
offsets = ak.index.Index32(data)

assert np.asarray(offsets).tolist() == [0, 2, 2, 3]
assert np.asarray(offsets.data).tolist() == [0, 2, 2, 3]
assert offsets[0] == 0
assert offsets[1] == 2
assert offsets[2] == 2
Expand All @@ -34,12 +34,12 @@ def test():
offsets = ak.index.Index32(data)
array = ak.contents.ListOffsetArray(offsets, content)

assert np.asarray(content).tolist() == [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]
assert np.asarray(content[0]).tolist() == [0, 1, 2, 3]
assert ak.to_numpy(content).tolist() == [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]
assert ak.to_numpy(content[0]).tolist() == [0, 1, 2, 3]
assert content.to_typetracer()[0].form == content[0].form
assert np.asarray(content[1]).tolist() == [4, 5, 6, 7]
assert ak.to_numpy(content[1]).tolist() == [4, 5, 6, 7]
assert content.to_typetracer()[1].form == content[1].form
assert np.asarray(content[2]).tolist() == [8, 9, 10, 11]
assert ak.to_numpy(content[2]).tolist() == [8, 9, 10, 11]
assert content.to_typetracer()[2].form == content[2].form
assert [content[i][j] for i in range(3) for j in range(4)] == [
0,
Expand All @@ -56,17 +56,17 @@ def test():
11,
]

assert np.asarray(array[0]).tolist() == [[0, 1, 2, 3], [4, 5, 6, 7]]
assert ak.to_numpy(array[0]).tolist() == [[0, 1, 2, 3], [4, 5, 6, 7]]
assert array.to_typetracer()[0].form == array[0].form
assert np.asarray(array[1]).tolist() == []
assert ak.to_numpy(array[1]).tolist() == []
assert array.to_typetracer()[1].form == array[1].form
assert np.asarray(array[2]).tolist() == [[8, 9, 10, 11]]
assert ak.to_numpy(array[2]).tolist() == [[8, 9, 10, 11]]
assert array.to_typetracer()[2].form == array[2].form
assert np.asarray(array[1:3][0]).tolist() == []
assert ak.to_numpy(array[1:3][0]).tolist() == []
assert array.to_typetracer()[1:3][0].form == array[1:3][0].form
assert np.asarray(array[1:3][1]).tolist() == [[8, 9, 10, 11]]
assert ak.to_numpy(array[1:3][1]).tolist() == [[8, 9, 10, 11]]
assert array.to_typetracer()[1:3][1].form == array[1:3][1].form
assert np.asarray(array[2:3][0]).tolist() == [[8, 9, 10, 11]]
assert ak.to_numpy(array[2:3][0]).tolist() == [[8, 9, 10, 11]]
assert array.to_typetracer()[2:3][0].form == array[2:3][0].form


Expand All @@ -85,16 +85,16 @@ def test_members():
array = ak.contents.ListOffsetArray(offsets, content)
new = ak.contents.ListOffsetArray(offsets, array)

assert np.asarray(array.offsets).tolist() == [0, 2, 2, 3]
assert np.asarray(array.content).tolist() == [
assert np.asarray(array.offsets.data).tolist() == [0, 2, 2, 3]
assert ak.to_numpy(array.content).tolist() == [
[0, 1, 2, 3],
[4, 5, 6, 7],
[8, 9, 10, 11],
]

assert np.asarray(new.offsets).tolist() == [0, 2, 2, 3]
assert np.asarray(new.content.offsets).tolist() == [0, 2, 2, 3]
assert np.asarray(new.content.content).tolist() == [
assert np.asarray(new.offsets.data).tolist() == [0, 2, 2, 3]
assert np.asarray(new.content.offsets.data).tolist() == [0, 2, 2, 3]
assert ak.to_numpy(new.content.content).tolist() == [
[0, 1, 2, 3],
[4, 5, 6, 7],
[8, 9, 10, 11],
Expand Down
2 changes: 1 addition & 1 deletion tests/test_0006_deep_iteration.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ def test_iterator():
array = ak.contents.ListOffsetArray(offsets, content)

assert list(content) == [1.1, 2.2, 3.3]
assert [np.asarray(x).tolist() for x in array] == [[1.1, 2.2], [], [3.3]]
assert [ak.to_numpy(x).tolist() for x in array] == [[1.1, 2.2], [], [3.3]]
22 changes: 11 additions & 11 deletions tests/test_0074_argsort_and_sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,18 +117,18 @@ def test_NumpyArray():

assert to_list(
ak.operations.sort(array2, axis=1, ascending=True, stable=False)
) == to_list(np.sort(np.asarray(array2), axis=1))
) == to_list(np.sort(ak.to_numpy(array2), axis=1))
assert to_list(
ak.operations.sort(array2, axis=0, ascending=True, stable=False)
) == to_list(np.sort(np.asarray(array2), axis=0))
) == to_list(np.sort(ak.to_numpy(array2), axis=0))

assert to_list(
ak.operations.argsort(array2, axis=1, ascending=True, stable=False)
) == to_list(np.argsort(np.asarray(array2), 1))
) == to_list(np.argsort(ak.to_numpy(array2), 1))

assert to_list(
ak.operations.argsort(array2, axis=0, ascending=True, stable=False)
) == to_list(np.argsort(np.asarray(array2), 0))
) == to_list(np.argsort(ak.to_numpy(array2), 0))

with pytest.raises(ValueError) as err:
ak.operations.argsort(array2, axis=2, ascending=True, stable=False)
Expand Down Expand Up @@ -344,20 +344,20 @@ def test_3d():
) # 5

sorted = ak.operations.argsort(array, axis=1, ascending=True, stable=False)
assert to_list(sorted) == to_list(np.argsort(np.asarray(array), 1))
assert to_list(sorted) == to_list(np.argsort(np.asarray(array.data), 1))

sorted = ak.operations.argsort(array, axis=2, ascending=True, stable=False)
assert to_list(sorted) == to_list(np.argsort(np.asarray(array), 2))
assert to_list(sorted) == to_list(np.argsort(np.asarray(array.data), 2))

sorted = ak.operations.sort(array, axis=2, ascending=True, stable=False)
assert to_list(sorted) == to_list(np.sort(np.asarray(array), 2))
assert to_list(sorted) == to_list(np.sort(np.asarray(array.data), 2))

sorted = ak.operations.argsort(array, axis=1, ascending=True, stable=False)

assert to_list(sorted) == to_list(np.argsort(np.asarray(array), 1))
assert to_list(sorted) == to_list(np.argsort(np.asarray(array.data), 1))

sorted = ak.operations.sort(array, axis=1, ascending=True, stable=False)
assert to_list(sorted) == to_list(np.sort(np.asarray(array), 1))
assert to_list(sorted) == to_list(np.sort(np.asarray(array.data), 1))

sorted = ak.operations.sort(array, axis=1, ascending=False, stable=False)
assert to_list(sorted) == [
Expand All @@ -374,11 +374,11 @@ def test_3d():
]

sorted = ak.operations.sort(array, axis=0, ascending=True, stable=False)
assert to_list(sorted) == to_list(np.sort(np.asarray(array), 0))
assert to_list(sorted) == to_list(np.sort(np.asarray(array.data), 0))

assert to_list(
ak.operations.argsort(array, axis=0, ascending=True, stable=False)
) == to_list(np.argsort(np.asarray(array), 0))
) == to_list(np.argsort(np.asarray(array.data), 0))


def test_ByteMaskedArray():
Expand Down
8 changes: 4 additions & 4 deletions tests/test_0093_simplify_uniontypes_and_optiontypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,11 @@ def test_numpyarray_merge():
one = ak.contents.NumpyArray(np.array([1, 2, 3], dtype=x))
two = ak.contents.NumpyArray(np.array([4, 5], dtype=y))
three = one._mergemany([two])
assert np.asarray(three).dtype == np.dtype(z), "{} {} {} {}".format(
x, y, z, np.asarray(three).dtype.type
assert ak.to_numpy(three).dtype == np.dtype(z), "{} {} {} {}".format(
x, y, z, ak.to_numpy(three).dtype.type
)
assert to_list(three) == to_list(
np.concatenate([np.asarray(one), np.asarray(two)])
np.concatenate([ak.to_numpy(one), ak.to_numpy(two)])
)
assert to_list(one._mergemany([emptyarray])) == to_list(one)
assert to_list(emptyarray._mergemany([one])) == to_list(one)
Expand Down Expand Up @@ -927,7 +927,7 @@ def test_indexedarray_simplify():
index2 = ak.index.Index64(np.array([2, 2, 1, 6, 5], dtype=np.int64))

array2 = ak.contents.IndexedArray.simplified(index2, array)
assert np.asarray(array.index).tolist() == [0, 1, -1, 2, -1, -1, 3, 4]
assert np.asarray(array.index.data).tolist() == [0, 1, -1, 2, -1, -1, 3, 4]
assert to_list(array2) == to_list(array2) == [None, None, "two", "four", None]

assert array2.to_typetracer().form == array2.form
Expand Down
Loading

0 comments on commit 6dee1ea

Please sign in to comment.