From 1d9dc4ab4e66687ebcc7f2b19f5740a45624b3e7 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 24 Oct 2023 09:47:18 +0100 Subject: [PATCH 1/7] fix: support all-None index --- .../src/cpu-kernels/awkward_Index_nones_as_index.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/awkward-cpp/src/cpu-kernels/awkward_Index_nones_as_index.cpp b/awkward-cpp/src/cpu-kernels/awkward_Index_nones_as_index.cpp index 28c0f3be5e..bef9e2b83d 100644 --- a/awkward-cpp/src/cpu-kernels/awkward_Index_nones_as_index.cpp +++ b/awkward-cpp/src/cpu-kernels/awkward_Index_nones_as_index.cpp @@ -8,12 +8,17 @@ template ERROR awkward_Index_nones_as_index( T* toindex, int64_t length) { - int64_t last_index = 0; + int64_t n_non_null = 0; + // Assuming that `toindex` comprises of contiguous integers, and is zero-based + // Compute the number of non-null values to determine our starting index for (int64_t i = 0; i < length; i++) { - toindex[i] > last_index ? last_index = toindex[i] : last_index; + if (toindex[i] != -1) { + n_non_null++; + } } + // for (int64_t i = 0; i < length; i++) { - toindex[i] == -1 ? toindex[i] = ++last_index : toindex[i]; + toindex[i] == -1 ? toindex[i] = n_non_null++ : toindex[i]; } return success(); } From 202a7c02096a9807d239d13cac1ece9d158c0923 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 24 Oct 2023 11:13:10 +0100 Subject: [PATCH 2/7] refactor: clarify logic --- src/awkward/contents/indexedoptionarray.py | 118 ++++++++++----------- 1 file changed, 59 insertions(+), 59 deletions(-) diff --git a/src/awkward/contents/indexedoptionarray.py b/src/awkward/contents/indexedoptionarray.py index 429076affa..adcb6fb718 100644 --- a/src/awkward/contents/indexedoptionarray.py +++ b/src/awkward/contents/indexedoptionarray.py @@ -1192,39 +1192,42 @@ def _argsort_next( negaxis, starts, nextshifts, nextparents, outlength, ascending, stable ) - # `next._argsort_next` is given the non-None values. We choose to - # sort None values to the end of the list, meaning we need to grow `out` - # to account for these None values. First, we locate these nones within - # their sublists - nulls_merged = False - nulls_index = ak.index.Index64.empty(numnull, self._backend.index_nplike) - assert nulls_index.nplike is self._backend.index_nplike - self._backend.maybe_kernel_error( - self._backend[ - "awkward_IndexedArray_index_of_nulls", - nulls_index.dtype.type, - self._index.dtype.type, - parents.dtype.type, - starts.dtype.type, - ]( - nulls_index.data, - self._index.data, - self._index.length, - parents.data, - starts.data, + # Are we sorting the current dimension? + if (not branch) and negaxis == depth: + # `out` will contain the argsort result for the non-None values, i.e. + # `next._argsort_next` is only given the non-None values. + # When computing argsort for *this* layout, we want to return + # the indices of each `None` value, rather than `None` itself. + # By convention, we choose to sort None values to the end of the + # list, meaning we need to grow `out` to account for the indices of + # the missing values + + # First, let's find the location of these missing values + nulls_index = ak.index.Index64.empty(numnull, self._backend.index_nplike) + assert nulls_index.nplike is self._backend.index_nplike + self._backend.maybe_kernel_error( + self._backend[ + "awkward_IndexedArray_index_of_nulls", + nulls_index.dtype.type, + self._index.dtype.type, + parents.dtype.type, + starts.dtype.type, + ]( + nulls_index.data, + self._index.data, + self._index.length, + parents.data, + starts.data, + ) + ) + # Now, we append these indices to the non-None argsort result + out = out._mergemany( + [ + ak.contents.NumpyArray( + nulls_index.data, parameters=None, backend=self._backend + ) + ] ) - ) - # If we wrap a NumpyArray (i.e., axis=-1), then we want `argmax` to return - # the indices of each `None` value, rather than `None` itself. - # We can test for this condition by seeing whether the NumpyArray of indices - # is mergeable with our content (`out = next._argsort_next result`). - # If so, try to concatenate them at the end of `out`.` - nulls_index_content = ak.contents.NumpyArray( - nulls_index.data, parameters=None, backend=self._backend - ) - if out._mergeable_next(nulls_index_content, True): - out = out._mergemany([nulls_index_content]) - nulls_merged = True nextoutindex = ak.index.Index64.empty( parents.length, self._backend.index_nplike @@ -1252,9 +1255,10 @@ def _argsort_next( ) ) - if nulls_merged: + # Are we sorting the current dimension? + if (not branch) and negaxis == depth: # awkward_IndexedArray_local_preparenext uses -1 to - # indicate `None` values. Given that this code-path runs + # indicate `None` values. Yet, given that *this* code-path runs # only when the `None` value indices are explicitly stored in out, # we need to mapping the -1 values to their corresponding indices # in `out` @@ -1265,34 +1269,30 @@ def _argsort_next( nextoutindex.length, ) ) - - out = ak.contents.IndexedOptionArray.simplified( - nextoutindex, out, parameters=self._parameters - ) - - inject_nones = ( - True - if ( - (numnull is not unknown_length and numnull > 0) - and not branch - and negaxis != depth + return out._carry(nextoutindex, False).copy( + parameters=parameters_union(out._parameters, self._parameters) ) - else False - ) - # If we want the None's at this depth to be injected - # into the dense ([x y z None None]) rearranger result. - # Here, we index the dense content with an index - # that maps the values to the correct locations - if inject_nones: - return ak.contents.IndexedOptionArray.simplified( - outindex, out, parameters=self._parameters - ) - # Otherwise, if we are rearranging (e.g sorting) the contents of this layout, - # then we do NOT want to return an optional layout, - # OR we are branching else: - return out + out = ak.contents.IndexedOptionArray.simplified( + nextoutindex, out, parameters=self._parameters + ) + + inject_nones = numnull is not unknown_length and numnull > 0 + + # If we want the None's at this depth to be injected + # into the dense ([x y z None None]) rearranger result. + # Here, we index the dense content with an index + # that maps the values to the correct locations + if inject_nones: + return ak.contents.IndexedOptionArray.simplified( + outindex, out, parameters=self._parameters + ) + # Otherwise, if we are rearranging (e.g sorting) the contents of this layout, + # then we do NOT want to return an optional layout, + # OR we are branching + else: + return out def _sort_next(self, negaxis, starts, parents, outlength, ascending, stable): assert ( From 815bd219e270252d9d07a1b6d7af9b90979cc2b0 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 24 Oct 2023 11:13:23 +0100 Subject: [PATCH 3/7] test: test argsort cases --- tests/test_2769_argsort_all_none.py | 51 +++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 tests/test_2769_argsort_all_none.py diff --git a/tests/test_2769_argsort_all_none.py b/tests/test_2769_argsort_all_none.py new file mode 100644 index 0000000000..6e0a4acf2d --- /dev/null +++ b/tests/test_2769_argsort_all_none.py @@ -0,0 +1,51 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +import pytest # noqa: F401 + +import awkward as ak + + +def test_all_none(): + result = ak.argsort([None, None, None]) + assert ak.is_valid(result) + assert result.to_list() == [0, 1, 2] + assert isinstance(result.layout, ak.contents.NumpyArray) + assert result.type == ak.types.ArrayType(ak.types.NumpyType("int64"), 3) + + +def test_mixed_none_local(): + result = ak.argsort([None, 1, None, 2, 0, None, -1]) + assert ak.is_valid(result) + assert result.to_list() == [6, 4, 1, 3, 0, 2, 5] + assert result.type == ak.types.ArrayType(ak.types.NumpyType("int64"), 7) + + +def test_mixed_none_2d_local(): + result = ak.argsort( + [ + [None, 1, None, 0, None, None, -1], + None, + [None, 2, None, 2, 0, None, -2], + ], + axis=1, + ) + assert ak.is_valid(result) + assert result.to_list() == [[6, 3, 1, 0, 2, 4, 5], None, [6, 4, 1, 3, 0, 2, 5]] + assert result.type == ak.types.ArrayType( + ak.types.OptionType(ak.types.ListType(ak.types.NumpyType("int64"))), 3 + ) + + +def test_mixed_none_2d_nonlocal(): + result = ak.argsort( + [ + [None, 1, None, 0, None, None, -1], + [None, 2, None, 2, 0, None, -2], + ], + axis=0, + ) + assert ak.is_valid(result) + assert result.to_list() == [[0, 0, 0, 0, 1, 0, 1], [1, 1, 1, 1, 0, 1, 0]] + assert result.type == ak.types.ArrayType( + ak.types.ListType(ak.types.NumpyType("int64")), 2 + ) From 0134e28b0ce943478f3624eabf51cf5acdc011a3 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 24 Oct 2023 11:29:03 +0100 Subject: [PATCH 4/7] Revert "refactor: clarify logic" This reverts commit 202a7c02096a9807d239d13cac1ece9d158c0923. --- src/awkward/contents/indexedoptionarray.py | 118 ++++++++++----------- 1 file changed, 59 insertions(+), 59 deletions(-) diff --git a/src/awkward/contents/indexedoptionarray.py b/src/awkward/contents/indexedoptionarray.py index adcb6fb718..429076affa 100644 --- a/src/awkward/contents/indexedoptionarray.py +++ b/src/awkward/contents/indexedoptionarray.py @@ -1192,42 +1192,39 @@ def _argsort_next( negaxis, starts, nextshifts, nextparents, outlength, ascending, stable ) - # Are we sorting the current dimension? - if (not branch) and negaxis == depth: - # `out` will contain the argsort result for the non-None values, i.e. - # `next._argsort_next` is only given the non-None values. - # When computing argsort for *this* layout, we want to return - # the indices of each `None` value, rather than `None` itself. - # By convention, we choose to sort None values to the end of the - # list, meaning we need to grow `out` to account for the indices of - # the missing values - - # First, let's find the location of these missing values - nulls_index = ak.index.Index64.empty(numnull, self._backend.index_nplike) - assert nulls_index.nplike is self._backend.index_nplike - self._backend.maybe_kernel_error( - self._backend[ - "awkward_IndexedArray_index_of_nulls", - nulls_index.dtype.type, - self._index.dtype.type, - parents.dtype.type, - starts.dtype.type, - ]( - nulls_index.data, - self._index.data, - self._index.length, - parents.data, - starts.data, - ) - ) - # Now, we append these indices to the non-None argsort result - out = out._mergemany( - [ - ak.contents.NumpyArray( - nulls_index.data, parameters=None, backend=self._backend - ) - ] + # `next._argsort_next` is given the non-None values. We choose to + # sort None values to the end of the list, meaning we need to grow `out` + # to account for these None values. First, we locate these nones within + # their sublists + nulls_merged = False + nulls_index = ak.index.Index64.empty(numnull, self._backend.index_nplike) + assert nulls_index.nplike is self._backend.index_nplike + self._backend.maybe_kernel_error( + self._backend[ + "awkward_IndexedArray_index_of_nulls", + nulls_index.dtype.type, + self._index.dtype.type, + parents.dtype.type, + starts.dtype.type, + ]( + nulls_index.data, + self._index.data, + self._index.length, + parents.data, + starts.data, ) + ) + # If we wrap a NumpyArray (i.e., axis=-1), then we want `argmax` to return + # the indices of each `None` value, rather than `None` itself. + # We can test for this condition by seeing whether the NumpyArray of indices + # is mergeable with our content (`out = next._argsort_next result`). + # If so, try to concatenate them at the end of `out`.` + nulls_index_content = ak.contents.NumpyArray( + nulls_index.data, parameters=None, backend=self._backend + ) + if out._mergeable_next(nulls_index_content, True): + out = out._mergemany([nulls_index_content]) + nulls_merged = True nextoutindex = ak.index.Index64.empty( parents.length, self._backend.index_nplike @@ -1255,10 +1252,9 @@ def _argsort_next( ) ) - # Are we sorting the current dimension? - if (not branch) and negaxis == depth: + if nulls_merged: # awkward_IndexedArray_local_preparenext uses -1 to - # indicate `None` values. Yet, given that *this* code-path runs + # indicate `None` values. Given that this code-path runs # only when the `None` value indices are explicitly stored in out, # we need to mapping the -1 values to their corresponding indices # in `out` @@ -1269,30 +1265,34 @@ def _argsort_next( nextoutindex.length, ) ) - return out._carry(nextoutindex, False).copy( - parameters=parameters_union(out._parameters, self._parameters) - ) - else: - out = ak.contents.IndexedOptionArray.simplified( - nextoutindex, out, parameters=self._parameters - ) + out = ak.contents.IndexedOptionArray.simplified( + nextoutindex, out, parameters=self._parameters + ) - inject_nones = numnull is not unknown_length and numnull > 0 + inject_nones = ( + True + if ( + (numnull is not unknown_length and numnull > 0) + and not branch + and negaxis != depth + ) + else False + ) - # If we want the None's at this depth to be injected - # into the dense ([x y z None None]) rearranger result. - # Here, we index the dense content with an index - # that maps the values to the correct locations - if inject_nones: - return ak.contents.IndexedOptionArray.simplified( - outindex, out, parameters=self._parameters - ) - # Otherwise, if we are rearranging (e.g sorting) the contents of this layout, - # then we do NOT want to return an optional layout, - # OR we are branching - else: - return out + # If we want the None's at this depth to be injected + # into the dense ([x y z None None]) rearranger result. + # Here, we index the dense content with an index + # that maps the values to the correct locations + if inject_nones: + return ak.contents.IndexedOptionArray.simplified( + outindex, out, parameters=self._parameters + ) + # Otherwise, if we are rearranging (e.g sorting) the contents of this layout, + # then we do NOT want to return an optional layout, + # OR we are branching + else: + return out def _sort_next(self, negaxis, starts, parents, outlength, ascending, stable): assert ( From dfd9660a8e45914599bb50a63bc0e6a35e8479a8 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 24 Oct 2023 11:31:21 +0100 Subject: [PATCH 5/7] fix: drop option where necessary --- src/awkward/contents/indexedoptionarray.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/awkward/contents/indexedoptionarray.py b/src/awkward/contents/indexedoptionarray.py index 429076affa..f1d9568a55 100644 --- a/src/awkward/contents/indexedoptionarray.py +++ b/src/awkward/contents/indexedoptionarray.py @@ -1265,10 +1265,15 @@ def _argsort_next( nextoutindex.length, ) ) - - out = ak.contents.IndexedOptionArray.simplified( - nextoutindex, out, parameters=self._parameters - ) + # Drop the option type: we have ensured that we don't have any + # -1 values in `nextoutindex` now! + out = out._carry(nextoutindex, False).copy( + parameters=parameters_union(out._parameters, self._parameters) + ) + else: + out = ak.contents.IndexedOptionArray.simplified( + nextoutindex, out, parameters=self._parameters + ) inject_nones = ( True From 8cd07572b65dda67f7ae8357c0311cc00a16b6f4 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 24 Oct 2023 11:35:41 +0100 Subject: [PATCH 6/7] docs: improve comment --- awkward-cpp/src/cpu-kernels/awkward_Index_nones_as_index.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/awkward-cpp/src/cpu-kernels/awkward_Index_nones_as_index.cpp b/awkward-cpp/src/cpu-kernels/awkward_Index_nones_as_index.cpp index bef9e2b83d..ad2febc39e 100644 --- a/awkward-cpp/src/cpu-kernels/awkward_Index_nones_as_index.cpp +++ b/awkward-cpp/src/cpu-kernels/awkward_Index_nones_as_index.cpp @@ -9,14 +9,14 @@ ERROR awkward_Index_nones_as_index( T* toindex, int64_t length) { int64_t n_non_null = 0; - // Assuming that `toindex` comprises of contiguous integers, and is zero-based + // Assuming that `toindex` comprises of unique, contiguous integers (or -1), and is zero-based // Compute the number of non-null values to determine our starting index for (int64_t i = 0; i < length; i++) { if (toindex[i] != -1) { n_non_null++; } } - // + // Now set the null-value indices to by monotonically increasing and unique from the final index for (int64_t i = 0; i < length; i++) { toindex[i] == -1 ? toindex[i] = n_non_null++ : toindex[i]; } From d668e0c6d9d0b8e1b91f1aed801b23d9a6c31056 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 24 Oct 2023 17:41:23 +0100 Subject: [PATCH 7/7] fix: update demonstration kernel --- kernel-specification.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel-specification.yml b/kernel-specification.yml index 2a120e505b..7d50a4df45 100644 --- a/kernel-specification.yml +++ b/kernel-specification.yml @@ -4500,14 +4500,14 @@ kernels: description: null definition: | def awkward_Index_nones_as_index(toindex, length): - last_index = 0 + num_non_null = 0 for i in range(length): - if toindex[i] > last_index: - last_index = toindex[i] + if toindex[i] != -1: + num_non_null += 1 for i in range(length): if toindex[i] == -1: - last_index = last_index + 1 - toindex[i] = last_index + toindex[i] = num_non_null + num_non_null += 1 automatic-tests: false manual-tests: []