diff --git a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc index ffbcaf53656f0..8555474ea6da5 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc @@ -95,7 +95,7 @@ struct FixedSizeBinaryTransformExecBase { ctx->Allocate(output_width * input_nstrings)); uint8_t* output_str = values_buffer->mutable_data(); - const uint8_t* input_data = input.GetValues(1); + const uint8_t* input_data = input.GetValues(1, input.offset * input_width); for (int64_t i = 0; i < input_nstrings; i++) { if (!input.IsNull(i)) { const uint8_t* input_string = input_data + i * input_width; @@ -2594,8 +2594,12 @@ struct SliceBytesTransform : StringSliceTransformBase { ((start <= stop) && (step < 0))) { return 0; } - return static_cast(std::max( - static_cast(0), (stop - start + (step - (step > 0 ? 1 : -1))) / step)); + + if (step < 0) { + return std::max(static_cast(0), (stop - start + step + 1) / step); + } + + return std::max(static_cast(0), (stop - start + step - 1) / step); } }; diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc index 692455a8555da..1e5d82988fb6b 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc @@ -19,6 +19,7 @@ #include #include #include +#include "arrow/type_fwd.h" #include #include @@ -712,18 +713,31 @@ TEST_F(TestFixedSizeBinaryKernels, BinaryLength) { "[6, null, 6]"); } -TEST_F(TestFixedSizeBinaryKernels, SliceBytesBasic) { +TEST_F(TestFixedSizeBinaryKernels, BinarySliceEmpty) { SliceOptions options{2, 4}; - CheckUnary("binary_slice", R"(["abcabc", "defdef"])", fixed_size_binary(2), - R"(["ca", "fd"])", &options); + CheckScalarUnary("binary_slice", ArrayFromJSON(fixed_size_binary(0), R"([""])"), + ArrayFromJSON(fixed_size_binary(0), R"([""])"), &options); + + CheckScalarUnary("binary_slice", + ArrayFromJSON(fixed_size_binary(0), R"(["", null, ""])"), + ArrayFromJSON(fixed_size_binary(0), R"(["", null, ""])"), &options); + + CheckUnary("binary_slice", R"([null, null])", fixed_size_binary(2), R"([null, null])", + &options); +} + +TEST_F(TestFixedSizeBinaryKernels, BinarySliceBasic) { + SliceOptions options{2, 4}; + CheckUnary("binary_slice", R"(["abcdef", null, "foobaz"])", fixed_size_binary(2), + R"(["cd", null, "ob"])", &options); SliceOptions options_edgecase_1{-3, 1}; - CheckUnary("binary_slice", R"(["abcabc", "defdef"])", fixed_size_binary(0), + CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(0), R"(["", ""])", &options_edgecase_1); SliceOptions options_edgecase_2{-10, -3}; - CheckUnary("binary_slice", R"(["abcabc", "defdef"])", fixed_size_binary(3), - R"(["abc", "def"])", &options_edgecase_2); + CheckUnary("binary_slice", R"(["abcdef", "foobaz", null])", fixed_size_binary(3), + R"(["abc", "foo", null])", &options_edgecase_2); auto input = ArrayFromJSON(this->type(), R"(["foobaz"])"); EXPECT_RAISES_WITH_MESSAGE_THAT( @@ -737,75 +751,75 @@ TEST_F(TestFixedSizeBinaryKernels, SliceBytesBasic) { CallFunction("binary_slice", {input}, &options_invalid)); } -TEST_F(TestFixedSizeBinaryKernels, SliceBytesPosPos) { +TEST_F(TestFixedSizeBinaryKernels, BinarySlicePosPos) { SliceOptions options_step{1, 5, 2}; - CheckUnary("binary_slice", R"(["abcabc", "defdef"])", fixed_size_binary(2), - R"(["ba", "ed"])", &options_step); + CheckUnary("binary_slice", R"([null, "abcdef", "foobaz"])", fixed_size_binary(2), + R"([null, "bd", "ob"])", &options_step); SliceOptions options_step_neg{5, 0, -2}; - CheckUnary("binary_slice", R"(["abcabc", "defdef"])", fixed_size_binary(3), - R"(["cab", "fde"])", &options_step_neg); + CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(3), + R"(["fdb", "zbo"])", &options_step_neg); } -TEST_F(TestFixedSizeBinaryKernels, SliceBytesPosNeg) { +TEST_F(TestFixedSizeBinaryKernels, BinarySlicePosNeg) { SliceOptions options{2, -1}; - CheckUnary("binary_slice", R"(["abcabc", "defdef"])", fixed_size_binary(3), - R"(["cab", "fde"])", &options); + CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(3), + R"(["cde", "oba"])", &options); SliceOptions options_step{1, -1, 2}; - CheckUnary("binary_slice", R"(["abcabc", "defdef"])", fixed_size_binary(2), - R"(["ba", "ed"])", &options_step); + CheckUnary("binary_slice", R"(["abcdef", null, "foobaz"])", fixed_size_binary(2), + R"(["bd", null, "ob"])", &options_step); SliceOptions options_step_neg{5, -4, -2}; - CheckUnary("binary_slice", R"(["abcabc", "defdef"])", fixed_size_binary(2), - R"(["ca", "fd"])", &options_step_neg); + CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(2), + R"(["fd", "zb"])", &options_step_neg); options_step_neg.stop = -6; - CheckUnary("binary_slice", R"(["abcabc", "defdef"])", fixed_size_binary(3), - R"(["cab", "fde"])", &options_step_neg); + CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(3), + R"(["fdb", "zbo"])", &options_step_neg); } -TEST_F(TestFixedSizeBinaryKernels, SliceBytesNegNeg) { +TEST_F(TestFixedSizeBinaryKernels, BinarySliceNegNeg) { SliceOptions options{-2, -1}; - CheckUnary("binary_slice", R"(["abcabc", "defdef"])", fixed_size_binary(1), - R"(["b", "e"])", &options); + CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(1), + R"(["e", "a"])", &options); SliceOptions options_step{-4, -1, 2}; - CheckUnary("binary_slice", R"(["abcabc", "defdef"])", fixed_size_binary(2), - R"(["cb", "fe"])", &options_step); + CheckUnary("binary_slice", R"(["abcdef", "foobaz", null, null])", fixed_size_binary(2), + R"(["ce", "oa", null, null])", &options_step); SliceOptions options_step_neg{-1, -3, -2}; - CheckUnary("binary_slice", R"(["abcabc", "defdef"])", fixed_size_binary(1), - R"(["c", "f"])", &options_step_neg); + CheckUnary("binary_slice", R"([null, "abcdef", null, "foobaz"])", fixed_size_binary(1), + R"([null, "f", null, "z"])", &options_step_neg); options_step_neg.stop = -4; - CheckUnary("binary_slice", R"(["abcabc", "defdef"])", fixed_size_binary(2), - R"(["ca", "fd"])", &options_step_neg); + CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(2), + R"(["fd", "zb"])", &options_step_neg); } -TEST_F(TestFixedSizeBinaryKernels, SliceBytesNegPos) { +TEST_F(TestFixedSizeBinaryKernels, BinarySliceNegPos) { SliceOptions options{-2, 4}; - CheckUnary("binary_slice", R"(["abcabc", "defdef"])", fixed_size_binary(0), + CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(0), R"(["", ""])", &options); SliceOptions options_step{-4, 5, 2}; - CheckUnary("binary_slice", R"(["abcabc", "defdef"])", fixed_size_binary(2), - R"(["cb", "fe"])", &options_step); + CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(2), + R"(["ce", "oa"])", &options_step); SliceOptions options_step_neg{-1, 1, -2}; - CheckUnary("binary_slice", R"(["abcabc", "defdef"])", fixed_size_binary(2), - R"(["ca", "fd"])", &options_step_neg); + CheckUnary("binary_slice", R"([null, "abcdef", "foobaz", null])", fixed_size_binary(2), + R"([null, "fd", "zb", null])", &options_step_neg); options_step_neg.stop = 0; - CheckUnary("binary_slice", R"(["abcabc", "defdef"])", fixed_size_binary(3), - R"(["cab", "fde"])", &options_step_neg); + CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(3), + R"(["fdb", "zbo"])", &options_step_neg); } TEST_F(TestFixedSizeBinaryKernels, BinaryReplaceSlice) { ReplaceSliceOptions options{0, 1, "XX"}; CheckUnary("binary_replace_slice", "[]", fixed_size_binary(7), "[]", &options); - CheckUnary("binary_replace_slice", R"([null, "abcdef"])", fixed_size_binary(7), - R"([null, "XXbcdef"])", &options); + CheckUnary("binary_replace_slice", R"(["foobaz", null, "abcdef"])", + fixed_size_binary(7), R"(["XXoobaz", null, "XXbcdef"])", &options); ReplaceSliceOptions options_shrink{0, 2, ""}; CheckUnary("binary_replace_slice", R"([null, "abcdef"])", fixed_size_binary(4), @@ -820,8 +834,8 @@ TEST_F(TestFixedSizeBinaryKernels, BinaryReplaceSlice) { R"([null, "abXXef"])", &options_middle); ReplaceSliceOptions options_neg_start{-3, -2, "XX"}; - CheckUnary("binary_replace_slice", R"([null, "abcdef"])", fixed_size_binary(7), - R"([null, "abcXXef"])", &options_neg_start); + CheckUnary("binary_replace_slice", R"(["foobaz", null, "abcdef"])", + fixed_size_binary(7), R"(["fooXXaz", null, "abcXXef"])", &options_neg_start); ReplaceSliceOptions options_neg_end{2, -2, "XX"}; CheckUnary("binary_replace_slice", R"([null, "abcdef"])", fixed_size_binary(6), @@ -896,7 +910,7 @@ TEST_F(TestFixedSizeBinaryKernels, CountSubstringIgnoreCase) { offset_type(), "[0, null, 0, 1, 1, 1, 2, 2, 1]", &options); MatchSubstringOptions options_empty{"", /*ignore_case=*/true}; - CheckUnary("count_substring", R"([" ", null, "abcABc"])", offset_type(), + CheckUnary("count_substring", R"([" ", null, "abcdef"])", offset_type(), "[7, null, 7]", &options_empty); }