From d7f13ab68a360f20294eb4e696c73a0331d90d07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Ma=C5=9Blanka?= Date: Wed, 13 Nov 2024 12:16:53 +0100 Subject: [PATCH] datalake/avro: fixed encoding decimal values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Java `BigInteger.toByteArray()` method is used to encode Avro decimals the method returns an array where the first element is the most significant byte of the encoded decimal. This commit fixes Redpanda Avro Bigdecimal encoding. Signed-off-by: Michał Maślanka --- src/v/iceberg/avro_decimal.h | 18 +++++++------- src/v/iceberg/tests/values_avro_test.cc | 32 +++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/src/v/iceberg/avro_decimal.h b/src/v/iceberg/avro_decimal.h index 547829f7ad0d..ccba8ca8acec 100644 --- a/src/v/iceberg/avro_decimal.h +++ b/src/v/iceberg/avro_decimal.h @@ -16,20 +16,20 @@ #include namespace iceberg { /** - * Converts a decimal into an array of bytes (big endian) + * Converts a decimal into an array of bytes (big endian), this works the same + * way as the Java's BigInteger.toByteArray() method. */ - inline bytes encode_avro_decimal(absl::int128 decimal) { - auto high_half = absl::Uint128High64(decimal); - auto low_half = absl::Uint128Low64(decimal); + auto high_half = ss::cpu_to_be(absl::Uint128High64(decimal)); + auto low_half = ss::cpu_to_be(absl::Uint128Low64(decimal)); bytes decimal_bytes(bytes::initialized_zero{}, 16); for (int i = 0; i < 8; i++) { // NOLINTNEXTLINE(hicpp-signed-bitwise) - decimal_bytes[i] = (low_half >> (i * 8)) & 0xFF; + decimal_bytes[i] = (high_half >> (i * 8)) & 0xFF; // NOLINTNEXTLINE(hicpp-signed-bitwise) - decimal_bytes[i + 8] = (high_half >> (i * 8)) & 0xFF; + decimal_bytes[i + 8] = (low_half >> (i * 8)) & 0xFF; } return decimal_bytes; @@ -41,12 +41,12 @@ inline absl::int128 decode_avro_decimal(bytes bytes) { int64_t high_half = 0; uint64_t low_half = 0; for (size_t i = 0; i < 8; i++) { - low_half |= static_cast(bytes[i]) << i * 8; // NOLINTNEXTLINE(hicpp-signed-bitwise) - high_half |= static_cast(bytes[8 + i]) << i * 8; + high_half |= static_cast(bytes[i]) << i * 8; + low_half |= static_cast(bytes[i + 8]) << i * 8; } - return absl::MakeInt128(high_half, low_half); + return absl::MakeInt128(ss::be_to_cpu(high_half), ss::be_to_cpu(low_half)); } inline iobuf avro_decimal_to_iobuf(absl::int128 decimal, size_t max_size) { diff --git a/src/v/iceberg/tests/values_avro_test.cc b/src/v/iceberg/tests/values_avro_test.cc index 436d3d5b2ffe..9aab7111d251 100644 --- a/src/v/iceberg/tests/values_avro_test.cc +++ b/src/v/iceberg/tests/values_avro_test.cc @@ -113,3 +113,35 @@ TEST(ValuesAvroTest, TestDecimalConversionsLimitedSize) { ASSERT_EQ( decimal, iobuf_to_avro_decimal(avro_decimal_to_iobuf(decimal, 8))); } + +TEST(ValuesAvroTest, TestDecimalConversionAgainstJavaBigInteger) { + // value of 65536 + ASSERT_EQ( + encode_avro_decimal(absl::MakeInt128(0, 65536)), + bytes({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0})); + // value of 35209893291843950283695459221 + ASSERT_EQ( + encode_avro_decimal(absl::MakeInt128(1908732140, 89247320981)), + bytes({0, 0, 0, 0, 113, 196, 240, 236, 0, 0, 0, 20, 199, 142, 11, 149})); + + // value of -18218949492341193300753118315 + ASSERT_EQ( + encode_avro_decimal(absl::MakeInt128(-987651231, 89247320981)), + bytes( + {255, + 255, + 255, + 255, + 197, + 33, + 163, + 97, + 0, + 0, + 0, + 20, + 199, + 142, + 11, + 149})); +}