From cb013bc0190aebe68f425e698dd5ff5e5c94a86c Mon Sep 17 00:00:00 2001 From: ccat3z Date: Fri, 13 Jan 2023 09:56:10 +0000 Subject: [PATCH] fix castVARCHAR on huge date fix https://github.com/oap-project/gazelle_plugin/issues/1207 --- cpp/src/gandiva/precompiled/time.cc | 7 ++--- cpp/src/gandiva/precompiled/time_test.cc | 35 ++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/cpp/src/gandiva/precompiled/time.cc b/cpp/src/gandiva/precompiled/time.cc index 87bc11ba97dd2..8ffc086beff25 100644 --- a/cpp/src/gandiva/precompiled/time.cc +++ b/cpp/src/gandiva/precompiled/time.cc @@ -681,7 +681,7 @@ const char* castVARCHAR_date32_int64(gdv_int64 context, gdv_date32 in_day, gdv_int64 month = extractMonth_timestamp(in); gdv_int64 day = extractDay_timestamp(in); - static const int kDateStringLen = 11; + static const int kDateStringLen = 13; // YYYY maybe overflow const int char_buffer_length = kDateStringLen + 1; // snprintf adds \0 char char_buffer[char_buffer_length]; @@ -701,10 +701,7 @@ const char* castVARCHAR_date32_int64(gdv_int64 context, gdv_date32 in_day, return ""; } - *out_len = static_cast(length); - if (*out_len > kDateStringLen) { - *out_len = kDateStringLen; - } + *out_len = static_cast(res); if (*out_len <= 0) { if (*out_len < 0) { diff --git a/cpp/src/gandiva/precompiled/time_test.cc b/cpp/src/gandiva/precompiled/time_test.cc index b0312db81e450..dddcd8d9c2f94 100644 --- a/cpp/src/gandiva/precompiled/time_test.cc +++ b/cpp/src/gandiva/precompiled/time_test.cc @@ -18,7 +18,11 @@ #include #include +#include +#include + #include "../execution_context.h" +#include "gandiva/gdv_function_stubs.h" #include "gandiva/precompiled/testing.h" #include "gandiva/precompiled/types.h" @@ -780,4 +784,35 @@ TEST(TestTime, castVarcharDate) { EXPECT_EQ(std::string(out, out_len), "1967-12-01"); } +TEST(TestTime, castVarcharDateWithHugeDate) { + ExecutionContext context; + auto context_ptr = reinterpret_cast(&context); + + // Year > 9999 + std::vector> timestamp2dates = { + // Passing in a huge timestamp may be a mistake to pass in milliseconds as seconds. + // castVARCHAR(date) guarantee the correct format but the value may overflow. + // So some cases are skiped. + // {30000000000000ul, "952632-01-22", "9526320122"}, + // {1656932979027ul, "54476-02-12", "544760212"}, + // {1627282640731ul, "53536-07-14", "535360714"}, + {652440000000ul, "22644-12-31", "226441231"}, + {253402300800ul, "10000-01-01", "100000101"}, + }; + + for (auto& [ts_in_sec, fmt10, fmt8] : timestamp2dates) { + gdv_int32 out_len; + gdv_date64 date64 = castDATE_timestamp(ts_in_sec * 1000); + gdv_date32 date32 = castDATE32_date64(date64); + + // Test yyyy-MM-dd format + const char* out = castVARCHAR_date32_int64(context_ptr, date32, 10L, &out_len); + EXPECT_EQ(std::string(out, out_len), fmt10); + + // Test yyyyMMdd format + out = castVARCHAR_date32_int64(context_ptr, date32, 8L, &out_len); + EXPECT_EQ(std::string(out, out_len), fmt8); + } +} + } // namespace gandiva