From 008b5fe6dcd0d6ceeee56b06b231d9901e13cac2 Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Sat, 16 Nov 2024 17:39:18 +0100 Subject: [PATCH] Support Duration in JSON Reader (#6683) * Support Duration in JSON Reader * fix clippy --- arrow-cast/src/parse.rs | 4 ++++ arrow-json/src/reader/mod.rs | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/arrow-cast/src/parse.rs b/arrow-cast/src/parse.rs index 4bd94c13fe8d..f4c4639c1c08 100644 --- a/arrow-cast/src/parse.rs +++ b/arrow-cast/src/parse.rs @@ -497,6 +497,10 @@ parser_primitive!(Int64Type); parser_primitive!(Int32Type); parser_primitive!(Int16Type); parser_primitive!(Int8Type); +parser_primitive!(DurationNanosecondType); +parser_primitive!(DurationMicrosecondType); +parser_primitive!(DurationMillisecondType); +parser_primitive!(DurationSecondType); impl Parser for TimestampNanosecondType { fn parse(string: &str) -> Option { diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs index bcacf6f706b8..ba31b78570c6 100644 --- a/arrow-json/src/reader/mod.rs +++ b/arrow-json/src/reader/mod.rs @@ -691,6 +691,10 @@ fn make_decoder( DataType::Time32(TimeUnit::Millisecond) => primitive_decoder!(Time32MillisecondType, data_type), DataType::Time64(TimeUnit::Microsecond) => primitive_decoder!(Time64MicrosecondType, data_type), DataType::Time64(TimeUnit::Nanosecond) => primitive_decoder!(Time64NanosecondType, data_type), + DataType::Duration(TimeUnit::Nanosecond) => primitive_decoder!(DurationNanosecondType, data_type), + DataType::Duration(TimeUnit::Microsecond) => primitive_decoder!(DurationMicrosecondType, data_type), + DataType::Duration(TimeUnit::Millisecond) => primitive_decoder!(DurationMillisecondType, data_type), + DataType::Duration(TimeUnit::Second) => primitive_decoder!(DurationSecondType, data_type), DataType::Decimal128(p, s) => Ok(Box::new(DecimalArrayDecoder::::new(p, s))), DataType::Decimal256(p, s) => Ok(Box::new(DecimalArrayDecoder::::new(p, s))), DataType::Boolean => Ok(Box::::default()), @@ -1330,6 +1334,37 @@ mod tests { test_time::(); } + fn test_duration() { + let buf = r#" + {"a": 1, "b": "2"} + {"a": 3, "b": null} + "#; + + let schema = Arc::new(Schema::new(vec![ + Field::new("a", T::DATA_TYPE, true), + Field::new("b", T::DATA_TYPE, true), + ])); + + let batches = do_read(buf, 1024, true, false, schema); + assert_eq!(batches.len(), 1); + + let col_a = batches[0].column_by_name("a").unwrap().as_primitive::(); + assert_eq!(col_a.null_count(), 0); + assert_eq!(col_a.values(), &[1, 3].map(T::Native::usize_as)); + + let col2 = batches[0].column_by_name("b").unwrap().as_primitive::(); + assert_eq!(col2.null_count(), 1); + assert_eq!(col2.values(), &[2, 0].map(T::Native::usize_as)); + } + + #[test] + fn test_durations() { + test_duration::(); + test_duration::(); + test_duration::(); + test_duration::(); + } + #[test] fn test_delta_checkpoint() { let json = "{\"protocol\":{\"minReaderVersion\":1,\"minWriterVersion\":2}}";