From 1d580ec1996e75bb9be4c1c880871d0dd2718ad4 Mon Sep 17 00:00:00 2001 From: Marko Grujic Date: Thu, 14 Nov 2024 08:39:54 +0100 Subject: [PATCH] Fix negative decimal e-notation parsing (#6729) --- arrow-cast/src/parse.rs | 28 ++++++++++++++++++++++------ arrow-select/src/zip.rs | 2 +- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/arrow-cast/src/parse.rs b/arrow-cast/src/parse.rs index e332e5bbaaec..4bd94c13fe8d 100644 --- a/arrow-cast/src/parse.rs +++ b/arrow-cast/src/parse.rs @@ -841,19 +841,20 @@ pub fn parse_decimal( let base = T::Native::usize_as(10); let bs = s.as_bytes(); - let (bs, negative) = match bs.first() { - Some(b'-') => (&bs[1..], true), - Some(b'+') => (&bs[1..], false), - _ => (bs, false), + let (signed, negative) = match bs.first() { + Some(b'-') => (true, true), + Some(b'+') => (true, false), + _ => (false, false), }; - if bs.is_empty() { + if bs.is_empty() || signed && bs.len() == 1 { return Err(ArrowError::ParseError(format!( "can't parse the string value {s} to decimal" ))); } - let mut bs = bs.iter().enumerate(); + // Iterate over the raw input bytes, skipping the sign if any + let mut bs = bs.iter().enumerate().skip(signed as usize); let mut is_e_notation = false; @@ -2679,6 +2680,21 @@ mod tests { 0i128, 15, ), + ( + "-1e3", + -1000000000i128, + 6, + ), + ( + "+1e3", + 1000000000i128, + 6, + ), + ( + "-1e31", + -10000000000000000000000000000000000000i128, + 6, + ), ]; for (s, i, scale) in edge_tests_128 { let result_128 = parse_decimal::(s, 38, scale); diff --git a/arrow-select/src/zip.rs b/arrow-select/src/zip.rs index 1f317dfd45c6..acb31dfa3bc2 100644 --- a/arrow-select/src/zip.rs +++ b/arrow-select/src/zip.rs @@ -53,7 +53,7 @@ pub fn zip( "all arrays should have the same length".into(), )); } - if truthy_is_scalar && truthy.len() != 1 { + if falsy_is_scalar && falsy.len() != 1 { return Err(ArrowError::InvalidArgumentError( "scalar arrays must have 1 element".into(), ));