From 2bc8b54ba85af4ed976440372ec929c8ab9fbc69 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Thu, 18 Jul 2024 17:44:00 +0100 Subject: [PATCH] undo substr --- datafusion/functions/src/unicode/substr.rs | 74 +--------------------- 1 file changed, 3 insertions(+), 71 deletions(-) diff --git a/datafusion/functions/src/unicode/substr.rs b/datafusion/functions/src/unicode/substr.rs index 58e39abea667b..9d15920bb6550 100644 --- a/datafusion/functions/src/unicode/substr.rs +++ b/datafusion/functions/src/unicode/substr.rs @@ -19,12 +19,10 @@ use std::any::Any; use std::cmp::max; use std::sync::Arc; -use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait, StringViewArray}; +use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait}; use arrow::datatypes::DataType; -use datafusion_common::cast::{ - as_generic_string_array, as_int64_array, as_string_view_array, -}; +use datafusion_common::cast::{as_generic_string_array, as_int64_array}; use datafusion_common::{exec_err, Result}; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; @@ -50,10 +48,8 @@ impl SubstrFunc { signature: Signature::one_of( vec![ Exact(vec![Utf8, Int64]), - Exact(vec![Utf8View, Int64]), Exact(vec![LargeUtf8, Int64]), Exact(vec![Utf8, Int64, Int64]), - Exact(vec![Utf8View, Int64, Int64]), Exact(vec![LargeUtf8, Int64, Int64]), ], Volatility::Immutable, @@ -83,13 +79,6 @@ impl ScalarUDFImpl for SubstrFunc { fn invoke(&self, args: &[ColumnarValue]) -> Result { match args[0].data_type() { DataType::Utf8 => make_scalar_function(substr::, vec![])(args), - DataType::Utf8View => match &args { - // [_, ColumnarValue::Scalar(_)] - // | [_, ColumnarValue::Scalar(_), ColumnarValue::Scalar(_)] => { - // arrow_string::substring - // } - _ => make_scalar_function(substr_view, vec![])(args), - }, DataType::LargeUtf8 => make_scalar_function(substr::, vec![])(args), other => exec_err!("Unsupported data type {other:?} for function substr"), } @@ -100,63 +89,6 @@ impl ScalarUDFImpl for SubstrFunc { } } -// TODO(aduffy): just return a manipulated Utf8View array by modifying the view only. -pub fn substr_view(args: &[ArrayRef]) -> Result { - match args.len() { - 2 => { - let string_view_array = as_string_view_array(&args[0])?; - let start_array = as_int64_array(&args[1])?; - - let result = string_view_array - .iter() - .zip(start_array.iter()) - .map(|(string_view, start)| match (string_view, start) { - (Some(string), Some(start)) => { - if start <= 0 { - Some(string.to_string()) - } else { - Some(string.chars().skip(start as usize - 1).collect()) - } - } - _ => None, - }) - .collect::(); - - Ok(Arc::new(result)) - } - 3 => { - let string_view_array = as_string_view_array(&args[0])?; - let start_array = as_int64_array(&args[1])?; - let count_array = as_int64_array(&args[2])?; - - let result = string_view_array - .iter() - .zip(start_array.iter()) - .zip(count_array.iter()) - .map(|((string, start), count)| match (string, start, count) { - (Some(string), Some(start), Some(count)) => { - if count < 0 { - exec_err!( - "negative substring length not allowed: substr(, {start}, {count})" - ) - } else { - let skip = max(0, start - 1); - let count = max(0, count + (if start < 1 { start - 1 } else { 0 })); - Ok(Some(string.chars().skip(skip as usize).take(count as usize).collect::())) - } - } - _ => Ok(None), - }) - .collect::>()?; - - Ok(Arc::new(result) as ArrayRef) - } - other => { - exec_err!("substr was called with {other} arguments. It requires 2 or 3.") - } - } -} - /// Extracts the substring of string starting at the start'th character, and extending for count characters if that is specified. (Same as substring(string from start for count).) /// substr('alphabet', 3) = 'phabet' /// substr('alphabet', 3, 2) = 'ph' @@ -201,7 +133,7 @@ pub fn substr(args: &[ArrayRef]) -> Result { ) } else { let skip = max(0, start - 1); - let count = max(0, count + (if start < 1 { start - 1 } else { 0 })); + let count = max(0, count + (if start < 1 {start - 1} else {0})); Ok(Some(string.chars().skip(skip as usize).take(count as usize).collect::())) } }