From b48a2a9214f08e3714d8e5156cb6fffe15560122 Mon Sep 17 00:00:00 2001 From: baishen Date: Mon, 9 May 2022 11:46:12 +0800 Subject: [PATCH] support array access elements --- .../src/scalars/semi_structureds/get.rs | 67 ++++++++++++++++++- 1 file changed, 64 insertions(+), 3 deletions(-) diff --git a/common/functions/src/scalars/semi_structureds/get.rs b/common/functions/src/scalars/semi_structureds/get.rs index 6b9fe21b7556..57413348dca9 100644 --- a/common/functions/src/scalars/semi_structureds/get.rs +++ b/common/functions/src/scalars/semi_structureds/get.rs @@ -15,6 +15,7 @@ use std::fmt; use common_datavalues::prelude::*; +use common_datavalues::with_match_scalar_types_error; use common_exception::ErrorCode; use common_exception::Result; use sqlparser::ast::Value; @@ -35,6 +36,7 @@ pub type GetPathFunction = GetFunctionImpl; #[derive(Clone)] pub struct GetFunctionImpl { + data_type: DataTypeImpl, display_name: String, } @@ -49,7 +51,7 @@ impl GetFunctionImpl GetFunctionImpl { + data_type: data_type.clone(), display_name: display_name.to_string(), })) } @@ -80,7 +83,13 @@ impl Function } fn return_type(&self) -> DataTypeImpl { - NullableType::new_impl(VariantType::new_impl()) + // TODO(b41sh): Support multi-dimensional array access + match &self.data_type { + DataTypeImpl::Array(array_type) => { + NullableType::new_impl(array_type.inner_type().clone()) + } + _ => NullableType::new_impl(VariantType::new_impl()), + } } fn eval( @@ -95,7 +104,12 @@ impl Function build_path_keys(columns[1].column())? }; - extract_value_by_path(columns[0].column(), path_keys, input_rows, IGNORE_CASE) + match &self.data_type { + DataTypeImpl::Array(array_type) => { + extract_array_value(array_type, columns[0].column(), path_keys, input_rows) + } + _ => extract_value_by_path(columns[0].column(), path_keys, input_rows, IGNORE_CASE), + } } } @@ -253,3 +267,50 @@ pub fn extract_value_by_path( } Ok(builder.build(input_rows)) } + +fn extract_array_value( + array_type: &ArrayType, + column: &ColumnRef, + path_keys: Vec>, + input_rows: usize, +) -> Result { + let column: &ArrayColumn = if column.is_const() { + let const_column: &ConstColumn = Series::check_get(column)?; + Series::check_get(const_column.inner())? + } else { + Series::check_get(column)? + }; + + let inner_type = array_type.inner_type().data_type_id(); + with_match_scalar_types_error!(inner_type.to_physical_type(), |$T| { + let mut builder = NullableColumnBuilder::<$T>::with_capacity(input_rows); + + for path_key in path_keys.iter() { + // TODO(b41sh): Support multi-dimensional array access + if path_key.is_empty() || path_key.len() > 1 { + for _ in 0..column.len() { + builder.append_null(); + } + continue; + } + let key = &path_key[0]; + for v in column.iter() { + match key { + DataValue::UInt64(k) => { + if let ArrayValueRef::Indexed { column, idx } = v { + let value = column.get(idx); + if let DataValue::Array(vals) = value { + match vals.get(*k as usize) { + Some(val) => builder.append(*val.into(), true), + None => builder.append_null(), + } + } + } + }, + _ => builder.append_null(), + } + } + } + Ok(builder.build(input_rows)) + }) +}