From 2e204edf52b5dd192a995e7eeac91ba7bfa42377 Mon Sep 17 00:00:00 2001 From: Jonathan Chen Date: Mon, 21 Oct 2024 14:27:20 -0400 Subject: [PATCH 1/4] add page --- .../core/src/bin/print_functions_docs.rs | 18 ++++- datafusion/expr/src/lib.rs | 2 + datafusion/expr/src/udf.rs | 7 -- datafusion/expr/src/udsf.rs | 32 +++++++++ datafusion/functions-nested/src/map.rs | 50 +++++++++++++- dev/update_function_docs.sh | 47 +++++++++++++ .../user-guide/sql/special_functions_new.md | 66 +++++++++++++++++++ 7 files changed, 210 insertions(+), 12 deletions(-) create mode 100644 datafusion/expr/src/udsf.rs create mode 100644 docs/source/user-guide/sql/special_functions_new.md diff --git a/datafusion/core/src/bin/print_functions_docs.rs b/datafusion/core/src/bin/print_functions_docs.rs index d87c3cefe666..924f0c0e95aa 100644 --- a/datafusion/core/src/bin/print_functions_docs.rs +++ b/datafusion/core/src/bin/print_functions_docs.rs @@ -17,8 +17,8 @@ use datafusion::execution::SessionStateDefaults; use datafusion_expr::{ - aggregate_doc_sections, scalar_doc_sections, window_doc_sections, AggregateUDF, - DocSection, Documentation, ScalarUDF, WindowUDF, + aggregate_doc_sections, scalar_doc_sections, special_doc_sections, + window_doc_sections, AggregateUDF, DocSection, Documentation, ScalarUDF, WindowUDF, }; use hashbrown::HashSet; use itertools::Itertools; @@ -35,7 +35,7 @@ fn main() { if args.len() != 2 { panic!( - "Usage: {} type (one of 'aggregate', 'scalar', 'window')", + "Usage: {} type (one of 'aggregate', 'scalar', 'special', 'window')", args[0] ); } @@ -44,6 +44,7 @@ fn main() { let docs = match function_type.as_str() { "aggregate" => print_aggregate_docs(), "scalar" => print_scalar_docs(), + "special" => print_special_docs(), "window" => print_window_docs(), _ => { panic!("Unknown function type: {}", function_type) @@ -73,6 +74,17 @@ fn print_scalar_docs() -> String { print_docs(providers, scalar_doc_sections::doc_sections()) } +fn print_special_docs() -> String { + let mut providers: Vec> = vec![]; + + // Iterates through the default_scalar_functions to retrieve the special functions + for f in SessionStateDefaults::default_scalar_functions() { + providers.push(Box::new(f.as_ref().clone())); + } + + print_docs(providers, special_doc_sections::doc_sections()) +} + fn print_window_docs() -> String { let mut providers: Vec> = vec![]; diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs index 849d9604808c..3291b76f8fdd 100644 --- a/datafusion/expr/src/lib.rs +++ b/datafusion/expr/src/lib.rs @@ -35,6 +35,7 @@ mod table_source; mod udaf; mod udf; mod udf_docs; +mod udsf; mod udwf; pub mod conditional_expressions; @@ -96,6 +97,7 @@ pub use udaf::{ }; pub use udf::{scalar_doc_sections, ScalarUDF, ScalarUDFImpl}; pub use udf_docs::{DocSection, Documentation, DocumentationBuilder}; +pub use udsf::special_doc_sections; pub use udwf::{window_doc_sections, ReversedUDWF, WindowUDF, WindowUDFImpl}; pub use window_frame::{WindowFrame, WindowFrameBound, WindowFrameUnits}; diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 3759fb18f56d..2fed0bac5a60 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -767,7 +767,6 @@ pub mod scalar_doc_sections { DOC_SECTION_DATETIME, DOC_SECTION_ARRAY, DOC_SECTION_STRUCT, - DOC_SECTION_MAP, DOC_SECTION_HASHING, DOC_SECTION_OTHER, ] @@ -826,12 +825,6 @@ The following regular expression functions are supported:"#, description: None, }; - pub const DOC_SECTION_MAP: DocSection = DocSection { - include: true, - label: "Map Functions", - description: None, - }; - pub const DOC_SECTION_HASHING: DocSection = DocSection { include: true, label: "Hashing Functions", diff --git a/datafusion/expr/src/udsf.rs b/datafusion/expr/src/udsf.rs new file mode 100644 index 000000000000..41ee3b6155ae --- /dev/null +++ b/datafusion/expr/src/udsf.rs @@ -0,0 +1,32 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! [`SpecialUDF`]: Special User Defined Functions + +pub mod special_doc_sections { + use crate::DocSection; + + pub fn doc_sections() -> Vec { + vec![DOC_SECTION_MAP] + } + + pub const DOC_SECTION_MAP: DocSection = DocSection { + include: true, + label: "Map Functions", + description: None, + }; +} diff --git a/datafusion/functions-nested/src/map.rs b/datafusion/functions-nested/src/map.rs index 29afe4a7f3be..38eb286e9ebb 100644 --- a/datafusion/functions-nested/src/map.rs +++ b/datafusion/functions-nested/src/map.rs @@ -17,7 +17,7 @@ use std::any::Any; use std::collections::{HashSet, VecDeque}; -use std::sync::Arc; +use std::sync::{Arc, OnceLock}; use arrow::array::ArrayData; use arrow_array::{Array, ArrayRef, MapArray, OffsetSizeTrait, StructArray}; @@ -27,7 +27,10 @@ use arrow_schema::{DataType, Field, SchemaBuilder}; use datafusion_common::utils::{fixed_size_list_to_arrays, list_to_arrays}; use datafusion_common::{exec_err, Result, ScalarValue}; use datafusion_expr::expr::ScalarFunction; -use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility}; +use datafusion_expr::special_doc_sections::DOC_SECTION_MAP; +use datafusion_expr::{ + ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility, +}; use crate::make_array::make_array; @@ -238,7 +241,50 @@ impl ScalarUDFImpl for MapFunc { fn invoke(&self, args: &[ColumnarValue]) -> Result { make_map_batch(args) } + + fn documentation(&self) -> Option<&Documentation> { + Some(get_map_doc()) + } } + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +fn get_map_doc() -> &'static Documentation { + DOCUMENTATION.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_MAP) + .with_description( + "Returns an Arrow map with the specified key-value pairs.", + ) + .with_syntax_example("map(key, value)\nmap(key: value)") + .with_sql_example( + r#"```sql +SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]); +---- +{POST: 41, HEAD: 33, PATCH: } + +SELECT MAP([[1,2], [3,4]], ['a', 'b']); +---- +{[1, 2]: a, [3, 4]: b} + +SELECT MAP { 'a': 1, 'b': 2 }; +---- +{a: 1, b: 2} +```"#, + ) + .with_argument( + "key", + "Expression to be used for key. Can be a constant, column, or function, any combination of arithmetic or string operators, or a named expression of the previously listed.", + ) + .with_argument( + "value", + "Expression to be used for value. Can be a constant, column, or function, any combination of arithmetic or string operators, or a named expression of the previously listed.", + ) + .build() + .unwrap() +}) +} + fn get_element_type(data_type: &DataType) -> Result<&DataType> { match data_type { DataType::List(element) => Ok(element.data_type()), diff --git a/dev/update_function_docs.sh b/dev/update_function_docs.sh index f1f26c8b2f58..33ae35e84c43 100755 --- a/dev/update_function_docs.sh +++ b/dev/update_function_docs.sh @@ -298,3 +298,50 @@ npx prettier@2.3.2 --write "$TARGET_FILE" echo "'$TARGET_FILE' successfully updated!" +TARGET_FILE="docs/source/user-guide/sql/special_functions_new.md" +PRINT_SPECIAL_FUNCTION_DOCS_COMMAND="cargo run --manifest-path datafusion/core/Cargo.toml --bin print_functions_docs -- special" + +echo "Inserting header" +cat <<'EOF' > "$TARGET_FILE" + + + + +# Special Functions (NEW) + +Note: this documentation is in the process of being migrated to be [automatically created from the codebase]. + +[automatically created from the codebase]: https://github.com/apache/datafusion/issues/12740 + +EOF + +echo "Running CLI and inserting special function docs table" +$PRINT_SPECIAL_FUNCTION_DOCS_COMMAND >> "$TARGET_FILE" + +echo "Running prettier" +npx prettier@2.3.2 --write "$TARGET_FILE" + +echo "'$TARGET_FILE' successfully updated!" \ No newline at end of file diff --git a/docs/source/user-guide/sql/special_functions_new.md b/docs/source/user-guide/sql/special_functions_new.md new file mode 100644 index 000000000000..90507a08714c --- /dev/null +++ b/docs/source/user-guide/sql/special_functions_new.md @@ -0,0 +1,66 @@ + + + + +# Special Functions (NEW) + +Note: this documentation is in the process of being migrated to be [automatically created from the codebase]. + +[automatically created from the codebase]: https://github.com/apache/datafusion/issues/12740 + +## Map Functions + +- [map](#map) + +### `map` + +Returns an Arrow map with the specified key-value pairs. + +``` +map(key, value) +map(key: value) +``` + +#### Arguments + +- **key**: Expression to be used for key. Can be a constant, column, or function, any combination of arithmetic or string operators, or a named expression of the previously listed. +- **value**: Expression to be used for value. Can be a constant, column, or function, any combination of arithmetic or string operators, or a named expression of the previously listed. + +#### Example + +```sql +SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]); +---- +{POST: 41, HEAD: 33, PATCH: } + +SELECT MAP([[1,2], [3,4]], ['a', 'b']); +---- +{[1, 2]: a, [3, 4]: b} + +SELECT MAP { 'a': 1, 'b': 2 }; +---- +{a: 1, b: 2} +``` From 720a4b71660c10ed53364b30797ca62c1a911528 Mon Sep 17 00:00:00 2001 From: Jonathan Chen Date: Mon, 21 Oct 2024 17:41:13 -0400 Subject: [PATCH 2/4] small fixes --- .../core/src/bin/print_functions_docs.rs | 18 ++----- datafusion/expr/src/lib.rs | 2 - datafusion/expr/src/udf.rs | 7 +++ datafusion/expr/src/udsf.rs | 32 ------------- datafusion/functions-nested/src/map.rs | 4 +- dev/update_function_docs.sh | 48 ------------------- .../user-guide/sql/scalar_functions_new.md | 34 +++++++++++++ .../user-guide/sql/special_functions_new.md | 33 ------------- 8 files changed, 46 insertions(+), 132 deletions(-) delete mode 100644 datafusion/expr/src/udsf.rs diff --git a/datafusion/core/src/bin/print_functions_docs.rs b/datafusion/core/src/bin/print_functions_docs.rs index 924f0c0e95aa..d87c3cefe666 100644 --- a/datafusion/core/src/bin/print_functions_docs.rs +++ b/datafusion/core/src/bin/print_functions_docs.rs @@ -17,8 +17,8 @@ use datafusion::execution::SessionStateDefaults; use datafusion_expr::{ - aggregate_doc_sections, scalar_doc_sections, special_doc_sections, - window_doc_sections, AggregateUDF, DocSection, Documentation, ScalarUDF, WindowUDF, + aggregate_doc_sections, scalar_doc_sections, window_doc_sections, AggregateUDF, + DocSection, Documentation, ScalarUDF, WindowUDF, }; use hashbrown::HashSet; use itertools::Itertools; @@ -35,7 +35,7 @@ fn main() { if args.len() != 2 { panic!( - "Usage: {} type (one of 'aggregate', 'scalar', 'special', 'window')", + "Usage: {} type (one of 'aggregate', 'scalar', 'window')", args[0] ); } @@ -44,7 +44,6 @@ fn main() { let docs = match function_type.as_str() { "aggregate" => print_aggregate_docs(), "scalar" => print_scalar_docs(), - "special" => print_special_docs(), "window" => print_window_docs(), _ => { panic!("Unknown function type: {}", function_type) @@ -74,17 +73,6 @@ fn print_scalar_docs() -> String { print_docs(providers, scalar_doc_sections::doc_sections()) } -fn print_special_docs() -> String { - let mut providers: Vec> = vec![]; - - // Iterates through the default_scalar_functions to retrieve the special functions - for f in SessionStateDefaults::default_scalar_functions() { - providers.push(Box::new(f.as_ref().clone())); - } - - print_docs(providers, special_doc_sections::doc_sections()) -} - fn print_window_docs() -> String { let mut providers: Vec> = vec![]; diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs index 3291b76f8fdd..849d9604808c 100644 --- a/datafusion/expr/src/lib.rs +++ b/datafusion/expr/src/lib.rs @@ -35,7 +35,6 @@ mod table_source; mod udaf; mod udf; mod udf_docs; -mod udsf; mod udwf; pub mod conditional_expressions; @@ -97,7 +96,6 @@ pub use udaf::{ }; pub use udf::{scalar_doc_sections, ScalarUDF, ScalarUDFImpl}; pub use udf_docs::{DocSection, Documentation, DocumentationBuilder}; -pub use udsf::special_doc_sections; pub use udwf::{window_doc_sections, ReversedUDWF, WindowUDF, WindowUDFImpl}; pub use window_frame::{WindowFrame, WindowFrameBound, WindowFrameUnits}; diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 2fed0bac5a60..fc5b7cdf5ba7 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -768,6 +768,7 @@ pub mod scalar_doc_sections { DOC_SECTION_ARRAY, DOC_SECTION_STRUCT, DOC_SECTION_HASHING, + DOC_SECTION_SPECIAL, DOC_SECTION_OTHER, ] } @@ -831,6 +832,12 @@ The following regular expression functions are supported:"#, description: None, }; + pub const DOC_SECTION_SPECIAL: DocSection = DocSection { + include: true, + label: "Special Functions", + description: None, + }; + pub const DOC_SECTION_OTHER: DocSection = DocSection { include: true, label: "Other Functions", diff --git a/datafusion/expr/src/udsf.rs b/datafusion/expr/src/udsf.rs deleted file mode 100644 index 41ee3b6155ae..000000000000 --- a/datafusion/expr/src/udsf.rs +++ /dev/null @@ -1,32 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! [`SpecialUDF`]: Special User Defined Functions - -pub mod special_doc_sections { - use crate::DocSection; - - pub fn doc_sections() -> Vec { - vec![DOC_SECTION_MAP] - } - - pub const DOC_SECTION_MAP: DocSection = DocSection { - include: true, - label: "Map Functions", - description: None, - }; -} diff --git a/datafusion/functions-nested/src/map.rs b/datafusion/functions-nested/src/map.rs index 38eb286e9ebb..60261ffac361 100644 --- a/datafusion/functions-nested/src/map.rs +++ b/datafusion/functions-nested/src/map.rs @@ -27,7 +27,7 @@ use arrow_schema::{DataType, Field, SchemaBuilder}; use datafusion_common::utils::{fixed_size_list_to_arrays, list_to_arrays}; use datafusion_common::{exec_err, Result, ScalarValue}; use datafusion_expr::expr::ScalarFunction; -use datafusion_expr::special_doc_sections::DOC_SECTION_MAP; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_SPECIAL; use datafusion_expr::{ ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility, }; @@ -252,7 +252,7 @@ static DOCUMENTATION: OnceLock = OnceLock::new(); fn get_map_doc() -> &'static Documentation { DOCUMENTATION.get_or_init(|| { Documentation::builder() - .with_doc_section(DOC_SECTION_MAP) + .with_doc_section(DOC_SECTION_SPECIAL) .with_description( "Returns an Arrow map with the specified key-value pairs.", ) diff --git a/dev/update_function_docs.sh b/dev/update_function_docs.sh index 33ae35e84c43..13bc22afcc13 100755 --- a/dev/update_function_docs.sh +++ b/dev/update_function_docs.sh @@ -297,51 +297,3 @@ echo "Running prettier" npx prettier@2.3.2 --write "$TARGET_FILE" echo "'$TARGET_FILE' successfully updated!" - -TARGET_FILE="docs/source/user-guide/sql/special_functions_new.md" -PRINT_SPECIAL_FUNCTION_DOCS_COMMAND="cargo run --manifest-path datafusion/core/Cargo.toml --bin print_functions_docs -- special" - -echo "Inserting header" -cat <<'EOF' > "$TARGET_FILE" - - - - -# Special Functions (NEW) - -Note: this documentation is in the process of being migrated to be [automatically created from the codebase]. - -[automatically created from the codebase]: https://github.com/apache/datafusion/issues/12740 - -EOF - -echo "Running CLI and inserting special function docs table" -$PRINT_SPECIAL_FUNCTION_DOCS_COMMAND >> "$TARGET_FILE" - -echo "Running prettier" -npx prettier@2.3.2 --write "$TARGET_FILE" - -echo "'$TARGET_FILE' successfully updated!" \ No newline at end of file diff --git a/docs/source/user-guide/sql/scalar_functions_new.md b/docs/source/user-guide/sql/scalar_functions_new.md index 1f4ec1c27858..bfe2a28a73d5 100644 --- a/docs/source/user-guide/sql/scalar_functions_new.md +++ b/docs/source/user-guide/sql/scalar_functions_new.md @@ -4054,6 +4054,40 @@ sha512(expression) +-------------------------------------------+ ``` +## Special Functions + +- [map](#map) + +### `map` + +Returns an Arrow map with the specified key-value pairs. + +``` +map(key, value) +map(key: value) +``` + +#### Arguments + +- **key**: Expression to be used for key. Can be a constant, column, or function, any combination of arithmetic or string operators, or a named expression of the previously listed. +- **value**: Expression to be used for value. Can be a constant, column, or function, any combination of arithmetic or string operators, or a named expression of the previously listed. + +#### Example + +```sql +SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]); +---- +{POST: 41, HEAD: 33, PATCH: } + +SELECT MAP([[1,2], [3,4]], ['a', 'b']); +---- +{[1, 2]: a, [3, 4]: b} + +SELECT MAP { 'a': 1, 'b': 2 }; +---- +{a: 1, b: 2} +``` + ## Other Functions - [arrow_cast](#arrow_cast) diff --git a/docs/source/user-guide/sql/special_functions_new.md b/docs/source/user-guide/sql/special_functions_new.md index 90507a08714c..9cdb543bb200 100644 --- a/docs/source/user-guide/sql/special_functions_new.md +++ b/docs/source/user-guide/sql/special_functions_new.md @@ -31,36 +31,3 @@ Note: this documentation is in the process of being migrated to be [automaticall [automatically created from the codebase]: https://github.com/apache/datafusion/issues/12740 -## Map Functions - -- [map](#map) - -### `map` - -Returns an Arrow map with the specified key-value pairs. - -``` -map(key, value) -map(key: value) -``` - -#### Arguments - -- **key**: Expression to be used for key. Can be a constant, column, or function, any combination of arithmetic or string operators, or a named expression of the previously listed. -- **value**: Expression to be used for value. Can be a constant, column, or function, any combination of arithmetic or string operators, or a named expression of the previously listed. - -#### Example - -```sql -SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]); ----- -{POST: 41, HEAD: 33, PATCH: } - -SELECT MAP([[1,2], [3,4]], ['a', 'b']); ----- -{[1, 2]: a, [3, 4]: b} - -SELECT MAP { 'a': 1, 'b': 2 }; ----- -{a: 1, b: 2} -``` From 65afb94f5e60d2ab2294f7f4cfb010daf8c1c43e Mon Sep 17 00:00:00 2001 From: Jonathan Chen Date: Mon, 21 Oct 2024 17:45:01 -0400 Subject: [PATCH 3/4] delete md --- .../user-guide/sql/special_functions_new.md | 33 ------------------- 1 file changed, 33 deletions(-) delete mode 100644 docs/source/user-guide/sql/special_functions_new.md diff --git a/docs/source/user-guide/sql/special_functions_new.md b/docs/source/user-guide/sql/special_functions_new.md deleted file mode 100644 index 9cdb543bb200..000000000000 --- a/docs/source/user-guide/sql/special_functions_new.md +++ /dev/null @@ -1,33 +0,0 @@ - - - - -# Special Functions (NEW) - -Note: this documentation is in the process of being migrated to be [automatically created from the codebase]. - -[automatically created from the codebase]: https://github.com/apache/datafusion/issues/12740 - From 6f6bdabf18232f9ec3a4127c9a9473b33b03b3ce Mon Sep 17 00:00:00 2001 From: Jonathan Chen Date: Wed, 23 Oct 2024 16:12:30 -0400 Subject: [PATCH 4/4] Migrate map functions --- datafusion/expr/src/udf.rs | 10 +- datafusion/functions-nested/src/map.rs | 85 +++++---- .../functions-nested/src/map_extract.rs | 49 ++++- datafusion/functions-nested/src/map_keys.rs | 41 +++- datafusion/functions-nested/src/map_values.rs | 41 +++- .../source/user-guide/sql/scalar_functions.md | 145 -------------- .../user-guide/sql/scalar_functions_new.md | 178 ++++++++++++++---- 7 files changed, 324 insertions(+), 225 deletions(-) diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index fc5b7cdf5ba7..3759fb18f56d 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -767,8 +767,8 @@ pub mod scalar_doc_sections { DOC_SECTION_DATETIME, DOC_SECTION_ARRAY, DOC_SECTION_STRUCT, + DOC_SECTION_MAP, DOC_SECTION_HASHING, - DOC_SECTION_SPECIAL, DOC_SECTION_OTHER, ] } @@ -826,15 +826,15 @@ The following regular expression functions are supported:"#, description: None, }; - pub const DOC_SECTION_HASHING: DocSection = DocSection { + pub const DOC_SECTION_MAP: DocSection = DocSection { include: true, - label: "Hashing Functions", + label: "Map Functions", description: None, }; - pub const DOC_SECTION_SPECIAL: DocSection = DocSection { + pub const DOC_SECTION_HASHING: DocSection = DocSection { include: true, - label: "Special Functions", + label: "Hashing Functions", description: None, }; diff --git a/datafusion/functions-nested/src/map.rs b/datafusion/functions-nested/src/map.rs index 60261ffac361..d7dce3bacbe1 100644 --- a/datafusion/functions-nested/src/map.rs +++ b/datafusion/functions-nested/src/map.rs @@ -27,7 +27,7 @@ use arrow_schema::{DataType, Field, SchemaBuilder}; use datafusion_common::utils::{fixed_size_list_to_arrays, list_to_arrays}; use datafusion_common::{exec_err, Result, ScalarValue}; use datafusion_expr::expr::ScalarFunction; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_SPECIAL; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_MAP; use datafusion_expr::{ ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility, }; @@ -251,38 +251,57 @@ static DOCUMENTATION: OnceLock = OnceLock::new(); fn get_map_doc() -> &'static Documentation { DOCUMENTATION.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_SPECIAL) - .with_description( - "Returns an Arrow map with the specified key-value pairs.", - ) - .with_syntax_example("map(key, value)\nmap(key: value)") - .with_sql_example( - r#"```sql -SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]); ----- -{POST: 41, HEAD: 33, PATCH: } - -SELECT MAP([[1,2], [3,4]], ['a', 'b']); ----- -{[1, 2]: a, [3, 4]: b} - -SELECT MAP { 'a': 1, 'b': 2 }; ----- -{a: 1, b: 2} -```"#, - ) - .with_argument( - "key", - "Expression to be used for key. Can be a constant, column, or function, any combination of arithmetic or string operators, or a named expression of the previously listed.", - ) - .with_argument( - "value", - "Expression to be used for value. Can be a constant, column, or function, any combination of arithmetic or string operators, or a named expression of the previously listed.", - ) - .build() - .unwrap() -}) + Documentation::builder() + .with_doc_section(DOC_SECTION_MAP) + .with_description( + "Returns an Arrow map with the specified key-value pairs.\n\n\ + The `make_map` function creates a map from two lists: one for keys and one for values. Each key must be unique and non-null." + ) + .with_syntax_example( + "map(key, value)\nmap(key: value)\nmake_map(['key1', 'key2'], ['value1', 'value2'])" + ) + .with_sql_example( + r#"```sql + -- Using map function + SELECT MAP('type', 'test'); + ---- + {type: test} + + SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]); + ---- + {POST: 41, HEAD: 33, PATCH: } + + SELECT MAP([[1,2], [3,4]], ['a', 'b']); + ---- + {[1, 2]: a, [3, 4]: b} + + SELECT MAP { 'a': 1, 'b': 2 }; + ---- + {a: 1, b: 2} + + -- Using make_map function + SELECT MAKE_MAP(['POST', 'HEAD'], [41, 33]); + ---- + {POST: 41, HEAD: 33} + + SELECT MAKE_MAP(['key1', 'key2'], ['value1', null]); + ---- + {key1: value1, key2: } + ```"# + ) + .with_argument( + "key", + "For `map`: Expression to be used for key. Can be a constant, column, function, or any combination of arithmetic or string operators.\n\ + For `make_map`: The list of keys to be used in the map. Each key must be unique and non-null." + ) + .with_argument( + "value", + "For `map`: Expression to be used for value. Can be a constant, column, function, or any combination of arithmetic or string operators.\n\ + For `make_map`: The list of values to be mapped to the corresponding keys." + ) + .build() + .unwrap() + }) } fn get_element_type(data_type: &DataType) -> Result<&DataType> { diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index 9f0c4ad29c60..d2bb6595fe76 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -26,9 +26,12 @@ use arrow_buffer::OffsetBuffer; use arrow_schema::Field; use datafusion_common::{cast::as_map_array, exec_err, Result}; -use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_MAP; +use datafusion_expr::{ + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, +}; use std::any::Any; -use std::sync::Arc; +use std::sync::{Arc, OnceLock}; use std::vec; use crate::utils::{get_map_entry_field, make_scalar_function}; @@ -101,6 +104,48 @@ impl ScalarUDFImpl for MapExtract { field.first().unwrap().data_type().clone(), ]) } + + fn documentation(&self) -> Option<&Documentation> { + Some(get_map_extract_doc()) + } +} + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +fn get_map_extract_doc() -> &'static Documentation { + DOCUMENTATION.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_MAP) + .with_description( + "Returns a list containing the value for the given key or an empty list if the key is not present in the map.", + ) + .with_syntax_example("map_extract(map, key)") + .with_sql_example( + r#"```sql +SELECT map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'a'); +---- +[1] + +SELECT map_extract(MAP {1: 'one', 2: 'two'}, 2); +---- +['two'] + +SELECT map_extract(MAP {'x': 10, 'y': NULL, 'z': 30}, 'y'); +---- +[] +```"#, + ) + .with_argument( + "map", + "Map expression. Can be a constant, column, or function, and any combination of map operators.", + ) + .with_argument( + "key", + "Key to extract from the map. Can be a constant, column, or function, any combination of arithmetic or string operators, or a named expression of the previously listed.", + ) + .build() + .unwrap() + }) } fn general_map_extract_inner( diff --git a/datafusion/functions-nested/src/map_keys.rs b/datafusion/functions-nested/src/map_keys.rs index 0b1cebb27c86..f28de1c3b2c7 100644 --- a/datafusion/functions-nested/src/map_keys.rs +++ b/datafusion/functions-nested/src/map_keys.rs @@ -21,12 +21,13 @@ use crate::utils::{get_map_entry_field, make_scalar_function}; use arrow_array::{Array, ArrayRef, ListArray}; use arrow_schema::{DataType, Field}; use datafusion_common::{cast::as_map_array, exec_err, Result}; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_MAP; use datafusion_expr::{ - ArrayFunctionSignature, ColumnarValue, ScalarUDFImpl, Signature, TypeSignature, - Volatility, + ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature, + TypeSignature, Volatility, }; use std::any::Any; -use std::sync::Arc; +use std::sync::{Arc, OnceLock}; make_udf_expr_and_func!( MapKeysFunc, @@ -81,6 +82,40 @@ impl ScalarUDFImpl for MapKeysFunc { fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { make_scalar_function(map_keys_inner)(args) } + + fn documentation(&self) -> Option<&Documentation> { + Some(get_map_keys_doc()) + } +} + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +fn get_map_keys_doc() -> &'static Documentation { + DOCUMENTATION.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_MAP) + .with_description( + "Returns a list of all keys in the map." + ) + .with_syntax_example("map_keys(map)") + .with_sql_example( + r#"```sql +SELECT map_keys(MAP {'a': 1, 'b': NULL, 'c': 3}); +---- +[a, b, c] + +SELECT map_keys(map([100, 5], [42, 43])); +---- +[100, 5] +```"#, + ) + .with_argument( + "map", + "Map expression. Can be a constant, column, or function, and any combination of map operators." + ) + .build() + .unwrap() + }) } fn map_keys_inner(args: &[ArrayRef]) -> Result { diff --git a/datafusion/functions-nested/src/map_values.rs b/datafusion/functions-nested/src/map_values.rs index 58c0d74eed5f..2b19d9fbbc76 100644 --- a/datafusion/functions-nested/src/map_values.rs +++ b/datafusion/functions-nested/src/map_values.rs @@ -21,12 +21,13 @@ use crate::utils::{get_map_entry_field, make_scalar_function}; use arrow_array::{Array, ArrayRef, ListArray}; use arrow_schema::{DataType, Field}; use datafusion_common::{cast::as_map_array, exec_err, Result}; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_MAP; use datafusion_expr::{ - ArrayFunctionSignature, ColumnarValue, ScalarUDFImpl, Signature, TypeSignature, - Volatility, + ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature, + TypeSignature, Volatility, }; use std::any::Any; -use std::sync::Arc; +use std::sync::{Arc, OnceLock}; make_udf_expr_and_func!( MapValuesFunc, @@ -81,6 +82,40 @@ impl ScalarUDFImpl for MapValuesFunc { fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { make_scalar_function(map_values_inner)(args) } + + fn documentation(&self) -> Option<&Documentation> { + Some(get_map_values_doc()) + } +} + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +fn get_map_values_doc() -> &'static Documentation { + DOCUMENTATION.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_MAP) + .with_description( + "Returns a list of all values in the map." + ) + .with_syntax_example("map_values(map)") + .with_sql_example( + r#"```sql +SELECT map_values(MAP {'a': 1, 'b': NULL, 'c': 3}); +---- +[1, , 3] + +SELECT map_values(map([100, 5], [42, 43])); +---- +[42, 43] +```"#, + ) + .with_argument( + "map", + "Map expression. Can be a constant, column, or function, and any combination of map operators." + ) + .build() + .unwrap() + }) } fn map_values_inner(args: &[ArrayRef]) -> Result { diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 547ea108080e..203411428777 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -199,151 +199,6 @@ Unwraps struct fields into columns. +-----------------------+-----------------------+ ``` -## Map Functions - -- [map](#map) -- [make_map](#make_map) -- [map_extract](#map_extract) -- [map_keys](#map_keys) -- [map_values](#map_values) - -### `map` - -Returns an Arrow map with the specified key-value pairs. - -``` -map(key, value) -map(key: value) -``` - -#### Arguments - -- **key**: Expression to be used for key. - Can be a constant, column, or function, any combination of arithmetic or - string operators, or a named expression of previous listed. -- **value**: Expression to be used for value. - Can be a constant, column, or function, any combination of arithmetic or - string operators, or a named expression of previous listed. - -#### Example - -``` -SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]); ----- -{POST: 41, HEAD: 33, PATCH: } - -SELECT MAP([[1,2], [3,4]], ['a', 'b']); ----- -{[1, 2]: a, [3, 4]: b} - -SELECT MAP { 'a': 1, 'b': 2 }; ----- -{a: 1, b: 2} -``` - -### `make_map` - -Returns an Arrow map with the specified key-value pairs. - -``` -make_map(key_1, value_1, ..., key_n, value_n) -``` - -#### Arguments - -- **key_n**: Expression to be used for key. - Can be a constant, column, or function, any combination of arithmetic or - string operators, or a named expression of previous listed. -- **value_n**: Expression to be used for value. - Can be a constant, column, or function, any combination of arithmetic or - string operators, or a named expression of previous listed. - -#### Example - -``` -SELECT MAKE_MAP('POST', 41, 'HEAD', 33, 'PATCH', null); ----- -{POST: 41, HEAD: 33, PATCH: } -``` - -### `map_extract` - -Return a list containing the value for a given key or an empty list if the key is not contained in the map. - -``` -map_extract(map, key) -``` - -#### Arguments - -- `map`: Map expression. - Can be a constant, column, or function, and any combination of map operators. -- `key`: Key to extract from the map. - Can be a constant, column, or function, any combination of arithmetic or - string operators, or a named expression of previous listed. - -#### Example - -``` -SELECT map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'a'); ----- -[1] -``` - -#### Aliases - -- element_at - -### `map_keys` - -Return a list of all keys in the map. - -``` -map_keys(map) -``` - -#### Arguments - -- `map`: Map expression. - Can be a constant, column, or function, and any combination of map operators. - -#### Example - -``` -SELECT map_keys(MAP {'a': 1, 'b': NULL, 'c': 3}); ----- -[a, b, c] - -select map_keys(map([100, 5], [42,43])); ----- -[100, 5] -``` - -### `map_values` - -Return a list of all values in the map. - -``` -map_values(map) -``` - -#### Arguments - -- `map`: Map expression. - Can be a constant, column, or function, and any combination of map operators. - -#### Example - -``` -SELECT map_values(MAP {'a': 1, 'b': NULL, 'c': 3}); ----- -[1, , 3] - -select map_values(map([100, 5], [42,43])); ----- -[42, 43] -``` - ## Other Functions See the new documentation [`here`](https://datafusion.apache.org/user-guide/sql/scalar_functions_new.html) diff --git a/docs/source/user-guide/sql/scalar_functions_new.md b/docs/source/user-guide/sql/scalar_functions_new.md index bfe2a28a73d5..7d0280dbc28f 100644 --- a/docs/source/user-guide/sql/scalar_functions_new.md +++ b/docs/source/user-guide/sql/scalar_functions_new.md @@ -3898,6 +3898,150 @@ select struct(a as field_a, b) from t; - row +## Map Functions + +- [element_at](#element_at) +- [map](#map) +- [map_extract](#map_extract) +- [map_keys](#map_keys) +- [map_values](#map_values) + +### `element_at` + +_Alias of [map_extract](#map_extract)._ + +### `map` + +Returns an Arrow map with the specified key-value pairs. + +The `make_map` function creates a map from two lists: one for keys and one for values. Each key must be unique and non-null. + +``` +map(key, value) +map(key: value) +make_map(['key1', 'key2'], ['value1', 'value2']) +``` + +#### Arguments + +- **key**: For `map`: Expression to be used for key. Can be a constant, column, function, or any combination of arithmetic or string operators. + For `make_map`: The list of keys to be used in the map. Each key must be unique and non-null. +- **value**: For `map`: Expression to be used for value. Can be a constant, column, function, or any combination of arithmetic or string operators. + For `make_map`: The list of values to be mapped to the corresponding keys. + +#### Example + +````sql + -- Using map function + SELECT MAP('type', 'test'); + ---- + {type: test} + + SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]); + ---- + {POST: 41, HEAD: 33, PATCH: } + + SELECT MAP([[1,2], [3,4]], ['a', 'b']); + ---- + {[1, 2]: a, [3, 4]: b} + + SELECT MAP { 'a': 1, 'b': 2 }; + ---- + {a: 1, b: 2} + + -- Using make_map function + SELECT MAKE_MAP(['POST', 'HEAD'], [41, 33]); + ---- + {POST: 41, HEAD: 33} + + SELECT MAKE_MAP(['key1', 'key2'], ['value1', null]); + ---- + {key1: value1, key2: } + ``` + + +### `map_extract` + +Returns a list containing the value for the given key or an empty list if the key is not present in the map. + +```` + +map_extract(map, key) + +```` +#### Arguments + +- **map**: Map expression. Can be a constant, column, or function, and any combination of map operators. +- **key**: Key to extract from the map. Can be a constant, column, or function, any combination of arithmetic or string operators, or a named expression of the previously listed. + +#### Example + +```sql +SELECT map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'a'); +---- +[1] + +SELECT map_extract(MAP {1: 'one', 2: 'two'}, 2); +---- +['two'] + +SELECT map_extract(MAP {'x': 10, 'y': NULL, 'z': 30}, 'y'); +---- +[] +```` + +#### Aliases + +- element_at + +### `map_keys` + +Returns a list of all keys in the map. + +``` +map_keys(map) +``` + +#### Arguments + +- **map**: Map expression. Can be a constant, column, or function, and any combination of map operators. + +#### Example + +```sql +SELECT map_keys(MAP {'a': 1, 'b': NULL, 'c': 3}); +---- +[a, b, c] + +SELECT map_keys(map([100, 5], [42, 43])); +---- +[100, 5] +``` + +### `map_values` + +Returns a list of all values in the map. + +``` +map_values(map) +``` + +#### Arguments + +- **map**: Map expression. Can be a constant, column, or function, and any combination of map operators. + +#### Example + +```sql +SELECT map_values(MAP {'a': 1, 'b': NULL, 'c': 3}); +---- +[1, , 3] + +SELECT map_values(map([100, 5], [42, 43])); +---- +[42, 43] +``` + ## Hashing Functions - [digest](#digest) @@ -4054,40 +4198,6 @@ sha512(expression) +-------------------------------------------+ ``` -## Special Functions - -- [map](#map) - -### `map` - -Returns an Arrow map with the specified key-value pairs. - -``` -map(key, value) -map(key: value) -``` - -#### Arguments - -- **key**: Expression to be used for key. Can be a constant, column, or function, any combination of arithmetic or string operators, or a named expression of the previously listed. -- **value**: Expression to be used for value. Can be a constant, column, or function, any combination of arithmetic or string operators, or a named expression of the previously listed. - -#### Example - -```sql -SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]); ----- -{POST: 41, HEAD: 33, PATCH: } - -SELECT MAP([[1,2], [3,4]], ['a', 'b']); ----- -{[1, 2]: a, [3, 4]: b} - -SELECT MAP { 'a': 1, 'b': 2 }; ----- -{a: 1, b: 2} -``` - ## Other Functions - [arrow_cast](#arrow_cast)