From 98d8e72ed2b4a85bf09f9a2f2f5df11e1ac2a284 Mon Sep 17 00:00:00 2001 From: Jordan Frazier Date: Tue, 1 Aug 2023 00:52:14 -0700 Subject: [PATCH] Fix default schema in inference for map/list and type validation for get/index functions --- .../sparrow-compiler/src/types/inference.rs | 27 ++++++++-- crates/sparrow-main/tests/e2e/list_tests.rs | 54 +++++++++++++++++++ crates/sparrow-main/tests/e2e/map_tests.rs | 32 +++++++++++ 3 files changed, 109 insertions(+), 4 deletions(-) diff --git a/crates/sparrow-compiler/src/types/inference.rs b/crates/sparrow-compiler/src/types/inference.rs index d64c02331..4ed80e856 100644 --- a/crates/sparrow-compiler/src/types/inference.rs +++ b/crates/sparrow-compiler/src/types/inference.rs @@ -104,6 +104,7 @@ pub(crate) fn instantiate( .iter() .map(|parameter_type| instantiate_type(parameter_type, &solutions)) .collect(); + let instantiated_arguments = arguments.with_values(instantiated_arguments); let instantiated_return = if instantiated_arguments.iter().any(|t| t.is_error()) { FenlType::Error @@ -368,12 +369,18 @@ fn instantiate_type(fenl_type: &FenlType, solutions: &HashMap Field::new("key", t, false), + FenlType::Concrete(t) => Field::new("keys", t, false), other => panic!("expected concrete type, got {:?}", other), }; let value_field = match concrete_value_type { - FenlType::Concrete(t) => Field::new("value", t, false), + FenlType::Concrete(t) => Field::new("values", t, true), other => panic!("expected concrete type, got {:?}", other), }; @@ -388,8 +395,12 @@ fn instantiate_type(fenl_type: &FenlType, solutions: &HashMap Arc::new(Field::new("item", t, false)), + // Note: the `name` and `nullability` here are the standard, and cannot be changed, + // or we will have schema mismatches later during execution. + // + // That said, there's no reason why a later arrow version can't change this behavior. + // TODO: Figure out how to pass user naming and nullability through inference. + FenlType::Concrete(t) => Arc::new(Field::new("item", t, true)), other => panic!("expected concrete type, got {:?}", other), }; @@ -779,6 +790,10 @@ impl TypeConstraints { (FenlType::TypeRef(variable), _) => self.constrain(variable, argument), (FenlType::Collection(p_collection, p_type_vars), arg_type) => match arg_type { FenlType::Concrete(DataType::List(field)) => { + if p_collection != &Collection::List { + return Err(()); + } + debug_assert_eq!( p_type_vars.len(), 1, @@ -790,6 +805,10 @@ impl TypeConstraints { )?; } FenlType::Concrete(DataType::Map(s, _)) => { + if p_collection != &Collection::Map { + return Err(()); + } + debug_assert_eq!( p_type_vars.len(), 2, diff --git a/crates/sparrow-main/tests/e2e/list_tests.rs b/crates/sparrow-main/tests/e2e/list_tests.rs index 13d0921e4..5e41a5bba 100644 --- a/crates/sparrow-main/tests/e2e/list_tests.rs +++ b/crates/sparrow-main/tests/e2e/list_tests.rs @@ -95,6 +95,60 @@ async fn test_index_list_bool_dynamic() { "###); } +#[tokio::test] +async fn test_list_schemas_are_compatible() { + // This query puts a collect() into a record, which + // does schema validation when constructing the struct array. + let hash = QueryFixture::new( + " + let s_list = Input.string_list + let first_elem = s_list | index(0) + let list_with_first_elems = first_elem | collect(max = null) + in { l: Input.string_list, list_with_first_elems } + ", + ) + .run_to_parquet_hash(&list_data_fixture().await) + .await + .unwrap(); + + assert_eq!( + "5F9880AD6B3285D2FA244C508645A98807B38EE51FAF53C86505D12E", + hash + ); +} + +#[tokio::test] +async fn test_using_list_in_get_fails() { + insta::assert_yaml_snapshot!(QueryFixture::new("{ f1: Input.i64_list | get(\"s\") }") + .run_to_csv(&list_data_fixture().await).await.unwrap_err(), @r###" + --- + code: Client specified an invalid argument + message: 1 errors in Fenl statements; see diagnostics + fenl_diagnostics: + - severity: error + code: E0010 + message: Invalid argument type(s) + formatted: + - "error[E0010]: Invalid argument type(s)" + - " --> Query:1:24" + - " |" + - "1 | { f1: Input.i64_list | get(\"s\") }" + - " | ^^^ Invalid types for parameter 'map' in call to 'get'" + - " |" + - " --> internal:1:1" + - " |" + - 1 | $input + - " | ------ Actual type: list" + - " |" + - " --> built-in signature 'get(key: K, map: map) -> V':1:34" + - " |" + - "1 | get(key: K, map: map) -> V" + - " | --------- Expected type: map" + - "" + - "" + "###); +} + #[tokio::test] async fn test_incorrect_index_type() { insta::assert_yaml_snapshot!(QueryFixture::new("{ f1: Input.i64_list | index(\"s\") }") diff --git a/crates/sparrow-main/tests/e2e/map_tests.rs b/crates/sparrow-main/tests/e2e/map_tests.rs index e17f87866..5b21dfdc6 100644 --- a/crates/sparrow-main/tests/e2e/map_tests.rs +++ b/crates/sparrow-main/tests/e2e/map_tests.rs @@ -230,3 +230,35 @@ async fn test_incompatible_key_types() { - "" "###); } + +#[tokio::test] +async fn test_using_map_in_index_fails() { + insta::assert_yaml_snapshot!(QueryFixture::new("{ f1: Input.i64_to_i64 | index(0) }") + .run_to_csv(&map_data_fixture().await).await.unwrap_err(), @r###" + --- + code: Client specified an invalid argument + message: 1 errors in Fenl statements; see diagnostics + fenl_diagnostics: + - severity: error + code: E0010 + message: Invalid argument type(s) + formatted: + - "error[E0010]: Invalid argument type(s)" + - " --> Query:1:26" + - " |" + - "1 | { f1: Input.i64_to_i64 | index(0) }" + - " | ^^^^^ Invalid types for parameter 'list' in call to 'index'" + - " |" + - " --> internal:1:1" + - " |" + - 1 | $input + - " | ------ Actual type: map" + - " |" + - " --> built-in signature 'index(i: i64, list: list) -> T':1:29" + - " |" + - "1 | index(i: i64, list: list) -> T" + - " | ------- Expected type: list" + - "" + - "" + "###); +}