Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bug: fix default schema in inference for map/list #577

Merged
merged 1 commit into from
Aug 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 23 additions & 4 deletions crates/sparrow-compiler/src/types/inference.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ pub(crate) fn instantiate(
.iter()
.map(|parameter_type| instantiate_type(parameter_type, &solutions))
.collect();

let instantiated_arguments = arguments.with_values(instantiated_arguments);
let instantiated_return = if instantiated_arguments.iter().any(|t| t.is_error()) {
FenlType::Error
Expand Down Expand Up @@ -368,12 +369,18 @@ fn instantiate_type(fenl_type: &FenlType, solutions: &HashMap<TypeVariable, Fenl
.unwrap_or(FenlType::Concrete(DataType::Null));

// `solutions` map should contain concrete types for all type variables.

// Note that the `name` and `nullability` are the standard, and cannot be changed,
// or we may have schema mismatches later during execution.
//
// That said, there's no reason why a later arrow version can't change this behavior.
// TODO: Figure out how to pass user naming and nullability through inference.
let key_field = match concrete_key_type {
FenlType::Concrete(t) => Field::new("key", t, false),
FenlType::Concrete(t) => Field::new("keys", t, false),
other => panic!("expected concrete type, got {:?}", other),
};
let value_field = match concrete_value_type {
FenlType::Concrete(t) => Field::new("value", t, false),
FenlType::Concrete(t) => Field::new("values", t, true),
other => panic!("expected concrete type, got {:?}", other),
};

Expand All @@ -388,8 +395,12 @@ fn instantiate_type(fenl_type: &FenlType, solutions: &HashMap<TypeVariable, Fenl
.cloned()
.unwrap_or(FenlType::Concrete(DataType::Null));
let field = match concrete_type {
// TODO: Should the fields be nullable?
FenlType::Concrete(t) => Arc::new(Field::new("item", t, false)),
// Note: the `name` and `nullability` here are the standard, and cannot be changed,
// or we will have schema mismatches later during execution.
//
// That said, there's no reason why a later arrow version can't change this behavior.
// TODO: Figure out how to pass user naming and nullability through inference.
FenlType::Concrete(t) => Arc::new(Field::new("item", t, true)),
other => panic!("expected concrete type, got {:?}", other),
};

Expand Down Expand Up @@ -779,6 +790,10 @@ impl TypeConstraints {
(FenlType::TypeRef(variable), _) => self.constrain(variable, argument),
(FenlType::Collection(p_collection, p_type_vars), arg_type) => match arg_type {
FenlType::Concrete(DataType::List(field)) => {
if p_collection != &Collection::List {
return Err(());
}

debug_assert_eq!(
p_type_vars.len(),
1,
Expand All @@ -790,6 +805,10 @@ impl TypeConstraints {
)?;
}
FenlType::Concrete(DataType::Map(s, _)) => {
if p_collection != &Collection::Map {
return Err(());
}

debug_assert_eq!(
p_type_vars.len(),
2,
Expand Down
54 changes: 54 additions & 0 deletions crates/sparrow-main/tests/e2e/list_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,60 @@ async fn test_index_list_bool_dynamic() {
"###);
}

#[tokio::test]
async fn test_list_schemas_are_compatible() {
// This query puts a collect() into a record, which
// does schema validation when constructing the struct array.
let hash = QueryFixture::new(
"
let s_list = Input.string_list
let first_elem = s_list | index(0)
let list_with_first_elems = first_elem | collect(max = null)
in { l: Input.string_list, list_with_first_elems }
",
)
.run_to_parquet_hash(&list_data_fixture().await)
.await
.unwrap();

assert_eq!(
"5F9880AD6B3285D2FA244C508645A98807B38EE51FAF53C86505D12E",
hash
);
}

#[tokio::test]
async fn test_using_list_in_get_fails() {
insta::assert_yaml_snapshot!(QueryFixture::new("{ f1: Input.i64_list | get(\"s\") }")
.run_to_csv(&list_data_fixture().await).await.unwrap_err(), @r###"
---
code: Client specified an invalid argument
message: 1 errors in Fenl statements; see diagnostics
fenl_diagnostics:
- severity: error
code: E0010
message: Invalid argument type(s)
formatted:
- "error[E0010]: Invalid argument type(s)"
- " --> Query:1:24"
- " |"
- "1 | { f1: Input.i64_list | get(\"s\") }"
- " | ^^^ Invalid types for parameter 'map' in call to 'get'"
- " |"
- " --> internal:1:1"
- " |"
- 1 | $input
- " | ------ Actual type: list<i64>"
- " |"
- " --> built-in signature 'get<K: key, V: any>(key: K, map: map<K, V>) -> V':1:34"
- " |"
- "1 | get<K: key, V: any>(key: K, map: map<K, V>) -> V"
- " | --------- Expected type: map<K, V>"
- ""
- ""
"###);
}

#[tokio::test]
async fn test_incorrect_index_type() {
insta::assert_yaml_snapshot!(QueryFixture::new("{ f1: Input.i64_list | index(\"s\") }")
Expand Down
32 changes: 32 additions & 0 deletions crates/sparrow-main/tests/e2e/map_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -230,3 +230,35 @@ async fn test_incompatible_key_types() {
- ""
"###);
}

#[tokio::test]
async fn test_using_map_in_index_fails() {
insta::assert_yaml_snapshot!(QueryFixture::new("{ f1: Input.i64_to_i64 | index(0) }")
.run_to_csv(&map_data_fixture().await).await.unwrap_err(), @r###"
---
code: Client specified an invalid argument
message: 1 errors in Fenl statements; see diagnostics
fenl_diagnostics:
- severity: error
code: E0010
message: Invalid argument type(s)
formatted:
- "error[E0010]: Invalid argument type(s)"
- " --> Query:1:26"
- " |"
- "1 | { f1: Input.i64_to_i64 | index(0) }"
- " | ^^^^^ Invalid types for parameter 'list' in call to 'index'"
- " |"
- " --> internal:1:1"
- " |"
- 1 | $input
- " | ------ Actual type: map<i64, i64>"
- " |"
- " --> built-in signature 'index<T: any>(i: i64, list: list<T>) -> T':1:29"
- " |"
- "1 | index<T: any>(i: i64, list: list<T>) -> T"
- " | ------- Expected type: list<T>"
- ""
- ""
"###);
}
Loading