Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preserve columns names when SELECT * and JOIN #268

Merged
merged 19 commits into from
Mar 19, 2024
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
## [0.9.15] - 2024-02-20
### Changed
- SELECT * with JOINs preserve the column names when there is no ambiguity [#268](https://github.com/Qrlew/qrlew/pull/268)


## [0.9.14] - 2024-01-30
### Added
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
authors = ["Nicolas Grislain <ng@sarus.tech>"]
name = "qrlew"
version = "0.9.14"
version = "0.9.15"
edition = "2021"
description = "Sarus Qrlew Engine"
documentation = "https://docs.rs/qrlew"
Expand Down
37 changes: 30 additions & 7 deletions src/relation/rewriting.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,19 +203,28 @@ impl Join {
}

/// Replace the duplicates fields specified in `columns` by their coalesce expression
/// Its mimicks teh behaviour of USING in SQL
/// It mimics the behavior of USING in SQL
/// If preserve_input_names is True, the fields of the resulting relation
/// will be named as in the JOINs inputs if they are not ambiguous.
pub fn remove_duplicates_and_coalesce(
self,
vec: Vec<String>,
columns: &Hierarchy<Identifier>,
preserve_input_names: bool,
) -> Relation {
let fields = self

let fields_in_vec = self
.field_inputs()
.filter_map(|(name, id)| {
let col = id.as_ref().last().unwrap();
if id.as_ref().first().unwrap().as_str() == LEFT_INPUT_NAME && vec.contains(col) {
let final_col_name = if preserve_input_names {
col.to_string()
} else {
name
};
Some((
name,
final_col_name,
Expr::coalesce(
Expr::col(columns[[LEFT_INPUT_NAME, col]].as_ref().last().unwrap()),
Expr::col(columns[[RIGHT_INPUT_NAME, col]].as_ref().last().unwrap()),
Expand All @@ -224,12 +233,26 @@ impl Join {
} else {
None
}
})
.chain(self.field_inputs().filter_map(|(name, id)| {
}).collect::<Vec<_>>();

let fields_not_in_vec = self
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is really obscure, can you separate well between name preservation and collision management?

.field_inputs()
.filter_map(|(name, id)| {
let col = id.as_ref().last().unwrap();
(!vec.contains(col)).then_some((name.clone(), Expr::col(name)))
}))
let final_col_name = if preserve_input_names && columns.get(&[col.clone()]).is_some() {
col.to_string()
} else {
name.clone()
};
(!vec.contains(col)).then_some((final_col_name, Expr::col(name)))
})
.collect::<Vec<_>>();

let fields = fields_in_vec
.into_iter()
.chain(fields_not_in_vec.into_iter())
.collect::<Vec<_>>();

Relation::map()
.input(Relation::from(self))
.with_iter(fields)
Expand Down
111 changes: 100 additions & 11 deletions src/sql/relation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a,
fn try_from_table_with_joins(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is becoming too complex, can you try to split it in independent sub-functions with explicit names ?

&self,
table_with_joins: &'a ast::TableWithJoins,
preserve_input_names: bool,
) -> Result<RelationWithColumns> {
// Process the relation
// Then the JOIN if needed
Expand All @@ -273,9 +274,9 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a,
let all_columns = left_columns.with(right_columns);
let operator = self.try_from_join_operator_with_columns(
&ast_join.join_operator,
// &all_columns.filter_map(|i| Some(i.split_last().ok()?.0)),//TODO remove this
&all_columns,
)?;

// We build a Join
let join: Join = Relation::join()
.operator(operator)
Expand All @@ -284,19 +285,22 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a,
.build();

// We collect column mapping inputs should map to new names (hence the inversion)
let new_columns: Hierarchy<Identifier> =
join.field_inputs().map(|(f, i)| (i, f.into())).collect();
let composed_columns = all_columns.and_then(new_columns.clone());
let join_columns: Hierarchy<Identifier> =join
.field_inputs()
.map(|(f, i)| (i, f.into()))
.collect();
let composed_columns = all_columns.and_then(join_columns.clone());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where are you using this ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure to understand, here the only difference from before is that the name of join columns which I change it from new_columns to join_columns


// If the join contraint is of type "USING" or "NATURAL", add a map to coalesce the duplicate columns
// If the join constraint is of type "USING" or "NATURAL", add a map to coalesce the duplicate columns
let relation = match &ast_join.join_operator {
ast::JoinOperator::Inner(ast::JoinConstraint::Using(v))
| ast::JoinOperator::LeftOuter(ast::JoinConstraint::Using(v))
| ast::JoinOperator::RightOuter(ast::JoinConstraint::Using(v))
| ast::JoinOperator::FullOuter(ast::JoinConstraint::Using(v)) => {
join.remove_duplicates_and_coalesce(
v.into_iter().map(|id| id.value.to_string()).collect(),
&new_columns
&join_columns,
preserve_input_names
)
},
ast::JoinOperator::Inner(ast::JoinConstraint::Natural)
Expand All @@ -307,16 +311,35 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a,
.into_iter()
.filter_map(|f| join.right().schema().field(f.name()).is_ok().then_some(f.name().to_string()))
.collect();
join.remove_duplicates_and_coalesce(v,&new_columns)
join.remove_duplicates_and_coalesce(v, &join_columns, preserve_input_names)
},
ast::JoinOperator::LeftSemi(_) => todo!(),
ast::JoinOperator::RightSemi(_) => todo!(),
ast::JoinOperator::LeftAnti(_) => todo!(),
ast::JoinOperator::RightAnti(_) => todo!(),
_ => Relation::from(join),
_ => join.remove_duplicates_and_coalesce(vec![], &join_columns, preserve_input_names),
};


let composed_columns = if preserve_input_names {
// relation fields are renamed to those of the join relation inputs
// if no name collision is generated.
let original_not_ambiguous_columns: Hierarchy<Identifier> = join_columns
.iter()
.map(|(key, value)| {
let original_col_name = key.last().unwrap().as_str();
let join_col_name = value.head().unwrap();
match join_columns.get(&[original_col_name.to_string()]) {
Some(_) => (Identifier::from(join_col_name), original_col_name.into()),
None => (Identifier::from(join_col_name), join_col_name.into()),
}
})
.collect();
composed_columns.and_then(original_not_ambiguous_columns)
} else {
composed_columns
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're relying on the previously defined composed column here ? It's a bit hard to read

};

// We should compose hierarchies
Ok(RelationWithColumns::new(Arc::new(relation), composed_columns))
},
);
Expand All @@ -327,11 +350,15 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a,
fn try_from_tables_with_joins(
&self,
tables_with_joins: &'a Vec<ast::TableWithJoins>,
preserve_input_names: bool
) -> Result<RelationWithColumns> {
// TODO consider more tables
// For now, only consider the first element
// It should eventually be cross joined as described in: https://www.postgresql.org/docs/current/queries-table-expressions.html
self.try_from_table_with_joins(&tables_with_joins[0])
self.try_from_table_with_joins(
&tables_with_joins[0],
preserve_input_names
)
}

/// Build a relation from the
Expand Down Expand Up @@ -528,7 +555,22 @@ impl<'a, T: QueryToRelationTranslator + Copy + Clone> VisitedQueryRelations<'a,
if qualify.is_some() {
return Err(Error::other("QUALIFY is not supported"));
}
let RelationWithColumns(from, columns) = self.try_from_tables_with_joins(from)?;

// If projection contains a Wildcard (SELECT *) the table with joins should
// preserver columns names.
let RelationWithColumns(from, columns) = self.try_from_tables_with_joins(
from,
projection.contains(
&ast::SelectItem::Wildcard(
ast::WildcardAdditionalOptions {
opt_exclude: None,
opt_except: None,
opt_rename: None,
opt_replace: None
}
)
)
)?;
let relation = self.try_from_select_items_selection_and_group_by(
&columns.filter_map(|i| Some(i.split_last().ok()?.0)),
projection,
Expand Down Expand Up @@ -1407,6 +1449,53 @@ mod tests {
.map(ToString::to_string);
}

#[test]
fn test_select_all_with_joins() {
let mut database = postgresql::test_database();
let relations = database.relations();

let query_str = r#"
WITH my_tab AS (SELECT * FROM user_table u JOIN order_table o USING (id))
SELECT * FROM my_tab WHERE id > 50;
"#;
let query = parse(query_str).unwrap();
let relation = Relation::try_from(QueryWithRelations::new(
&query,
&relations
))
.unwrap();
relation.display_dot().unwrap();
let query: &str = &ast::Query::from(&relation).to_string();
println!("{query}");
_ = database
.query(query)
.unwrap()
.iter()
.map(ToString::to_string);

let query_str = r#"
WITH my_tab AS (SELECT * FROM user_table u JOIN order_table o ON (u.id=o.user_id))
SELECT * FROM my_tab WHERE user_id > 50;
"#;
let query = parse(query_str).unwrap();
let relation = Relation::try_from(QueryWithRelations::new(
&query,
&relations
))
.unwrap();
// id becomes an ambiguous column since is present in both tables
assert!(relation.schema().field("id").is_err());
relation.display_dot().unwrap();
println!("relation = {relation}");
let query: &str = &ast::Query::from(&relation).to_string();
println!("{query}");
_ = database
.query(query)
.unwrap()
.iter()
.map(ToString::to_string);
}

#[test]
fn test_distinct_in_select() {
let query = parse("SELECT DISTINCT a, b FROM table_1;").unwrap();
Expand Down
Loading