Skip to content

Commit

Permalink
feat: support unnest with additional columns (#9400)
Browse files Browse the repository at this point in the history
* feat: support `unnest` with additional columns

* add test from issue

* add test to verify preserve_nulls

* update test

* fix name conflicts
  • Loading branch information
jonahgao authored Mar 1, 2024
1 parent a8a3c5d commit 10d5f2d
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 19 deletions.
42 changes: 25 additions & 17 deletions datafusion/sql/src/select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ use crate::utils::{
resolve_columns, resolve_positions_to_exprs,
};

use datafusion_common::Column;
use datafusion_common::{not_impl_err, plan_err, DataFusionError, Result};
use datafusion_common::{Column, UnnestOptions};
use datafusion_expr::expr::{Alias, Unnest};
use datafusion_expr::expr_rewriter::{
normalize_col, normalize_col_with_schemas_and_ambiguity_check,
Expand Down Expand Up @@ -282,30 +282,38 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
input: LogicalPlan,
select_exprs: Vec<Expr>,
) -> Result<LogicalPlan> {
let mut exprs_to_unnest = vec![];

for expr in select_exprs.iter() {
if let Expr::Unnest(Unnest { exprs }) = expr {
exprs_to_unnest.push(exprs[0].clone());
}
}
let mut unnest_columns = vec![];
// Map unnest expressions to their argument
let projection_exprs = select_exprs
.into_iter()
.map(|expr| {
if let Expr::Unnest(Unnest { ref exprs }) = expr {
let column_name = expr.display_name()?;
unnest_columns.push(column_name.clone());
// Add alias for the argument expression, to avoid naming conflicts with other expressions
// in the select list. For example: `select unnest(col1), col1 from t`.
Ok(exprs[0].clone().alias(column_name))
} else {
Ok(expr)
}
})
.collect::<Result<Vec<_>>>()?;

// Do the final projection
if exprs_to_unnest.is_empty() {
if unnest_columns.is_empty() {
LogicalPlanBuilder::from(input)
.project(select_exprs)?
.project(projection_exprs)?
.build()
} else {
if exprs_to_unnest.len() > 1 {
if unnest_columns.len() > 1 {
return not_impl_err!("Only support single unnest expression for now");
}

let expr = exprs_to_unnest[0].clone();
let column = expr.display_name()?;

let unnest_column = unnest_columns.pop().unwrap();
// Set preserve_nulls to false to ensure compatibility with DuckDB and PostgreSQL
let unnest_options = UnnestOptions::new().with_preserve_nulls(false);
LogicalPlanBuilder::from(input)
.project(vec![expr])?
.unnest_column(column)?
.project(projection_exprs)?
.unnest_column_with_options(unnest_column, unnest_options)?
.build()
}
}
Expand Down
44 changes: 42 additions & 2 deletions datafusion/sqllogictest/test_files/unnest.slt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ AS VALUES
([1,2,3], [7], 1),
([4,5], [8,9,10], 2),
([6], [11,12], 3),
([12], [null, 42, null], null)
([12], [null, 42, null], null),
-- null array to verify the `preserve_nulls` option
(null, null, 4)
;

## Basic unnest expression in select list
Expand Down Expand Up @@ -91,6 +93,44 @@ NULL
42
NULL

## Unnest with additional column
## Issue: https://github.com/apache/arrow-datafusion/issues/9349
query II
select unnest(column1), column3 from unnest_table;
----
1 1
2 1
3 1
4 2
5 2
6 3
12 NULL

query I?
select unnest(column1), column1 from unnest_table;
----
1 [1, 2, 3]
2 [1, 2, 3]
3 [1, 2, 3]
4 [4, 5]
5 [4, 5]
6 [6]
12 [12]

query ?II
select array_remove(column1, 4), unnest(column2), column3 * 10 from unnest_table;
----
[1, 2, 3] 7 10
[5] 8 20
[5] 9 20
[5] 10 20
[6] 11 30
[6] 12 30
[12] NULL NULL
[12] 42 NULL
[12] NULL NULL


## Unnest column with scalars
query error DataFusion error: Error during planning: unnest\(\) can only be applied to array, struct and null
select unnest(column3) from unnest_table;
Expand Down Expand Up @@ -212,7 +252,7 @@ select * from unnest([1,2,(select sum(column3) from unnest_table)]);
----
1
2
6
10

statement ok
drop table unnest_table;

0 comments on commit 10d5f2d

Please sign in to comment.