Skip to content

Commit

Permalink
fix: Fix struct expansion and raise on exclude (pola-rs#17489)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored and Henry Harbeck committed Jul 8, 2024
1 parent 460e21b commit 04ee016
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 11 deletions.
24 changes: 15 additions & 9 deletions crates/polars-plan/src/dsl/struct_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,35 +68,41 @@ impl StructNameSpace {
.map_private(FunctionExpr::StructExpr(StructFunction::JsonEncode))
}

pub fn with_fields(self, fields: Vec<Expr>) -> Expr {
fn materialize_field(this: &Expr, field: Expr) -> Expr {
field.map_expr(|e| match e {
pub fn with_fields(self, fields: Vec<Expr>) -> PolarsResult<Expr> {
fn materialize_field(this: &Expr, field: Expr) -> PolarsResult<Expr> {
field.try_map_expr(|e| match e {
Expr::Field(names) => {
let this = this.clone().struct_();
if names.len() == 1 {
Ok(if names.len() == 1 {
this.field_by_name(names[0].as_ref())
} else {
this.field_by_names_impl(names)
}
})
},
_ => e,
Expr::Exclude(_, _) => {
polars_bail!(InvalidOperation: "'exclude' not allowed in 'field'")
},
_ => Ok(e),
})
}

let mut new_fields = Vec::with_capacity(fields.len());
new_fields.push(Default::default());

new_fields.extend(fields.into_iter().map(|e| materialize_field(&self.0, e)));
for e in fields.into_iter().map(|e| materialize_field(&self.0, e)) {
new_fields.push(e?)
}
new_fields[0] = self.0;
Expr::Function {
Ok(Expr::Function {
input: new_fields,
function: FunctionExpr::StructExpr(StructFunction::WithFields),
options: FunctionOptions {
collect_groups: ApplyOptions::ElementWise,
pass_name_to_apply: true,
allow_group_aware: false,
input_wildcard_expansion: true,
..Default::default()
},
}
})
}
}
11 changes: 9 additions & 2 deletions py-polars/src/expr/struct.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use pyo3::prelude::*;

use crate::error::PyPolarsErr;
use crate::expr::ToExprs;
use crate::PyExpr;

Expand All @@ -25,8 +26,14 @@ impl PyExpr {
self.inner.clone().struct_().json_encode().into()
}

fn struct_with_fields(&self, fields: Vec<PyExpr>) -> Self {
fn struct_with_fields(&self, fields: Vec<PyExpr>) -> PyResult<Self> {
let fields = fields.to_exprs();
self.inner.clone().struct_().with_fields(fields).into()
let e = self
.inner
.clone()
.struct_()
.with_fields(fields)
.map_err(PyPolarsErr::from)?;
Ok(e.into())
}
}
29 changes: 29 additions & 0 deletions py-polars/tests/unit/datatypes/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -932,3 +932,32 @@ def test_struct_split_16536() -> None:

df = pl.concat([df, df, df, df], rechunk=False)
assert df.filter(pl.col("int") == 1).shape == (4, 3)


def test_struct_wildcard_expansion_and_exclude() -> None:
df = pl.DataFrame(
{
"id": [1, 2],
"meta_data": [
{"system_data": "to_remove", "user_data": "keep"},
{"user_data": "keep_"},
],
}
)

# ensure wildcard expansion is on input
assert df.lazy().select(
pl.col("meta_data").struct.with_fields("*")
).collect().schema["meta_data"].fields == [ # type: ignore[attr-defined]
pl.Field("system_data", pl.String),
pl.Field("user_data", pl.String),
pl.Field("id", pl.Int64),
pl.Field(
"meta_data", pl.Struct({"system_data": pl.String, "user_data": pl.String})
),
]

with pytest.raises(pl.exceptions.InvalidOperationError):
df.lazy().select(
pl.col("meta_data").struct.with_fields(pl.field("*").exclude("user_data"))
).collect()

0 comments on commit 04ee016

Please sign in to comment.