Skip to content

Commit

Permalink
fix: Fix oob of join with literals and empty table (#17047)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Jun 18, 2024
1 parent 2aec475 commit 7aa7854
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 7 deletions.
24 changes: 18 additions & 6 deletions crates/polars-ops/src/frame/join/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,6 @@ pub trait DataFrameJoinOps: IntoDf {
S: AsRef<str>,
{
let df_left = self.to_df();
#[cfg(feature = "cross_join")]
if let JoinType::Cross = args.how {
return df_left.cross_join(other, args.suffix.as_deref(), None);
}
let selected_left = df_left.select_series(left_on)?;
let selected_right = other.select_series(right_on)?;
self._join_impl(other, selected_left, selected_right, args, true, false)
Expand All @@ -115,14 +111,30 @@ pub trait DataFrameJoinOps: IntoDf {
_verbose: bool,
) -> PolarsResult<DataFrame> {
let left_df = self.to_df();
let should_coalesce = args.coalesce.coalesce(&args.how);
assert_eq!(selected_left.len(), selected_right.len());

#[cfg(feature = "cross_join")]
if let JoinType::Cross = args.how {
return left_df.cross_join(other, args.suffix.as_deref(), args.slice);
}

// Clear literals if a frame is empty. Otherwise we could get an oob
fn clear(s: &mut [Series]) {
for s in s.iter_mut() {
if s.len() == 1 {
*s = s.clear()
}
}
}
if left_df.is_empty() {
clear(&mut selected_left);
}
if other.is_empty() {
clear(&mut selected_right);
}

let should_coalesce = args.coalesce.coalesce(&args.how);
assert_eq!(selected_left.len(), selected_right.len());

#[cfg(feature = "chunked_ids")]
{
// a left join create chunked-ids
Expand Down
5 changes: 4 additions & 1 deletion crates/polars-pipe/src/executors/sinks/joins/row_values.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,10 @@ impl RowValues {

for phys_e in self.join_column_eval.iter() {
let s = phys_e.evaluate(chunk, &context.execution_state)?;
let s = s.to_physical_repr().rechunk();
let mut s = s.to_physical_repr().rechunk();
if chunk.data.is_empty() {
s = s.clear()
};
if determine_idx {
names.push(s.name().to_string());
}
Expand Down
22 changes: 22 additions & 0 deletions py-polars/tests/unit/operations/test_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -973,3 +973,25 @@ def test_join_lit_panic_11410() -> None:
assert symbols.join(dates, left_on=pl.lit(1), right_on=pl.lit(1)).drop(
"literal"
).collect().to_dict(as_series=False) == {"symbol": [4], "date": [1]}


def test_join_empty_literal_17027() -> None:
df1 = pl.DataFrame({"a": [1]})
df2 = pl.DataFrame(schema={"a": pl.Int64})

assert df1.join(df2, on=pl.lit(0), how="left").height == 1
assert df1.join(df2, on=pl.lit(0), how="inner").height == 0
assert (
df1.lazy()
.join(df2.lazy(), on=pl.lit(0), how="inner")
.collect(streaming=True)
.height
== 0
)
assert (
df1.lazy()
.join(df2.lazy(), on=pl.lit(0), how="left")
.collect(streaming=True)
.height
== 1
)

0 comments on commit 7aa7854

Please sign in to comment.