Skip to content

Commit

Permalink
fix(python): don't SO on align_frames (#9911)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Jul 15, 2023
1 parent e51326f commit cc0795f
Showing 1 changed file with 27 additions and 10 deletions.
37 changes: 27 additions & 10 deletions py-polars/polars/functions/eager.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,17 +347,34 @@ def align_frames(
# create aligned master frame (this is the most expensive part; afterwards
# we just subselect out the columns representing the component frames)
eager = isinstance(frames[0], pl.DataFrame)
alignment_frame: LazyFrame = (
reduce( # type: ignore[attr-defined]
lambda x, y: x.lazy().join( # type: ignore[arg-type, return-value]
y.lazy(), how=how, on=align_on, suffix=str(id(y))
),
frames,

# we stackoverflow on many frames
# so we branch on an arbitrary chosen large number of frames
if len(frames) < 250:
# lazy variant
# this can SO
alignment_frame: LazyFrame = (
reduce( # type: ignore[attr-defined]
lambda x, y: x.lazy().join( # type: ignore[arg-type, return-value]
y.lazy(), how=how, on=align_on, suffix=str(id(y))
),
frames,
)
.sort(by=align_on, descending=descending)
.collect(no_optimization=True)
.lazy()
)
else:
# eager variant
# this doesn't SO
alignment_frame = (
reduce(
lambda x, y: x.join(y, how=how, on=align_on, suffix=str(id(y))),
frames,
)
.sort(by=align_on, descending=descending)
.lazy()
)
.sort(by=align_on, descending=descending)
.collect()
.lazy()
)

# select-out aligned components from the master frame
aligned_cols = set(alignment_frame.columns)
Expand Down

0 comments on commit cc0795f

Please sign in to comment.