Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(python)!: rollup breaking changes #5602

Merged
merged 1 commit into from
Nov 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 6 additions & 24 deletions py-polars/polars/internals/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@
_prepare_row_count_args,
_process_null_values,
_timedelta_to_pl_duration,
deprecated_alias,
format_path,
handle_projection_columns,
is_bool_sequence,
Expand Down Expand Up @@ -2465,7 +2464,7 @@ def filter(
return (
self.lazy()
.filter(predicate) # type: ignore[arg-type]
.collect(no_optimization=True, string_cache=False)
.collect(no_optimization=True)
)

def describe(self: DF) -> DF:
Expand Down Expand Up @@ -2668,11 +2667,7 @@ def sort(
"""
if not isinstance(by, str) and isinstance(by, (Sequence, pli.Expr)):
df = (
self.lazy()
.sort(by, reverse, nulls_last)
.collect(no_optimization=True, string_cache=False)
)
df = self.lazy().sort(by, reverse, nulls_last).collect(no_optimization=True)
return df
return self._from_pydf(self._df.sort(by, reverse, nulls_last))

Expand Down Expand Up @@ -2782,7 +2777,6 @@ def slice(self: DF, offset: int, length: int | None = None) -> DF:
length = self.height - offset + length
return self._from_pydf(self._df.slice(offset, length))

@deprecated_alias(length="n")
def limit(self: DF, n: int = 5) -> DF:
"""
Get the first `n` rows.
Expand Down Expand Up @@ -2818,7 +2812,6 @@ def limit(self: DF, n: int = 5) -> DF:
"""
return self.head(n)

@deprecated_alias(length="n")
def head(self: DF, n: int = 5) -> DF:
"""
Get the first `n` rows.
Expand Down Expand Up @@ -2854,7 +2847,6 @@ def head(self: DF, n: int = 5) -> DF:
"""
return self._from_pydf(self._df.head(n))

@deprecated_alias(length="n")
def tail(self: DF, n: int = 5) -> DF:
"""
Get the last `n` rows.
Expand Down Expand Up @@ -4098,11 +4090,7 @@ def with_column(self, column: pli.Series | pli.Expr) -> DataFrame:
└──────┴─────┘
"""
return (
self.lazy()
.with_column(column)
.collect(no_optimization=True, string_cache=False)
)
return self.lazy().with_column(column).collect(no_optimization=True)

def hstack(
self: DF,
Expand Down Expand Up @@ -4254,7 +4242,6 @@ def extend(self: DF, other: DF) -> DF:
self._df.extend(other._df)
return self

@deprecated_alias(name="columns")
def drop(self: DF, columns: str | Sequence[str]) -> DF:
"""
Remove column from DataFrame and return as new.
Expand Down Expand Up @@ -5234,7 +5221,7 @@ def shift_and_fill(self, periods: int, fill_value: int | str | float) -> DataFra
return (
self.lazy()
.shift_and_fill(periods, fill_value)
.collect(no_optimization=True, string_cache=False)
.collect(no_optimization=True)
)

def is_duplicated(self) -> pli.Series:
Expand Down Expand Up @@ -5411,10 +5398,7 @@ def select(
"""
return self._from_pydf(
self.lazy()
.select(exprs)
.collect(no_optimization=True, string_cache=False)
._df
self.lazy().select(exprs).collect(no_optimization=True)._df
)

def with_columns(
Expand Down Expand Up @@ -5490,9 +5474,7 @@ def with_columns(
if exprs is not None and not isinstance(exprs, Sequence):
exprs = [exprs]
return (
self.lazy()
.with_columns(exprs, **named_exprs)
.collect(no_optimization=True, string_cache=False)
self.lazy().with_columns(exprs, **named_exprs).collect(no_optimization=True)
)

@overload
Expand Down
10 changes: 5 additions & 5 deletions py-polars/polars/internals/dataframe/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ def agg(self, aggs: pli.Expr | Sequence[pli.Expr]) -> pli.DataFrame:
.lazy()
.groupby(self.by, maintain_order=self.maintain_order)
.agg(aggs)
.collect(no_optimization=True, string_cache=False)
.collect(no_optimization=True)
)
return self._dataframe_class._from_pydf(df._df)

Expand Down Expand Up @@ -362,7 +362,7 @@ def head(self, n: int = 5) -> DF:
.lazy()
.groupby(self.by, self.maintain_order)
.head(n)
.collect(no_optimization=True, string_cache=False)
.collect(no_optimization=True)
)
return self._dataframe_class._from_pydf(df._df)

Expand Down Expand Up @@ -426,7 +426,7 @@ def tail(self, n: int = 5) -> DF:
.lazy()
.groupby(self.by, self.maintain_order)
.tail(n)
.collect(no_optimization=True, string_cache=False)
.collect(no_optimization=True)
)
return self._dataframe_class._from_pydf(df._df)

Expand Down Expand Up @@ -858,7 +858,7 @@ def agg(self, aggs: pli.Expr | Sequence[pli.Expr]) -> pli.DataFrame:
self.time_column, self.period, self.offset, self.closed, self.by
)
.agg(aggs)
.collect(no_optimization=True, string_cache=False)
.collect(no_optimization=True)
)


Expand Down Expand Up @@ -911,7 +911,7 @@ def agg(self, aggs: pli.Expr | Sequence[pli.Expr]) -> pli.DataFrame:
self.by,
)
.agg(aggs)
.collect(no_optimization=True, string_cache=False)
.collect(no_optimization=True)
)


Expand Down
34 changes: 16 additions & 18 deletions py-polars/polars/internals/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
_prepare_row_count_args,
_process_null_values,
_timedelta_to_pl_duration,
deprecated_alias,
format_path,
)

Expand Down Expand Up @@ -596,6 +597,7 @@ def describe_plan(self) -> str:
"""Create a string representation of the unoptimized query plan."""
return self._ldf.describe_plan()

@deprecated_alias(streaming="allow_streaming")
def describe_optimized_plan(
self,
type_coercion: bool = True,
Expand All @@ -604,7 +606,7 @@ def describe_optimized_plan(
simplify_expression: bool = True,
slice_pushdown: bool = True,
common_subplan_elimination: bool = True,
allow_streaming: bool = False,
streaming: bool = False,
) -> str:
"""Create a string representation of the optimized query plan."""
ldf = self._ldf.optimization_toggle(
Expand All @@ -614,14 +616,16 @@ def describe_optimized_plan(
simplify_expression,
slice_pushdown,
common_subplan_elimination,
allow_streaming,
streaming,
)

return ldf.describe_optimized_plan()

@deprecated_alias(streaming="allow_streaming")
def show_graph(
self,
optimized: bool = True,
*,
show: bool = True,
output_path: str | None = None,
raw_output: bool = False,
Expand All @@ -632,7 +636,7 @@ def show_graph(
simplify_expression: bool = True,
slice_pushdown: bool = True,
common_subplan_elimination: bool = True,
allow_streaming: bool = False,
streaming: bool = False,
) -> str | None:
"""
Show a plot of the query plan. Note that you should have graphviz installed.
Expand Down Expand Up @@ -663,7 +667,7 @@ def show_graph(
Will try to cache branching subplans that occur on self-joins or unions.
common_subplan_elimination
Will try to cache branching subplans that occur on self-joins or unions.
allow_streaming
streaming
Run parts of the query in a streaming fashion (this is in an alpha state)
"""
Expand All @@ -674,7 +678,7 @@ def show_graph(
simplify_expression,
slice_pushdown,
common_subplan_elimination,
allow_streaming,
streaming,
)

dot = _ldf.to_dot(optimized)
Expand Down Expand Up @@ -956,18 +960,18 @@ def profile(

return df, timings

@deprecated_alias(allow_streaming="streaming")
def collect(
self,
*,
type_coercion: bool = True,
predicate_pushdown: bool = True,
projection_pushdown: bool = True,
*,
simplify_expression: bool = True,
string_cache: bool = False,
no_optimization: bool = False,
slice_pushdown: bool = True,
common_subplan_elimination: bool = True,
allow_streaming: bool = False,
streaming: bool = False,
) -> pli.DataFrame:
"""
Collect into a DataFrame.
Expand All @@ -985,16 +989,13 @@ def collect(
Do projection pushdown optimization.
simplify_expression
Run simplify expressions optimization.
string_cache
This argument is deprecated. Please set the string cache globally.
The argument will be ignored
no_optimization
Turn off (certain) optimizations.
slice_pushdown
Slice pushdown optimization.
common_subplan_elimination
Will try to cache branching subplans that occur on self-joins or unions.
allow_streaming
streaming
Run parts of the query in a streaming fashion (this is in an alpha state)
Returns
Expand Down Expand Up @@ -1031,7 +1032,7 @@ def collect(
slice_pushdown = False
common_subplan_elimination = False

if allow_streaming:
if streaming:
common_subplan_elimination = False

ldf = self._ldf.optimization_toggle(
Expand All @@ -1041,18 +1042,18 @@ def collect(
simplify_expression,
slice_pushdown,
common_subplan_elimination,
allow_streaming,
streaming,
)
return pli.wrap_df(ldf.collect())

def fetch(
self,
n_rows: int = 500,
*,
type_coercion: bool = True,
predicate_pushdown: bool = True,
projection_pushdown: bool = True,
simplify_expression: bool = True,
string_cache: bool = False,
no_optimization: bool = False,
slice_pushdown: bool = True,
common_subplan_elimination: bool = True,
Expand Down Expand Up @@ -1081,9 +1082,6 @@ def fetch(
Run projection pushdown optimization.
simplify_expression
Run simplify expressions optimization.
string_cache
This argument is deprecated. Please set the string cache globally.
The argument will be ignored
no_optimization
Turn off optimizations.
slice_pushdown
Expand Down
4 changes: 0 additions & 4 deletions py-polars/polars/internals/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@
_datetime_to_pl_timestamp,
_time_to_pl_time,
accessor,
deprecated_alias,
is_bool_sequence,
is_int_sequence,
range_to_slice,
Expand Down Expand Up @@ -1607,7 +1606,6 @@ def cumprod(self, reverse: bool = False) -> Series:
"""

@deprecated_alias(num_elements="n")
def limit(self, n: int = 10) -> Series:
"""
Get the first `n` rows.
Expand Down Expand Up @@ -1746,7 +1744,6 @@ def filter(self, predicate: Series | list[bool]) -> Series:
predicate = Series("", predicate)
return wrap_s(self._s.filter(predicate._s))

@deprecated_alias(length="n")
def head(self, n: int = 10) -> Series:
"""
Get the first `n` rows.
Expand All @@ -1770,7 +1767,6 @@ def head(self, n: int = 10) -> Series:
"""
return self.to_frame().select(pli.col(self.name).head(n)).to_series()

@deprecated_alias(length="n")
def tail(self, n: int = 10) -> Series:
"""
Get the last `n` rows.
Expand Down
4 changes: 0 additions & 4 deletions py-polars/tests/unit/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,10 +679,6 @@ def test_extend() -> None:

def test_drop() -> None:
df = pl.DataFrame({"a": [2, 1, 3], "b": ["a", "b", "c"], "c": [1, 2, 3]})
with pytest.deprecated_call():
df = df.drop(name="a") # type: ignore[call-arg]
assert df.shape == (3, 2)
df = pl.DataFrame({"a": [2, 1, 3], "b": ["a", "b", "c"], "c": [1, 2, 3]})
df = df.drop(columns="a")
assert df.shape == (3, 2)
df = pl.DataFrame({"a": [2, 1, 3], "b": ["a", "b", "c"], "c": [1, 2, 3]})
Expand Down
6 changes: 3 additions & 3 deletions py-polars/tests/unit/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,11 +227,11 @@ def test_streaming_non_streaming_gb() -> None:
n = 100
df = pl.DataFrame({"a": np.random.randint(0, 20, n)})
q = df.lazy().groupby("a").agg(pl.count()).sort("a")
assert q.collect(allow_streaming=True).frame_equal(q.collect())
assert q.collect(streaming=True).frame_equal(q.collect())

q = df.lazy().with_column(pl.col("a").cast(pl.Utf8))
q = q.groupby("a").agg(pl.count()).sort("a")
assert q.collect(allow_streaming=True).frame_equal(q.collect())
assert q.collect(streaming=True).frame_equal(q.collect())
q = df.lazy().with_column(pl.col("a").alias("b"))
q = q.groupby(["a", "b"]).agg(pl.count()).sort("a")
assert q.collect(allow_streaming=True).frame_equal(q.collect())
assert q.collect(streaming=True).frame_equal(q.collect())
2 changes: 1 addition & 1 deletion py-polars/tests/unit/test_projections.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def test_unnest_columns_available() -> None:
def test_streaming_duplicate_cols_5537() -> None:
assert pl.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}).lazy().with_columns(
[(pl.col("a") * 2).alias("foo"), (pl.col("a") * 3)]
).collect(allow_streaming=True).to_dict(False) == {
).collect(streaming=True).to_dict(False) == {
"a": [3, 6, 9],
"b": [1, 2, 3],
"foo": [2, 4, 6],
Expand Down