Skip to content

Commit

Permalink
Use lazy sink instead of collecting data
Browse files Browse the repository at this point in the history
Fix #519.

Signed-off-by: Juan Luis Cano Rodríguez <juan_luis_cano@mckinsey.com>
  • Loading branch information
astrojuanlu committed Mar 19, 2024
1 parent d5bc782 commit f8893b6
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions kedro-datasets/kedro_datasets/polars/lazy_polars_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,15 +207,15 @@ def _save(self, data: Union[pl.DataFrame, pl.LazyFrame]) -> None:
save_path = get_filepath_str(self._get_save_path(), self._protocol)

collected_data = None
if isinstance(data, pl.LazyFrame):
collected_data = data.collect()
if not isinstance(data, pl.LazyFrame):
collected_data = data.lazy()
else:
collected_data = data

# Note: polars does support writing partitioned parquet file
# it is leveraging Arrow to do so, see e.g.
# https://pola-rs.github.io/polars/py-polars/html/reference/api/polars.DataFrame.write_parquet.html
save_method = getattr(collected_data, f"write_{self._file_format}", None)
save_method = getattr(collected_data, f"sink_{self._file_format}", None)
if save_method:
buf = BytesIO()
save_method(file=buf, **self._save_args)
Expand Down

0 comments on commit f8893b6

Please sign in to comment.