From f8893b647baab721e66dfc54c4413e04cf4b16eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Luis=20Cano=20Rodr=C3=ADguez?= Date: Tue, 19 Mar 2024 14:21:31 +0100 Subject: [PATCH] Use lazy sink instead of collecting data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix #519. Signed-off-by: Juan Luis Cano Rodríguez --- kedro-datasets/kedro_datasets/polars/lazy_polars_dataset.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kedro-datasets/kedro_datasets/polars/lazy_polars_dataset.py b/kedro-datasets/kedro_datasets/polars/lazy_polars_dataset.py index a99e0d771..c8da63afc 100644 --- a/kedro-datasets/kedro_datasets/polars/lazy_polars_dataset.py +++ b/kedro-datasets/kedro_datasets/polars/lazy_polars_dataset.py @@ -207,15 +207,15 @@ def _save(self, data: Union[pl.DataFrame, pl.LazyFrame]) -> None: save_path = get_filepath_str(self._get_save_path(), self._protocol) collected_data = None - if isinstance(data, pl.LazyFrame): - collected_data = data.collect() + if not isinstance(data, pl.LazyFrame): + collected_data = data.lazy() else: collected_data = data # Note: polars does support writing partitioned parquet file # it is leveraging Arrow to do so, see e.g. # https://pola-rs.github.io/polars/py-polars/html/reference/api/polars.DataFrame.write_parquet.html - save_method = getattr(collected_data, f"write_{self._file_format}", None) + save_method = getattr(collected_data, f"sink_{self._file_format}", None) if save_method: buf = BytesIO() save_method(file=buf, **self._save_args)