From ff7d8d73cd147e68d46ddee19e52a320eca18fe1 Mon Sep 17 00:00:00 2001 From: noah ridge Date: Fri, 2 Aug 2024 14:53:38 -0600 Subject: [PATCH 1/2] update PandasExcelWriter to use pd.ExcelWriter --- hamilton/plugins/pandas_extensions.py | 28 +++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/hamilton/plugins/pandas_extensions.py b/hamilton/plugins/pandas_extensions.py index c8157574f..f4e92b3b6 100644 --- a/hamilton/plugins/pandas_extensions.py +++ b/hamilton/plugins/pandas_extensions.py @@ -1504,6 +1504,10 @@ class PandasExcelWriter(DataSaver): freeze_panes: Optional[Tuple[int, int]] = None storage_options: Optional[Dict[str, Any]] = None engine_kwargs: Optional[Dict[str, Any]] = None + mode: Optional[Literal["w", "a"]] = "w" + if_sheet_exists: Optional[Literal["error", "new", "replace", "overlay"]] = None + datetime_format: str = None + date_format: str = None @classmethod def applicable_types(cls) -> Collection[Type]: @@ -1513,6 +1517,17 @@ def _get_saving_kwargs(self) -> Dict[str, Any]: # Puts kwargs in a dict kwargs = dataclasses.asdict(self) + # Pass kwargs to ExcelWriter ONLY for kwargs which appear in both ExcelWriter and .to_excel() + writer_kwarg_names = [ + "date_format", + "datetime_format", + "if_sheet_exists", + "mode", + "engine_kwargs", + "engine", + "storage_options", + ] + # path corresponds to 'excel_writer' argument of pandas.DataFrame.to_excel, # but we send it separately del kwargs["path"] @@ -1521,11 +1536,20 @@ def _get_saving_kwargs(self) -> Dict[str, Any]: # For compatibility with pandas 2.0 we remove engine_kwargs from kwargs if it's empty. if kwargs["engine_kwargs"] is None: del kwargs["engine_kwargs"] + writer_kwarg_names.remove("engine_kwargs") - return kwargs + # seperate kwargs for ExcelWriter and to_excel() invocation + writer_kwargs = {k: kwargs[k] for k in writer_kwarg_names} + to_excel_kwargs = {k: kwargs[k] for k in (kwargs.keys() - set(writer_kwarg_names))} + + return writer_kwargs, to_excel_kwargs def save_data(self, data: DATAFRAME_TYPE) -> Dict[str, Any]: - data.to_excel(self.path, **self._get_saving_kwargs()) + + writer_kwargs, to_excel_kwargs = self._get_saving_kwargs() + + with pd.ExcelWriter(self.path, **writer_kwargs) as writer: + data.to_excel(writer, **to_excel_kwargs) return utils.get_file_and_dataframe_metadata(self.path, data) @classmethod From 0b28001840fe5eed0d9ceb80cce077aa0133551a Mon Sep 17 00:00:00 2001 From: noah ridge Date: Mon, 5 Aug 2024 20:56:24 -0600 Subject: [PATCH 2/2] add link to pandas ExcelWriter docs --- hamilton/plugins/pandas_extensions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hamilton/plugins/pandas_extensions.py b/hamilton/plugins/pandas_extensions.py index f4e92b3b6..7ebafe602 100644 --- a/hamilton/plugins/pandas_extensions.py +++ b/hamilton/plugins/pandas_extensions.py @@ -1481,7 +1481,8 @@ def name(cls) -> str: @dataclasses.dataclass class PandasExcelWriter(DataSaver): """Class that handles saving Excel files with pandas. - Maps to https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_excel.html + Maps to https://pandas.pydata.org/docs/reference/api/pandas.ExcelWriter.html + Additional parameters passed to https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_excel.html """ path: Union[str, Path, BytesIO]