Skip to content

Commit

Permalink
Update PandasExcelWriter to use pd.ExcelWriter (#1078)
Browse files Browse the repository at this point in the history
Created to fix issue #946 

Changes to pandas_extensions.py in PandasExcelWriter to implement the pd.ExcelWriter and add appropriate keyword arguments to support this.

* update PandasExcelWriter to use pd.ExcelWriter

* add link to pandas ExcelWriter docs
  • Loading branch information
noahridge authored Aug 6, 2024
1 parent 04aff12 commit 7b84fdd
Showing 1 changed file with 28 additions and 3 deletions.
31 changes: 28 additions & 3 deletions hamilton/plugins/pandas_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1481,7 +1481,8 @@ def name(cls) -> str:
@dataclasses.dataclass
class PandasExcelWriter(DataSaver):
"""Class that handles saving Excel files with pandas.
Maps to https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_excel.html
Maps to https://pandas.pydata.org/docs/reference/api/pandas.ExcelWriter.html
Additional parameters passed to https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_excel.html
"""

path: Union[str, Path, BytesIO]
Expand All @@ -1504,6 +1505,10 @@ class PandasExcelWriter(DataSaver):
freeze_panes: Optional[Tuple[int, int]] = None
storage_options: Optional[Dict[str, Any]] = None
engine_kwargs: Optional[Dict[str, Any]] = None
mode: Optional[Literal["w", "a"]] = "w"
if_sheet_exists: Optional[Literal["error", "new", "replace", "overlay"]] = None
datetime_format: str = None
date_format: str = None

@classmethod
def applicable_types(cls) -> Collection[Type]:
Expand All @@ -1513,6 +1518,17 @@ def _get_saving_kwargs(self) -> Dict[str, Any]:
# Puts kwargs in a dict
kwargs = dataclasses.asdict(self)

# Pass kwargs to ExcelWriter ONLY for kwargs which appear in both ExcelWriter and .to_excel()
writer_kwarg_names = [
"date_format",
"datetime_format",
"if_sheet_exists",
"mode",
"engine_kwargs",
"engine",
"storage_options",
]

# path corresponds to 'excel_writer' argument of pandas.DataFrame.to_excel,
# but we send it separately
del kwargs["path"]
Expand All @@ -1521,11 +1537,20 @@ def _get_saving_kwargs(self) -> Dict[str, Any]:
# For compatibility with pandas 2.0 we remove engine_kwargs from kwargs if it's empty.
if kwargs["engine_kwargs"] is None:
del kwargs["engine_kwargs"]
writer_kwarg_names.remove("engine_kwargs")

return kwargs
# seperate kwargs for ExcelWriter and to_excel() invocation
writer_kwargs = {k: kwargs[k] for k in writer_kwarg_names}
to_excel_kwargs = {k: kwargs[k] for k in (kwargs.keys() - set(writer_kwarg_names))}

return writer_kwargs, to_excel_kwargs

def save_data(self, data: DATAFRAME_TYPE) -> Dict[str, Any]:
data.to_excel(self.path, **self._get_saving_kwargs())

writer_kwargs, to_excel_kwargs = self._get_saving_kwargs()

with pd.ExcelWriter(self.path, **writer_kwargs) as writer:
data.to_excel(writer, **to_excel_kwargs)
return utils.get_file_and_dataframe_metadata(self.path, data)

@classmethod
Expand Down

0 comments on commit 7b84fdd

Please sign in to comment.