Skip to content

Commit

Permalink
fix: tz attribute check, it was checking dtype instead of dt (#…
Browse files Browse the repository at this point in the history
…2855)

* fix: `tz` attribute check, it was checking `dtype` instead of `dt`

This will fix the issue reported here: #2410

* fix test_timezone_raw_values

* fix formatting

---------

Co-authored-by: Leon Luttenberger <luttenberger.leon@gmail.com>
  • Loading branch information
sanrodari and LeonLuttenberger authored Jun 25, 2024
1 parent aece852 commit f646ed3
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 1 deletion.
6 changes: 5 additions & 1 deletion awswrangler/_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,17 @@ def _apply_timezone(df: pd.DataFrame, metadata: dict[str, Any]) -> pd.DataFrame:
if col_name in df.columns and c["pandas_type"] == "datetimetz":
column_metadata: dict[str, Any] = c["metadata"] if c.get("metadata") else {}
timezone_str: str | None = column_metadata.get("timezone")

if timezone_str:
timezone: datetime.tzinfo = pa.lib.string_to_tzinfo(timezone_str)
_logger.debug("applying timezone (%s) on column %s", timezone, col_name)
if hasattr(df[col_name].dtype, "tz") is False:

if hasattr(df[col_name].dt, "tz") is False or df[col_name].dt.tz is None:
df[col_name] = df[col_name].dt.tz_localize(tz="UTC")

if timezone is not None and timezone != pytz.UTC and hasattr(df[col_name].dt, "tz_convert"):
df[col_name] = df[col_name].dt.tz_convert(tz=timezone)

return df


Expand Down
4 changes: 4 additions & 0 deletions tests/unit/test_s3_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,13 +645,17 @@ def test_timezone_raw_values(path):
df["c3"] = pd.to_datetime(datetime(2011, 11, 4, 0, 5, 23, tzinfo=timezone(-timedelta(seconds=14400))))
df["c4"] = pd.to_datetime(datetime(2011, 11, 4, 0, 5, 23, tzinfo=timezone(timedelta(hours=-8))))
wr.s3.to_parquet(partition_cols=["par"], df=df, path=path, dataset=True, sanitize_columns=False)

df2 = wr.s3.read_parquet(path, dataset=True, use_threads=False, pyarrow_additional_kwargs={"ignore_metadata": True})

# Use pandas to read because of Modin "Internal Error: Internal and external indices on axis 1 do not match."
import pandas

df3 = pandas.read_parquet(path)

df2["par"] = df2["par"].astype("string")
df3["par"] = df3["par"].astype("string")

assert_pandas_equals(df2, df3)


Expand Down

0 comments on commit f646ed3

Please sign in to comment.