Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding myself to the "who is using" #389

Merged
merged 10 commits into from
Sep 9, 2020
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

> An [AWS Professional Service](https://aws.amazon.com/professional-services/) open source initiative | aws-proserve-opensource@amazon.com

[![Release](https://img.shields.io/badge/release-1.9.2-brightgreen.svg)](https://pypi.org/project/awswrangler/)
[![Release](https://img.shields.io/badge/release-1.9.3-brightgreen.svg)](https://pypi.org/project/awswrangler/)
[![Python Version](https://img.shields.io/badge/python-3.6%20%7C%203.7%20%7C%203.8-brightgreen.svg)](https://anaconda.org/conda-forge/awswrangler)
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
Expand Down Expand Up @@ -134,4 +134,5 @@ Please send a PR with your company name and @githubhandle if you may.
* [OKRA Technologies](https://okra.ai) [[@JPFrancoia](https://github.com/JPFrancoia), [@schot](https://github.com/schot)]
* [Pier](https://www.pier.digital/) [[@flaviomax](https://github.com/flaviomax)]
* [Pismo](https://www.pismo.io/) [[@msantino](https://github.com/msantino)]
* [Serasa Experian](https://www.serasaexperian.com.br/) [[@andre-marcos-perez](https://github.com/andre-marcos-perez)]
* [Serasa Experian](https://www.serasaexperian.com.br/) [[@andre-marcos-perez](https://github.com/andre-marcos-perez)]
* [Zillow](https://www.zillow.com/) [[@nicholas-miles]](https://github.com/nicholas-miles)
2 changes: 1 addition & 1 deletion awswrangler/__metadata__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@

__title__: str = "awswrangler"
__description__: str = "Pandas on AWS."
__version__: str = "1.9.2"
__version__: str = "1.9.3"
__license__: str = "Apache License 2.0"
7 changes: 4 additions & 3 deletions awswrangler/s3/_read_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,10 +192,11 @@ def _apply_index(df: pd.DataFrame, metadata: Dict[str, Any]) -> pd.DataFrame:
def _apply_timezone(df: pd.DataFrame, metadata: Dict[str, Any]) -> pd.DataFrame:
for c in metadata["columns"]:
if c["field_name"] in df.columns and c["pandas_type"] == "datetimetz":
_logger.debug("applying timezone (%s) on column %s", c["metadata"]["timezone"], c["field_name"])
if isinstance(df[c["field_name"]].dtype, pd.core.dtypes.dtypes.DatetimeTZDtype) is False:
timezone: datetime.tzinfo = pa.lib.string_to_tzinfo(c["metadata"]["timezone"])
_logger.debug("applying timezone (%s) on column %s", timezone, c["field_name"])
if hasattr(df[c["field_name"]].dtype, "tz") is False:
df[c["field_name"]] = df[c["field_name"]].dt.tz_localize(tz="UTC")
df[c["field_name"]] = df[c["field_name"]].dt.tz_convert(tz=c["metadata"]["timezone"])
df[c["field_name"]] = df[c["field_name"]].dt.tz_convert(tz=timezone)
return df


Expand Down
2 changes: 1 addition & 1 deletion tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


def test_metadata():
assert wr.__version__ == "1.9.2"
assert wr.__version__ == "1.9.3"
assert wr.__title__ == "awswrangler"
assert wr.__description__ == "Pandas on AWS."
assert wr.__license__ == "Apache License 2.0"
20 changes: 18 additions & 2 deletions tests/test_s3_parquet.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import itertools
import logging
import math
from datetime import datetime
from datetime import datetime, timedelta, timezone

import boto3
import numpy as np
Expand Down Expand Up @@ -362,7 +362,7 @@ def test_timezone_file(path, use_threads):
assert df.equals(df2)


@pytest.mark.parametrize("use_threads", [False])
@pytest.mark.parametrize("use_threads", [True, False])
def test_timezone_file_columns(path, use_threads):
file_path = f"{path}0.parquet"
df = pd.DataFrame({"c0": [datetime.utcnow(), datetime.utcnow()], "c1": [1.1, 2.2]})
Expand All @@ -371,3 +371,19 @@ def test_timezone_file_columns(path, use_threads):
wr.s3.wait_objects_exist(paths=[file_path], use_threads=use_threads)
df2 = wr.s3.read_parquet(path, columns=["c1"], use_threads=use_threads)
assert df[["c1"]].equals(df2)


@pytest.mark.parametrize("use_threads", [True, False])
def test_timezone_raw_values(path, use_threads):
df = pd.DataFrame({"c0": [1.1, 2.2], "par": ["a", "b"]})
df["c1"] = pd.to_datetime(datetime.now(timezone.utc))
df["c2"] = pd.to_datetime(datetime(2011, 11, 4, 0, 5, 23, tzinfo=timezone(timedelta(seconds=14400))))
df["c3"] = pd.to_datetime(datetime(2011, 11, 4, 0, 5, 23, tzinfo=timezone(-timedelta(seconds=14400))))
df["c4"] = pd.to_datetime(datetime(2011, 11, 4, 0, 5, 23, tzinfo=timezone(timedelta(hours=-8))))
paths = wr.s3.to_parquet(partition_cols=["par"], df=df, path=path, dataset=True, sanitize_columns=False)["paths"]
wr.s3.wait_objects_exist(paths, use_threads=use_threads)
df2 = wr.s3.read_parquet(path, dataset=True, use_threads=use_threads)
df3 = pd.concat([pd.read_parquet(p) for p in paths], ignore_index=True)
df2["par"] = df2["par"].astype("string")
df3["par"] = df3["par"].astype("string")
assert df2.equals(df3)