Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(python!): Consistently convert to given time zone in Series constructor #16828

Merged
merged 5 commits into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 5 additions & 36 deletions py-polars/polars/_utils/construction/series.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

import contextlib
import warnings
from datetime import date, datetime, time, timedelta
from decimal import Decimal as PyDecimal
from itertools import islice
Expand All @@ -24,7 +23,6 @@
is_simple_numpy_backed_pandas_series,
)
from polars._utils.various import (
find_stacklevel,
range_to_series,
)
from polars._utils.wrap import wrap_s
Expand Down Expand Up @@ -64,7 +62,6 @@
from polars.dependencies import numpy as np
from polars.dependencies import pandas as pd
from polars.dependencies import pyarrow as pa
from polars.exceptions import TimeZoneAwareConstructorWarning

with contextlib.suppress(ImportError): # Module not available when building docs
from polars.polars import PySeries
Expand Down Expand Up @@ -203,41 +200,13 @@ def sequence_to_pyseries(
s = wrap_s(py_series).dt.cast_time_unit(time_unit)

if (values_dtype == Date) & (dtype == Datetime):
return (
s.cast(Datetime(time_unit or "us"))
.dt.replace_time_zone(
time_zone,
ambiguous="raise" if strict else "null",
non_existent="raise" if strict else "null",
)
._s
)
result = s.cast(Datetime(time_unit or "us"))
if time_zone is not None:
result = result.dt.convert_time_zone(time_zone)
return result._s

if (dtype == Datetime) and (value.tzinfo is not None or time_zone is not None):
values_tz = str(value.tzinfo) if value.tzinfo is not None else None
dtype_tz = time_zone
if values_tz is not None and (dtype_tz is not None and dtype_tz != "UTC"):
msg = (
"time-zone-aware datetimes are converted to UTC"
"\n\nPlease either drop the time zone from the dtype, or set it to 'UTC'."
" To convert to a different time zone, please use `.dt.convert_time_zone`."
)
raise ValueError(msg)
if values_tz != "UTC" and dtype_tz is None:
warnings.warn(
"Constructing a Series with time-zone-aware "
"datetimes results in a Series with UTC time zone. "
"To silence this warning, you can filter "
"warnings of class TimeZoneAwareConstructorWarning, or "
"set 'UTC' as the time zone of your datatype.",
TimeZoneAwareConstructorWarning,
stacklevel=find_stacklevel(),
)
return s.dt.replace_time_zone(
dtype_tz or "UTC",
ambiguous="raise" if strict else "null",
non_existent="raise" if strict else "null",
)._s
return s.dt.convert_time_zone(time_zone or "UTC")._s
return s._s

elif (
Expand Down
4 changes: 0 additions & 4 deletions py-polars/polars/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,6 @@ class PolarsInefficientMapWarning(PolarsWarning): # type: ignore[misc]
"""Warning issued when a potentially slow `map_*` operation is performed."""


class TimeZoneAwareConstructorWarning(PolarsWarning): # type: ignore[misc]
"""Warning issued when constructing Series from non-UTC time-zone-aware inputs."""


class UnstableWarning(PolarsWarning): # type: ignore[misc]
"""Warning issued when unstable functionality is used."""

Expand Down
13 changes: 8 additions & 5 deletions py-polars/polars/selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1330,17 +1330,20 @@ def datetime(

Examples
--------
>>> from datetime import datetime, date
>>> from datetime import datetime, date, timezone
>>> import polars.selectors as cs
>>> from zoneinfo import ZoneInfo
>>> tokyo_tz = ZoneInfo("Asia/Tokyo")
>>> utc_tz = timezone.utc
>>> df = pl.DataFrame(
... {
... "tstamp_tokyo": [
... datetime(1999, 7, 21, 5, 20, 16, 987654),
... datetime(2000, 5, 16, 6, 21, 21, 123465),
... datetime(1999, 7, 21, 5, 20, 16, 987654, tzinfo=tokyo_tz),
... datetime(2000, 5, 16, 6, 21, 21, 123465, tzinfo=tokyo_tz),
... ],
... "tstamp_utc": [
... datetime(2023, 4, 10, 12, 14, 16, 999000),
... datetime(2025, 8, 25, 14, 18, 22, 666000),
... datetime(2023, 4, 10, 12, 14, 16, 999000, tzinfo=utc_tz),
... datetime(2025, 8, 25, 14, 18, 22, 666000, tzinfo=utc_tz),
... ],
... "tstamp": [
... datetime(2000, 11, 20, 18, 12, 16, 600000),
Expand Down
23 changes: 8 additions & 15 deletions py-polars/tests/unit/constructors/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from polars._utils.construction.utils import try_get_type_hints
from polars.datatypes import PolarsDataType, numpy_char_code_to_dtype
from polars.dependencies import dataclasses, pydantic
from polars.exceptions import TimeZoneAwareConstructorWarning
from polars.testing import assert_frame_equal, assert_series_equal

if TYPE_CHECKING:
Expand Down Expand Up @@ -897,21 +896,15 @@ def test_init_1d_sequence() -> None:
[datetime(2020, 1, 1, tzinfo=timezone.utc)], schema={"ts": pl.Datetime("ms")}
)
assert df.schema == {"ts": pl.Datetime("ms", "UTC")}
with pytest.warns(
TimeZoneAwareConstructorWarning, match="Series with UTC time zone"
):
df = pl.DataFrame(
[datetime(2020, 1, 1, tzinfo=timezone(timedelta(hours=1)))],
schema={"ts": pl.Datetime("ms")},
)
df = pl.DataFrame(
[datetime(2020, 1, 1, tzinfo=timezone(timedelta(hours=1)))],
schema={"ts": pl.Datetime("ms")},
)
assert df.schema == {"ts": pl.Datetime("ms", "UTC")}
with pytest.warns(
TimeZoneAwareConstructorWarning, match="Series with UTC time zone"
):
df = pl.DataFrame(
[datetime(2020, 1, 1, tzinfo=ZoneInfo("Asia/Kathmandu"))],
schema={"ts": pl.Datetime("ms")},
)
df = pl.DataFrame(
[datetime(2020, 1, 1, tzinfo=ZoneInfo("Asia/Kathmandu"))],
schema={"ts": pl.Datetime("ms")},
)
assert df.schema == {"ts": pl.Datetime("ms", "UTC")}


Expand Down
16 changes: 10 additions & 6 deletions py-polars/tests/unit/constructors/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,23 +106,27 @@ def test_series_init_ambiguous_datetime() -> None:
value = datetime(2001, 10, 28, 2)
dtype = pl.Datetime(time_zone="Europe/Belgrade")

with pytest.raises(pl.ComputeError, match="ambiguous"):
pl.Series([value], dtype=dtype, strict=True)
result = pl.Series([value], dtype=dtype, strict=True)
expected = pl.Series([datetime(2001, 10, 28, 3)]).dt.replace_time_zone(
"Europe/Belgrade"
)
assert_series_equal(result, expected)

result = pl.Series([value], dtype=dtype, strict=False)
expected = pl.Series([None], dtype=dtype)
assert_series_equal(result, expected)


def test_series_init_nonexistent_datetime() -> None:
value = datetime(2024, 3, 31, 2, 30)
dtype = pl.Datetime(time_zone="Europe/Amsterdam")

with pytest.raises(pl.ComputeError, match="non-existent"):
pl.Series([value], dtype=dtype, strict=True)
result = pl.Series([value], dtype=dtype, strict=True)
expected = pl.Series([datetime(2024, 3, 31, 4, 30)]).dt.replace_time_zone(
"Europe/Amsterdam"
)
assert_series_equal(result, expected)

result = pl.Series([value], dtype=dtype, strict=False)
expected = pl.Series([None], dtype=dtype)
assert_series_equal(result, expected)


Expand Down
41 changes: 15 additions & 26 deletions py-polars/tests/unit/dataframe/test_df.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

import contextlib
import sys
import typing
from collections import OrderedDict
Expand All @@ -18,7 +17,6 @@
import polars.selectors as cs
from polars._utils.construction import iterable_to_pydf
from polars.datatypes import DTYPE_TEMPORAL_UNITS, INTEGER_DTYPES
from polars.exceptions import TimeZoneAwareConstructorWarning
from polars.testing import (
assert_frame_equal,
assert_frame_not_equal,
Expand Down Expand Up @@ -2427,7 +2425,10 @@ def test_init_datetimes_with_timezone() -> None:
},
):
result = pl.DataFrame( # type: ignore[arg-type]
data={"d1": [dtm], "d2": [dtm]},
data={
"d1": [dtm.replace(tzinfo=ZoneInfo(tz_us))],
"d2": [dtm.replace(tzinfo=ZoneInfo(tz_europe))],
},
**type_overrides,
)
expected = pl.DataFrame(
Expand All @@ -2446,25 +2447,22 @@ def test_init_datetimes_with_timezone() -> None:
"dtype_time_zone",
"expected_time_zone",
"expected_item",
"warn",
),
[
(None, "", None, None, datetime(2020, 1, 1), False),
(None, "", None, None, datetime(2020, 1, 1)),
(
timezone(timedelta(hours=-8)),
"-08:00",
"UTC",
"UTC",
datetime(2020, 1, 1, 8, tzinfo=timezone.utc),
False,
),
(
timezone(timedelta(hours=-8)),
"-08:00",
None,
"UTC",
datetime(2020, 1, 1, 8, tzinfo=timezone.utc),
True,
),
],
)
Expand All @@ -2474,19 +2472,11 @@ def test_init_vs_strptime_consistency(
dtype_time_zone: str | None,
expected_time_zone: str,
expected_item: datetime,
warn: bool,
) -> None:
msg = r"UTC time zone"
context_manager: contextlib.AbstractContextManager[pytest.WarningsRecorder | None]
if warn:
context_manager = pytest.warns(TimeZoneAwareConstructorWarning, match=msg)
else:
context_manager = contextlib.nullcontext()
with context_manager:
result_init = pl.Series(
[datetime(2020, 1, 1, tzinfo=tzinfo)],
dtype=pl.Datetime("us", dtype_time_zone),
)
result_init = pl.Series(
[datetime(2020, 1, 1, tzinfo=tzinfo)],
dtype=pl.Datetime("us", dtype_time_zone),
)
result_strptime = pl.Series([f"2020-01-01 00:00{offset}"]).str.strptime(
pl.Datetime("us", dtype_time_zone)
)
Expand All @@ -2495,13 +2485,12 @@ def test_init_vs_strptime_consistency(
assert_series_equal(result_init, result_strptime)


def test_init_vs_strptime_consistency_raises() -> None:
msg = "-aware datetimes are converted to UTC"
with pytest.raises(ValueError, match=msg):
pl.Series(
[datetime(2020, 1, 1, tzinfo=timezone(timedelta(hours=-8)))],
dtype=pl.Datetime("us", "US/Pacific"),
)
def test_init_vs_strptime_consistency_converts() -> None:
result = pl.Series(
[datetime(2020, 1, 1, tzinfo=timezone(timedelta(hours=-8)))],
dtype=pl.Datetime("us", "US/Pacific"),
).item()
assert result == datetime(2020, 1, 1, 0, 0, tzinfo=ZoneInfo(key="US/Pacific"))
result = (
pl.Series(["2020-01-01 00:00-08:00"])
.str.strptime(pl.Datetime("us", "US/Pacific"))
Expand Down
Loading