Skip to content

Commit

Permalink
fix(python): ensure cs.temporal() selector wildcards Datetime the t…
Browse files Browse the repository at this point in the history
…ime_zone match
  • Loading branch information
alexander-beedie committed Jan 12, 2024
1 parent 6f44725 commit 138f672
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 3 deletions.
5 changes: 3 additions & 2 deletions py-polars/polars/datatypes/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,8 +412,9 @@ def __init__(
time_zone
Time zone string, as defined in zoneinfo (to see valid strings run
`import zoneinfo; zoneinfo.available_timezones()` for a full list).
When using to match dtypes, can use "*" to check for Datetime columns
that have any timezone.
When used to match dtypes, can set this to "*" to check for Datetime
columns that have any (non-null) timezone.
"""
if isinstance(time_zone, timezone):
time_zone = str(time_zone)
Expand Down
3 changes: 3 additions & 0 deletions py-polars/polars/datatypes/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@
Datetime("ms"),
Datetime("us"),
Datetime("ns"),
Datetime("ms", "*"),
Datetime("us", "*"),
Datetime("ns", "*"),
]
)
DURATION_DTYPES: frozenset[PolarsDataType] = DataTypeGroup(
Expand Down
17 changes: 16 additions & 1 deletion py-polars/tests/unit/series/buffers/test_from_buffers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from datetime import datetime
from typing import TYPE_CHECKING

import pytest
from hypothesis import given
Expand All @@ -9,6 +10,20 @@
from polars.testing import assert_series_equal
from polars.testing.parametric import series

if TYPE_CHECKING:
from polars.type_aliases import PolarsDataType


def _dtype_attr(tp: PolarsDataType, attr: str) -> str:
return getattr(getattr(tp, "dtype", ""), attr, "")


TEMPORAL_EX_WILDCARDS: list[PolarsDataType] = [
tp
for tp in pl.TEMPORAL_DTYPES
if ("*" not in (_dtype_attr(tp, "time_unit"), _dtype_attr(tp, "time_zone")))
]


@given(
s=series(
Expand All @@ -34,7 +49,7 @@ def test_series_from_buffers_numeric(s: pl.Series) -> None:
assert_series_equal(s, result)


@given(s=series(allowed_dtypes=pl.TEMPORAL_DTYPES, chunked=False))
@given(s=series(allowed_dtypes=TEMPORAL_EX_WILDCARDS, chunked=False))
def test_series_from_buffers_temporal_with_validity(s: pl.Series) -> None:
validity = s.is_not_null()
physical = pl.Int32 if s.dtype == pl.Date else pl.Int64
Expand Down
35 changes: 35 additions & 0 deletions py-polars/tests/unit/test_selectors.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from datetime import date, datetime
from typing import Any

import pytest
from zoneinfo import ZoneInfo

import polars as pl
import polars.selectors as cs
Expand Down Expand Up @@ -352,6 +354,39 @@ def test_selector_temporal(df: pl.DataFrame) -> None:
assert df.select(cs.date() | cs.time()).schema == {"ghi": pl.Time, "JJK": pl.Date}


def test_selector_temporal_13665() -> None:
xdf = pl.DataFrame(
data={"date_id": [20231212, 20240111, 20240112], "value": [3, 7, 1]}
).with_columns(
date_id_datetime=pl.col("date_id")
.cast(pl.Utf8)
.str.to_datetime("%Y%m%d", time_zone="UTC", time_unit="ns")
.dt.offset_by("12h"),
date_id_date=pl.col("date_id").cast(pl.Utf8).str.to_date("%Y%m%d"),
)
assert_frame_equal(
xdf.select(cs.temporal()),
pl.DataFrame(
data={
"date_id_datetime": [
datetime(2023, 12, 12, 12, 0, tzinfo=ZoneInfo(key="UTC")),
datetime(2024, 1, 11, 12, 0, tzinfo=ZoneInfo(key="UTC")),
datetime(2024, 1, 12, 12, 0, tzinfo=ZoneInfo(key="UTC")),
],
"date_id_date": [
date(2023, 12, 12),
date(2024, 1, 11),
date(2024, 1, 12),
],
},
schema={
"date_id_datetime": pl.Datetime(time_unit="ns", time_zone="UTC"),
"date_id_date": pl.Date,
},
),
)


def test_selector_expansion() -> None:
df = pl.DataFrame({name: [] for name in "abcde"})

Expand Down

0 comments on commit 138f672

Please sign in to comment.