From dc9766f9809a87279d2035dbb355ec89803d5c0b Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Wed, 28 Feb 2024 14:14:16 +0100 Subject: [PATCH 1/2] refactor(python): Use `datetime_to_int` util for AnyValue conversion --- py-polars/polars/utils/__init__.py | 6 ++--- py-polars/polars/utils/convert.py | 27 ++----------------- py-polars/src/conversion/any_value.rs | 34 ++++++------------------ py-polars/tests/unit/utils/test_utils.py | 2 ++ 4 files changed, 14 insertions(+), 55 deletions(-) diff --git a/py-polars/polars/utils/__init__.py b/py-polars/polars/utils/__init__.py index 324fbd9e1283..db6c1085bc14 100644 --- a/py-polars/polars/utils/__init__.py +++ b/py-polars/polars/utils/__init__.py @@ -5,9 +5,8 @@ """ from polars.utils._scan import _execute_from_rust from polars.utils.convert import ( - _datetime_for_any_value, - _datetime_for_any_value_windows, date_to_int, + datetime_to_int, time_to_int, timedelta_to_int, to_py_date, @@ -24,10 +23,9 @@ "no_default", # Required for Rust bindings "date_to_int", + "datetime_to_int", "time_to_int", "timedelta_to_int", - "_datetime_for_any_value", - "_datetime_for_any_value_windows", "_execute_from_rust", "_polars_warn", "to_py_date", diff --git a/py-polars/polars/utils/convert.py b/py-polars/polars/utils/convert.py index 8359ea83e1a1..4e58f90a0658 100644 --- a/py-polars/polars/utils/convert.py +++ b/py-polars/polars/utils/convert.py @@ -98,7 +98,8 @@ def datetime_to_int(dt: datetime, time_unit: TimeUnit) -> int: if dt.tzinfo is None: dt = dt.replace(tzinfo=timezone.utc) - seconds = _timestamp_in_seconds(dt) + td = dt - EPOCH_UTC + seconds = td.days * SECONDS_PER_DAY + td.seconds microseconds = dt.microsecond if time_unit == "us": @@ -111,11 +112,6 @@ def datetime_to_int(dt: datetime, time_unit: TimeUnit) -> int: _raise_invalid_time_unit(time_unit) -def _timestamp_in_seconds(dt: datetime) -> int: - td = dt - EPOCH_UTC - return td.days * SECONDS_PER_DAY + td.seconds - - def timedelta_to_int(td: timedelta, time_unit: TimeUnit) -> int: """Convert a Python timedelta object to an integer.""" seconds = td.days * SECONDS_PER_DAY + td.seconds @@ -243,25 +239,6 @@ def _create_decimal_with_prec( return Context(prec=precision).create_decimal -def _datetime_for_any_value(dt: datetime) -> tuple[int, int]: - """Used in PyO3 AnyValue conversion.""" - # returns (s, ms) - if dt.tzinfo is None: - return ( - _timestamp_in_seconds(dt.replace(tzinfo=timezone.utc)), - dt.microsecond, - ) - return (_timestamp_in_seconds(dt), dt.microsecond) - - -def _datetime_for_any_value_windows(dt: datetime) -> tuple[float, int]: - """Used in PyO3 AnyValue conversion.""" - if dt.tzinfo is None: - dt = _localize_datetime(dt, "UTC") - # returns (s, ms) - return (_timestamp_in_seconds(dt), dt.microsecond) - - def _raise_invalid_time_unit(time_unit: Any) -> NoReturn: msg = f"`time_unit` must be one of {{'ms', 'us', 'ns'}}, got {time_unit!r}" raise ValueError(msg) diff --git a/py-polars/src/conversion/any_value.rs b/py-polars/src/conversion/any_value.rs index 1e327f2cf1f1..f78eac0a25d8 100644 --- a/py-polars/src/conversion/any_value.rs +++ b/py-polars/src/conversion/any_value.rs @@ -356,34 +356,16 @@ fn convert_date(ob: &PyAny) -> PyResult> { Ok(Wrap(AnyValue::Date(v))) }) } + fn convert_datetime(ob: &PyAny) -> PyResult> { Python::with_gil(|py| { - // windows - #[cfg(target_arch = "windows")] - let (seconds, microseconds) = { - let convert = UTILS - .getattr(py, intern!(py, "_datetime_for_any_value_windows")) - .unwrap(); - let out = convert.call1(py, (ob,)).unwrap(); - let out: (i64, i64) = out.extract(py).unwrap(); - out - }; - // unix - #[cfg(not(target_arch = "windows"))] - let (seconds, microseconds) = { - let convert = UTILS - .getattr(py, intern!(py, "_datetime_for_any_value")) - .unwrap(); - let out = convert.call1(py, (ob,)).unwrap(); - let out: (i64, i64) = out.extract(py).unwrap(); - out - }; - - // s to us - let mut v = seconds * 1_000_000; - v += microseconds; - - // choose "us" as that is python's default unit + let date = UTILS + .as_ref(py) + .getattr(intern!(py, "datetime_to_int")) + .unwrap() + .call1((ob, intern!(py, "us"))) + .unwrap(); + let v = date.extract::().unwrap(); Ok(AnyValue::Datetime(v, TimeUnit::Microseconds, &None).into()) }) } diff --git a/py-polars/tests/unit/utils/test_utils.py b/py-polars/tests/unit/utils/test_utils.py index 4b716c1fa4f2..5244cbc7331b 100644 --- a/py-polars/tests/unit/utils/test_utils.py +++ b/py-polars/tests/unit/utils/test_utils.py @@ -87,6 +87,8 @@ def test_time_to_int(t: time, expected: int) -> None: (datetime(2121, 1, 1), "ns", 4_765_132_800_000_000_000), (datetime(2121, 1, 1), "us", 4_765_132_800_000_000), (datetime(2121, 1, 1), "ms", 4_765_132_800_000), + (datetime(1969, 12, 31, 23, 59, 59, 999999), "us", -1), + (datetime(1969, 12, 30, 23, 59, 59, 999999), "us", -86_400_000_001), (datetime.min, "ns", -62_135_596_800_000_000_000), (datetime.max, "ns", 253_402_300_799_999_999_000), (datetime.min, "ms", -62_135_596_800_000), From 47fbc209d7f00e49a8c47f3802b59cb880a4a7f8 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Wed, 28 Feb 2024 14:43:59 +0100 Subject: [PATCH 2/2] Add tests --- py-polars/tests/unit/utils/test_utils.py | 41 ++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/py-polars/tests/unit/utils/test_utils.py b/py-polars/tests/unit/utils/test_utils.py index 5244cbc7331b..352a1a3371e0 100644 --- a/py-polars/tests/unit/utils/test_utils.py +++ b/py-polars/tests/unit/utils/test_utils.py @@ -26,7 +26,11 @@ ) if TYPE_CHECKING: + from zoneinfo import ZoneInfo + from polars.type_aliases import TimeUnit +else: + from polars.utils.convert import string_to_zoneinfo as ZoneInfo @pytest.mark.parametrize( @@ -75,12 +79,24 @@ def test_date_to_int(d: date, expected: int) -> None: (time(20, 52, 10, 200), 75_130_000_200_000), (time.min, 0), (time.max, 86_399_999_999_000), + (time(12, 0, tzinfo=None), 43_200_000_000_000), + (time(12, 0, tzinfo=ZoneInfo("UTC")), 43_200_000_000_000), + (time(12, 0, tzinfo=ZoneInfo("Asia/Shanghai")), 43_200_000_000_000), + (time(12, 0, tzinfo=ZoneInfo("US/Central")), 43_200_000_000_000), ], ) def test_time_to_int(t: time, expected: int) -> None: assert time_to_int(t) == expected +@pytest.mark.parametrize( + "tzinfo", [None, ZoneInfo("UTC"), ZoneInfo("Asia/Shanghai"), ZoneInfo("US/Central")] +) +def test_time_to_int_with_time_zone(tzinfo: Any) -> None: + t = time(12, 0, tzinfo=tzinfo) + assert time_to_int(t) == 43_200_000_000_000 + + @pytest.mark.parametrize( ("dt", "time_unit", "expected"), [ @@ -99,6 +115,31 @@ def test_datetime_to_int(dt: datetime, time_unit: TimeUnit, expected: int) -> No assert datetime_to_int(dt, time_unit) == expected +@pytest.mark.parametrize( + ("dt", "expected"), + [ + ( + datetime(2000, 1, 1, 12, 0, tzinfo=None), + 946_728_000_000_000, + ), + ( + datetime(2000, 1, 1, 12, 0, tzinfo=ZoneInfo("UTC")), + 946_728_000_000_000, + ), + ( + datetime(2000, 1, 1, 12, 0, tzinfo=ZoneInfo("Asia/Shanghai")), + 946_699_200_000_000, + ), + ( + datetime(2000, 1, 1, 12, 0, tzinfo=ZoneInfo("US/Central")), + 946_749_600_000_000, + ), + ], +) +def test_datetime_to_int_with_time_zone(dt: datetime, expected: int) -> None: + assert datetime_to_int(dt, "us") == expected + + @pytest.mark.parametrize( ("td", "time_unit", "expected"), [