From 0b500eb70a010e033f6447bf9205e8db8a14fa0c Mon Sep 17 00:00:00 2001 From: "matthew.f" Date: Fri, 17 Feb 2023 19:57:41 +0000 Subject: [PATCH 01/17] test(python): `median` & `mean` unit test parameters updated. Testing for different cases, including skewed case - when one of the functions would have worked but the other failed. --- .../tests/unit/namespaces/test_datetime.py | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/py-polars/tests/unit/namespaces/test_datetime.py b/py-polars/tests/unit/namespaces/test_datetime.py index d4ec70d27566..fdb983119886 100644 --- a/py-polars/tests/unit/namespaces/test_datetime.py +++ b/py-polars/tests/unit/namespaces/test_datetime.py @@ -279,26 +279,32 @@ def test_weekday() -> None: @pytest.mark.parametrize( - ("values", "expected"), + ("values", "expected_median"), [ - ([datetime(1969, 12, 31), datetime(1970, 1, 2)], datetime(1970, 1, 1)), + ([], None), ([None, None], None), + ([date(2022, 1, 1)], date(2022, 1, 1)), + ([date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 3)], date(2022, 1, 2)), + ([date(2022, 1, 1), date(2022, 1, 2), date(2024, 5, 15)], date(2022, 1, 2)), ], - ids=["datetime_dates", "Nones"], + ids=["empty", "Nones", "single", "spread_even", "spread_skewed"], ) -def test_median(values: list[datetime | None], expected: datetime | None) -> None: - result = pl.Series(values).cast(pl.Datetime).dt.median() - assert result == expected +def test_median(values: list[date | None], expected_median: date | None) -> None: + result = pl.Series(values).cast(pl.Date).dt.median() + assert result == expected_median @pytest.mark.parametrize( - ("values", "expected"), + ("values", "expected_mean"), [ - ([datetime(1969, 12, 31), datetime(1970, 1, 2)], datetime(1970, 1, 1)), + ([], None), ([None, None], None), + ([date(2022, 1, 1)], date(2022, 1, 1)), + ([date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 3)], date(2022, 1, 2)), + ([date(2022, 1, 1), date(2022, 1, 2), date(2024, 5, 15)], date(2022, 10, 16)), ], - ids=["datetime_dates", "Nones"], + ids=["empty", "Nones", "single", "spread_even", "spread_skewed"], ) -def test_mean(values: list[datetime | None], expected: datetime | None) -> None: - result = pl.Series(values).cast(pl.Datetime).dt.mean() - assert result == expected +def test_mean(values: list[date | None], expected_mean: date | None) -> None: + result = pl.Series(values).cast(pl.Date).dt.mean() + assert result == expected_mean From eba94b3b79b7bf1cd1792b1e899abb2a0993326b Mon Sep 17 00:00:00 2001 From: "matthew.f" Date: Fri, 17 Feb 2023 20:35:48 +0000 Subject: [PATCH 02/17] test(python): applying `parametrize` to other date-time unit tests --- .../tests/unit/namespaces/test_datetime.py | 150 +++++++++--------- 1 file changed, 71 insertions(+), 79 deletions(-) diff --git a/py-polars/tests/unit/namespaces/test_datetime.py b/py-polars/tests/unit/namespaces/test_datetime.py index fdb983119886..1701877ee168 100644 --- a/py-polars/tests/unit/namespaces/test_datetime.py +++ b/py-polars/tests/unit/namespaces/test_datetime.py @@ -1,7 +1,7 @@ from __future__ import annotations from datetime import date, datetime, time, timedelta -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal import numpy as np import pytest @@ -71,88 +71,79 @@ def test_dt_datetimes() -> None: ) -def test_duration_extract_times() -> None: +@pytest.mark.parametrize("unit, expected", [ + ("days", [1]), + ("hours", [24]), + ("seconds", [3600 * 24]), + ("milliseconds", [3600 * 24 * int(1e3)]), + ("microseconds", [3600 * 24 * int(1e6)]), + ("nanoseconds", [3600 * 24 * int(1e9)]) +]) +def test_duration_extract_times(unit: str, expected: list[int]) -> None: a = pl.Series("a", [datetime(2021, 1, 1)]) b = pl.Series("b", [datetime(2021, 1, 2)]) - duration = b - a - expected = pl.Series("b", [1]) - assert_series_equal(duration.dt.days(), expected) - - expected = pl.Series("b", [24]) - assert_series_equal(duration.dt.hours(), expected) - - expected = pl.Series("b", [3600 * 24]) - assert_series_equal(duration.dt.seconds(), expected) + assert_series_equal(getattr(duration.dt, unit)(), pl.Series("b", expected)) - expected = pl.Series("b", [3600 * 24 * int(1e3)]) - assert_series_equal(duration.dt.milliseconds(), expected) - expected = pl.Series("b", [3600 * 24 * int(1e6)]) - assert_series_equal(duration.dt.microseconds(), expected) +@pytest.mark.parametrize( + "time_unit, every", + [ + ("ms", "1h"), + ("us", "1h0m0s"), + ("ns", timedelta(hours=1)), + ], + ids=["milliseconds", "microseconds", "nanoseconds"], +) +def test_truncate(time_unit: Literal["ms", "us", "ns"], every: str | timedelta) -> None: + start = datetime(2021, 1, 1) + stop = datetime(2021, 1, 2) - expected = pl.Series("b", [3600 * 24 * int(1e9)]) - assert_series_equal(duration.dt.nanoseconds(), expected) + s = pl.date_range( + start, stop, timedelta(minutes=30), name=f"dates[{time_unit}]", + time_unit=time_unit + ) + # can pass strings and time-deltas + out = s.dt.truncate(every) + assert out.dt[0] == start + assert out.dt[1] == start + assert out.dt[2] == start + timedelta(hours=1) + assert out.dt[3] == start + timedelta(hours=1) + # ... + assert out.dt[-3] == stop - timedelta(hours=1) + assert out.dt[-2] == stop - timedelta(hours=1) + assert out.dt[-1] == stop -def test_truncate() -> None: - start = datetime(2001, 1, 1) - stop = datetime(2001, 1, 2) - s1 = pl.date_range( - start, stop, timedelta(minutes=30), name="dates[ms]", time_unit="ms" - ) - s2 = pl.date_range( - start, stop, timedelta(minutes=30), name="dates[us]", time_unit="us" - ) - s3 = pl.date_range( - start, stop, timedelta(minutes=30), name="dates[ns]", time_unit="ns" - ) +@pytest.mark.parametrize( + "time_unit, every", + [ + ("ms", "1h"), + ("us", "1h0m0s"), + ("ns", timedelta(hours=1)), + ], + ids=["milliseconds", "microseconds", "nanoseconds"], +) +def test_round(time_unit: Literal["ms", "us", "ns"], every: str | timedelta) -> None: + start = datetime(2021, 1, 1) + stop = datetime(2021, 1, 2) - # can pass strings and timedeltas - for out in [ - s1.dt.truncate("1h"), - s2.dt.truncate("1h0m0s"), - s3.dt.truncate(timedelta(hours=1)), - ]: - assert out.dt[0] == start - assert out.dt[1] == start - assert out.dt[2] == start + timedelta(hours=1) - assert out.dt[3] == start + timedelta(hours=1) - # ... - assert out.dt[-3] == stop - timedelta(hours=1) - assert out.dt[-2] == stop - timedelta(hours=1) - assert out.dt[-1] == stop - - -def test_round() -> None: - start = datetime(2001, 1, 1) - stop = datetime(2001, 1, 2) - - s1 = pl.date_range( - start, stop, timedelta(minutes=30), name="dates[ms]", time_unit="ms" - ) - s2 = pl.date_range( - start, stop, timedelta(minutes=30), name="dates[us]", time_unit="us" - ) - s3 = pl.date_range( - start, stop, timedelta(minutes=30), name="dates[ns]", time_unit="ns" + s = pl.date_range( + start, stop, timedelta(minutes=30), name=f"dates[{time_unit}]", + time_unit=time_unit ) - # can pass strings and timedeltas - for out in [ - s1.dt.round("1h"), - s2.dt.round("1h0m0s"), - s3.dt.round(timedelta(hours=1)), - ]: - assert out.dt[0] == start - assert out.dt[1] == start + timedelta(hours=1) - assert out.dt[2] == start + timedelta(hours=1) - assert out.dt[3] == start + timedelta(hours=2) - # ... - assert out.dt[-3] == stop - timedelta(hours=1) - assert out.dt[-2] == stop - assert out.dt[-1] == stop + # can pass strings and time-deltas + out = s.dt.round(every) + assert out.dt[0] == start + assert out.dt[1] == start + timedelta(hours=1) + assert out.dt[2] == start + timedelta(hours=1) + assert out.dt[3] == start + timedelta(hours=2) + # ... + assert out.dt[-3] == stop - timedelta(hours=1) + assert out.dt[-2] == stop + assert out.dt[-1] == stop def test_cast_time_units() -> None: @@ -267,15 +258,16 @@ def test_year_empty_df() -> None: assert df.select(pl.col("date").dt.year()).dtypes == [pl.Int32] -def test_weekday() -> None: - # monday - s = pl.Series([datetime(2020, 1, 6)]) - - time_units: list[TimeUnit] = ["ns", "us", "ms"] - for tu in time_units: - assert s.dt.cast_time_unit(tu).dt.weekday()[0] == 1 +@pytest.mark.parametrize( + "time_unit", + ["ms", "us", "ns"], + ids=["milliseconds", "microseconds", "nanoseconds"], +) +def test_weekday(time_unit: Literal["ms", "us", "ns"]) -> None: + friday = pl.Series([datetime(2021, 1, 1)]) - assert s.cast(pl.Date).dt.weekday()[0] == 1 + assert friday.dt.cast_time_unit(time_unit).dt.weekday()[0] == 5 + assert friday.cast(pl.Date).dt.weekday()[0] == 5 @pytest.mark.parametrize( From 9cb794c26ffc3be681de54e86097c4be99a2dde3 Mon Sep 17 00:00:00 2001 From: "matthew.f" Date: Fri, 17 Feb 2023 20:54:56 +0000 Subject: [PATCH 03/17] test(python): added `fixture`'s date-time unit tests --- .../tests/unit/namespaces/test_datetime.py | 82 ++++++++++++------- 1 file changed, 53 insertions(+), 29 deletions(-) diff --git a/py-polars/tests/unit/namespaces/test_datetime.py b/py-polars/tests/unit/namespaces/test_datetime.py index 1701877ee168..149841f28034 100644 --- a/py-polars/tests/unit/namespaces/test_datetime.py +++ b/py-polars/tests/unit/namespaces/test_datetime.py @@ -14,6 +14,16 @@ from polars.internals.type_aliases import TimeUnit +@pytest.fixture() +def date_2022_01_01() -> date: + return date(2022, 1, 1) + + +@pytest.fixture() +def date_2022_01_02() -> date: + return date(2022, 1, 2) + + def test_dt_strftime() -> None: s = pl.Series("a", [10000, 20000, 30000], dtype=pl.Date) assert s.dtype == pl.Date @@ -71,23 +81,27 @@ def test_dt_datetimes() -> None: ) -@pytest.mark.parametrize("unit, expected", [ - ("days", [1]), - ("hours", [24]), - ("seconds", [3600 * 24]), - ("milliseconds", [3600 * 24 * int(1e3)]), - ("microseconds", [3600 * 24 * int(1e6)]), - ("nanoseconds", [3600 * 24 * int(1e9)]) -]) -def test_duration_extract_times(unit: str, expected: list[int]) -> None: - a = pl.Series("a", [datetime(2021, 1, 1)]) - b = pl.Series("b", [datetime(2021, 1, 2)]) - duration = b - a - assert_series_equal(getattr(duration.dt, unit)(), pl.Series("b", expected)) +@pytest.mark.parametrize( + ("unit", "expected"), + [ + ("days", [1]), + ("hours", [24]), + ("seconds", [3600 * 24]), + ("milliseconds", [3600 * 24 * int(1e3)]), + ("microseconds", [3600 * 24 * int(1e6)]), + ("nanoseconds", [3600 * 24 * int(1e9)]), + ], +) +def test_duration_extract_times( + unit: str, expected: list[int], date_2022_01_01: date, date_2022_01_02: date +) -> None: + duration = pl.Series([date_2022_01_02]) - pl.Series([date_2022_01_01]) + + assert_series_equal(getattr(duration.dt, unit)(), pl.Series(expected)) @pytest.mark.parametrize( - "time_unit, every", + ("time_unit", "every"), [ ("ms", "1h"), ("us", "1h0m0s"), @@ -95,13 +109,18 @@ def test_duration_extract_times(unit: str, expected: list[int]) -> None: ], ids=["milliseconds", "microseconds", "nanoseconds"], ) -def test_truncate(time_unit: Literal["ms", "us", "ns"], every: str | timedelta) -> None: - start = datetime(2021, 1, 1) - stop = datetime(2021, 1, 2) - +def test_truncate( + time_unit: Literal["ms", "us", "ns"], + every: str | timedelta, + start: date = date_2022_01_01, + stop: date = date_2022_01_02, +) -> None: s = pl.date_range( - start, stop, timedelta(minutes=30), name=f"dates[{time_unit}]", - time_unit=time_unit + start, + stop, + timedelta(minutes=30), + name=f"dates[{time_unit}]", + time_unit=time_unit, ) # can pass strings and time-deltas @@ -117,7 +136,7 @@ def test_truncate(time_unit: Literal["ms", "us", "ns"], every: str | timedelta) @pytest.mark.parametrize( - "time_unit, every", + ("time_unit", "every"), [ ("ms", "1h"), ("us", "1h0m0s"), @@ -125,13 +144,18 @@ def test_truncate(time_unit: Literal["ms", "us", "ns"], every: str | timedelta) ], ids=["milliseconds", "microseconds", "nanoseconds"], ) -def test_round(time_unit: Literal["ms", "us", "ns"], every: str | timedelta) -> None: - start = datetime(2021, 1, 1) - stop = datetime(2021, 1, 2) - +def test_round( + time_unit: Literal["ms", "us", "ns"], + every: str | timedelta, + start: date = date_2022_01_01, + stop: date = date_2022_01_02, +) -> None: s = pl.date_range( - start, stop, timedelta(minutes=30), name=f"dates[{time_unit}]", - time_unit=time_unit + start, + stop, + timedelta(minutes=30), + name=f"dates[{time_unit}]", + time_unit=time_unit, ) # can pass strings and time-deltas @@ -263,8 +287,8 @@ def test_year_empty_df() -> None: ["ms", "us", "ns"], ids=["milliseconds", "microseconds", "nanoseconds"], ) -def test_weekday(time_unit: Literal["ms", "us", "ns"]) -> None: - friday = pl.Series([datetime(2021, 1, 1)]) +def test_weekday(time_unit: Literal["ms", "us", "ns"], date_2022_01_01: date) -> None: + friday = pl.Series([date_2022_01_01]) assert friday.dt.cast_time_unit(time_unit).dt.weekday()[0] == 5 assert friday.cast(pl.Date).dt.weekday()[0] == 5 From 0adf77d293220f3e750a75c177b11879a99538a2 Mon Sep 17 00:00:00 2001 From: "matthew.f" Date: Fri, 17 Feb 2023 22:12:28 +0000 Subject: [PATCH 04/17] test(python): added `series_of_dates` `fixture` and removed redundant `Series` name assignments. --- .../tests/unit/namespaces/test_datetime.py | 71 ++++++++++--------- 1 file changed, 39 insertions(+), 32 deletions(-) diff --git a/py-polars/tests/unit/namespaces/test_datetime.py b/py-polars/tests/unit/namespaces/test_datetime.py index 149841f28034..3625a0f6141f 100644 --- a/py-polars/tests/unit/namespaces/test_datetime.py +++ b/py-polars/tests/unit/namespaces/test_datetime.py @@ -24,28 +24,39 @@ def date_2022_01_02() -> date: return date(2022, 1, 2) -def test_dt_strftime() -> None: - s = pl.Series("a", [10000, 20000, 30000], dtype=pl.Date) - assert s.dtype == pl.Date - expected = pl.Series("a", ["1997-05-19", "2024-10-04", "2052-02-20"]) - assert_series_equal(s.dt.strftime("%F"), expected) +@pytest.fixture() +def series_of_dates() -> pl.Series: + return pl.Series([10000, 20000, 30000], dtype=pl.Date) + +def test_dt_strftime(series_of_dates: pl.Series) -> None: + expected = pl.Series(["1997-05-19", "2024-10-04", "2052-02-20"]) -def test_dt_year_month_week_day_ordinal_day() -> None: - s = pl.Series("a", [10000, 20000, 30000], dtype=pl.Date) + assert series_of_dates.dtype == pl.Date + assert_series_equal(series_of_dates.dt.strftime("%F"), expected) - assert_series_equal(s.dt.year(), pl.Series("a", [1997, 2024, 2052], dtype=pl.Int32)) - assert_series_equal(s.dt.month(), pl.Series("a", [5, 10, 2], dtype=pl.UInt32)) - assert_series_equal(s.dt.weekday(), pl.Series("a", [1, 5, 2], dtype=pl.UInt32)) - assert_series_equal(s.dt.week(), pl.Series("a", [21, 40, 8], dtype=pl.UInt32)) - assert_series_equal(s.dt.day(), pl.Series("a", [19, 4, 20], dtype=pl.UInt32)) +def test_dt_year_month_week_day_ordinal_day( + series_of_dates: pl.Series, +) -> None: assert_series_equal( - s.dt.ordinal_day(), pl.Series("a", [139, 278, 51], dtype=pl.UInt32) + series_of_dates.dt.year(), pl.Series([1997, 2024, 2052], dtype=pl.Int32) + ) + assert_series_equal( + series_of_dates.dt.month(), pl.Series([5, 10, 2], dtype=pl.UInt32) + ) + assert_series_equal( + series_of_dates.dt.weekday(), pl.Series([1, 5, 2], dtype=pl.UInt32) + ) + assert_series_equal( + series_of_dates.dt.week(), pl.Series([21, 40, 8], dtype=pl.UInt32) + ) + assert_series_equal( + series_of_dates.dt.day(), pl.Series([19, 4, 20], dtype=pl.UInt32) + ) + assert_series_equal( + series_of_dates.dt.ordinal_day(), pl.Series([139, 278, 51], dtype=pl.UInt32) ) - - assert s.dt.median() == date(2024, 10, 4) - assert s.dt.mean() == date(2024, 10, 4) def test_dt_datetimes() -> None: @@ -53,31 +64,27 @@ def test_dt_datetimes() -> None: s = s.str.strptime(pl.Datetime, fmt="%Y-%m-%d %H:%M:%S.%9f") # hours, minutes, seconds, milliseconds, microseconds, and nanoseconds - assert_series_equal(s.dt.hour(), pl.Series("", [0, 3], dtype=pl.UInt32)) - assert_series_equal(s.dt.minute(), pl.Series("", [0, 20], dtype=pl.UInt32)) - assert_series_equal(s.dt.second(), pl.Series("", [0, 10], dtype=pl.UInt32)) - assert_series_equal(s.dt.millisecond(), pl.Series("", [0, 987], dtype=pl.UInt32)) - assert_series_equal(s.dt.microsecond(), pl.Series("", [0, 987654], dtype=pl.UInt32)) - assert_series_equal( - s.dt.nanosecond(), pl.Series("", [0, 987654321], dtype=pl.UInt32) - ) + assert_series_equal(s.dt.hour(), pl.Series([0, 3], dtype=pl.UInt32)) + assert_series_equal(s.dt.minute(), pl.Series([0, 20], dtype=pl.UInt32)) + assert_series_equal(s.dt.second(), pl.Series([0, 10], dtype=pl.UInt32)) + assert_series_equal(s.dt.millisecond(), pl.Series([0, 987], dtype=pl.UInt32)) + assert_series_equal(s.dt.microsecond(), pl.Series([0, 987654], dtype=pl.UInt32)) + assert_series_equal(s.dt.nanosecond(), pl.Series([0, 987654321], dtype=pl.UInt32)) # epoch methods - assert_series_equal( - s.dt.epoch(tu="d"), pl.Series("", [18262, 18294], dtype=pl.Int32) - ) + assert_series_equal(s.dt.epoch(tu="d"), pl.Series([18262, 18294], dtype=pl.Int32)) assert_series_equal( s.dt.epoch(tu="s"), - pl.Series("", [1_577_836_800, 1_580_613_610], dtype=pl.Int64), + pl.Series([1_577_836_800, 1_580_613_610], dtype=pl.Int64), ) assert_series_equal( s.dt.epoch(tu="ms"), - pl.Series("", [1_577_836_800_000, 1_580_613_610_000], dtype=pl.Int64), + pl.Series([1_577_836_800_000, 1_580_613_610_000], dtype=pl.Int64), ) # fractional seconds assert_series_equal( s.dt.second(fractional=True), - pl.Series("", [0.0, 10.987654321], dtype=pl.Float64), + pl.Series([0.0, 10.987654321], dtype=pl.Float64), ) @@ -171,7 +178,7 @@ def test_round( def test_cast_time_units() -> None: - dates = pl.Series("dates", [datetime(2001, 1, 1), datetime(2001, 2, 1, 10, 8, 9)]) + dates = pl.Series([datetime(2001, 1, 1), datetime(2001, 2, 1, 10, 8, 9)]) dates_in_ns = np.array([978307200000000000, 981022089000000000]) assert dates.dt.cast_time_unit("ns").cast(int).to_list() == list(dates_in_ns) @@ -184,7 +191,7 @@ def test_cast_time_units() -> None: def test_epoch() -> None: - dates = pl.Series("dates", [datetime(2001, 1, 1), datetime(2001, 2, 1, 10, 8, 9)]) + dates = pl.Series([datetime(2001, 1, 1), datetime(2001, 2, 1, 10, 8, 9)]) for unit in DTYPE_TEMPORAL_UNITS: assert_series_equal(dates.dt.epoch(unit), dates.dt.timestamp(unit)) From 9a5ddafeb677131b410194adfbd2efbfce95c572 Mon Sep 17 00:00:00 2001 From: "matthew.f" Date: Fri, 17 Feb 2023 22:18:25 +0000 Subject: [PATCH 05/17] style(python): `test_date_offset` cleaner code & comments --- .../tests/unit/namespaces/test_datetime.py | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/py-polars/tests/unit/namespaces/test_datetime.py b/py-polars/tests/unit/namespaces/test_datetime.py index 3625a0f6141f..f9e9ca7f982c 100644 --- a/py-polars/tests/unit/namespaces/test_datetime.py +++ b/py-polars/tests/unit/namespaces/test_datetime.py @@ -248,18 +248,22 @@ def test_quarter() -> None: def test_date_offset() -> None: - out = pl.DataFrame( - {"dates": pl.date_range(datetime(2000, 1, 1), datetime(2020, 1, 1), "1y")} - ).with_columns( - [ - pl.col("dates").dt.offset_by("1y").alias("date_plus_1y"), - pl.col("dates").dt.offset_by("-1y2mo").alias("date_min"), - ] - ) + df = pl.DataFrame({"dates": pl.date_range(datetime(2000, 1, 1), datetime(2020, 1, 1), "1y")}) + + # Add two new columns to the DataFrame using the offset_by() method + df = df.with_columns([ + df["dates"].dt.offset_by("1y").alias("date_plus_1y"), + df["dates"].dt.offset_by("-1y2mo").alias("date_min"), + ]) + + # Assert that the day of the month for all dates in the 'date_plus_1y' column is 1 + assert (df["date_plus_1y"].dt.day() == 1).all() + + # Assert that the day of the month for all dates in the 'date_min' column is 1 + assert (df["date_min"].dt.day() == 1).all() - assert (out["date_plus_1y"].dt.day() == 1).all() - assert (out["date_min"].dt.day() == 1).all() - assert out["date_min"].to_list() == [ + # Assert that the 'date_min' column contains the expected list of dates + expected_dates = [ datetime(1998, 11, 1, 0, 0), datetime(1999, 11, 1, 0, 0), datetime(2000, 11, 1, 0, 0), @@ -282,6 +286,7 @@ def test_date_offset() -> None: datetime(2017, 11, 1, 0, 0), datetime(2018, 11, 1, 0, 0), ] + assert df["date_min"].to_list() == expected_dates def test_year_empty_df() -> None: From 2d82701790df537af4996dcc1666a2f8b0f99dea Mon Sep 17 00:00:00 2001 From: "matthew.f" Date: Fri, 17 Feb 2023 22:24:23 +0000 Subject: [PATCH 06/17] style(python): `test_date_time_combine` cleaner code & comments --- .../tests/unit/namespaces/test_datetime.py | 57 +++++++++++-------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/py-polars/tests/unit/namespaces/test_datetime.py b/py-polars/tests/unit/namespaces/test_datetime.py index f9e9ca7f982c..17cfffd8e680 100644 --- a/py-polars/tests/unit/namespaces/test_datetime.py +++ b/py-polars/tests/unit/namespaces/test_datetime.py @@ -204,41 +204,52 @@ def test_epoch() -> None: def test_date_time_combine() -> None: - # test combining datetime/date and time (as expr/col and as literal) - df = pl.DataFrame( - { - "dtm": [ - datetime(2022, 12, 31, 10, 30, 45), - datetime(2023, 7, 5, 23, 59, 59), - ], - "dt": [date(2022, 10, 10), date(2022, 7, 5)], - "tm": [time(1, 2, 3, 456000), time(7, 8, 9, 101000)], - } - ).select( - [ - pl.col("dtm").dt.combine(pl.col("tm")).alias("d1"), - pl.col("dt").dt.combine(pl.col("tm")).alias("d2"), - pl.col("dt").dt.combine(time(4, 5, 6)).alias("d3"), - ] - ) - # if combining with datetime, the time component should be overwritten. - # if combining with date, should write both parts 'as-is' into the new datetime. - assert df.to_dict(False) == { + # Define a DataFrame with columns for datetime, date, and time + df = pl.DataFrame({ + "dtm": [ + datetime(2022, 12, 31, 10, 30, 45), + datetime(2023, 7, 5, 23, 59, 59), + ], + "dt": [ + date(2022, 10, 10), + date(2022, 7, 5), + ], + "tm": [ + time(1, 2, 3, 456000), + time(7, 8, 9, 101000), + ], + }) + + # Use the .select() method to combine datetime/date and time + df = df.select([ + pl.col("dtm").dt.combine(pl.col("tm")).alias("d1"), # Combine datetime and time + pl.col("dt").dt.combine(pl.col("tm")).alias("d2"), # Combine date and time + pl.col("dt").dt.combine(time(4, 5, 6)).alias("d3"), # Combine date and a specified time + ]) + + # Check that the new columns have the expected values and datatypes + expected_dict = { "d1": [ - datetime(2022, 12, 31, 1, 2, 3, 456000), + datetime(2022, 12, 31, 1, 2, 3, 456000), # Time component should be overwritten by `tm` datetime(2023, 7, 5, 7, 8, 9, 101000), ], "d2": [ datetime(2022, 10, 10, 1, 2, 3, 456000), datetime(2022, 7, 5, 7, 8, 9, 101000), ], - "d3": [datetime(2022, 10, 10, 4, 5, 6), datetime(2022, 7, 5, 4, 5, 6)], + "d3": [ + datetime(2022, 10, 10, 4, 5, 6), # New datetime should use specified time component + datetime(2022, 7, 5, 4, 5, 6), + ], } - assert df.schema == { + assert df.to_dict(False) == expected_dict + + expected_schema = { "d1": pl.Datetime("us"), "d2": pl.Datetime("us"), "d3": pl.Datetime("us"), } + assert df.schema == expected_schema def test_quarter() -> None: From f5c18863dd50ef2eae7d45b76add794ea42e2389 Mon Sep 17 00:00:00 2001 From: "matthew.f" Date: Fri, 17 Feb 2023 22:24:57 +0000 Subject: [PATCH 07/17] style(python): `black` formatting --- .../tests/unit/namespaces/test_datetime.py | 70 ++++++++++++------- 1 file changed, 43 insertions(+), 27 deletions(-) diff --git a/py-polars/tests/unit/namespaces/test_datetime.py b/py-polars/tests/unit/namespaces/test_datetime.py index 17cfffd8e680..b6f0043a73bc 100644 --- a/py-polars/tests/unit/namespaces/test_datetime.py +++ b/py-polars/tests/unit/namespaces/test_datetime.py @@ -11,7 +11,7 @@ from polars.testing import assert_series_equal if TYPE_CHECKING: - from polars.internals.type_aliases import TimeUnit + pass @pytest.fixture() @@ -205,32 +205,42 @@ def test_epoch() -> None: def test_date_time_combine() -> None: # Define a DataFrame with columns for datetime, date, and time - df = pl.DataFrame({ - "dtm": [ - datetime(2022, 12, 31, 10, 30, 45), - datetime(2023, 7, 5, 23, 59, 59), - ], - "dt": [ - date(2022, 10, 10), - date(2022, 7, 5), - ], - "tm": [ - time(1, 2, 3, 456000), - time(7, 8, 9, 101000), - ], - }) + df = pl.DataFrame( + { + "dtm": [ + datetime(2022, 12, 31, 10, 30, 45), + datetime(2023, 7, 5, 23, 59, 59), + ], + "dt": [ + date(2022, 10, 10), + date(2022, 7, 5), + ], + "tm": [ + time(1, 2, 3, 456000), + time(7, 8, 9, 101000), + ], + } + ) # Use the .select() method to combine datetime/date and time - df = df.select([ - pl.col("dtm").dt.combine(pl.col("tm")).alias("d1"), # Combine datetime and time - pl.col("dt").dt.combine(pl.col("tm")).alias("d2"), # Combine date and time - pl.col("dt").dt.combine(time(4, 5, 6)).alias("d3"), # Combine date and a specified time - ]) + df = df.select( + [ + pl.col("dtm") + .dt.combine(pl.col("tm")) + .alias("d1"), # Combine datetime and time + pl.col("dt").dt.combine(pl.col("tm")).alias("d2"), # Combine date and time + pl.col("dt") + .dt.combine(time(4, 5, 6)) + .alias("d3"), # Combine date and a specified time + ] + ) # Check that the new columns have the expected values and datatypes expected_dict = { "d1": [ - datetime(2022, 12, 31, 1, 2, 3, 456000), # Time component should be overwritten by `tm` + datetime( + 2022, 12, 31, 1, 2, 3, 456000 + ), # Time component should be overwritten by `tm` datetime(2023, 7, 5, 7, 8, 9, 101000), ], "d2": [ @@ -238,7 +248,9 @@ def test_date_time_combine() -> None: datetime(2022, 7, 5, 7, 8, 9, 101000), ], "d3": [ - datetime(2022, 10, 10, 4, 5, 6), # New datetime should use specified time component + datetime( + 2022, 10, 10, 4, 5, 6 + ), # New datetime should use specified time component datetime(2022, 7, 5, 4, 5, 6), ], } @@ -259,13 +271,17 @@ def test_quarter() -> None: def test_date_offset() -> None: - df = pl.DataFrame({"dates": pl.date_range(datetime(2000, 1, 1), datetime(2020, 1, 1), "1y")}) + df = pl.DataFrame( + {"dates": pl.date_range(datetime(2000, 1, 1), datetime(2020, 1, 1), "1y")} + ) # Add two new columns to the DataFrame using the offset_by() method - df = df.with_columns([ - df["dates"].dt.offset_by("1y").alias("date_plus_1y"), - df["dates"].dt.offset_by("-1y2mo").alias("date_min"), - ]) + df = df.with_columns( + [ + df["dates"].dt.offset_by("1y").alias("date_plus_1y"), + df["dates"].dt.offset_by("-1y2mo").alias("date_min"), + ] + ) # Assert that the day of the month for all dates in the 'date_plus_1y' column is 1 assert (df["date_plus_1y"].dt.day() == 1).all() From d94f493cbb5e9d8da941183a24eebdd7444934b1 Mon Sep 17 00:00:00 2001 From: "matthew.f" Date: Fri, 17 Feb 2023 22:36:31 +0000 Subject: [PATCH 08/17] fix(python): correct use of fixtures --- .../tests/unit/namespaces/test_datetime.py | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/py-polars/tests/unit/namespaces/test_datetime.py b/py-polars/tests/unit/namespaces/test_datetime.py index b6f0043a73bc..e9d86c84cd7e 100644 --- a/py-polars/tests/unit/namespaces/test_datetime.py +++ b/py-polars/tests/unit/namespaces/test_datetime.py @@ -11,17 +11,17 @@ from polars.testing import assert_series_equal if TYPE_CHECKING: - pass + from polars.internals.type_aliases import TimeUnit @pytest.fixture() -def date_2022_01_01() -> date: - return date(2022, 1, 1) +def date_2022_01_01() -> datetime: + return datetime(2022, 1, 1) @pytest.fixture() -def date_2022_01_02() -> date: - return date(2022, 1, 2) +def date_2022_01_02() -> datetime: + return datetime(2022, 1, 2) @pytest.fixture() @@ -100,7 +100,7 @@ def test_dt_datetimes() -> None: ], ) def test_duration_extract_times( - unit: str, expected: list[int], date_2022_01_01: date, date_2022_01_02: date + unit: str, expected: list[int], date_2022_01_01: datetime, date_2022_01_02: datetime ) -> None: duration = pl.Series([date_2022_01_02]) - pl.Series([date_2022_01_01]) @@ -119,9 +119,10 @@ def test_duration_extract_times( def test_truncate( time_unit: Literal["ms", "us", "ns"], every: str | timedelta, - start: date = date_2022_01_01, - stop: date = date_2022_01_02, + date_2022_01_01: datetime, + date_2022_01_02: datetime, ) -> None: + start, stop = date_2022_01_01, date_2022_01_02 s = pl.date_range( start, stop, @@ -154,9 +155,10 @@ def test_truncate( def test_round( time_unit: Literal["ms", "us", "ns"], every: str | timedelta, - start: date = date_2022_01_01, - stop: date = date_2022_01_02, + date_2022_01_01: datetime, + date_2022_01_02: datetime, ) -> None: + start, stop = date_2022_01_01, date_2022_01_02 s = pl.date_range( start, stop, @@ -326,8 +328,8 @@ def test_year_empty_df() -> None: ["ms", "us", "ns"], ids=["milliseconds", "microseconds", "nanoseconds"], ) -def test_weekday(time_unit: Literal["ms", "us", "ns"], date_2022_01_01: date) -> None: - friday = pl.Series([date_2022_01_01]) +def test_weekday(time_unit: Literal["ms", "us", "ns"]) -> None: + friday = pl.Series([datetime(2023, 2, 17)]) assert friday.dt.cast_time_unit(time_unit).dt.weekday()[0] == 5 assert friday.cast(pl.Date).dt.weekday()[0] == 5 From facc2f0de13305930706c14effb9aaa7e666bf0a Mon Sep 17 00:00:00 2001 From: "matthew.f" Date: Sat, 18 Feb 2023 15:08:02 +0000 Subject: [PATCH 09/17] test(python): `parametrize` on `test_dt_year_month_week_day_ordinal_day` --- .../tests/unit/namespaces/test_datetime.py | 31 ++++++++----------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/py-polars/tests/unit/namespaces/test_datetime.py b/py-polars/tests/unit/namespaces/test_datetime.py index e9d86c84cd7e..f4b49b700033 100644 --- a/py-polars/tests/unit/namespaces/test_datetime.py +++ b/py-polars/tests/unit/namespaces/test_datetime.py @@ -36,27 +36,22 @@ def test_dt_strftime(series_of_dates: pl.Series) -> None: assert_series_equal(series_of_dates.dt.strftime("%F"), expected) +@pytest.mark.parametrize( + ("unit", "expected"), + [ + ("year", pl.Series("", [1997, 2024, 2052], pl.Int32)), + ("month", pl.Series("", [5, 10, 2], pl.UInt32)), + ("week", pl.Series("", [21, 40, 8], pl.UInt32)), + ("day", pl.Series("", [19, 4, 20], pl.UInt32)), + ("ordinal_day", pl.Series("", [139, 278, 51], pl.UInt32)), + ], +) def test_dt_year_month_week_day_ordinal_day( + unit: str, + expected: pl.Series, series_of_dates: pl.Series, ) -> None: - assert_series_equal( - series_of_dates.dt.year(), pl.Series([1997, 2024, 2052], dtype=pl.Int32) - ) - assert_series_equal( - series_of_dates.dt.month(), pl.Series([5, 10, 2], dtype=pl.UInt32) - ) - assert_series_equal( - series_of_dates.dt.weekday(), pl.Series([1, 5, 2], dtype=pl.UInt32) - ) - assert_series_equal( - series_of_dates.dt.week(), pl.Series([21, 40, 8], dtype=pl.UInt32) - ) - assert_series_equal( - series_of_dates.dt.day(), pl.Series([19, 4, 20], dtype=pl.UInt32) - ) - assert_series_equal( - series_of_dates.dt.ordinal_day(), pl.Series([139, 278, 51], dtype=pl.UInt32) - ) + assert_series_equal(getattr(series_of_dates.dt, unit)(), expected) def test_dt_datetimes() -> None: From 97f0f74c559ae9416044141503be02697e93b795 Mon Sep 17 00:00:00 2001 From: "matthew.f" Date: Sat, 18 Feb 2023 15:16:26 +0000 Subject: [PATCH 10/17] style(python): cleaner parameters for unit tests, better namings --- .../tests/unit/namespaces/test_datetime.py | 41 +++++++++++-------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/py-polars/tests/unit/namespaces/test_datetime.py b/py-polars/tests/unit/namespaces/test_datetime.py index f4b49b700033..665a53628a7d 100644 --- a/py-polars/tests/unit/namespaces/test_datetime.py +++ b/py-polars/tests/unit/namespaces/test_datetime.py @@ -37,21 +37,22 @@ def test_dt_strftime(series_of_dates: pl.Series) -> None: @pytest.mark.parametrize( - ("unit", "expected"), + ("unit_attr", "expected"), [ - ("year", pl.Series("", [1997, 2024, 2052], pl.Int32)), - ("month", pl.Series("", [5, 10, 2], pl.UInt32)), - ("week", pl.Series("", [21, 40, 8], pl.UInt32)), - ("day", pl.Series("", [19, 4, 20], pl.UInt32)), - ("ordinal_day", pl.Series("", [139, 278, 51], pl.UInt32)), + ("year", pl.Series(values=[1997, 2024, 2052], dtype=pl.Int32)), + ("month", pl.Series(values=[5, 10, 2], dtype=pl.UInt32)), + ("week", pl.Series(values=[21, 40, 8], dtype=pl.UInt32)), + ("day", pl.Series(values=[19, 4, 20], dtype=pl.UInt32)), + ("ordinal_day", pl.Series(values=[139, 278, 51], dtype=pl.UInt32)), + ("day_of_year", pl.Series(values=[139, 278, 51], dtype=pl.UInt32)), ], ) -def test_dt_year_month_week_day_ordinal_day( - unit: str, +def test_dt_extract_year_month_week_day_ordinal_day( + unit_attr: str, expected: pl.Series, series_of_dates: pl.Series, ) -> None: - assert_series_equal(getattr(series_of_dates.dt, unit)(), expected) + assert_series_equal(getattr(series_of_dates.dt, unit_attr)(), expected) def test_dt_datetimes() -> None: @@ -84,22 +85,26 @@ def test_dt_datetimes() -> None: @pytest.mark.parametrize( - ("unit", "expected"), + ("unit_attr", "expected"), [ - ("days", [1]), - ("hours", [24]), - ("seconds", [3600 * 24]), - ("milliseconds", [3600 * 24 * int(1e3)]), - ("microseconds", [3600 * 24 * int(1e6)]), - ("nanoseconds", [3600 * 24 * int(1e9)]), + ("days", pl.Series([1])), + ("hours", pl.Series([24])), + ("minutes", pl.Series([24 * 60])), + ("seconds", pl.Series([3600 * 24])), + ("milliseconds", pl.Series([3600 * 24 * int(1e3)])), + ("microseconds", pl.Series([3600 * 24 * int(1e6)])), + ("nanoseconds", pl.Series([3600 * 24 * int(1e9)])), ], ) def test_duration_extract_times( - unit: str, expected: list[int], date_2022_01_01: datetime, date_2022_01_02: datetime + unit_attr: str, + expected: pl.Series, + date_2022_01_01: datetime, + date_2022_01_02: datetime ) -> None: duration = pl.Series([date_2022_01_02]) - pl.Series([date_2022_01_01]) - assert_series_equal(getattr(duration.dt, unit)(), pl.Series(expected)) + assert_series_equal(getattr(duration.dt, unit_attr)(), expected) @pytest.mark.parametrize( From e5e8f2cb0beee706744a2556b04122568f7310a9 Mon Sep 17 00:00:00 2001 From: "matthew.f" Date: Sat, 18 Feb 2023 15:36:06 +0000 Subject: [PATCH 11/17] test(python): split `test_dt_datetimes` into `test_dt_strptime_extract_times`, `test_strptime_epoch`, and `test_strptime_fractional_seconds`. Applied `parametrize` where applicable --- .../tests/unit/namespaces/test_datetime.py | 63 +++++++++++++------ 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/py-polars/tests/unit/namespaces/test_datetime.py b/py-polars/tests/unit/namespaces/test_datetime.py index 665a53628a7d..195c58475340 100644 --- a/py-polars/tests/unit/namespaces/test_datetime.py +++ b/py-polars/tests/unit/namespaces/test_datetime.py @@ -55,29 +55,52 @@ def test_dt_extract_year_month_week_day_ordinal_day( assert_series_equal(getattr(series_of_dates.dt, unit_attr)(), expected) -def test_dt_datetimes() -> None: +@pytest.mark.parametrize( + ("unit_attr", "expected"), + [ + ("hour", pl.Series(values=[0, 3], dtype=pl.UInt32)), + ("minute", pl.Series(values=[0, 20], dtype=pl.UInt32)), + ("second", pl.Series(values=[0, 10], dtype=pl.UInt32)), + ("millisecond", pl.Series(values=[0, 987], dtype=pl.UInt32)), + ("microsecond", pl.Series(values=[0, 987654], dtype=pl.UInt32)), + ("nanosecond", pl.Series(values=[0, 987654321], dtype=pl.UInt32)), + ], +) +def test_strptime_extract_times( + unit_attr: str, + expected: pl.Series, + series_of_dates: pl.Series, +) -> None: s = pl.Series(["2020-01-01 00:00:00.000000000", "2020-02-02 03:20:10.987654321"]) s = s.str.strptime(pl.Datetime, fmt="%Y-%m-%d %H:%M:%S.%9f") - # hours, minutes, seconds, milliseconds, microseconds, and nanoseconds - assert_series_equal(s.dt.hour(), pl.Series([0, 3], dtype=pl.UInt32)) - assert_series_equal(s.dt.minute(), pl.Series([0, 20], dtype=pl.UInt32)) - assert_series_equal(s.dt.second(), pl.Series([0, 10], dtype=pl.UInt32)) - assert_series_equal(s.dt.millisecond(), pl.Series([0, 987], dtype=pl.UInt32)) - assert_series_equal(s.dt.microsecond(), pl.Series([0, 987654], dtype=pl.UInt32)) - assert_series_equal(s.dt.nanosecond(), pl.Series([0, 987654321], dtype=pl.UInt32)) + assert_series_equal(getattr(s.dt, unit_attr)(), expected) + + +@pytest.mark.parametrize( + ("temporal_unit", "expected"), + [ + ("d", pl.Series(values=[18262, 18294], dtype=pl.Int32)), + ("s", pl.Series(values=[1_577_836_800, 1_580_613_610], dtype=pl.Int64)), + ( + "ms", + pl.Series(values=[1_577_836_800_000, 1_580_613_610_000], dtype=pl.Int64), + ), + ], +) +def test_strptime_epoch( + temporal_unit: Literal["d", "s", "ms"], expected: pl.Series +) -> None: + s = pl.Series(["2020-01-01 00:00:00.000000000", "2020-02-02 03:20:10.987654321"]) + s = s.str.strptime(pl.Datetime, fmt="%Y-%m-%d %H:%M:%S.%9f") + + assert_series_equal(s.dt.epoch(tu=temporal_unit), expected) + + +def test_strptime_fractional_seconds(): + s = pl.Series(["2020-01-01 00:00:00.000000000", "2020-02-02 03:20:10.987654321"]) + s = s.str.strptime(pl.Datetime, fmt="%Y-%m-%d %H:%M:%S.%9f") - # epoch methods - assert_series_equal(s.dt.epoch(tu="d"), pl.Series([18262, 18294], dtype=pl.Int32)) - assert_series_equal( - s.dt.epoch(tu="s"), - pl.Series([1_577_836_800, 1_580_613_610], dtype=pl.Int64), - ) - assert_series_equal( - s.dt.epoch(tu="ms"), - pl.Series([1_577_836_800_000, 1_580_613_610_000], dtype=pl.Int64), - ) - # fractional seconds assert_series_equal( s.dt.second(fractional=True), pl.Series([0.0, 10.987654321], dtype=pl.Float64), @@ -100,7 +123,7 @@ def test_duration_extract_times( unit_attr: str, expected: pl.Series, date_2022_01_01: datetime, - date_2022_01_02: datetime + date_2022_01_02: datetime, ) -> None: duration = pl.Series([date_2022_01_02]) - pl.Series([date_2022_01_01]) From 129fed7c2ca323a992f92c995a0d7cdc142c42e6 Mon Sep 17 00:00:00 2001 From: "matthew.f" Date: Sat, 18 Feb 2023 15:45:49 +0000 Subject: [PATCH 12/17] test(python): `series_of_str_dates` fixture added --- .../tests/unit/namespaces/test_datetime.py | 60 +++++++------------ 1 file changed, 22 insertions(+), 38 deletions(-) diff --git a/py-polars/tests/unit/namespaces/test_datetime.py b/py-polars/tests/unit/namespaces/test_datetime.py index 195c58475340..5230bdfbb16f 100644 --- a/py-polars/tests/unit/namespaces/test_datetime.py +++ b/py-polars/tests/unit/namespaces/test_datetime.py @@ -25,15 +25,20 @@ def date_2022_01_02() -> datetime: @pytest.fixture() -def series_of_dates() -> pl.Series: +def series_of_int_dates() -> pl.Series: return pl.Series([10000, 20000, 30000], dtype=pl.Date) -def test_dt_strftime(series_of_dates: pl.Series) -> None: - expected = pl.Series(["1997-05-19", "2024-10-04", "2052-02-20"]) +@pytest.fixture() +def series_of_str_dates() -> pl.Series: + return pl.Series(["2020-01-01 00:00:00.000000000", "2020-02-02 03:20:10.987654321"]) + - assert series_of_dates.dtype == pl.Date - assert_series_equal(series_of_dates.dt.strftime("%F"), expected) +def test_dt_strftime(series_of_int_dates: pl.Series) -> None: + expected_str_dates = pl.Series(["1997-05-19", "2024-10-04", "2052-02-20"]) + + assert series_of_int_dates.dtype == pl.Date + assert_series_equal(series_of_int_dates.dt.strftime("%F"), expected_str_dates) @pytest.mark.parametrize( @@ -44,15 +49,14 @@ def test_dt_strftime(series_of_dates: pl.Series) -> None: ("week", pl.Series(values=[21, 40, 8], dtype=pl.UInt32)), ("day", pl.Series(values=[19, 4, 20], dtype=pl.UInt32)), ("ordinal_day", pl.Series(values=[139, 278, 51], dtype=pl.UInt32)), - ("day_of_year", pl.Series(values=[139, 278, 51], dtype=pl.UInt32)), ], ) def test_dt_extract_year_month_week_day_ordinal_day( unit_attr: str, expected: pl.Series, - series_of_dates: pl.Series, + series_of_int_dates: pl.Series, ) -> None: - assert_series_equal(getattr(series_of_dates.dt, unit_attr)(), expected) + assert_series_equal(getattr(series_of_int_dates.dt, unit_attr)(), expected) @pytest.mark.parametrize( @@ -69,10 +73,10 @@ def test_dt_extract_year_month_week_day_ordinal_day( def test_strptime_extract_times( unit_attr: str, expected: pl.Series, - series_of_dates: pl.Series, + series_of_int_dates: pl.Series, + series_of_str_dates: pl.Series, ) -> None: - s = pl.Series(["2020-01-01 00:00:00.000000000", "2020-02-02 03:20:10.987654321"]) - s = s.str.strptime(pl.Datetime, fmt="%Y-%m-%d %H:%M:%S.%9f") + s = series_of_str_dates.str.strptime(pl.Datetime, fmt="%Y-%m-%d %H:%M:%S.%9f") assert_series_equal(getattr(s.dt, unit_attr)(), expected) @@ -89,17 +93,17 @@ def test_strptime_extract_times( ], ) def test_strptime_epoch( - temporal_unit: Literal["d", "s", "ms"], expected: pl.Series + temporal_unit: Literal["d", "s", "ms"], + expected: pl.Series, + series_of_str_dates: pl.Series, ) -> None: - s = pl.Series(["2020-01-01 00:00:00.000000000", "2020-02-02 03:20:10.987654321"]) - s = s.str.strptime(pl.Datetime, fmt="%Y-%m-%d %H:%M:%S.%9f") + s = series_of_str_dates.str.strptime(pl.Datetime, fmt="%Y-%m-%d %H:%M:%S.%9f") assert_series_equal(s.dt.epoch(tu=temporal_unit), expected) -def test_strptime_fractional_seconds(): - s = pl.Series(["2020-01-01 00:00:00.000000000", "2020-02-02 03:20:10.987654321"]) - s = s.str.strptime(pl.Datetime, fmt="%Y-%m-%d %H:%M:%S.%9f") +def test_strptime_fractional_seconds(series_of_str_dates: pl.Series): + s = series_of_str_dates.str.strptime(pl.Datetime, fmt="%Y-%m-%d %H:%M:%S.%9f") assert_series_equal( s.dt.second(fractional=True), @@ -316,27 +320,7 @@ def test_date_offset() -> None: # Assert that the 'date_min' column contains the expected list of dates expected_dates = [ - datetime(1998, 11, 1, 0, 0), - datetime(1999, 11, 1, 0, 0), - datetime(2000, 11, 1, 0, 0), - datetime(2001, 11, 1, 0, 0), - datetime(2002, 11, 1, 0, 0), - datetime(2003, 11, 1, 0, 0), - datetime(2004, 11, 1, 0, 0), - datetime(2005, 11, 1, 0, 0), - datetime(2006, 11, 1, 0, 0), - datetime(2007, 11, 1, 0, 0), - datetime(2008, 11, 1, 0, 0), - datetime(2009, 11, 1, 0, 0), - datetime(2010, 11, 1, 0, 0), - datetime(2011, 11, 1, 0, 0), - datetime(2012, 11, 1, 0, 0), - datetime(2013, 11, 1, 0, 0), - datetime(2014, 11, 1, 0, 0), - datetime(2015, 11, 1, 0, 0), - datetime(2016, 11, 1, 0, 0), - datetime(2017, 11, 1, 0, 0), - datetime(2018, 11, 1, 0, 0), + datetime(year, 11, 1, 0, 0) for year in range(1998, 2019) ] assert df["date_min"].to_list() == expected_dates From 5f78f72fd4fa16b6e5bea6639fd4b50992d7b9b8 Mon Sep 17 00:00:00 2001 From: "matthew.f" Date: Sat, 18 Feb 2023 15:52:00 +0000 Subject: [PATCH 13/17] test(python): `paramerize` for ``test_cast_time_units` unit test --- .../tests/unit/namespaces/test_datetime.py | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/py-polars/tests/unit/namespaces/test_datetime.py b/py-polars/tests/unit/namespaces/test_datetime.py index 5230bdfbb16f..520ad8fea4a8 100644 --- a/py-polars/tests/unit/namespaces/test_datetime.py +++ b/py-polars/tests/unit/namespaces/test_datetime.py @@ -206,17 +206,22 @@ def test_round( assert out.dt[-1] == stop -def test_cast_time_units() -> None: +@pytest.mark.parametrize( + ("time_unit", "date_in_that_unit"), + [ + ("ns", [978307200000000000, 981022089000000000]), + ("us", [978307200000000, 981022089000000]), + ("ms", [978307200000, 981022089000]), + ], + ids=["nanoseconds", "microseconds", "milliseconds"], +) +def test_cast_time_units( + time_unit: Literal["ms", "us", "ns"], + date_in_that_unit: list[int], +) -> None: dates = pl.Series([datetime(2001, 1, 1), datetime(2001, 2, 1, 10, 8, 9)]) - dates_in_ns = np.array([978307200000000000, 981022089000000000]) - assert dates.dt.cast_time_unit("ns").cast(int).to_list() == list(dates_in_ns) - assert dates.dt.cast_time_unit("us").cast(int).to_list() == list( - dates_in_ns // 1_000 - ) - assert dates.dt.cast_time_unit("ms").cast(int).to_list() == list( - dates_in_ns // 1_000_000 - ) + assert dates.dt.cast_time_unit(time_unit).cast(int).to_list() == date_in_that_unit def test_epoch() -> None: From 99813f793589cd9c222ddee6d0fd517337dde6eb Mon Sep 17 00:00:00 2001 From: "matthew.f" Date: Sat, 18 Feb 2023 16:14:36 +0000 Subject: [PATCH 14/17] style(python): `black` formatting, cleaner comments --- .../tests/unit/namespaces/test_datetime.py | 21 ++++++------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/py-polars/tests/unit/namespaces/test_datetime.py b/py-polars/tests/unit/namespaces/test_datetime.py index 520ad8fea4a8..3d43eb94aeb3 100644 --- a/py-polars/tests/unit/namespaces/test_datetime.py +++ b/py-polars/tests/unit/namespaces/test_datetime.py @@ -3,7 +3,6 @@ from datetime import date, datetime, time, timedelta from typing import TYPE_CHECKING, Literal -import numpy as np import pytest import polars as pl @@ -256,16 +255,12 @@ def test_date_time_combine() -> None: } ) - # Use the .select() method to combine datetime/date and time + # Combine datetime/date and time df = df.select( [ - pl.col("dtm") - .dt.combine(pl.col("tm")) - .alias("d1"), # Combine datetime and time - pl.col("dt").dt.combine(pl.col("tm")).alias("d2"), # Combine date and time - pl.col("dt") - .dt.combine(time(4, 5, 6)) - .alias("d3"), # Combine date and a specified time + pl.col("dtm").dt.combine(pl.col("tm")).alias("d1"), # datetime & time + pl.col("dt").dt.combine(pl.col("tm")).alias("d2"), # date & time + pl.col("dt").dt.combine(time(4, 5, 6)).alias("d3"), # date & specified time ] ) @@ -317,16 +312,12 @@ def test_date_offset() -> None: ] ) - # Assert that the day of the month for all dates in the 'date_plus_1y' column is 1 + # Assert that the day of the month for all the dates in new columns is 1 assert (df["date_plus_1y"].dt.day() == 1).all() - - # Assert that the day of the month for all dates in the 'date_min' column is 1 assert (df["date_min"].dt.day() == 1).all() # Assert that the 'date_min' column contains the expected list of dates - expected_dates = [ - datetime(year, 11, 1, 0, 0) for year in range(1998, 2019) - ] + expected_dates = [datetime(year, 11, 1, 0, 0) for year in range(1998, 2019)] assert df["date_min"].to_list() == expected_dates From 416e04e937770a131a669c48cd1974d98737a778 Mon Sep 17 00:00:00 2001 From: "matthew.f" Date: Sat, 18 Feb 2023 16:45:54 +0000 Subject: [PATCH 15/17] style(python): cleaner comments & spacing & renamed `test_epoch` -> `test_epoch_matches_timestamp` --- .../tests/unit/namespaces/test_datetime.py | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/py-polars/tests/unit/namespaces/test_datetime.py b/py-polars/tests/unit/namespaces/test_datetime.py index 3d43eb94aeb3..0804171b49e2 100644 --- a/py-polars/tests/unit/namespaces/test_datetime.py +++ b/py-polars/tests/unit/namespaces/test_datetime.py @@ -223,7 +223,7 @@ def test_cast_time_units( assert dates.dt.cast_time_unit(time_unit).cast(int).to_list() == date_in_that_unit -def test_epoch() -> None: +def test_epoch_matches_timestamp() -> None: dates = pl.Series([datetime(2001, 1, 1), datetime(2001, 2, 1, 10, 8, 9)]) for unit in DTYPE_TEMPORAL_UNITS: @@ -255,7 +255,7 @@ def test_date_time_combine() -> None: } ) - # Combine datetime/date and time + # Combine datetime/date with time df = df.select( [ pl.col("dtm").dt.combine(pl.col("tm")).alias("d1"), # datetime & time @@ -264,22 +264,18 @@ def test_date_time_combine() -> None: ] ) - # Check that the new columns have the expected values and datatypes + # Assert that the new columns have the expected values and datatypes expected_dict = { - "d1": [ - datetime( - 2022, 12, 31, 1, 2, 3, 456000 - ), # Time component should be overwritten by `tm` + "d1": [ # Time component should be overwritten by `tm` values + datetime(2022, 12, 31, 1, 2, 3, 456000), datetime(2023, 7, 5, 7, 8, 9, 101000), ], - "d2": [ + "d2": [ # Both date and time components combined "as-is" into new datetime datetime(2022, 10, 10, 1, 2, 3, 456000), datetime(2022, 7, 5, 7, 8, 9, 101000), ], - "d3": [ - datetime( - 2022, 10, 10, 4, 5, 6 - ), # New datetime should use specified time component + "d3": [ # New datetime should use specified time component + datetime(2022, 10, 10, 4, 5, 6), datetime(2022, 7, 5, 4, 5, 6), ], } From c9c47a09d35b048c8977c05d603d7aeaa7236b23 Mon Sep 17 00:00:00 2001 From: "matthew.f" Date: Sat, 18 Feb 2023 17:13:46 +0000 Subject: [PATCH 16/17] fix(python): type annotations --- py-polars/tests/unit/namespaces/test_datetime.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/py-polars/tests/unit/namespaces/test_datetime.py b/py-polars/tests/unit/namespaces/test_datetime.py index 0804171b49e2..ec8b3f6b9953 100644 --- a/py-polars/tests/unit/namespaces/test_datetime.py +++ b/py-polars/tests/unit/namespaces/test_datetime.py @@ -1,7 +1,7 @@ from __future__ import annotations from datetime import date, datetime, time, timedelta -from typing import TYPE_CHECKING, Literal +from typing import TYPE_CHECKING import pytest @@ -92,7 +92,7 @@ def test_strptime_extract_times( ], ) def test_strptime_epoch( - temporal_unit: Literal["d", "s", "ms"], + temporal_unit: TimeUnit, expected: pl.Series, series_of_str_dates: pl.Series, ) -> None: @@ -101,7 +101,7 @@ def test_strptime_epoch( assert_series_equal(s.dt.epoch(tu=temporal_unit), expected) -def test_strptime_fractional_seconds(series_of_str_dates: pl.Series): +def test_strptime_fractional_seconds(series_of_str_dates: pl.Series) -> None: s = series_of_str_dates.str.strptime(pl.Datetime, fmt="%Y-%m-%d %H:%M:%S.%9f") assert_series_equal( @@ -143,7 +143,7 @@ def test_duration_extract_times( ids=["milliseconds", "microseconds", "nanoseconds"], ) def test_truncate( - time_unit: Literal["ms", "us", "ns"], + time_unit: TimeUnit, every: str | timedelta, date_2022_01_01: datetime, date_2022_01_02: datetime, @@ -179,7 +179,7 @@ def test_truncate( ids=["milliseconds", "microseconds", "nanoseconds"], ) def test_round( - time_unit: Literal["ms", "us", "ns"], + time_unit: TimeUnit, every: str | timedelta, date_2022_01_01: datetime, date_2022_01_02: datetime, @@ -215,7 +215,7 @@ def test_round( ids=["nanoseconds", "microseconds", "milliseconds"], ) def test_cast_time_units( - time_unit: Literal["ms", "us", "ns"], + time_unit: TimeUnit, date_in_that_unit: list[int], ) -> None: dates = pl.Series([datetime(2001, 1, 1), datetime(2001, 2, 1, 10, 8, 9)]) @@ -327,7 +327,7 @@ def test_year_empty_df() -> None: ["ms", "us", "ns"], ids=["milliseconds", "microseconds", "nanoseconds"], ) -def test_weekday(time_unit: Literal["ms", "us", "ns"]) -> None: +def test_weekday(time_unit: TimeUnit) -> None: friday = pl.Series([datetime(2023, 2, 17)]) assert friday.dt.cast_time_unit(time_unit).dt.weekday()[0] == 5 From e14a38bba6e3de454ef73ed650bf35ccd2fc5804 Mon Sep 17 00:00:00 2001 From: "matthew.f" Date: Sun, 19 Feb 2023 15:01:56 +0000 Subject: [PATCH 17/17] feat(python): removed `date_2022_01_01` & `date_2022_01_02` fixtures --- .../tests/unit/namespaces/test_datetime.py | 22 +++---------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/py-polars/tests/unit/namespaces/test_datetime.py b/py-polars/tests/unit/namespaces/test_datetime.py index ec8b3f6b9953..f66dd74c8550 100644 --- a/py-polars/tests/unit/namespaces/test_datetime.py +++ b/py-polars/tests/unit/namespaces/test_datetime.py @@ -13,16 +13,6 @@ from polars.internals.type_aliases import TimeUnit -@pytest.fixture() -def date_2022_01_01() -> datetime: - return datetime(2022, 1, 1) - - -@pytest.fixture() -def date_2022_01_02() -> datetime: - return datetime(2022, 1, 2) - - @pytest.fixture() def series_of_int_dates() -> pl.Series: return pl.Series([10000, 20000, 30000], dtype=pl.Date) @@ -125,10 +115,8 @@ def test_strptime_fractional_seconds(series_of_str_dates: pl.Series) -> None: def test_duration_extract_times( unit_attr: str, expected: pl.Series, - date_2022_01_01: datetime, - date_2022_01_02: datetime, ) -> None: - duration = pl.Series([date_2022_01_02]) - pl.Series([date_2022_01_01]) + duration = pl.Series([datetime(2022, 1, 2)]) - pl.Series([datetime(2022, 1, 1)]) assert_series_equal(getattr(duration.dt, unit_attr)(), expected) @@ -145,10 +133,8 @@ def test_duration_extract_times( def test_truncate( time_unit: TimeUnit, every: str | timedelta, - date_2022_01_01: datetime, - date_2022_01_02: datetime, ) -> None: - start, stop = date_2022_01_01, date_2022_01_02 + start, stop = datetime(2022, 1, 1), datetime(2022, 1, 2) s = pl.date_range( start, stop, @@ -181,10 +167,8 @@ def test_truncate( def test_round( time_unit: TimeUnit, every: str | timedelta, - date_2022_01_01: datetime, - date_2022_01_02: datetime, ) -> None: - start, stop = date_2022_01_01, date_2022_01_02 + start, stop = datetime(2022, 1, 1), datetime(2022, 1, 2) s = pl.date_range( start, stop,