Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Consistently round half-way points down in dt.round #18245

Merged
merged 4 commits into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions crates/polars-time/src/round.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ use polars_core::prelude::*;
use polars_utils::cache::FastFixedCache;

use crate::prelude::*;
use crate::truncate::fast_truncate;

#[inline(always)]
fn fast_round(t: i64, every: i64) -> i64 {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you tag this with inline(always)?

fast_truncate(t + every / 2, every)
}

pub trait PolarsRound {
fn round(&self, every: &StringChunked, tz: Option<&Tz>) -> PolarsResult<Self>
Expand Down Expand Up @@ -35,11 +41,7 @@ impl PolarsRound for DatetimeChunked {
TimeUnit::Nanoseconds => every_parsed.duration_ns(),
};
return Ok(self
.apply_values(|t| {
// Round half-way values away from zero
let half_away = t.signum() * every / 2;
t + half_away - (t + half_away) % every
})
.apply_values(|t| fast_round(t, every))
.into_datetime(self.time_unit(), time_zone.clone()));
} else {
let w = Window::new(every_parsed, every_parsed, offset);
Expand Down
11 changes: 7 additions & 4 deletions crates/polars-time/src/truncate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ pub trait PolarsTruncate {
Self: Sized;
}

#[inline(always)]
pub(crate) fn fast_truncate(t: i64, every: i64) -> i64 {
let remainder = t % every;
t - (remainder + every * (remainder < 0) as i64)
}

impl PolarsTruncate for DatetimeChunked {
fn truncate(&self, tz: Option<&Tz>, every: &StringChunked) -> PolarsResult<Self> {
let time_zone = self.time_zone();
Expand All @@ -35,10 +41,7 @@ impl PolarsTruncate for DatetimeChunked {
TimeUnit::Nanoseconds => every_parsed.duration_ns(),
};
return Ok(self
.apply_values(|t| {
let remainder = t % every;
t - (remainder + every * (remainder < 0) as i64)
})
.apply_values(|t| fast_truncate(t, every))
.into_datetime(self.time_unit(), time_zone.clone()));
} else {
let w = Window::new(every_parsed, every_parsed, offset);
Expand Down
10 changes: 6 additions & 4 deletions py-polars/polars/expr/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,10 +284,12 @@ def round(self, every: str | dt.timedelta | IntoExprColumn) -> Expr:
This functionality is considered **unstable**. It may be changed
at any point without it being considered a breaking change.

Each date/datetime in the first half of the interval
is mapped to the start of its bucket.
Each date/datetime in the second half of the interval
is mapped to the end of its bucket.
- Each date/datetime in the first half of the interval
is mapped to the start of its bucket.
- Each date/datetime in the second half of the interval
is mapped to the end of its bucket.
- Half-way points are mapped to the start of their bucket.

Ambiguous results are localised using the DST offset of the original timestamp -
for example, rounding `'2022-11-06 01:20:00 CST'` by `'1h'` results in
`'2022-11-06 01:00:00 CST'`, whereas rounding `'2022-11-06 01:20:00 CDT'` by
Expand Down
10 changes: 6 additions & 4 deletions py-polars/polars/series/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1764,10 +1764,12 @@ def round(self, every: str | dt.timedelta | IntoExprColumn) -> Series:
This functionality is considered **unstable**. It may be changed
at any point without it being considered a breaking change.

Each date/datetime in the first half of the interval is mapped to the start of
its bucket.
Each date/datetime in the second half of the interval is mapped to the end of
its bucket.
- Each date/datetime in the first half of the interval
is mapped to the start of its bucket.
- Each date/datetime in the second half of the interval
is mapped to the end of its bucket.
- Half-way points are mapped to the start of their bucket.

Ambiguous results are localized using the DST offset of the original timestamp -
for example, rounding `'2022-11-06 01:20:00 CST'` by `'1h'` results in
`'2022-11-06 01:00:00 CST'`, whereas rounding `'2022-11-06 01:20:00 CDT'` by
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,3 +189,51 @@ def test_round_datetime_w_expression(time_unit: TimeUnit) -> None:
result = df.select(pl.col("a").dt.round(pl.col("b")))["a"]
assert result[0] == datetime(2020, 1, 1)
assert result[1] == datetime(2020, 1, 21)


@pytest.mark.parametrize(
("time_unit", "expected"),
[
("ms", 0),
("us", 0),
("ns", 0),
],
)
def test_round_negative_towards_epoch_18239(time_unit: TimeUnit, expected: int) -> None:
s = pl.Series([datetime(1970, 1, 1)], dtype=pl.Datetime(time_unit))
s = s.dt.offset_by(f"-1{time_unit}")
result = s.dt.round(f"2{time_unit}").dt.timestamp(time_unit="ns").item()
assert result == expected
result = (
s.dt.replace_time_zone("Europe/London")
.dt.round(f"2{time_unit}")
.dt.replace_time_zone(None)
.dt.timestamp(time_unit="ns")
.item()
)
assert result == expected


@pytest.mark.parametrize(
("time_unit", "expected"),
[
("ms", 2_000_000),
("us", 2_000),
("ns", 2),
],
)
def test_round_positive_away_from_epoch_18239(
time_unit: TimeUnit, expected: int
) -> None:
s = pl.Series([datetime(1970, 1, 1)], dtype=pl.Datetime(time_unit))
s = s.dt.offset_by(f"1{time_unit}")
result = s.dt.round(f"2{time_unit}").dt.timestamp(time_unit="ns").item()
assert result == expected
result = (
s.dt.replace_time_zone("Europe/London")
.dt.round(f"2{time_unit}")
.dt.replace_time_zone(None)
.dt.timestamp(time_unit="ns")
.item()
)
assert result == expected