Skip to content

Commit

Permalink
perf: make truncate 4x faster in simple cases (#16615)
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli authored May 31, 2024
1 parent 6e88f1d commit b85c5e0
Showing 1 changed file with 49 additions and 3 deletions.
52 changes: 49 additions & 3 deletions crates/polars-time/src/truncate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,62 @@ pub trait PolarsTruncate {

impl PolarsTruncate for DatetimeChunked {
fn truncate(&self, tz: Option<&Tz>, every: &StringChunked, offset: &str) -> PolarsResult<Self> {
let offset = Duration::parse(offset);
let offset: Duration = Duration::parse(offset);
let time_zone = self.time_zone();
let mut duration_cache_opt: Option<FastFixedCache<String, Duration>> = None;

// Let's check if we can use a fastpath...
if every.len() == 1 {
if let Some(every) = every.get(0) {
let every_parsed = Duration::parse(every);
if every_parsed.negative {
polars_bail!(ComputeError: "cannot truncate a Datetime to a negative duration")
}
if (time_zone.is_none() || time_zone.as_deref() == Some("UTC"))
&& (every_parsed.months() == 0 && every_parsed.weeks() == 0)
{
// ... yes we can! Weeks, months, and time zones require extra logic.
// But in this simple case, it's just simple integer arithmetic.
let every = match self.time_unit() {
TimeUnit::Milliseconds => every_parsed.duration_ms(),
TimeUnit::Microseconds => every_parsed.duration_us(),
TimeUnit::Nanoseconds => every_parsed.duration_ns(),
};
return Ok(self
.apply_values(|t| {
let remainder = t % every;
t - remainder + every * (remainder < 0) as i64
})
.into_datetime(self.time_unit(), time_zone.clone()));
} else {
// A sqrt(n) cache is not too small, not too large.
duration_cache_opt =
Some(FastFixedCache::new((every.len() as f64).sqrt() as usize));
duration_cache_opt
.as_mut()
.map(|cache| *cache.insert(every.to_string(), every_parsed));
}
}
}
let mut duration_cache = match duration_cache_opt {
Some(cache) => cache,
None => FastFixedCache::new((every.len() as f64).sqrt() as usize),
};

let func = match self.time_unit() {
TimeUnit::Nanoseconds => Window::truncate_ns,
TimeUnit::Microseconds => Window::truncate_us,
TimeUnit::Milliseconds => Window::truncate_ms,
};

// A sqrt(n) cache is not too small, not too large.
let mut duration_cache = FastFixedCache::new((every.len() as f64).sqrt() as usize);
// TODO: optimize the code below, so it does the following:
// - convert to naive
// - truncate all naively
// - localize, preserving the fold of the original datetime.
// The last step is the non-trivial one. But it should be worth it,
// and faster than the current approach of truncating everything
// as tz-aware.

let out = broadcast_try_binary_elementwise(self, every, |opt_timestamp, opt_every| match (
opt_timestamp,
opt_every,
Expand Down

0 comments on commit b85c5e0

Please sign in to comment.