Skip to content

Commit

Permalink
feat(rust, python): groupby_dynamic define start of first window (pol…
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored and zundertj committed Jan 7, 2023
1 parent 51a6ddb commit 451eebe
Show file tree
Hide file tree
Showing 16 changed files with 429 additions and 67 deletions.
3 changes: 2 additions & 1 deletion polars/polars-time/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ description = "Time related code for the polars dataframe library"
chrono = "0.4"
chrono-tz = { version = "0.6", optional = true }
lexical = { version = "6", default-features = false, features = ["std", "parse-floats", "parse-integers"] }
now = "0.1"
polars-arrow = { version = "0.25.1", path = "../polars-arrow", features = ["compute", "temporal"] }
polars-core = { version = "0.25.1", path = "../polars-core", default-features = false, features = ["private", "dtype-datetime", "dtype-duration", "dtype-time", "dtype-date"] }
polars-utils = { version = "0.25.1", path = "../polars-utils" }
Expand All @@ -25,7 +26,7 @@ dtype-duration = ["polars-core/dtype-duration", "polars-core/temporal"]
rolling_window = ["polars-core/rolling_window", "dtype-duration"]
private = []
fmt = ["polars-core/fmt"]
timezones = ["chrono-tz", "dtype-datetime"]
timezones = ["chrono-tz", "dtype-datetime", "polars-core/timezones"]

test = [
"dtype-date",
Expand Down
70 changes: 54 additions & 16 deletions polars/polars-time/src/groupby/dynamic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,22 @@ pub struct DynamicGroupOptions {
// add the boundaries to the dataframe
pub include_boundaries: bool,
pub closed_window: ClosedWindow,
pub start_by: StartBy,
}

impl Default for DynamicGroupOptions {
fn default() -> Self {
Self {
index_column: "".to_string(),
every: Duration::new(1),
period: Duration::new(1),
offset: Duration::new(1),
truncate: true,
include_boundaries: false,
closed_window: ClosedWindow::Left,
start_by: Default::default(),
}
}
}

#[derive(Clone, Debug, PartialEq, Eq)]
Expand Down Expand Up @@ -241,6 +257,7 @@ impl Wrap<&DataFrame> {
tu,
include_lower_bound,
include_upper_bound,
options.start_by,
);
update_bounds(lower, upper);
GroupsProxy::Slice {
Expand Down Expand Up @@ -271,6 +288,7 @@ impl Wrap<&DataFrame> {
tu,
include_lower_bound,
include_upper_bound,
options.start_by,
);

(lower, upper, update_subgroups_idx(&sub_groups, base_g))
Expand Down Expand Up @@ -300,6 +318,7 @@ impl Wrap<&DataFrame> {
tu,
include_lower_bound,
include_upper_bound,
options.start_by,
);
update_subgroups_idx(&sub_groups, base_g)
})
Expand Down Expand Up @@ -583,11 +602,15 @@ mod test {

#[test]
fn test_dynamic_groupby_window() {
let start = NaiveDate::from_ymd(2021, 12, 16)
.and_hms(0, 0, 0)
let start = NaiveDate::from_ymd_opt(2021, 12, 16)
.unwrap()
.and_hms_opt(0, 0, 0)
.unwrap()
.timestamp_millis();
let stop = NaiveDate::from_ymd(2021, 12, 16)
.and_hms(3, 0, 0)
let stop = NaiveDate::from_ymd_opt(2021, 12, 16)
.unwrap()
.and_hms_opt(3, 0, 0)
.unwrap()
.timestamp_millis();
let range = date_range_impl(
"date",
Expand All @@ -614,6 +637,7 @@ mod test {
truncate: true,
include_boundaries: true,
closed_window: ClosedWindow::Both,
start_by: Default::default(),
},
)
.unwrap();
Expand All @@ -630,11 +654,15 @@ mod test {
assert_eq!(g, &["a", "a", "a", "a", "b", "b"]);

let upper = out.column("_upper_boundary").unwrap().slice(0, 3);
let start = NaiveDate::from_ymd(2021, 12, 16)
.and_hms(1, 0, 0)
let start = NaiveDate::from_ymd_opt(2021, 12, 16)
.unwrap()
.and_hms_opt(1, 0, 0)
.unwrap()
.timestamp_millis();
let stop = NaiveDate::from_ymd(2021, 12, 16)
.and_hms(3, 0, 0)
let stop = NaiveDate::from_ymd_opt(2021, 12, 16)
.unwrap()
.and_hms_opt(3, 0, 0)
.unwrap()
.timestamp_millis();
let range = date_range_impl(
"_upper_boundary",
Expand All @@ -649,11 +677,15 @@ mod test {
assert_eq!(&upper, &range);

let upper = out.column("_lower_boundary").unwrap().slice(0, 3);
let start = NaiveDate::from_ymd(2021, 12, 16)
.and_hms(0, 0, 0)
let start = NaiveDate::from_ymd_opt(2021, 12, 16)
.unwrap()
.and_hms_opt(0, 0, 0)
.unwrap()
.timestamp_millis();
let stop = NaiveDate::from_ymd(2021, 12, 16)
.and_hms(2, 0, 0)
let stop = NaiveDate::from_ymd_opt(2021, 12, 16)
.unwrap()
.and_hms_opt(2, 0, 0)
.unwrap()
.timestamp_millis();
let range = date_range_impl(
"_lower_boundary",
Expand Down Expand Up @@ -695,17 +727,22 @@ mod test {
truncate: true,
include_boundaries: true,
closed_window: ClosedWindow::Both,
start_by: Default::default(),
},
);
}

#[test]
fn test_truncate_offset() {
let start = NaiveDate::from_ymd(2021, 3, 1)
.and_hms(12, 0, 0)
let start = NaiveDate::from_ymd_opt(2021, 3, 1)
.unwrap()
.and_hms_opt(12, 0, 0)
.unwrap()
.timestamp_millis();
let stop = NaiveDate::from_ymd(2021, 3, 7)
.and_hms(12, 0, 0)
let stop = NaiveDate::from_ymd_opt(2021, 3, 7)
.unwrap()
.and_hms_opt(12, 0, 0)
.unwrap()
.timestamp_millis();
let range = date_range_impl(
"date",
Expand All @@ -732,6 +769,7 @@ mod test {
truncate: true,
include_boundaries: true,
closed_window: ClosedWindow::Both,
start_by: Default::default(),
},
)
.unwrap();
Expand Down
18 changes: 17 additions & 1 deletion polars/polars-time/src/windows/groupby.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,21 @@ pub enum ClosedWindow {
None,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum StartBy {
WindowBound,
DataPoint,
/// only useful if periods are weekly
Monday,
}

impl Default for StartBy {
fn default() -> Self {
Self::WindowBound
}
}

/// Based on the given `Window`, which has an
/// - every
/// - period
Expand All @@ -38,6 +53,7 @@ pub fn groupby_windows(
tu: TimeUnit,
include_lower_bound: bool,
include_upper_bound: bool,
start_by: StartBy,
) -> (GroupsSlice, Vec<i64>, Vec<i64>) {
let start = time[0];
let boundary = if time.len() > 1 {
Expand Down Expand Up @@ -76,7 +92,7 @@ pub fn groupby_windows(
};
let mut start_offset = 0;

for bi in window.get_overlapping_bounds_iter(boundary, tu) {
for bi in window.get_overlapping_bounds_iter(boundary, tu, start_by) {
let mut skip_window = false;
// find starting point of window
while start_offset < time.len() {
Expand Down
Loading

0 comments on commit 451eebe

Please sign in to comment.