Skip to content

Commit

Permalink
feat(rust, python): date_range expression (#5267)
Browse files Browse the repository at this point in the history
Co-authored-by: Thomas Aarholt <thomasaarholt@gmail.com>
  • Loading branch information
ritchie46 and thomasaarholt authored Oct 20, 2022
1 parent 698adf4 commit 28b55f3
Show file tree
Hide file tree
Showing 10 changed files with 188 additions and 6 deletions.
41 changes: 41 additions & 0 deletions polars/polars-lazy/polars-plan/src/dsl/function_expr/datetime.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use polars_core::utils::arrow::temporal_conversions::SECONDS_IN_DAY;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

Expand Down Expand Up @@ -25,6 +26,12 @@ pub enum TemporalFunction {
Round(String, String),
#[cfg(feature = "timezones")]
CastTimezone(TimeZone),
DateRange {
name: String,
every: Duration,
closed: ClosedWindow,
tz: Option<TimeZone>,
},
}

impl Display for TemporalFunction {
Expand All @@ -50,6 +57,7 @@ impl Display for TemporalFunction {
Round(..) => "round",
#[cfg(feature = "timezones")]
CastTimezone(_) => "cast_timezone",
DateRange { .. } => return write!(f, "date_range"),
};
write!(f, "dt.{}", s)
}
Expand Down Expand Up @@ -127,3 +135,36 @@ pub(super) fn cast_timezone(s: &Series, tz: &str) -> PolarsResult<Series> {
let ca = s.datetime()?;
ca.cast_time_zone(tz).map(|ca| ca.into_series())
}

pub(super) fn date_range_dispatch(
s: &[Series],
name: &str,
every: Duration,
closed: ClosedWindow,
tz: Option<TimeZone>,
) -> PolarsResult<Series> {
let start = &s[0];
let stop = &s[1];

match start.dtype() {
DataType::Date => {
let start = start.to_physical_repr();
let stop = stop.to_physical_repr();
// to milliseconds
let start = start.get(0).extract::<i64>().unwrap() * SECONDS_IN_DAY * 1000;
let stop = stop.get(0).extract::<i64>().unwrap() * SECONDS_IN_DAY * 1000;

date_range_impl(name, start, stop, every, closed, TimeUnit::Milliseconds, tz)
.cast(&DataType::Date)
}
DataType::Datetime(tu, _) => {
let start = start.to_physical_repr();
let stop = stop.to_physical_repr();
let start = start.get(0).extract::<i64>().unwrap();
let stop = stop.get(0).extract::<i64>().unwrap();

Ok(date_range_impl(name, start, stop, every, closed, *tu, tz).into_series())
}
_ => todo!(),
}
}
14 changes: 14 additions & 0 deletions polars/polars-lazy/polars-plan/src/dsl/function_expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,20 @@ impl From<TemporalFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
Round(every, offset) => map!(datetime::round, &every, &offset),
#[cfg(feature = "timezones")]
CastTimezone(tz) => map!(datetime::cast_timezone, &tz),
DateRange {
name,
every,
closed,
tz,
} => {
map_as_slice!(
datetime::date_range_dispatch,
name.as_ref(),
every,
closed,
tz.clone()
)
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ impl FunctionExpr {
}
})
}
DateRange { .. } => return super_type(),
};
with_dtype(dtype)
}
Expand Down
29 changes: 28 additions & 1 deletion polars/polars-lazy/polars-plan/src/dsl/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
use std::ops::{BitAnd, BitOr};

use polars_core::export::arrow::temporal_conversions::NANOSECONDS;
use polars_core::prelude::*;
use polars_core::utils::arrow::temporal_conversions::SECONDS_IN_DAY;
#[cfg(feature = "rank")]
use polars_core::utils::coalesce_nulls_series;
Expand Down Expand Up @@ -981,3 +980,31 @@ pub fn coalesce(exprs: &[Expr]) -> Expr {
},
}
}

/// Create a date range from a `start` and `stop` expression.
#[cfg(feature = "temporal")]
pub fn date_range(
name: String,
start: Expr,
end: Expr,
every: Duration,
closed: ClosedWindow,
tz: Option<TimeZone>,
) -> Expr {
let input = vec![start, end];

Expr::Function {
input,
function: FunctionExpr::TemporalExpr(TemporalFunction::DateRange {
name,
every,
closed,
tz,
}),
options: FunctionOptions {
collect_groups: ApplyOptions::ApplyGroups,
cast_to_supertypes: true,
..Default::default()
},
}
}
1 change: 1 addition & 0 deletions polars/polars-lazy/src/dsl/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
//! Function on multiple expressions.
//!
use polars_core::prelude::*;
pub use polars_plan::dsl::functions::*;
use rayon::prelude::*;

use crate::prelude::*;
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-time/src/windows/groupby.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use serde::{Deserialize, Serialize};

use crate::prelude::*;

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum ClosedWindow {
Left,
Expand Down
2 changes: 1 addition & 1 deletion polars/tests/it/lazy/expressions/expand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use super::*;
fn test_expand_datetimes_3042() -> PolarsResult<()> {
let low = NaiveDate::from_ymd(2020, 1, 1).and_hms(0, 0, 0);
let high = NaiveDate::from_ymd(2020, 2, 1).and_hms(0, 0, 0);
let date_range = date_range(
let date_range = polars_time::date_range(
"dt1",
low,
high,
Expand Down
2 changes: 1 addition & 1 deletion polars/tests/it/lazy/groupby_dynamic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use super::*;
fn test_groupby_dynamic_week_bounds() -> PolarsResult<()> {
let start = NaiveDate::from_ymd(2022, 2, 1).and_hms(0, 0, 0);
let stop = NaiveDate::from_ymd(2022, 2, 14).and_hms(0, 0, 0);
let range = date_range(
let range = polars_time::date_range(
"dt",
start,
stop,
Expand Down
86 changes: 84 additions & 2 deletions py-polars/polars/internals/functions.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,26 @@
from __future__ import annotations

import sys
import typing
from datetime import date, datetime, timedelta
from typing import TYPE_CHECKING, Sequence, overload

from polars import internals as pli
from polars.datatypes import Categorical, Date, Float64, PolarsDataType
from polars.utils import _datetime_to_pl_timestamp, _timedelta_to_pl_duration

if sys.version_info >= (3, 10):
from typing import Literal
else:
from typing_extensions import Literal


try:
from polars.polars import concat_df as _concat_df
from polars.polars import concat_lf as _concat_lf
from polars.polars import concat_series as _concat_series
from polars.polars import py_date_range as _py_date_range
from polars.polars import py_date_range_lazy as _py_date_range_lazy
from polars.polars import py_diag_concat_df as _diag_concat_df
from polars.polars import py_hor_concat_df as _hor_concat_df

Expand Down Expand Up @@ -174,15 +183,78 @@ def _interval_granularity(interval: str) -> str:
return interval[-2:].lstrip("0123456789")


@overload
def date_range(
low: pli.Expr,
high: date | datetime | pli.Expr | str,
interval: str | timedelta,
*,
lazy: Literal[False] = ...,
closed: ClosedWindow = "both",
name: str | None = None,
time_unit: TimeUnit | None = None,
time_zone: str | None = None,
) -> pli.Expr:
...


@overload
def date_range(
low: date | datetime | pli.Expr | str,
high: pli.Expr,
interval: str | timedelta,
*,
lazy: Literal[False] = ...,
closed: ClosedWindow = "both",
name: str | None = None,
time_unit: TimeUnit | None = None,
time_zone: str | None = None,
) -> pli.Expr:
...


@overload
def date_range(
low: date | datetime,
high: date | datetime,
low: date | datetime | str,
high: date | datetime | str,
interval: str | timedelta,
*,
lazy: Literal[False] = ...,
closed: ClosedWindow = "both",
name: str | None = None,
time_unit: TimeUnit | None = None,
time_zone: str | None = None,
) -> pli.Series:
...


@overload
def date_range(
low: date | datetime | pli.Expr | str,
high: date | datetime | pli.Expr | str,
interval: str | timedelta,
*,
lazy: Literal[True],
closed: ClosedWindow = "both",
name: str | None = None,
time_unit: TimeUnit | None = None,
time_zone: str | None = None,
) -> pli.Expr:
...


@typing.no_type_check
def date_range(
low: date | datetime | pli.Expr | str,
high: date | datetime | pli.Expr | str,
interval: str | timedelta,
*,
lazy: bool = False,
closed: ClosedWindow = "both",
name: str | None = None,
time_unit: TimeUnit | None = None,
time_zone: str | None = None,
) -> pli.Series | pli.Expr:
"""
Create a range of type `Datetime` (or `Date`).
Expand All @@ -196,6 +268,8 @@ def date_range(
Interval periods. It can be a python timedelta object, like
``timedelta(days=10)``, or a polars duration string, such as ``3d12h4m25s``
representing 3 days, 12 hours, 4 minutes, and 25 seconds.
lazy:
Return an expression.
closed : {'both', 'left', 'right', 'none'}
Define whether the temporal window interval is closed or not.
name
Expand All @@ -205,6 +279,7 @@ def date_range(
time_zone:
Optional timezone
Notes
-----
If both ``low`` and ``high`` are passed as date types (not datetime), and the
Expand Down Expand Up @@ -256,6 +331,13 @@ def date_range(
elif " " in interval:
interval = interval.replace(" ", "")

if isinstance(low, pli.Expr) or isinstance(high, pli.Expr) or lazy:
low = pli.expr_to_lit_or_expr(low, str_to_lit=True)
high = pli.expr_to_lit_or_expr(high, str_to_lit=True)
return pli.wrap_expr(
_py_date_range_lazy(low, high, interval, closed, name, time_zone)
)

low, low_is_date = _ensure_datetime(low)
high, high_is_date = _ensure_datetime(high)

Expand Down
16 changes: 16 additions & 0 deletions py-polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,21 @@ fn py_date_range(
.into()
}

#[pyfunction]
fn py_date_range_lazy(
start: PyExpr,
end: PyExpr,
every: &str,
closed: Wrap<ClosedWindow>,
name: String,
tz: Option<TimeZone>,
) -> PyExpr {
let start = start.inner;
let end = end.inner;
let every = Duration::parse(every);
polars::lazy::dsl::functions::date_range(name, start, end, every, closed.0, tz).into()
}

#[pyfunction]
fn min_exprs(exprs: Vec<PyExpr>) -> PyExpr {
let exprs = exprs.to_exprs();
Expand Down Expand Up @@ -580,6 +595,7 @@ fn polars(py: Python, m: &PyModule) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(py_datetime)).unwrap();
m.add_wrapped(wrap_pyfunction!(py_duration)).unwrap();
m.add_wrapped(wrap_pyfunction!(py_date_range)).unwrap();
m.add_wrapped(wrap_pyfunction!(py_date_range_lazy)).unwrap();
m.add_wrapped(wrap_pyfunction!(sum_exprs)).unwrap();
m.add_wrapped(wrap_pyfunction!(min_exprs)).unwrap();
m.add_wrapped(wrap_pyfunction!(max_exprs)).unwrap();
Expand Down

0 comments on commit 28b55f3

Please sign in to comment.