Skip to content

Commit

Permalink
feat(rust,python): add .list.any() and .list.all() (pola-rs#9573)
Browse files Browse the repository at this point in the history
Co-authored-by: ritchie <ritchie46@gmail.com>
  • Loading branch information
2 people authored and c-peters committed Jul 14, 2023
1 parent 3cc9704 commit ed58c7a
Show file tree
Hide file tree
Showing 18 changed files with 276 additions and 1 deletion.
1 change: 1 addition & 0 deletions polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ coalesce = ["polars-lazy/coalesce"]
streaming = ["polars-lazy/streaming"]
fused = ["polars-ops/fused", "polars-lazy/fused"]
list_sets = ["polars-lazy/list_sets"]
list_any_all = ["polars-lazy/list_any_all"]

test = [
"lazy",
Expand Down
1 change: 1 addition & 0 deletions polars/polars-lazy/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ serde = [
]
fused = ["polars-plan/fused", "polars-ops/fused"]
list_sets = ["polars-plan/list_sets", "polars-ops/list_sets"]
list_any_all = ["polars-ops/list_any_all", "polars-plan/list_any_all"]

binary_encoding = ["polars-plan/binary_encoding"]

Expand Down
1 change: 1 addition & 0 deletions polars/polars-lazy/polars-plan/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ propagate_nans = ["polars-ops/propagate_nans"]
coalesce = []
fused = []
list_sets = ["polars-ops/list_sets"]
list_any_all = ["polars-ops/list_any_all"]

bigidx = ["polars-arrow/bigidx", "polars-core/bigidx", "polars-utils/bigidx"]

Expand Down
19 changes: 18 additions & 1 deletion polars/polars-lazy/polars-plan/src/dsl/function_expr/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ pub enum ListFunction {
Sum,
#[cfg(feature = "list_sets")]
SetOperation(SetOperation),
#[cfg(feature = "list_any_all")]
Any,
#[cfg(feature = "list_any_all")]
All,
}

impl Display for ListFunction {
Expand All @@ -37,6 +41,10 @@ impl Display for ListFunction {
Sum => "sum",
#[cfg(feature = "list_sets")]
SetOperation(s) => return write!(f, "{s}"),
#[cfg(feature = "list_any_all")]
Any => "any",
#[cfg(feature = "list_any_all")]
All => "all",
};
write!(f, "{name}")
}
Expand Down Expand Up @@ -250,6 +258,15 @@ pub(super) fn sum(s: &Series) -> PolarsResult<Series> {
pub(super) fn set_operation(s: &[Series], set_type: SetOperation) -> PolarsResult<Series> {
let s0 = &s[0];
let s1 = &s[1];

Ok(list_set_operation(s0.list()?, s1.list()?, set_type).into_series())
}

#[cfg(feature = "list_any_all")]
pub(super) fn lst_any(s: &Series) -> PolarsResult<Series> {
s.list()?.lst_any()
}

#[cfg(feature = "list_any_all")]
pub(super) fn lst_all(s: &Series) -> PolarsResult<Series> {
s.list()?.lst_all()
}
4 changes: 4 additions & 0 deletions polars/polars-lazy/polars-plan/src/dsl/function_expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,10 @@ impl From<FunctionExpr> for SpecialEq<Arc<dyn SeriesUdf>> {
Sum => map!(list::sum),
#[cfg(feature = "list_sets")]
SetOperation(s) => map_as_slice!(list::set_operation, s),
#[cfg(feature = "list_any_all")]
Any => map!(list::lst_any),
#[cfg(feature = "list_any_all")]
All => map!(list::lst_all),
}
}
#[cfg(feature = "dtype-array")]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ impl FunctionExpr {
Sum => mapper.nested_sum_type(),
#[cfg(feature = "list_sets")]
SetOperation(_) => mapper.with_same_dtype(),
#[cfg(feature = "list_any_all")]
Any => mapper.with_dtype(DataType::Boolean),
#[cfg(feature = "list_any_all")]
All => mapper.with_dtype(DataType::Boolean),
}
}
#[cfg(feature = "dtype-array")]
Expand Down
14 changes: 14 additions & 0 deletions polars/polars-lazy/polars-plan/src/dsl/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,20 @@ use crate::prelude::*;
pub struct ListNameSpace(pub Expr);

impl ListNameSpace {
#[cfg(feature = "list_any_all")]
pub fn any(self) -> Expr {
self.0
.apply_private(FunctionExpr::ListExpr(ListFunction::Any))
.with_fmt("list.any")
}

#[cfg(feature = "list_any_all")]
pub fn all(self) -> Expr {
self.0
.apply_private(FunctionExpr::ListExpr(ListFunction::All))
.with_fmt("list.all")
}

/// Get lengths of the arrays in the List type.
pub fn lengths(self) -> Expr {
let function = |s: Series| {
Expand Down
1 change: 1 addition & 0 deletions polars/polars-ops/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,4 @@ asof_join = ["polars-core/asof_join"]
semi_anti_join = ["polars-core/semi_anti_join"]
list_take = []
list_sets = []
list_any_all = []
58 changes: 58 additions & 0 deletions polars/polars-ops/src/chunked_array/list/any_all.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
use arrow::array::{BooleanArray, ListArray};

use super::*;

fn list_all_any<F>(arr: &ListArray<i64>, op: F, is_all: bool) -> PolarsResult<ArrayRef>
where
F: Fn(&BooleanArray) -> bool,
{
let offsets = arr.offsets().as_slice();
let values = arr.values();

polars_ensure!(values.data_type() == &ArrowDataType::Boolean, ComputeError: "expected boolean elements in list");

let values = values.as_any().downcast_ref::<BooleanArray>().unwrap();
let validity = arr.validity().cloned();

// fast path where all values set
// all is free
let all_set = arrow::compute::boolean::all(values);
if all_set && is_all {
return Ok(BooleanChunked::full("", true, arr.len()).chunks()[0]
.clone()
.with_validity(validity));
}

let mut start = offsets[0] as usize;
let iter = offsets[1..].iter().map(|&end| {
let end = end as usize;
let len = end - start;
// TODO!
// we can speed this upp if the boolean array doesn't have nulls
// Then we can work directly on the byte slice.
let val = values.clone().sliced_unchecked(start, len);
start = end;
op(&val)
});

Ok(Box::new(
BooleanArray::from_trusted_len_values_iter(iter).with_validity(validity),
))
}

pub(super) fn list_all(ca: &ListChunked) -> PolarsResult<Series> {
let chunks = ca
.downcast_iter()
.map(|arr| list_all_any(arr, arrow::compute::boolean::all, true))
.collect::<PolarsResult<Vec<_>>>()?;

unsafe { Ok(BooleanChunked::from_chunks(ca.name(), chunks).into_series()) }
}
pub(super) fn list_any(ca: &ListChunked) -> PolarsResult<Series> {
let chunks = ca
.downcast_iter()
.map(|arr| list_all_any(arr, arrow::compute::boolean::any, false))
.collect::<PolarsResult<Vec<_>>>()?;

unsafe { Ok(BooleanChunked::from_chunks(ca.name(), chunks).into_series()) }
}
2 changes: 2 additions & 0 deletions polars/polars-ops/src/chunked_array/list/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use polars_core::prelude::*;

#[cfg(feature = "list_any_all")]
mod any_all;
mod count;
#[cfg(feature = "hash")]
pub(crate) mod hash;
Expand Down
14 changes: 14 additions & 0 deletions polars/polars-ops/src/chunked_array/list/namespace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ use polars_core::series::ops::NullBehavior;
use polars_core::utils::{try_get_supertype, CustomIterTools};

use super::*;
#[cfg(feature = "list_any_all")]
use crate::chunked_array::list::any_all::*;
use crate::chunked_array::list::min_max::{list_max_function, list_min_function};
use crate::chunked_array::list::sum_mean::sum_with_nulls;
use crate::prelude::list::sum_mean::{mean_list_numerical, sum_list_numerical};
Expand Down Expand Up @@ -114,6 +116,18 @@ pub trait ListNameSpaceImpl: AsList {
list_max_function(self.as_list())
}

#[cfg(feature = "list_any_all")]
fn lst_all(&self) -> PolarsResult<Series> {
let ca = self.as_list();
list_all(ca)
}

#[cfg(feature = "list_any_all")]
fn lst_any(&self) -> PolarsResult<Series> {
let ca = self.as_list();
list_any(ca)
}

fn lst_min(&self) -> Series {
list_min_function(self.as_list())
}
Expand Down
2 changes: 2 additions & 0 deletions py-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ list_take = ["polars/list_take"]
list_count = ["polars/list_count"]
binary_encoding = ["polars/binary_encoding"]
list_sets = ["polars-lazy/list_sets"]
list_any_all = ["polars/list_any_all"]

all = [
"json",
Expand Down Expand Up @@ -103,6 +104,7 @@ all = [
"list_take",
"list_count",
"list_sets",
"list_any_all",
]

# we cannot conditionally activate simd
Expand Down
2 changes: 2 additions & 0 deletions py-polars/docs/source/reference/expressions/list.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ The following methods are available under the `expr.list` attribute.
:toctree: api/
:template: autosummary/accessor_method.rst

Expr.list.all
Expr.list.any
Expr.list.arg_max
Expr.list.arg_min
Expr.list.concat
Expand Down
2 changes: 2 additions & 0 deletions py-polars/docs/source/reference/series/list.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ The following methods are available under the `Series.list` attribute.
:toctree: api/
:template: autosummary/accessor_method.rst

Series.list.all
Series.list.any
Series.list.arg_max
Series.list.arg_min
Series.list.concat
Expand Down
54 changes: 54 additions & 0 deletions py-polars/polars/expr/list.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,60 @@ def __init__(self, expr: Expr):
def __getitem__(self, item: int) -> Expr:
return self.get(item)

def all(self) -> Expr:
"""
Evaluate whether all boolean values in a list are true.
Examples
--------
>>> df = pl.DataFrame(
... {"a": [[True, True], [False, True], [False, False], [None], [], None]}
... )
>>> df.select(pl.col("a").list.all())
shape: (6, 1)
┌───────┐
│ a │
│ --- │
│ bool │
╞═══════╡
│ true │
│ false │
│ false │
│ false │
│ true │
│ null │
└───────┘
"""
return wrap_expr(self._pyexpr.list_all())

def any(self) -> Expr:
"""
Evaluate whether any boolean value in a list is true.
Examples
--------
>>> df = pl.DataFrame(
... {"a": [[True, True], [False, True], [False, False], [None], [], None]}
... )
>>> df.select(pl.col("a").list.any())
shape: (6, 1)
┌───────┐
│ a │
│ --- │
│ bool │
╞═══════╡
│ true │
│ true │
│ false │
│ false │
│ false │
│ null │
└───────┘
"""
return wrap_expr(self._pyexpr.list_any())

def lengths(self) -> Expr:
"""
Get the length of the arrays as UInt32.
Expand Down
52 changes: 52 additions & 0 deletions py-polars/polars/series/list.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,58 @@ class ListNameSpace:
def __init__(self, series: Series):
self._s: PySeries = series._s

def all(self) -> Expr:
"""
Evaluate whether all boolean values in a list are true.
Examples
--------
>>> df = pl.DataFrame(
... {"a": [[True, True], [False, True], [False, False], [None], [], None]}
... )
>>> df.select(pl.col("a").list.all())
shape: (6, 1)
┌───────┐
│ a │
│ --- │
│ bool │
╞═══════╡
│ true │
│ false │
│ false │
│ false │
│ true │
│ null │
└───────┘
"""

def any(self) -> Expr:
"""
Evaluate whether any boolean value in a list is true.
Examples
--------
>>> df = pl.DataFrame(
... {"a": [[True, True], [False, True], [False, False], [None], [], None]}
... )
>>> df.select(pl.col("a").list.any())
shape: (6, 1)
┌───────┐
│ a │
│ --- │
│ bool │
╞═══════╡
│ true │
│ true │
│ false │
│ false │
│ false │
│ null │
└───────┘
"""

def lengths(self) -> Series:
"""
Get the length of the arrays as UInt32.
Expand Down
10 changes: 10 additions & 0 deletions py-polars/src/expr/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,16 @@ use crate::PyExpr;

#[pymethods]
impl PyExpr {
#[cfg(feature = "list_any_all")]
fn list_all(&self) -> Self {
self.inner.clone().list().all().into()
}

#[cfg(feature = "list_any_all")]
fn list_any(&self) -> Self {
self.inner.clone().list().any().into()
}

fn list_arg_max(&self) -> Self {
self.inner.clone().list().arg_max().into()
}
Expand Down
Loading

0 comments on commit ed58c7a

Please sign in to comment.