diff --git a/.codespellrc b/.codespellrc index ae8edd6352b7..854014b6e168 100644 --- a/.codespellrc +++ b/.codespellrc @@ -1,6 +1,6 @@ [codespell] # local codespell matches `./docs`, pre-commit codespell matches `docs` skip = *.lock,.direnv,.git,./docs/_freeze,./docs/_output/**,./docs/_inv/**,docs/_freeze/**,*.svg,*.css,*.html,*.js,ibis/backends/tests/tpc/queries/duckdb/ds/44.sql -ignore-regex = \b(i[if]f|I[IF]F|AFE)\b +ignore-regex = \b(i[if]f|I[IF]F|AFE|alls)\b builtin = clear,rare,names ignore-words-list = tim,notin,ang diff --git a/docs/_quarto.yml b/docs/_quarto.yml index e8fad272b63a..2a166be65512 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -361,6 +361,8 @@ quartodoc: package: ibis.expr.types.numeric - name: BooleanValue package: ibis.expr.types.logical + - name: BooleanColumn + package: ibis.expr.types.logical - name: and_ dynamic: true signature_name: full diff --git a/ibis/backends/polars/compiler.py b/ibis/backends/polars/compiler.py index 2b8d6bc28d30..991d7a3a032f 100644 --- a/ibis/backends/polars/compiler.py +++ b/ibis/backends/polars/compiler.py @@ -1350,3 +1350,43 @@ def execute_timestamp_range(op, **kw): def execute_drop_columns(op, **kw): parent = translate(op.parent, **kw) return parent.drop(op.columns_to_drop) + + +@translate.register(ops.ArraySum) +def execute_array_agg(op, **kw): + arg = translate(op.arg, **kw) + # workaround polars annoying sum([]) == 0 behavior + # + # the polars behavior is consistent with math, but inconsistent + # with every other query engine every built. + no_nulls = arg.list.drop_nulls() + return pl.when(no_nulls.list.len() == 0).then(None).otherwise(no_nulls.list.sum()) + + +@translate.register(ops.ArrayMean) +def execute_array_mean(op, **kw): + return translate(op.arg, **kw).list.mean() + + +@translate.register(ops.ArrayMin) +def execute_array_min(op, **kw): + return translate(op.arg, **kw).list.min() + + +@translate.register(ops.ArrayMax) +def execute_array_max(op, **kw): + return translate(op.arg, **kw).list.max() + + +@translate.register(ops.ArrayAny) +def execute_array_any(op, **kw): + arg = translate(op.arg, **kw) + no_nulls = arg.list.drop_nulls() + return pl.when(no_nulls.list.len() == 0).then(None).otherwise(no_nulls.list.any()) + + +@translate.register(ops.ArrayAll) +def execute_array_all(op, **kw): + arg = translate(op.arg, **kw) + no_nulls = arg.list.drop_nulls() + return pl.when(no_nulls.list.len() == 0).then(None).otherwise(no_nulls.list.all()) diff --git a/ibis/backends/snowflake/__init__.py b/ibis/backends/snowflake/__init__.py index 1e90a4eada02..d49e9efdae25 100644 --- a/ibis/backends/snowflake/__init__.py +++ b/ibis/backends/snowflake/__init__.py @@ -76,6 +76,70 @@ "returns": "ARRAY", "source": """return Array(count).fill(value).flat();""", }, + "ibis_udfs.public.array_sum": { + "inputs": {"array": "ARRAY"}, + "returns": "DOUBLE", + "source": """\ +let total = 0.0; +let allNull = true; + +for (val of array) { + if (val !== null) { + total += val; + allNull = false; + } +} + +return !allNull ? total : null;""", + }, + "ibis_udfs.public.array_avg": { + "inputs": {"array": "ARRAY"}, + "returns": "DOUBLE", + "source": """\ +let count = 0; +let total = 0.0; + +for (val of array) { + if (val !== null) { + total += val; + ++count; + } +} + +return count !== 0 ? total / count : null;""", + }, + "ibis_udfs.public.array_any": { + "inputs": {"array": "ARRAY"}, + "returns": "BOOLEAN", + "source": """\ +let count = 0; + +for (val of array) { + if (val === true) { + return true; + } else if (val === false) { + ++count; + } +} + +return count !== 0 ? false : null;""", + }, + "ibis_udfs.public.array_all": { + "inputs": {"array": "ARRAY"}, + "returns": "BOOLEAN", + "source": """\ +let count = 0; + +for (val of array) { + if (val === false) { + return false; + } else if (val === true) { + ++count; + } +} + +return count !== 0 ? true : null;""", + }, } diff --git a/ibis/backends/sql/compilers/bigquery.py b/ibis/backends/sql/compilers/bigquery.py index 6ab1a1a311b0..e9cb87e2e4ec 100644 --- a/ibis/backends/sql/compilers/bigquery.py +++ b/ibis/backends/sql/compilers/bigquery.py @@ -774,3 +774,29 @@ def visit_TimestampBucket(self, op, *, arg, interval, offset): origin = self.f.anon[f"{funcname}_add"](origin, offset) return func(arg, interval, origin) + + def _array_reduction(self, *, arg, reduction): + name = sg.to_identifier(util.gen_name(f"bq_arr_{reduction}")) + return ( + sg.select(self.f[reduction](name)) + .from_(self._unnest(arg, as_=name)) + .subquery() + ) + + def visit_ArrayMin(self, op, *, arg): + return self._array_reduction(arg=arg, reduction="min") + + def visit_ArrayMax(self, op, *, arg): + return self._array_reduction(arg=arg, reduction="max") + + def visit_ArraySum(self, op, *, arg): + return self._array_reduction(arg=arg, reduction="sum") + + def visit_ArrayMean(self, op, *, arg): + return self._array_reduction(arg=arg, reduction="avg") + + def visit_ArrayAny(self, op, *, arg): + return self._array_reduction(arg=arg, reduction="logical_or") + + def visit_ArrayAll(self, op, *, arg): + return self._array_reduction(arg=arg, reduction="logical_and") diff --git a/ibis/backends/sql/compilers/clickhouse.py b/ibis/backends/sql/compilers/clickhouse.py index 31bdb11bf44b..2ad60977d2f9 100644 --- a/ibis/backends/sql/compilers/clickhouse.py +++ b/ibis/backends/sql/compilers/clickhouse.py @@ -737,3 +737,24 @@ def _cleanup_names( value.as_(self._gen_valid_name(name), quoted=quoted, copy=False) for name, value in exprs.items() ) + + def _array_reduction(self, arg): + x = sg.to_identifier("x", quoted=self.quoted) + not_null = sge.Lambda(this=x.is_(sg.not_(NULL)), expressions=[x]) + return self.f.arrayFilter(not_null, arg) + + def visit_ArrayMin(self, op, *, arg): + return self.f.arrayReduce("min", self._array_reduction(arg)) + + visit_ArrayAll = visit_ArrayMin + + def visit_ArrayMax(self, op, *, arg): + return self.f.arrayReduce("max", self._array_reduction(arg)) + + visit_ArrayAny = visit_ArrayMax + + def visit_ArraySum(self, op, *, arg): + return self.f.arrayReduce("sum", self._array_reduction(arg)) + + def visit_ArrayMean(self, op, *, arg): + return self.f.arrayReduce("avg", self._array_reduction(arg)) diff --git a/ibis/backends/sql/compilers/duckdb.py b/ibis/backends/sql/compilers/duckdb.py index fe8f53e2d3a1..77806461b74a 100644 --- a/ibis/backends/sql/compilers/duckdb.py +++ b/ibis/backends/sql/compilers/duckdb.py @@ -48,6 +48,12 @@ class DuckDBCompiler(SQLGlotCompiler): SIMPLE_OPS = { ops.Arbitrary: "any_value", ops.ArrayPosition: "list_indexof", + ops.ArrayMin: "list_min", + ops.ArrayMax: "list_max", + ops.ArrayAny: "list_bool_or", + ops.ArrayAll: "list_bool_and", + ops.ArraySum: "list_sum", + ops.ArrayMean: "list_avg", ops.BitAnd: "bit_and", ops.BitOr: "bit_or", ops.BitXor: "bit_xor", diff --git a/ibis/backends/sql/compilers/postgres.py b/ibis/backends/sql/compilers/postgres.py index 9f8050c4a62d..78338c9a9905 100644 --- a/ibis/backends/sql/compilers/postgres.py +++ b/ibis/backends/sql/compilers/postgres.py @@ -670,3 +670,33 @@ def visit_TableUnnest( join_type="CROSS" if not keep_empty else "LEFT", ) ) + + def _unnest(self, expression, *, as_): + alias = sge.TableAlias(columns=[sg.to_identifier(as_)]) + return sge.Unnest(expressions=[expression], alias=alias) + + def _array_reduction(self, *, arg, reduction): + name = sg.to_identifier(gen_name(f"pg_arr_{reduction}")) + return ( + sg.select(self.f[reduction](name)) + .from_(self._unnest(arg, as_=name)) + .subquery() + ) + + def visit_ArrayMin(self, op, *, arg): + return self._array_reduction(arg=arg, reduction="min") + + def visit_ArrayMax(self, op, *, arg): + return self._array_reduction(arg=arg, reduction="max") + + def visit_ArraySum(self, op, *, arg): + return self._array_reduction(arg=arg, reduction="sum") + + def visit_ArrayMean(self, op, *, arg): + return self._array_reduction(arg=arg, reduction="avg") + + def visit_ArrayAny(self, op, *, arg): + return self._array_reduction(arg=arg, reduction="bool_or") + + def visit_ArrayAll(self, op, *, arg): + return self._array_reduction(arg=arg, reduction="bool_and") diff --git a/ibis/backends/sql/compilers/pyspark.py b/ibis/backends/sql/compilers/pyspark.py index 7bbcc486059c..c91d5ab4d26a 100644 --- a/ibis/backends/sql/compilers/pyspark.py +++ b/ibis/backends/sql/compilers/pyspark.py @@ -2,6 +2,7 @@ import calendar import itertools +import operator import re import sqlglot as sg @@ -68,6 +69,10 @@ class PySparkCompiler(SQLGlotCompiler): ops.ArrayRemove: "array_remove", ops.ArraySort: "array_sort", ops.ArrayUnion: "array_union", + ops.ArrayMin: "array_min", + ops.ArrayMax: "array_max", + ops.ArrayAll: "array_min", + ops.ArrayAny: "array_max", ops.EndsWith: "endswith", ops.Hash: "hash", ops.Log10: "log10", @@ -589,3 +594,42 @@ def _format_window_interval(self, expression): this = expression.this.this # avoid quoting the interval as a string literal return f"{this}{unit}" + + def _array_reduction(self, *, dtype, arg, output): + quoted = self.quoted + dot = lambda a, f: sge.Dot.build((a, sge.to_identifier(f, quoted=quoted))) + state_dtype = dt.Struct({"sum": dtype, "count": dt.int64}) + initial_state = self.cast( + sge.Struct.from_arg_list([sge.convert(0), sge.convert(0)]), state_dtype + ) + + s = sg.to_identifier("s", quoted=quoted) + x = sg.to_identifier("x", quoted=quoted) + + s_sum = dot(s, "sum") + s_count = dot(s, "count") + + input_fn_body = self.cast( + sge.Struct.from_arg_list( + [ + x + self.f.coalesce(s_sum, 0), + s_count + self.if_(x.is_(sg.not_(NULL)), 1, 0), + ] + ), + state_dtype, + ) + input_fn = sge.Lambda(this=input_fn_body, expressions=[s, x]) + + output_fn_body = self.if_(s_count > 0, output(s_sum, s_count), NULL) + return self.f.aggregate( + arg, + initial_state, + input_fn, + sge.Lambda(this=output_fn_body, expressions=[s]), + ) + + def visit_ArraySum(self, op, *, arg): + return self._array_reduction(dtype=op.dtype, arg=arg, output=lambda sum, _: sum) + + def visit_ArrayMean(self, op, *, arg): + return self._array_reduction(dtype=op.dtype, arg=arg, output=operator.truediv) diff --git a/ibis/backends/sql/compilers/snowflake.py b/ibis/backends/sql/compilers/snowflake.py index c68c677682f3..c651d9c9d816 100644 --- a/ibis/backends/sql/compilers/snowflake.py +++ b/ibis/backends/sql/compilers/snowflake.py @@ -727,3 +727,21 @@ def visit_TableUnnest( .from_(parent) .join(unnest, join_type="CROSS" if not keep_empty else "LEFT") ) + + def visit_ArrayMin(self, op, *, arg): + return self.cast(self.f.array_min(self.f.array_compact(arg)), op.dtype) + + def visit_ArrayMax(self, op, *, arg): + return self.cast(self.f.array_max(self.f.array_compact(arg)), op.dtype) + + def visit_ArrayAny(self, op, *, arg): + return self.f.udf.array_any(arg) + + def visit_ArrayAll(self, op, *, arg): + return self.f.udf.array_all(arg) + + def visit_ArraySum(self, op, *, arg): + return self.cast(self.f.udf.array_sum(arg), op.dtype) + + def visit_ArrayMean(self, op, *, arg): + return self.cast(self.f.udf.array_avg(arg), op.dtype) diff --git a/ibis/backends/sql/compilers/trino.py b/ibis/backends/sql/compilers/trino.py index f15d57ee7104..6d87c7199fca 100644 --- a/ibis/backends/sql/compilers/trino.py +++ b/ibis/backends/sql/compilers/trino.py @@ -1,6 +1,7 @@ from __future__ import annotations import math +import operator from functools import partial, reduce import sqlglot as sg @@ -10,7 +11,14 @@ import ibis.common.exceptions as com import ibis.expr.datatypes as dt import ibis.expr.operations as ops -from ibis.backends.sql.compilers.base import FALSE, NULL, STAR, AggGen, SQLGlotCompiler +from ibis.backends.sql.compilers.base import ( + FALSE, + NULL, + STAR, + TRUE, + AggGen, + SQLGlotCompiler, +) from ibis.backends.sql.datatypes import TrinoType from ibis.backends.sql.dialects import Trino from ibis.backends.sql.rewrites import ( @@ -564,7 +572,72 @@ def visit_TableUnnest( .from_(parent) .join( unnest, - on=None if not keep_empty else sge.convert(True), + on=None if not keep_empty else TRUE, join_type="CROSS" if not keep_empty else "LEFT", ) ) + + def visit_ArrayAny(self, op, *, arg): + x = sg.to_identifier("x", quoted=self.quoted) + identity = sge.Lambda(this=x, expressions=[x]) + is_not_null = sge.Lambda(this=x.is_(sg.not_(NULL)), expressions=[x]) + return self.f.any_match( + self.f.nullif(self.f.filter(arg, is_not_null), self.f.array()), identity + ) + + def visit_ArrayAll(self, op, *, arg): + x = sg.to_identifier("x", quoted=self.quoted) + identity = sge.Lambda(this=x, expressions=[x]) + is_not_null = sge.Lambda(this=x.is_(sg.not_(NULL)), expressions=[x]) + return self.f.all_match( + self.f.nullif(self.f.filter(arg, is_not_null), self.f.array()), identity + ) + + def visit_ArrayMin(self, op, *, arg): + x = sg.to_identifier("x", quoted=self.quoted) + func = sge.Lambda(this=x.is_(sg.not_(NULL)), expressions=[x]) + return self.f.array_min(self.f.filter(arg, func)) + + def visit_ArrayMax(self, op, *, arg): + x = sg.to_identifier("x", quoted=self.quoted) + func = sge.Lambda(this=x.is_(sg.not_(NULL)), expressions=[x]) + return self.f.array_max(self.f.filter(arg, func)) + + def visit_ArraySumAgg(self, op, *, arg, output): + quoted = self.quoted + dot = lambda a, f: sge.Dot.build((a, sge.to_identifier(f, quoted=quoted))) + state_dtype = dt.Struct({"sum": op.dtype, "count": dt.int64}) + initial_state = self.cast( + sge.Struct.from_arg_list([sge.convert(0), sge.convert(0)]), state_dtype + ) + + s = sg.to_identifier("s", quoted=quoted) + x = sg.to_identifier("x", quoted=quoted) + + s_sum = dot(s, "sum") + s_count = dot(s, "count") + + input_fn_body = self.cast( + sge.Struct.from_arg_list( + [ + x + self.f.coalesce(s_sum, 0), + s_count + self.if_(x.is_(sg.not_(NULL)), 1, 0), + ] + ), + state_dtype, + ) + input_fn = sge.Lambda(this=input_fn_body, expressions=[s, x]) + + output_fn_body = self.if_(s_count > 0, output(s_sum, s_count), NULL) + return self.f.reduce( + arg, + initial_state, + input_fn, + sge.Lambda(this=output_fn_body, expressions=[s]), + ) + + def visit_ArraySum(self, op, *, arg): + return self.visit_ArraySumAgg(op, arg=arg, output=lambda sum, _: sum) + + def visit_ArrayMean(self, op, *, arg): + return self.visit_ArraySumAgg(op, arg=arg, output=operator.truediv) diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index bd99a0c43a77..db69c27b5a33 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -1,8 +1,9 @@ from __future__ import annotations -import functools +import statistics from collections import Counter from datetime import datetime +from functools import partial import numpy as np import pandas as pd @@ -453,11 +454,7 @@ def test_array_slice(backend, start, stop): ) @pytest.mark.parametrize( "func", - [ - lambda x: x + 1, - functools.partial(lambda x, y: x + y, y=1), - ibis._ + 1, - ], + [lambda x: x + 1, partial(lambda x, y: x + y, y=1), ibis._ + 1], ids=["lambda", "partial", "deferred"], ) @pytest.mark.notimpl( @@ -515,11 +512,7 @@ def test_array_map(con, input, output, func): ) @pytest.mark.parametrize( "predicate", - [ - lambda x: x > 1, - functools.partial(lambda x, y: x > y, y=1), - ibis._ > 1, - ], + [lambda x: x > 1, partial(lambda x, y: x > y, y=1), ibis._ > 1], ids=["lambda", "partial", "deferred"], ) def test_array_filter(con, input, output, predicate): @@ -1482,3 +1475,95 @@ def test_table_unnest_array_of_struct_of_array(con): result = con.execute(expr) expected = pd.DataFrame({"x": [1, 1, 1, 2, 2, 2, 3, 3, 3]}) tm.assert_frame_equal(result, expected) + + +notimpl_aggs = pytest.mark.notimpl( + ["datafusion", "flink", "pandas", "dask"], raises=com.OperationNotDefinedError +) + + +def _agg_with_nulls(agg, x): + if x is None: + return None + x = [y for y in x if not pd.isna(y)] + if not x: + return None + return agg(x) + + +@pytest.mark.parametrize( + ("agg", "baseline_func"), + [ + (ir.ArrayValue.sums, lambda x: _agg_with_nulls(sum, x)), + (ir.ArrayValue.mins, lambda x: _agg_with_nulls(min, x)), + (ir.ArrayValue.maxs, lambda x: _agg_with_nulls(max, x)), + (ir.ArrayValue.means, lambda x: _agg_with_nulls(statistics.mean, x)), + ], + ids=["sums", "mins", "maxs", "means"], +) +@notimpl_aggs +@pytest.mark.parametrize( + "data", + [ + param( + [[None, 6], [None]], + id="nulls", + marks=[ + pytest.mark.notyet( + ["bigquery"], + raises=GoogleBadRequest, + reason="bigquery doesn't allow arrays with nulls", + ) + ], + ), + param([[1, 2, 3], [6], [], None], id="no-nulls"), + ], +) +def test_array_agg_numeric(con, data, agg, baseline_func): + t = ibis.memtable({"x": data, "id": range(len(data))}) + t = t.mutate(y=agg(t.x)) + assert t.y.type().is_numeric() + # sort so debugging is easier + df = con.to_pandas(t.order_by("id")) + result = df.y.tolist() + result = [x if pd.notna(x) else None for x in result] + expected = [baseline_func(x) for x in df.x] + assert result == expected + + +@pytest.mark.parametrize( + ("agg", "baseline_func"), + [ + (ir.ArrayValue.anys, partial(_agg_with_nulls, any)), + (ir.ArrayValue.alls, partial(_agg_with_nulls, all)), + ], + ids=["anys", "alls"], +) +@pytest.mark.parametrize( + "data", + [ + param( + [[True, None], [False, None], [None]], + marks=[ + pytest.mark.notyet( + ["bigquery"], + raises=GoogleBadRequest, + reason="bigquery doesn't allow arrays with nulls", + ) + ], + id="nulls", + ), + param([[True, False], [True], [False], [], None], id="no-nulls"), + ], +) +@notimpl_aggs +def test_array_agg_bool(con, data, agg, baseline_func): + t = ibis.memtable({"x": data, "id": range(len(data))}) + t = t.mutate(y=agg(t.x)) + assert t.y.type().is_boolean() + # sort so debugging is easier + df = con.to_pandas(t.order_by("id")) + result = df.y.tolist() + result = [x if pd.notna(x) else None for x in result] + expected = [baseline_func(x) for x in df.x] + assert result == expected diff --git a/ibis/expr/operations/arrays.py b/ibis/expr/operations/arrays.py index a297b9322067..191cfff526cd 100644 --- a/ibis/expr/operations/arrays.py +++ b/ibis/expr/operations/arrays.py @@ -266,3 +266,60 @@ class TimestampRange(Range): start: Value[dt.Timestamp] stop: Value[dt.Timestamp] step: Value[dt.Interval] + + +class ArrayAgg(Value): + arg: Value[dt.Array] + shape = rlz.shape_like("args") + + @attribute + def dtype(self) -> dt.DataType: + return self.arg.dtype.value_type + + +@public +class ArrayMin(ArrayAgg): + """Compute the minimum value of an array.""" + + +@public +class ArrayMax(ArrayAgg): + """Compute the maximum value of an array.""" + + +# in duckdb summing an array of ints leads to an int, but for other backends +# it might lead to a float?? +@public +class ArraySum(ArrayAgg): + """Compute the sum of an array.""" + + arg: Value[dt.Array[dt.Numeric]] + + +@public +class ArrayMean(ArrayAgg): + """Compute the average of an array.""" + + arg: Value[dt.Array[dt.Numeric]] + + @attribute + def dtype(self) -> dt.DataType: + dtype = self.arg.dtype.value_type + if dtype.is_floating() or dtype.is_integer(): + return dt.float64 + # do nothing for decimal types + return dtype + + +@public +class ArrayAny(ArrayAgg): + """Compute whether any array element is true.""" + + arg: Value[dt.Array[dt.Boolean]] + + +@public +class ArrayAll(ArrayAgg): + """Compute whether all array elements are true.""" + + arg: Value[dt.Array[dt.Boolean]] diff --git a/ibis/expr/types/arrays.py b/ibis/expr/types/arrays.py index 092341984ecd..edb439ec0e30 100644 --- a/ibis/expr/types/arrays.py +++ b/ibis/expr/types/arrays.py @@ -1059,6 +1059,243 @@ def flatten(self) -> ir.ArrayValue: """ return ops.ArrayFlatten(self).to_expr() + def anys(self) -> ir.BooleanValue: + """Return whether any element in the array is true. + + Returns NULL if the array is empty or contains only NULLs. + + See Also + -------- + [`BooleanColumn.any`](./expression-numeric.qmd#ibis.expr.types.logical.BooleanColumn.any) + + Returns + ------- + BooleanValue + Whether any element in the array is true + + Examples + -------- + >>> import ibis + >>> ibis.options.interactive = True + >>> t = ibis.memtable( + ... { + ... "arr": [ + ... [True, False], + ... [False], + ... [True], + ... [None, False], + ... [None, True], + ... [None], + ... [], + ... None, + ... ] + ... } + ... ) + >>> t.mutate(x=t.arr.anys()) + ┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓ + ┃ arr ┃ x ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩ + │ array │ boolean │ + ├──────────────────────┼─────────┤ + │ [True, False] │ True │ + │ [False] │ False │ + │ [True] │ True │ + │ [None, False] │ False │ + │ [None, True] │ True │ + │ [None] │ NULL │ + │ [] │ NULL │ + │ NULL │ NULL │ + └──────────────────────┴─────────┘ + """ + return ops.ArrayAny(self).to_expr() + + def alls(self) -> ir.BooleanValue: + """Return whether all elements (ignoring nulls) in the array are true. + + Returns NULL if the array is empty or contains only NULLs. + + See Also + -------- + [`BooleanColumn.all`](./expression-numeric.qmd#ibis.expr.types.logical.BooleanColumn.all) + + Returns + ------- + BooleanValue + Whether all elements (ignoring nulls) in the array are true. + + Examples + -------- + >>> import ibis + >>> ibis.options.interactive = True + >>> t = ibis.memtable( + ... { + ... "id": range(8), + ... "arr": [ + ... [True, False], + ... [False], + ... [True], + ... [None, False], + ... [None, True], + ... [None], + ... [], + ... None, + ... ], + ... } + ... ) + >>> t.mutate(x=t.arr.alls()).order_by("id") + ┏━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓ + ┃ id ┃ arr ┃ x ┃ + ┡━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩ + │ int64 │ array │ boolean │ + ├───────┼──────────────────────┼─────────┤ + │ 0 │ [True, False] │ False │ + │ 1 │ [False] │ False │ + │ 2 │ [True] │ True │ + │ 3 │ [None, False] │ False │ + │ 4 │ [None, True] │ True │ + │ 5 │ [None] │ NULL │ + │ 6 │ [] │ NULL │ + │ 7 │ NULL │ NULL │ + └───────┴──────────────────────┴─────────┘ + """ + return ops.ArrayAll(self).to_expr() + + def mins(self) -> ir.NumericValue: + """Return the minimum value in the array. + + Returns NULL if the array is empty or contains only NULLs. + + See Also + -------- + [`Column.min`](./expression-generic.qmd#ibis.expr.types.generic.Column.min) + + Returns + ------- + Value + Minimum value in the array + + Examples + -------- + >>> import ibis + >>> ibis.options.interactive = True + >>> t = ibis.memtable({"arr": [[1, 2, 3], [None, 6], [None], [], None]}) + >>> t.mutate(x=t.arr.mins()) + ┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┓ + ┃ arr ┃ x ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━┩ + │ array │ int64 │ + ├──────────────────────┼───────┤ + │ [1, 2, ... +1] │ 1 │ + │ [None, 6] │ 6 │ + │ [None] │ NULL │ + │ [] │ NULL │ + │ NULL │ NULL │ + └──────────────────────┴───────┘ + """ + return ops.ArrayMin(self).to_expr() + + def maxs(self) -> ir.NumericValue: + """Return the maximum value in the array. + + Returns NULL if the array is empty or contains only NULLs. + + See Also + -------- + [`Column.max`](./expression-generic.qmd#ibis.expr.types.generic.Column.max) + + Returns + ------- + Value + Maximum value in the array + + Examples + -------- + >>> import ibis + >>> ibis.options.interactive = True + >>> t = ibis.memtable({"arr": [[1, 2, 3], [None, 6], [None], [], None]}) + >>> t.mutate(x=t.arr.maxs()) + ┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┓ + ┃ arr ┃ x ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━┩ + │ array │ int64 │ + ├──────────────────────┼───────┤ + │ [1, 2, ... +1] │ 3 │ + │ [None, 6] │ 6 │ + │ [None] │ NULL │ + │ [] │ NULL │ + │ NULL │ NULL │ + └──────────────────────┴───────┘ + """ + return ops.ArrayMax(self).to_expr() + + def sums(self) -> ir.NumericValue: + """Return the sum of the values in the array. + + Returns NULL if the array is empty or contains only NULLs. + + See Also + -------- + [`NumericColumn.sum`](./expression-numeric.qmd#ibis.expr.types.numeric.NumericColumn.sum) + + Returns + ------- + Value + Sum of the values in the array + + Examples + -------- + >>> import ibis + >>> ibis.options.interactive = True + >>> t = ibis.memtable({"arr": [[1, 2, 3], [None, 6], [None], [], None]}) + >>> t.mutate(x=t.arr.sums()) + ┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┓ + ┃ arr ┃ x ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━┩ + │ array │ int64 │ + ├──────────────────────┼───────┤ + │ [1, 2, ... +1] │ 6 │ + │ [None, 6] │ 6 │ + │ [None] │ NULL │ + │ [] │ NULL │ + │ NULL │ NULL │ + └──────────────────────┴───────┘ + """ + return ops.ArraySum(self).to_expr() + + def means(self) -> ir.FloatingValue: + """Return the mean of the values in the array. + + Returns NULL if the array is empty or contains only NULLs. + + See Also + -------- + [`NumericColumn.mean`](./expression-numeric.qmd#ibis.expr.types.numeric.NumericColumn.mean) + + Returns + ------- + Value + Mean of the values in the array + + Examples + -------- + >>> import ibis + >>> ibis.options.interactive = True + >>> t = ibis.memtable({"arr": [[1, 2, 3], [None, 6], [None], [], None]}) + >>> t.mutate(x=t.arr.means()) + ┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓ + ┃ arr ┃ x ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩ + │ array │ float64 │ + ├──────────────────────┼─────────┤ + │ [1, 2, ... +1] │ 2.0 │ + │ [None, 6] │ 6.0 │ + │ [None] │ NULL │ + │ [] │ NULL │ + │ NULL │ NULL │ + └──────────────────────┴─────────┘ + """ + return ops.ArrayMean(self).to_expr() + @public class ArrayScalar(Scalar, ArrayValue):