From 899dce1b92d08987b3cfbcb29b31729f0328df5f Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 22 Jan 2024 05:43:43 -0500 Subject: [PATCH] fix(datatypes): ensure that array construction supports literals and infers their shape from its inputs (#8049) We were previously returning `ArrayColumn` from `ibis.array` when any inputs were expressions regardless of their shape. This PR renames `ArrayColumn` -> `Array` and uses the input arguments shapes to determine the output array shape. Fixes #8022. --------- Co-authored-by: Nick Crews --- ibis/backends/bigquery/registry.py | 4 +- ibis/backends/clickhouse/compiler/values.py | 6 +- ibis/backends/dask/execution/arrays.py | 22 ++++++- ibis/backends/datafusion/compiler/values.py | 6 +- ibis/backends/duckdb/registry.py | 4 +- ibis/backends/pandas/execution/arrays.py | 39 ++++++++---- ibis/backends/pandas/execution/generic.py | 22 +++++++ ibis/backends/polars/compiler.py | 4 +- ibis/backends/postgres/registry.py | 2 +- ibis/backends/pyspark/compiler.py | 4 +- ibis/backends/snowflake/registry.py | 4 +- ibis/backends/tests/test_array.py | 67 ++++++++++++-------- ibis/backends/tests/test_map.py | 2 +- ibis/backends/tests/test_param.py | 4 +- ibis/backends/tests/test_sql.py | 9 +-- ibis/backends/trino/registry.py | 6 +- ibis/expr/operations/arrays.py | 10 +-- ibis/expr/tests/test_format.py | 2 +- ibis/expr/types/arrays.py | 68 ++++++--------------- 19 files changed, 159 insertions(+), 126 deletions(-) diff --git a/ibis/backends/bigquery/registry.py b/ibis/backends/bigquery/registry.py index 7911f9b87555..7afc889d9ac8 100644 --- a/ibis/backends/bigquery/registry.py +++ b/ibis/backends/bigquery/registry.py @@ -129,7 +129,7 @@ def _array_concat(translator, op): def _array_column(translator, op): - return "[{}]".format(", ".join(map(translator.translate, op.cols))) + return "[{}]".format(", ".join(map(translator.translate, op.exprs))) def _array_index(translator, op): @@ -912,7 +912,7 @@ def _timestamp_range(translator, op): ops.StructColumn: _struct_column, ops.ArrayCollect: _array_agg, ops.ArrayConcat: _array_concat, - ops.ArrayColumn: _array_column, + ops.Array: _array_column, ops.ArrayIndex: _array_index, ops.ArrayLength: unary("ARRAY_LENGTH"), ops.ArrayRepeat: _array_repeat, diff --git a/ibis/backends/clickhouse/compiler/values.py b/ibis/backends/clickhouse/compiler/values.py index 3a3b479dbfb1..8486a6421a09 100644 --- a/ibis/backends/clickhouse/compiler/values.py +++ b/ibis/backends/clickhouse/compiler/values.py @@ -554,9 +554,9 @@ def _translate(op, *, arg, where, **_): return _translate -@translate_val.register(ops.ArrayColumn) -def _array_column(op, *, cols, **_): - return F.array(*cols) +@translate_val.register(ops.Array) +def _array_column(op, *, exprs, **_): + return F.array(*exprs) @translate_val.register(ops.StructColumn) diff --git a/ibis/backends/dask/execution/arrays.py b/ibis/backends/dask/execution/arrays.py index 85e94fbde13e..ef3dd7889298 100644 --- a/ibis/backends/dask/execution/arrays.py +++ b/ibis/backends/dask/execution/arrays.py @@ -6,6 +6,7 @@ import dask.dataframe as dd import dask.dataframe.groupby as ddgb import numpy as np +import pandas as pd import ibis.expr.operations as ops from ibis.backends.dask.core import execute @@ -34,10 +35,25 @@ ) -@execute_node.register(ops.ArrayColumn, tuple) +@execute_node.register(ops.Array, tuple) def execute_array_column(op, cols, **kwargs): - cols = [execute(arg, **kwargs) for arg in cols] - df = dd.concat(cols, axis=1) + vals = [execute(arg, **kwargs) for arg in cols] + + length = next((len(v) for v in vals if isinstance(v, dd.Series)), None) + if length is None: + return vals + + n_partitions = next((v.npartitions for v in vals if isinstance(v, dd.Series)), None) + + def ensure_series(v): + if isinstance(v, dd.Series): + return v + else: + return dd.from_pandas(pd.Series([v] * length), npartitions=n_partitions) + + # dd.concat() can only handle array-likes. + # If we're given a scalar, we need to broadcast it as a Series. + df = dd.concat([ensure_series(v) for v in vals], axis=1) return df.apply( lambda row: np.array(row, dtype=object), axis=1, meta=(None, "object") ) diff --git a/ibis/backends/datafusion/compiler/values.py b/ibis/backends/datafusion/compiler/values.py index 33cfe78f353c..60f75d66ac0f 100644 --- a/ibis/backends/datafusion/compiler/values.py +++ b/ibis/backends/datafusion/compiler/values.py @@ -733,9 +733,9 @@ def _not_null(op, *, arg, **_): return sg.not_(arg.is_(NULL)) -@translate_val.register(ops.ArrayColumn) -def array_column(op, *, cols, **_): - return F.make_array(*cols) +@translate_val.register(ops.Array) +def array_column(op, *, exprs, **_): + return F.make_array(*exprs) @translate_val.register(ops.ArrayRepeat) diff --git a/ibis/backends/duckdb/registry.py b/ibis/backends/duckdb/registry.py index df5d423f9682..49b23b978e0d 100644 --- a/ibis/backends/duckdb/registry.py +++ b/ibis/backends/duckdb/registry.py @@ -399,9 +399,9 @@ def _array_remove(t, op): operation_registry.update( { - ops.ArrayColumn: ( + ops.Array: ( lambda t, op: sa.cast( - sa.func.list_value(*map(t.translate, op.cols)), + sa.func.list_value(*map(t.translate, op.exprs)), t.get_sqla_type(op.dtype), ) ), diff --git a/ibis/backends/pandas/execution/arrays.py b/ibis/backends/pandas/execution/arrays.py index 8c5119610898..20461f022241 100644 --- a/ibis/backends/pandas/execution/arrays.py +++ b/ibis/backends/pandas/execution/arrays.py @@ -17,10 +17,23 @@ from collections.abc import Collection -@execute_node.register(ops.ArrayColumn, tuple) -def execute_array_column(op, cols, **kwargs): - cols = [execute(arg, **kwargs) for arg in cols] - df = pd.concat(cols, axis=1) +@execute_node.register(ops.Array, tuple) +def execute_array(op, cols, **kwargs): + vals = [execute(arg, **kwargs) for arg in cols] + length = next((len(v) for v in vals if isinstance(v, pd.Series)), None) + + if length is None: + return vals + + def ensure_series(v): + if isinstance(v, pd.Series): + return v + else: + return pd.Series(v, index=range(length)) + + # pd.concat() can only handle array-likes. + # If we're given a scalar, we need to broadcast it as a Series. + df = pd.concat([ensure_series(v) for v in vals], axis=1) return df.apply(lambda row: np.array(row, dtype=object), axis=1) @@ -29,7 +42,7 @@ def execute_array_length(op, data, **kwargs): return data.apply(len) -@execute_node.register(ops.ArrayLength, np.ndarray) +@execute_node.register(ops.ArrayLength, (list, np.ndarray)) def execute_array_length_scalar(op, data, **kwargs): return len(data) @@ -39,7 +52,7 @@ def execute_array_slice(op, data, start, stop, **kwargs): return data.apply(operator.itemgetter(slice(start, stop))) -@execute_node.register(ops.ArraySlice, np.ndarray, int, (int, type(None))) +@execute_node.register(ops.ArraySlice, (list, np.ndarray), int, (int, type(None))) def execute_array_slice_scalar(op, data, start, stop, **kwargs): return data[start:stop] @@ -53,7 +66,7 @@ def execute_array_index(op, data, index, **kwargs): ) -@execute_node.register(ops.ArrayIndex, np.ndarray, int) +@execute_node.register(ops.ArrayIndex, (list, np.ndarray), int) def execute_array_index_scalar(op, data, index, **kwargs): try: return data[index] @@ -61,7 +74,7 @@ def execute_array_index_scalar(op, data, index, **kwargs): return None -@execute_node.register(ops.ArrayContains, np.ndarray, object) +@execute_node.register(ops.ArrayContains, (list, np.ndarray), object) def execute_node_contains_value_array(op, haystack, needle, **kwargs): return needle in haystack @@ -91,7 +104,7 @@ def execute_array_concat_series(op, first, second, *args, **kwargs): @execute_node.register( - ops.ArrayConcat, np.ndarray, pd.Series, [(pd.Series, np.ndarray)] + ops.ArrayConcat, (list, np.ndarray), pd.Series, [(pd.Series, list, np.ndarray)] ) def execute_array_concat_mixed_left(op, left, right, *args, **kwargs): # ArrayConcat given a column (pd.Series) and a scalar (np.ndarray). @@ -102,7 +115,7 @@ def execute_array_concat_mixed_left(op, left, right, *args, **kwargs): @execute_node.register( - ops.ArrayConcat, pd.Series, np.ndarray, [(pd.Series, np.ndarray)] + ops.ArrayConcat, pd.Series, (list, np.ndarray), [(pd.Series, list, np.ndarray)] ) def execute_array_concat_mixed_right(op, left, right, *args, **kwargs): # Broadcast `right` to the length of `left` @@ -110,7 +123,9 @@ def execute_array_concat_mixed_right(op, left, right, *args, **kwargs): return _concat_iterables_to_series(left, right) -@execute_node.register(ops.ArrayConcat, np.ndarray, np.ndarray, [np.ndarray]) +@execute_node.register( + ops.ArrayConcat, (list, np.ndarray), (list, np.ndarray), [(list, np.ndarray)] +) def execute_array_concat_scalar(op, left, right, *args, **kwargs): return np.concatenate([left, right, *args]) @@ -122,7 +137,7 @@ def execute_array_repeat(op, data, n, **kwargs): return pd.Series(np.tile(arr, n) for arr in data) -@execute_node.register(ops.ArrayRepeat, np.ndarray, int) +@execute_node.register(ops.ArrayRepeat, (list, np.ndarray), int) def execute_array_repeat_scalar(op, data, n, **kwargs): # Negative n will be treated as 0 (repeat will produce empty array) return np.tile(data, max(n, 0)) diff --git a/ibis/backends/pandas/execution/generic.py b/ibis/backends/pandas/execution/generic.py index 2e30dd33c6b4..7c8b53cc2f79 100644 --- a/ibis/backends/pandas/execution/generic.py +++ b/ibis/backends/pandas/execution/generic.py @@ -145,6 +145,28 @@ def cast_to_array(array, numpy_type=numpy_type): return data.map(cast_to_array) +@execute_node.register(ops.Cast, list, dt.Array) +def execute_cast_list_array(op, data, type, **kwargs): + value_type = type.value_type + numpy_type = constants.IBIS_TYPE_TO_PANDAS_TYPE.get(value_type, None) + if numpy_type is None: + raise ValueError( + "Array value type must be a primitive type " + "(e.g., number, string, or timestamp)" + ) + + def cast_to_array(array, numpy_type=numpy_type): + elems = [ + el if el is None else np.array(el, dtype=numpy_type).item() for el in array + ] + try: + return np.array(elems, dtype=numpy_type) + except TypeError: + return np.array(elems) + + return cast_to_array(data) + + @execute_node.register(ops.Cast, pd.Series, dt.Timestamp) def execute_cast_series_timestamp(op, data, type, **kwargs): arg = op.arg diff --git a/ibis/backends/polars/compiler.py b/ibis/backends/polars/compiler.py index 3d2168e613b5..a9927cd6ad1a 100644 --- a/ibis/backends/polars/compiler.py +++ b/ibis/backends/polars/compiler.py @@ -888,9 +888,9 @@ def array_concat(op, **kw): return result -@translate.register(ops.ArrayColumn) +@translate.register(ops.Array) def array_column(op, **kw): - cols = [translate(col, **kw) for col in op.cols] + cols = [translate(col, **kw) for col in op.exprs] return pl.concat_list(cols) diff --git a/ibis/backends/postgres/registry.py b/ibis/backends/postgres/registry.py index 961fb61741c5..1cc3e028a47e 100644 --- a/ibis/backends/postgres/registry.py +++ b/ibis/backends/postgres/registry.py @@ -750,7 +750,7 @@ def _range(t, op): # array operations ops.ArrayLength: unary(sa.func.cardinality), ops.ArrayCollect: reduction(sa.func.array_agg), - ops.ArrayColumn: (lambda t, op: pg.array(list(map(t.translate, op.cols)))), + ops.Array: (lambda t, op: pg.array(list(map(t.translate, op.exprs)))), ops.ArraySlice: _array_slice( index_converter=_neg_idx_to_pos, array_length=sa.func.cardinality, diff --git a/ibis/backends/pyspark/compiler.py b/ibis/backends/pyspark/compiler.py index 73387c99db06..47003a437fb4 100644 --- a/ibis/backends/pyspark/compiler.py +++ b/ibis/backends/pyspark/compiler.py @@ -1634,9 +1634,9 @@ def compile_interval_from_integer(t, op, **kwargs): # -------------------------- Array Operations ---------------------------- -@compiles(ops.ArrayColumn) +@compiles(ops.Array) def compile_array_column(t, op, **kwargs): - cols = [t.translate(col, **kwargs) for col in op.cols] + cols = [t.translate(col, **kwargs) for col in op.exprs] return F.array(cols) diff --git a/ibis/backends/snowflake/registry.py b/ibis/backends/snowflake/registry.py index 3b8b2926c7b1..7e97330d4481 100644 --- a/ibis/backends/snowflake/registry.py +++ b/ibis/backends/snowflake/registry.py @@ -457,9 +457,7 @@ def _timestamp_range(t, op): ops.ArrayConcat: varargs( lambda *args: functools.reduce(sa.func.array_cat, args) ), - ops.ArrayColumn: lambda t, op: sa.func.array_construct( - *map(t.translate, op.cols) - ), + ops.Array: lambda t, op: sa.func.array_construct(*map(t.translate, op.exprs)), ops.ArraySlice: _array_slice, ops.ArrayCollect: reduction( lambda arg: sa.func.array_agg( diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index 8f1e2d39ed00..985d0911371b 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -1,6 +1,5 @@ from __future__ import annotations -import contextlib import functools from datetime import datetime @@ -15,13 +14,13 @@ import ibis import ibis.common.exceptions as com +import ibis.expr.datashape as ds import ibis.expr.datatypes as dt import ibis.expr.types as ir from ibis.backends.tests.errors import ( ClickHouseDatabaseError, GoogleBadRequest, PolarsComputeError, - Py4JJavaError, PySparkAnalysisException, ) @@ -42,29 +41,21 @@ @pytest.mark.notimpl(["flink"], raises=com.OperationNotDefinedError) def test_array_column(backend, alltypes, df): - expr = ibis.array([alltypes["double_col"], alltypes["double_col"]]) + expr = ibis.array( + [alltypes["double_col"], alltypes["double_col"], 5.0, ibis.literal(6.0)] + ) assert isinstance(expr, ir.ArrayColumn) result = expr.execute() expected = df.apply( - lambda row: [row["double_col"], row["double_col"]], + lambda row: [row["double_col"], row["double_col"], 5.0, 6.0], axis=1, ) backend.assert_series_equal(result, expected, check_names=False) -ARRAY_BACKEND_TYPES = { - "clickhouse": "Array(Float64)", - "snowflake": "ARRAY", - "trino": "array(double)", - "bigquery": "ARRAY", - "duckdb": "DOUBLE[]", - "postgres": "numeric[]", - "flink": "ARRAY NOT NULL", -} - - -def test_array_scalar(con, backend): +@pytest.mark.notimpl(["flink"], raises=com.OperationNotDefinedError) +def test_array_scalar(con): expr = ibis.array([1.0, 2.0, 3.0]) assert isinstance(expr, ir.ArrayScalar) @@ -73,10 +64,6 @@ def test_array_scalar(con, backend): assert np.array_equal(result, expected) - with contextlib.suppress(com.OperationNotDefinedError): - backend_name = backend.name() - assert con.execute(expr.typeof()) == ARRAY_BACKEND_TYPES[backend_name] - @pytest.mark.notimpl(["polars", "flink"], raises=com.OperationNotDefinedError) def test_array_repeat(con): @@ -327,7 +314,7 @@ def test_unnest_default_name(backend): array_types = backend.array_types df = array_types.execute() expr = ( - array_types.x.cast("!array") + ibis.array([1], type="!array") + array_types.x.cast("!array") + ibis.array([1]).cast("!array") ).unnest() assert expr.get_name().startswith("ArrayConcat(") @@ -1050,15 +1037,16 @@ def test_timestamp_range_zero_step(con, start, stop, step, tzinfo): assert list(result) == [] -@pytest.mark.notimpl(["flink"], raises=Py4JJavaError) -@pytest.mark.notimpl(["datafusion"], raises=Exception) +@pytest.mark.notimpl( + ["flink"], raises=AssertionError, reason="arrays not yet implemented" +) def test_repr_timestamp_array(con, monkeypatch): monkeypatch.setattr(ibis.options, "interactive", True) monkeypatch.setattr(ibis.options, "default_backend", con) assert ibis.options.interactive is True assert ibis.options.default_backend is con expr = ibis.array(pd.date_range("2010-01-01", "2010-01-03", freq="D").tolist()) - assert repr(expr) + assert "No translation rule" not in repr(expr) @pytest.mark.notyet( @@ -1070,3 +1058,34 @@ def test_unnest_range(con): result = con.execute(expr) expected = pd.DataFrame({"x": np.array([0, 1], dtype="int8"), "y": [1.0, 1.0]}) tm.assert_frame_equal(result, expected) + + +@pytest.mark.notyet(["flink"], raises=com.OperationNotDefinedError) +@pytest.mark.parametrize( + ("input", "expected"), + [ + param([1, ibis.literal(2)], [1, 2], id="int-int"), + param([1.0, ibis.literal(2)], [1.0, 2.0], id="float-int"), + param([1.0, ibis.literal(2.0)], [1.0, 2.0], id="float-float"), + param([1, ibis.literal(2.0)], [1.0, 2.0], id="int-float"), + param([ibis.literal(1), ibis.literal(2.0)], [1.0, 2.0], id="int-float-exprs"), + param( + [[1], ibis.literal([2])], + [[1], [2]], + id="array", + marks=[ + pytest.mark.notyet(["bigquery"], raises=GoogleBadRequest), + pytest.mark.broken( + ["polars"], + reason="expression input not supported with nested arrays", + raises=TypeError, + ), + ], + ), + ], +) +def test_array_literal_with_exprs(con, input, expected): + expr = ibis.array(input) + assert expr.op().shape == ds.scalar + result = list(con.execute(expr)) + assert result == expected diff --git a/ibis/backends/tests/test_map.py b/ibis/backends/tests/test_map.py index 19ec3e71fa21..8e9e97e2528d 100644 --- a/ibis/backends/tests/test_map.py +++ b/ibis/backends/tests/test_map.py @@ -236,7 +236,7 @@ def test_map_construct_dict(con, keys, values): @pytest.mark.notimpl( ["flink"], raises=exc.OperationNotDefinedError, - reason="No translation rule for ", + reason="No translation rule for ", ) def test_map_construct_array_column(con, alltypes, df): expr = ibis.map(ibis.array([alltypes.string_col]), ibis.array([alltypes.int_col])) diff --git a/ibis/backends/tests/test_param.py b/ibis/backends/tests/test_param.py index 452536197320..03b3cfddff05 100644 --- a/ibis/backends/tests/test_param.py +++ b/ibis/backends/tests/test_param.py @@ -60,9 +60,7 @@ def test_timestamp_accepts_date_literals(alltypes): assert expr.compile(params=params) is not None -@pytest.mark.notimpl( - ["dask", "impala", "pandas", "pyspark", "druid", "oracle", "exasol"] -) +@pytest.mark.notimpl(["impala", "pyspark", "druid", "oracle", "exasol"]) @pytest.mark.never( ["mysql", "sqlite", "mssql"], reason="backend will never implement array types" ) diff --git a/ibis/backends/tests/test_sql.py b/ibis/backends/tests/test_sql.py index 21b91984f478..b635ca8e1d09 100644 --- a/ibis/backends/tests/test_sql.py +++ b/ibis/backends/tests/test_sql.py @@ -18,15 +18,10 @@ ibis.array([1]), marks=[ pytest.mark.never( - ["mysql", "mssql", "oracle"], - raises=sa.exc.CompileError, + ["mysql", "mssql", "oracle", "impala", "sqlite"], + raises=exc.OperationNotDefinedError, reason="arrays not supported in the backend", ), - pytest.mark.notyet( - ["impala", "sqlite"], - raises=NotImplementedError, - reason="backends hasn't implemented array literals", - ), ], id="array_literal", ) diff --git a/ibis/backends/trino/registry.py b/ibis/backends/trino/registry.py index 74bd99a7f69a..5c5f45658e3d 100644 --- a/ibis/backends/trino/registry.py +++ b/ibis/backends/trino/registry.py @@ -43,7 +43,7 @@ class make_array(FunctionElement): pass -@compiles(make_array, "trino") +@compiles(make_array, "default") def compile_make_array(element, compiler, **kw): return f"ARRAY[{compiler.process(element.clauses, **kw)}]" @@ -117,7 +117,7 @@ def _group_concat(t, op): def _array_column(t, op): args = ", ".join( str(t.translate(arg).compile(compile_kwargs={"literal_binds": True})) - for arg in op.cols + for arg in op.exprs ) return sa.literal_column(f"ARRAY[{args}]", type_=t.get_sqla_type(op.dtype)) @@ -431,7 +431,7 @@ def _range(t, op): ops.ArrayIndex: fixed_arity( lambda arg, index: sa.func.element_at(arg, index + 1), 2 ), - ops.ArrayColumn: _array_column, + ops.Array: _array_column, ops.ArrayRepeat: fixed_arity( lambda arg, times: sa.func.flatten(sa.func.repeat(arg, times)), 2 ), diff --git a/ibis/expr/operations/arrays.py b/ibis/expr/operations/arrays.py index c0e65b36272e..68ee711a2da6 100644 --- a/ibis/expr/operations/arrays.py +++ b/ibis/expr/operations/arrays.py @@ -13,14 +13,16 @@ @public -class ArrayColumn(Value): - cols: VarTuple[Value] +class Array(Value): + exprs: VarTuple[Value] - shape = ds.columnar + @attribute + def shape(self): + return rlz.highest_precedence_shape(self.exprs) @attribute def dtype(self): - return dt.Array(rlz.highest_precedence_dtype(self.cols)) + return dt.Array(rlz.highest_precedence_dtype(self.exprs)) @public diff --git a/ibis/expr/tests/test_format.py b/ibis/expr/tests/test_format.py index 86de3ecdeaeb..6ee6dbc42514 100644 --- a/ibis/expr/tests/test_format.py +++ b/ibis/expr/tests/test_format.py @@ -382,7 +382,7 @@ def test_format_literal(literal, typ, output): def test_format_dummy_table(snapshot): - t = ops.DummyTable([ibis.array([1], type="array").name("foo")]).to_expr() + t = ops.DummyTable([ibis.array([1]).cast("array").name("foo")]).to_expr() result = fmt(t) assert "DummyTable" in result diff --git a/ibis/expr/types/arrays.py b/ibis/expr/types/arrays.py index 8258b4278ecf..7cc58afe2ccf 100644 --- a/ibis/expr/types/arrays.py +++ b/ibis/expr/types/arrays.py @@ -1017,82 +1017,50 @@ def __getitem__(self, index: int | ir.IntegerValue | slice) -> ir.Column: @public @deferrable -def array(values: Iterable[V], type: str | dt.DataType | None = None) -> ArrayValue: +def array(values: Iterable[V]) -> ArrayValue: """Create an array expression. - If the input expressions are all column expressions, then the output will - be an `ArrayColumn`. The input columns will be concatenated row-wise to - produce each array in the output array column. Each array will have length - _n_, where _n_ is the number of input columns. All input columns should be - of the same datatype. - - If the input expressions are Python literals, then the output will be a - single `ArrayScalar` of length _n_, where _n_ is the number of input - values. This is equivalent to - - ```python - values = [1, 2, 3] - ibis.literal(values) - ``` - Parameters ---------- values An iterable of Ibis expressions or a list of Python literals - type - An instance of `ibis.expr.datatypes.DataType` or a string indicating - the ibis type of `value`. Returns ------- ArrayValue - An array column (if the inputs are column expressions), or an array - scalar (if the inputs are Python literals) Examples -------- - Create an array column from column expressions + Create an array from scalar values >>> import ibis >>> ibis.options.interactive = True + >>> ibis.array([1.0, None]) + [1.0, None] + + Create an array from column and scalar expressions + >>> t = ibis.memtable({"a": [1, 2, 3], "b": [4, 5, 6]}) - >>> ibis.array([t.a, t.b]) + >>> ibis.array([t.a, 42, ibis.literal(None)]) ┏━━━━━━━━━━━━━━━━━━━━━━┓ - ┃ ArrayColumn() ┃ + ┃ Array() ┃ ┡━━━━━━━━━━━━━━━━━━━━━━┩ │ array │ ├──────────────────────┤ - │ [1, 4] │ - │ [2, 5] │ - │ [3, 6] │ + │ [1, 42, ... +1] │ + │ [2, 42, ... +1] │ + │ [3, 42, ... +1] │ └──────────────────────┘ - Create an array scalar from Python literals - - >>> ibis.array([1.0, 2.0, 3.0]) - [1.0, 2.0, ... +1] - - Mixing scalar and column expressions is allowed - - >>> ibis.array([t.a, 42]) + >>> ibis.array([t.a, 42 + ibis.literal(5)]) ┏━━━━━━━━━━━━━━━━━━━━━━┓ - ┃ ArrayColumn() ┃ + ┃ Array() ┃ ┡━━━━━━━━━━━━━━━━━━━━━━┩ │ array │ ├──────────────────────┤ - │ [1, 42] │ - │ [2, 42] │ - │ [3, 42] │ + │ [1, 47] │ + │ [2, 47] │ + │ [3, 47] │ └──────────────────────┘ """ - if any(isinstance(value, Value) for value in values): - return ops.ArrayColumn(values).to_expr() - else: - try: - return literal(list(values), type=type) - except com.IbisTypeError as e: - raise com.IbisTypeError( - "Could not create an array scalar from the values provided " - "to `array`. Ensure that all input values have the same " - "Python type, or can be casted to a single Python type." - ) from e + return ops.Array(tuple(values)).to_expr()