googleapis · tswast · Dec 15, 2023 · Sep 22, 2023 · Sep 22, 2023 · Sep 22, 2023
@@ -21,6 +21,7 @@
 
 import ibis
 import ibis.backends.bigquery as ibis_bigquery
+import ibis.common.deferred  # type: ignore
 import ibis.expr.datatypes as ibis_dtypes
 import ibis.expr.types as ibis_types
 import pandas
@@ -62,7 +63,16 @@ def __init__(
         self._columns = tuple(columns)
         # To allow for more efficient lookup by column name, create a
         # dictionary mapping names to column values.
-        self._column_names = {column.get_name(): column for column in self._columns}
+        self._column_names = {
+            (
+                column.resolve(table)
+                # TODO(https://github.com/ibis-project/ibis/issues/7613): use
+                # public API to refer to Deferred type.
+                if isinstance(column, ibis.common.deferred.Deferred)
+                else column
+            ).get_name(): column
+            for column in self._columns
+        }
 
     @property
     def columns(self) -> typing.Tuple[ibis_types.Value, ...]:
@@ -643,7 +653,16 @@ def __init__(
 
         # To allow for more efficient lookup by column name, create a
         # dictionary mapping names to column values.
-        self._column_names = {column.get_name(): column for column in self._columns}
+        self._column_names = {
+            (
+                column.resolve(table)
+                # TODO(https://github.com/ibis-project/ibis/issues/7613): use
+                # public API to refer to Deferred type.
+                if isinstance(column, ibis.common.deferred.Deferred)
+                else column
+            ).get_name(): column
+            for column in self._columns
+        }
         self._hidden_ordering_column_names = {
             column.get_name(): column for column in self._hidden_ordering_columns
         }
@@ -860,7 +879,7 @@ def project_window_op(
             case_statement = ibis.case()
             for clause in clauses:
                 case_statement = case_statement.when(clause[0], clause[1])
-            case_statement = case_statement.else_(window_op).end()
+            case_statement = case_statement.else_(window_op).end()  # type: ignore
             window_op = case_statement
 
         result = self._set_or_replace_by_id(output_name or column_name, window_op)

@@ -18,6 +18,7 @@
 
 import bigframes.constants as constants
 import bigframes.core as core
+import bigframes.core.ordering as order
 import bigframes.core.utils as utils
 import bigframes.dataframe
 import bigframes.operations as ops
@@ -145,7 +146,10 @@ def qcut(
     block, result = block.apply_window_op(
         x._value_column,
         agg_ops.QcutOp(q),
-        window_spec=core.WindowSpec(grouping_keys=(nullity_id,)),
+        window_spec=core.WindowSpec(
+            grouping_keys=(nullity_id,),
+            ordering=(order.OrderingColumnReference(x._value_column),),
+        ),
     )
     block, result = block.apply_binary_op(
         result, nullity_id, ops.partial_arg3(ops.where_op, None), result_label=label

@@ -18,6 +18,7 @@
 import typing
 
 import ibis
+import ibis.common.annotations
 import ibis.common.exceptions
 import ibis.expr.datatypes as ibis_dtypes
 import ibis.expr.operations.generic
@@ -352,14 +353,23 @@ def _as_ibis(self, x: ibis_types.Value):
         str_val = typing.cast(ibis_types.StringValue, x)
 
         # SQL pad operations will truncate, we do not want to truncate though.
-        pad_length = ibis.greatest(str_val.length(), self._length)
+        pad_length = typing.cast(
+            ibis_types.IntegerValue, ibis.greatest(str_val.length(), self._length)
+        )
         if self._side == "left":
             return str_val.lpad(pad_length, self._fillchar)
         elif self._side == "right":
             return str_val.rpad(pad_length, self._fillchar)
         else:  # side == both
             # Pad more on right side if can't pad both sides equally
-            lpad_amount = ((pad_length - str_val.length()) // 2) + str_val.length()
+            lpad_amount = typing.cast(
+                ibis_types.IntegerValue,
+                (
+                    (pad_length - str_val.length())
+                    // typing.cast(ibis_types.NumericValue, ibis.literal(2))
+                )
+                + str_val.length(),
+            )
             return str_val.lpad(lpad_amount, self._fillchar).rpad(
                 pad_length, self._fillchar
             )
@@ -722,10 +732,29 @@ def ne_op(
     return x != y
 
 
+def _null_or_value(value: ibis_types.Value, where_value: ibis_types.BooleanValue):
+    return ibis.where(
+        where_value,
+        value,
+        ibis.null(),
+    )
+
+
 def and_op(
     x: ibis_types.Value,
     y: ibis_types.Value,
 ):
+    # Workaround issue https://github.com/ibis-project/ibis/issues/7775 by
+    # implementing three-valued logic ourselves. For AND, when we encounter a
+    # NULL value, we only know when the result is FALSE, otherwise the result
+    # is unknown (NULL). See: truth table at
+    # https://en.wikibooks.org/wiki/Structured_Query_Language/NULLs_and_the_Three_Valued_Logic#AND,_OR
+    if isinstance(x, ibis_types.NullScalar):
+        return _null_or_value(y, y == ibis.literal(False))
+
+    if isinstance(y, ibis_types.NullScalar):
+        return _null_or_value(x, x == ibis.literal(False))
+
     return typing.cast(ibis_types.BooleanValue, x) & typing.cast(
         ibis_types.BooleanValue, y
     )
@@ -735,6 +764,17 @@ def or_op(
     x: ibis_types.Value,
     y: ibis_types.Value,
 ):
+    # Workaround issue https://github.com/ibis-project/ibis/issues/7775 by
+    # implementing three-valued logic ourselves. For OR, when we encounter a
+    # NULL value, we only know when the result is TRUE, otherwise the result
+    # is unknown (NULL). See: truth table at
+    # https://en.wikibooks.org/wiki/Structured_Query_Language/NULLs_and_the_Three_Valued_Logic#AND,_OR
+    if isinstance(x, ibis_types.NullScalar):
+        return _null_or_value(y, y == ibis.literal(True))
+
+    if isinstance(y, ibis_types.NullScalar):
+        return _null_or_value(x, x == ibis.literal(True))
+
     return typing.cast(ibis_types.BooleanValue, x) | typing.cast(
         ibis_types.BooleanValue, y
     )
@@ -746,10 +786,16 @@ def add_op(
     y: ibis_types.Value,
 ):
     if isinstance(x, ibis_types.NullScalar) or isinstance(x, ibis_types.NullScalar):
-        return
-    return typing.cast(ibis_types.NumericValue, x) + typing.cast(
-        ibis_types.NumericValue, y
-    )
+        return ibis.null()
+    try:
+        # Could be string concatenation or numeric addition.
+        return x + y  # type: ignore
+    except ibis.common.annotations.SignatureValidationError as exc:
+        left_type = bigframes.dtypes.ibis_dtype_to_bigframes_dtype(x.type())
+        right_type = bigframes.dtypes.ibis_dtype_to_bigframes_dtype(y.type())
+        raise TypeError(
+            f"Cannot add {repr(left_type)} and {repr(right_type)}. {constants.FEEDBACK_LINK}"
+        ) from exc
 
 
 @short_circuit_nulls()
@@ -1047,7 +1093,7 @@ def where_op(
     replacement: ibis_types.Value,
 ) -> ibis_types.Value:
     """Returns x if y is true, otherwise returns z."""
-    return ibis.case().when(condition, original).else_(replacement).end()
+    return ibis.case().when(condition, original).else_(replacement).end()  # type: ignore
 
 
 def clip_op(
@@ -1060,7 +1106,7 @@ def clip_op(
         not isinstance(upper, ibis_types.NullScalar)
     ):
         return (
-            ibis.case()
+            ibis.case()  # type: ignore
             .when(upper.isnull() | (original > upper), upper)
             .else_(original)
             .end()
@@ -1069,7 +1115,7 @@ def clip_op(
         upper, ibis_types.NullScalar
     ):
         return (
-            ibis.case()
+            ibis.case()  # type: ignore
             .when(lower.isnull() | (original < lower), lower)
             .else_(original)
             .end()
@@ -1079,9 +1125,11 @@ def clip_op(
     ):
         return original
     else:
-        # Note: Pandas has unchanged behavior when upper bound and lower bound are flipped. This implementation requires that lower_bound < upper_bound
+        # Note: Pandas has unchanged behavior when upper bound and lower bound
+        # are flipped.
+        # This implementation requires that lower_bound < upper_bound.
         return (
-            ibis.case()
+            ibis.case()  # type: ignore
             .when(lower.isnull() | (original < lower), lower)
             .when(upper.isnull() | (original > upper), upper)
             .else_(original)

@@ -74,7 +74,7 @@ def _as_ibis(
         # Will be null if all inputs are null. Pandas defaults to zero sum though.
         bq_sum = _apply_window_if_present(column.sum(), window)
         return (
-            ibis.case().when(bq_sum.isnull(), ibis_types.literal(0)).else_(bq_sum).end()
+            ibis.case().when(bq_sum.isnull(), ibis_types.literal(0)).else_(bq_sum).end()  # type: ignore
         )
 
 
@@ -167,7 +167,7 @@ def _as_ibis(
             .else_(magnitude * pow(-1, negative_count_parity))
             .end()
         )
-        return float_result.cast(column.type())
+        return float_result.cast(column.type())  # type: ignore
 
 
 class MaxOp(AggregateOp):
@@ -290,7 +290,7 @@ def _as_ibis(
                     dtypes.literal_to_ibis_scalar(bucket_n, force_dtype=Int64Dtype()),
                 )
             out = out.else_(None)
-            return out.end()
+            return out.end()  # type: ignore
 
     @property
     def skips_nulls(self):
@@ -482,7 +482,7 @@ def _map_to_literal(
     original: ibis_types.Value, literal: ibis_types.Scalar
 ) -> ibis_types.Column:
     # Hack required to perform aggregations on literals in ibis, even though bigquery will let you directly aggregate literals (eg. 'SELECT COUNT(1) from table1')
-    return ibis.ifelse(original.isnull(), literal, literal)
+    return ibis.ifelse(original.isnull(), literal, literal)  # type: ignore
 
 
 sum_op = SumOp()

@@ -535,17 +535,14 @@ def remote_function_node(
     """Creates an Ibis node representing a remote function call."""
 
     fields = {
-        name: rlz.value(type_) if type_ else rlz.any
+        name: rlz.ValueOf(None if type_ == "ANY TYPE" else type_)
         for name, type_ in zip(
             ibis_signature.parameter_names, ibis_signature.input_types
         )
     }
 
-    try:
-        fields["output_type"] = rlz.shape_like("args", dtype=ibis_signature.output_type)  # type: ignore
-    except TypeError:
-        fields["output_dtype"] = property(lambda _: ibis_signature.output_type)
-        fields["output_shape"] = rlz.shape_like("args")
+    fields["dtype"] = ibis_signature.output_type  # type: ignore
+    fields["shape"] = rlz.shape_like("args")
 
     node = type(routine_ref_to_string_for_query(routine_ref), (ops.ValueOp,), fields)  # type: ignore
 

@@ -79,9 +79,9 @@
 import bigframes.session.clients
 import bigframes.version
 
-# Even though the ibis.backends.bigquery.registry import is unused, it's needed
+# Even though the ibis.backends.bigquery import is unused, it's needed
 # to register new and replacement ops with the Ibis BigQuery backend.
-import third_party.bigframes_vendored.ibis.backends.bigquery.registry  # noqa
+import third_party.bigframes_vendored.ibis.backends.bigquery  # noqa
 import third_party.bigframes_vendored.ibis.expr.operations as vendored_ibis_ops
 import third_party.bigframes_vendored.pandas.io.gbq as third_party_pandas_gbq
 import third_party.bigframes_vendored.pandas.io.parquet as third_party_pandas_parquet
@@ -873,8 +873,9 @@ def _read_pandas(
             total_ordering_columns=frozenset([ordering_col]),
             integer_encoding=IntegerEncoding(True, is_sequential=True),
         )
-        table_expression = self.ibis_client.table(
+        table_expression = self.ibis_client.table(  # type: ignore
             load_table_destination.table_id,
+            # TODO: use "dataset_id" as the "schema"
             database=f"{load_table_destination.project}.{load_table_destination.dataset_id}",
         )
 

@@ -24,5 +24,8 @@ ignore_missing_imports = True
 [mypy-pyarrow]
 ignore_missing_imports = True
 
+[mypy-ibis.*]
+ignore_missing_imports = True
+
 [mypy-ipywidgets]
 ignore_missing_imports = True
@@ -524,23 +524,19 @@ def prerelease(session: nox.sessions.Session, tests_path):
     )
     already_installed.add("pandas")
 
-    # TODO(shobs):
-    # Commit https://github.com/ibis-project/ibis/commit/c20ba7feab6bdea6c299721310e04dbc10551cc2
-    # introduced breaking change that removed the following:
-    #   ibis.expr.rules.column
-    #   ibis.expr.rules.value
-    #   ibis.expr.rules.any
-    # Let's exclude ibis head from prerelease install list for now. Instead, use
-    # a working ibis-framework version resolved via setup.by (currently resolves
-    # to version 6.2.0 due to version requirement "6.2.0,<7.0.0dev").
-    # We should enable the head back once bigframes support a version that
-    # includes the above commit.
+    # Ibis has introduced breaking changes. Let's exclude ibis head
+    # from prerelease install list for now. We should enable the head back
+    # once bigframes supports the version at HEAD.
     # session.install(
-    #    "--upgrade",
-    #    "-e",  # Use -e so that py.typed file is included.
-    #    "git+https://github.com/ibis-project/ibis.git#egg=ibis-framework",
+    #     "--upgrade",
+    #     "-e",  # Use -e so that py.typed file is included.
+    #     "git+https://github.com/ibis-project/ibis.git@7.x.x#egg=ibis-framework",
     # )
-    session.install("--no-deps", "ibis-framework==6.2.0")
+    session.install(
+        "--upgrade",
+        # "--pre",
+        "ibis-framework>=7.1.0,<8.0.0dev",
+    )
     already_installed.add("ibis-framework")
 
     # Workaround https://github.com/googleapis/python-db-dtypes-pandas/issues/178

@@ -43,8 +43,8 @@
     "google-cloud-iam >=2.12.1",
     "google-cloud-resource-manager >=1.10.3",
     "google-cloud-storage >=2.0.0",
+    "ibis-framework[bigquery] >=7.1.0,<8.0.0dev",
     # TODO: Relax upper bound once we have fixed `system_prerelease` tests.
-    "ibis-framework[bigquery] >=6.2.0,<7.0.0dev",
     "pandas >=1.5.0,<2.1.4",
     "pydata-google-auth >=1.8.2",
     "requests >=2.27.1",

@@ -45,7 +45,7 @@ greenlet==2.0.2
 grpc-google-iam-v1==0.12.6
 grpcio==1.53.0
 grpcio-status==1.48.2
-ibis-framework==6.2.0
+ibis-framework==7.1.0
 humanize==4.6.0
 identify==2.5.22
 idna==3.4
@@ -107,7 +107,7 @@ scikit-learn==1.2.2
 SecretStorage==3.3.3
 six==1.16.0
 SQLAlchemy==1.4.0
-sqlglot==10.6.4
+sqlglot==18.12.0
 tomli==2.0.1
 toolz==0.12.0
 tqdm==4.65.0

@@ -23,11 +23,16 @@
 def test_approximate_quantiles(session: bigframes.Session, scalars_table_id: str):
     num_bins = 3
     ibis_client = session.ibis_client
-    _, dataset, table_id = scalars_table_id.split(".")
-    ibis_table: ibis_types.Table = ibis_client.table(table_id, database=dataset)
+    project, dataset, table_id = scalars_table_id.split(".")
+    ibis_table: ibis_types.Table = ibis_client.table(  # type: ignore
+        table_id,
+        schema=dataset,
+        database=project,
+    )
     ibis_column: ibis_types.NumericColumn = ibis_table["int64_col"]
-    quantiles: ibis_types.ArrayScalar = vendored_ibis_ops.ApproximateMultiQuantile(  # type: ignore
-        ibis_column, num_bins=num_bins
+    quantiles: ibis_types.ArrayScalar = vendored_ibis_ops.ApproximateMultiQuantile(
+        ibis_column,  # type: ignore
+        num_bins=num_bins,  # type: ignore
     ).to_expr()
     value = quantiles[1]
     num_edges = quantiles.length()

@@ -79,7 +79,7 @@ def create_dataframe(
     # might not actually be used. Mock out the global session, too.
     monkeypatch.setattr(bigframes.core.global_session, "_global_session", session)
     bigframes.options.bigquery._session_started = True
-    return bigframes.dataframe.DataFrame({}, session=session)
+    return bigframes.dataframe.DataFrame({"col": []}, session=session)
 
 
 def create_pandas_session(tables: Dict[str, pandas.DataFrame]) -> bigframes.Session: