ibis-project · gforsyth · Jul 29, 2024 · Jul 28, 2024
diff --git a/docs/contribute/02_workflow.qmd b/docs/contribute/02_workflow.qmd
@@ -95,12 +95,12 @@ all ordinary and edge cases.
 Pytest markers can be used to assert that a test should fail or raise a specific error.
 We use a number of pytest markers in ibis:
 
-- `pytest.mark.notimpl`: the backend can do a thing, we haven't mapped the op
-- `pytest.mark.notyet`: the backend cannot do a thing, but might in the future
-- `pytest.mark.never`: the backend will never support this / pass this test (common example
-here is a test running on sqlite that relies on strong typing)
-- `pytest.mark.broken`: this test broke and it's demonstrably unrelated to the PR I'm working
-on and fixing it shouldn't block this PR from going in (but we should fix it up pronto)
+- `pytest.mark.notimpl`: We can implement/fix/workaround this on the ibis side, but haven't yet.
+- `pytest.mark.notyet`: This requires the backend to implement/fix something.
+   We can't/won't do it on the ibis side.
+- `pytest.mark.never`: The backend will never support this / pass this test.
+  We shouldn't have any hope of trying to fix this.
+  A common example here is a test running on sqlite that relies on strong typing.
 
 Refrain from using a generic marker like `pytest.mark.xfail`.
 

diff --git a/ibis/backends/conftest.py b/ibis/backends/conftest.py
@@ -370,22 +370,6 @@ def _filter_none_from_raises(kwargs):
             kwargs = _filter_none_from_raises(kwargs)
             item.add_marker(pytest.mark.xfail(**kwargs))
 
-    # Something has been exposed as broken by a new test and it shouldn't be
-    # imperative for a contributor to fix it just because they happened to
-    # bring it to attention  -- USE SPARINGLY
-    for marker in item.iter_markers(name="broken"):
-        if backend in marker.args[0]:
-            if (
-                item.location[0] in FIlES_WITH_STRICT_EXCEPTION_CHECK
-                and "raises" not in marker.kwargs.keys()
-            ):
-                raise ValueError("broken requires a raises")
-
-            kwargs = marker.kwargs.copy()
-            kwargs.setdefault("reason", f"Feature is failing on {backend}")
-            kwargs = _filter_none_from_raises(kwargs)
-            item.add_marker(pytest.mark.xfail(**kwargs))
-
     for marker in item.iter_markers(name="xfail_version"):
         kwargs = marker.kwargs.copy()
         kwargs = _filter_none_from_raises(kwargs)

diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py
@@ -280,7 +280,7 @@ def mean_and_std(v):
             lambda t, where: t.bool_col[where].any(),
             id="any",
             marks=[
-                pytest.mark.broken(
+                pytest.mark.notimpl(
                     ["druid"],
                     raises=AttributeError,
                     reason="'IntegerColumn' object has no attribute 'any'",
@@ -292,7 +292,7 @@ def mean_and_std(v):
             lambda t, where: ~t.bool_col[where].any(),
             id="notany",
             marks=[
-                pytest.mark.broken(
+                pytest.mark.notimpl(
                     ["druid"],
                     raises=AttributeError,
                     reason="'IntegerColumn' object has no attribute 'notany'",
@@ -305,7 +305,7 @@ def mean_and_std(v):
             lambda t, where: ~t.bool_col[where].any(),
             id="any_negate",
             marks=[
-                pytest.mark.broken(
+                pytest.mark.notimpl(
                     ["druid"],
                     raises=AttributeError,
                     reason="'IntegerColumn' object has no attribute 'any'",
@@ -318,7 +318,7 @@ def mean_and_std(v):
             lambda t, where: t.bool_col[where].all(),
             id="all",
             marks=[
-                pytest.mark.broken(
+                pytest.mark.notimpl(
                     ["druid"],
                     raises=AttributeError,
                     reason="'IntegerColumn' object has no attribute 'all'",
@@ -330,7 +330,7 @@ def mean_and_std(v):
             lambda t, where: ~t.bool_col[where].all(),
             id="notall",
             marks=[
-                pytest.mark.broken(
+                pytest.mark.notimpl(
                     ["druid"],
                     raises=AttributeError,
                     reason="'IntegerColumn' object has no attribute 'notall'",
@@ -343,7 +343,7 @@ def mean_and_std(v):
             lambda t, where: ~t.bool_col[where].all(),
             id="all_negate",
             marks=[
-                pytest.mark.broken(
+                pytest.mark.notimpl(
                     ["druid"],
                     raises=AttributeError,
                     reason="'IntegerColumn' object has no attribute 'all'",
@@ -361,7 +361,7 @@ def mean_and_std(v):
             lambda t, where: (t.int_col > 0)[where].sum(),
             id="bool_sum",
             marks=[
-                pytest.mark.broken(
+                pytest.mark.notimpl(
                     ["oracle"],
                     raises=OracleDatabaseError,
                     reason="ORA-02000: missing AS keyword",
@@ -538,7 +538,7 @@ def mean_and_std(v):
                     ["impala", "mysql", "sqlite", "mssql", "druid", "oracle", "exasol"],
                     raises=com.OperationNotDefinedError,
                 ),
-                pytest.mark.broken(
+                pytest.mark.notimpl(
                     ["dask"],
                     raises=(AttributeError, TypeError),
                     reason=(
@@ -809,12 +809,12 @@ def test_count_distinct_star(alltypes, df, ibis_cond, pandas_cond):
                     reason="backend implements approximate quantiles",
                     raises=com.OperationNotDefinedError,
                 ),
-                pytest.mark.broken(
+                pytest.mark.never(
                     ["pyspark"],
                     reason="backend implements approximate quantiles",
                     raises=AssertionError,
                 ),
-                pytest.mark.broken(
+                pytest.mark.never(
                     ["dask"],
                     reason="backend implements approximate quantiles",
                     raises=AssertionError,
@@ -824,7 +824,7 @@ def test_count_distinct_star(alltypes, df, ibis_cond, pandas_cond):
                     reason="backend doesn't implement approximate quantiles yet",
                     raises=com.OperationNotDefinedError,
                 ),
-                pytest.mark.broken(
+                pytest.mark.notimpl(
                     ["risingwave"],
                     reason="Invalid input syntax: direct arg in `percentile_cont` must be castable to float64",
                     raises=PsycoPg2InternalError,
@@ -1132,7 +1132,7 @@ def test_median(alltypes, df):
     raises=ClickHouseDatabaseError,
     reason="doesn't support median of strings",
 )
-@pytest.mark.broken(
+@pytest.mark.notyet(
     ["pyspark"], raises=AssertionError, reason="pyspark returns null for string median"
 )
 @pytest.mark.notimpl(["dask"], raises=(AssertionError, NotImplementedError, TypeError))
@@ -1301,7 +1301,7 @@ def test_topk_op(alltypes, df):
         )
     ],
 )
-@pytest.mark.broken(
+@pytest.mark.notyet(
     ["druid"], raises=PyDruidProgrammingError, reason="Java NullPointerException"
 )
 @pytest.mark.notimpl(

diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py
@@ -420,7 +420,7 @@ def test_array_slice(backend, start, stop):
 @pytest.mark.notimpl(
     ["datafusion", "flink", "polars", "sqlite"], raises=com.OperationNotDefinedError
 )
-@pytest.mark.broken(
+@pytest.mark.notimpl(
     ["risingwave"],
     raises=PsycoPg2InternalError,
     reason="TODO(Kexiang): seems a bug",
@@ -460,7 +460,7 @@ def test_array_slice(backend, start, stop):
     ],
     ids=["lambda", "partial", "deferred"],
 )
-@pytest.mark.broken(
+@pytest.mark.notimpl(
     ["risingwave"],
     raises=PsycoPg2InternalError,
     reason="TODO(Kexiang): seems a bug",
@@ -542,7 +542,7 @@ def test_array_filter(con, input, output, predicate):
             "x",
             1,
             marks=[
-                pytest.mark.broken(
+                pytest.mark.notimpl(
                     ["flink"],
                     raises=Py4JJavaError,
                     reason="unknown; NPE during execution",
@@ -574,7 +574,7 @@ def test_array_contains(backend, con, col, value):
                     raises=Py4JJavaError,
                     reason="SQL validation failed; Flink does not support ARRAY[]",  # https://issues.apache.org/jira/browse/FLINK-20578
                 ),
-                pytest.mark.broken(
+                pytest.mark.notyet(
                     ["datafusion"],
                     raises=Exception,
                     reason="Internal error: start_from index out of bounds",
@@ -647,7 +647,7 @@ def test_array_remove(con, a):
     raises=(AssertionError, GoogleBadRequest),
     reason="bigquery doesn't support null elements in arrays",
 )
-@pytest.mark.broken(
+@pytest.mark.notimpl(
     ["risingwave"], raises=AssertionError, reason="TODO(Kexiang): seems a bug"
 )
 @pytest.mark.notyet(
@@ -681,7 +681,7 @@ def test_array_unique(con, input, expected):
     ["flink", "polars"],
     raises=com.OperationNotDefinedError,
 )
-@pytest.mark.broken(
+@pytest.mark.notyet(
     ["risingwave"],
     raises=AssertionError,
     reason="Refer to https://github.com/risingwavelabs/risingwave/issues/14735",
@@ -752,7 +752,7 @@ def test_array_union(con, a, b, expected_array):
 @pytest.mark.notimpl(
     ["sqlite"], raises=com.UnsupportedBackendType, reason="Unsupported type: Array..."
 )
-@pytest.mark.broken(
+@pytest.mark.notimpl(
     ["risingwave"],
     raises=AssertionError,
     reason="TODO(Kexiang): seems a bug",
@@ -793,7 +793,7 @@ def test_array_intersect(con, data):
 @pytest.mark.notimpl(["postgres"], raises=PsycoPg2SyntaxError)
 @pytest.mark.notimpl(["risingwave"], raises=PsycoPg2InternalError)
 @pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError)
-@pytest.mark.broken(
+@pytest.mark.notimpl(
     ["trino"], reason="inserting maps into structs doesn't work", raises=TrinoUserError
 )
 def test_unnest_struct(con):
@@ -813,10 +813,10 @@ def test_unnest_struct(con):
 @pytest.mark.notimpl(["postgres"], raises=PsycoPg2SyntaxError)
 @pytest.mark.notimpl(["risingwave"], raises=PsycoPg2InternalError)
 @pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError)
-@pytest.mark.broken(
+@pytest.mark.notimpl(
     ["trino"], reason="inserting maps into structs doesn't work", raises=TrinoUserError
 )
-@pytest.mark.broken(
+@pytest.mark.notimpl(
     ["flink"], reason="flink unnests a and b as separate columns", raises=Py4JJavaError
 )
 def test_unnest_struct_with_multiple_fields(con):
@@ -914,7 +914,7 @@ def test_zip_null(con, fn):
     reason="pyspark doesn't seem to support field selection on explode",
     raises=PySparkAnalysisException,
 )
-@pytest.mark.broken(
+@pytest.mark.notimpl(
     ["trino"], reason="inserting maps into structs doesn't work", raises=TrinoUserError
 )
 @pytest.mark.notyet(
@@ -1002,7 +1002,7 @@ def flatten_data():
                     reason="Arrays are never nullable",
                     raises=AssertionError,
                 ),
-                pytest.mark.broken(
+                pytest.mark.notimpl(
                     ["polars"],
                     raises=TypeError,
                     reason="comparison of nested arrays doesn't work in pandas testing module",
@@ -1220,8 +1220,10 @@ def swap(token):
             "-1h",
             id="neg_inner",
             marks=[
-                pytest.mark.broken(
-                    ["polars"], raises=AssertionError, reason="returns an empty array"
+                pytest.mark.notimpl(
+                    ["polars"],
+                    raises=(AssertionError, TypeError),
+                    reason="returns an empty array",
                 ),
                 pytest.mark.notimpl(
                     ["risingwave"],
@@ -1329,8 +1331,10 @@ def test_repr_timestamp_array(con, monkeypatch):
     ["datafusion", "flink", "polars"],
     raises=com.OperationNotDefinedError,
 )
-@pytest.mark.broken(["pandas"], raises=ValueError, reason="reindex on duplicate values")
-@pytest.mark.broken(
+@pytest.mark.notimpl(
+    ["pandas"], raises=ValueError, reason="reindex on duplicate values"
+)
+@pytest.mark.notimpl(
     ["dask"], raises=AssertionError, reason="DataFrame.index are different"
 )
 def test_unnest_range(con):
@@ -1367,7 +1371,7 @@ def test_array_literal_with_exprs(con, input, expected):
     ["datafusion", "postgres", "pandas", "polars", "risingwave", "dask", "flink"],
     raises=com.OperationNotDefinedError,
 )
-@pytest.mark.broken(
+@pytest.mark.notimpl(
     ["trino"],
     raises=TrinoUserError,
     reason="sqlglot generates code that assumes there's only at most two fields to unpack from a struct",

diff --git a/ibis/backends/tests/test_asof_join.py b/ibis/backends/tests/test_asof_join.py
@@ -120,7 +120,7 @@ def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op
 @pytest.mark.parametrize(
     ("direction", "op"), [("backward", operator.ge), ("forward", operator.le)]
 )
-@pytest.mark.broken(
+@pytest.mark.notimpl(
     ["clickhouse"], raises=AssertionError, reason="`time` is truncated to seconds"
 )
 @pytest.mark.notyet(

diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py
@@ -134,7 +134,7 @@ def test_create_table(backend, con, temp_table, func, sch):
                     reason="Can't rename temp tables",
                     raises=ValueError,
                 ),
-                pytest.mark.broken(
+                pytest.mark.notimpl(
                     ["bigquery"],
                     reason="tables created with temp=True cause a 404 on retrieval",
                 ),
@@ -155,8 +155,8 @@ def test_create_table(backend, con, temp_table, func, sch):
                     ["pyspark", "trino", "exasol", "risingwave"],
                     reason="No support for temp tables",
                 ),
-                pytest.mark.broken(["mssql"], reason="Incorrect temp table syntax"),
-                pytest.mark.broken(
+                pytest.mark.notimpl(["mssql"], reason="Incorrect temp table syntax"),
+                pytest.mark.notimpl(
                     ["bigquery"],
                     reason="tables created with temp=True cause a 404 on retrieval",
                 ),
@@ -293,7 +293,7 @@ def test_create_table_from_schema(con, new_schema, temp_table):
     assert result == new_table.schema()
 
 
-@mark.broken(
+@mark.notimpl(
     ["oracle"],
     reason="oracle temp tables aren't cleaned up on reconnect -- they need to "
     "be switched from using atexit to weakref.finalize",
@@ -1315,10 +1315,10 @@ def test_set_backend_url(url, monkeypatch):
 @pytest.mark.notimpl(
     ["snowflake"], reason="scale not implemented in ibis's snowflake backend"
 )
-@pytest.mark.broken(
+@pytest.mark.never(
     ["oracle"], reason="oracle doesn't allow DESCRIBE outside of its CLI"
 )
-@pytest.mark.broken(["druid"], reason="dialect is broken")
+@pytest.mark.notimpl(["druid"], reason="dialect is broken")
 @pytest.mark.notimpl(
     ["flink"],
     raises=com.IbisError,
@@ -1345,7 +1345,7 @@ def gen_test_name(con: BaseBackend):
     con.drop_table(name, force=True)
 
 
-@mark.broken(
+@mark.notimpl(
     ["druid"], raises=NotImplementedError, reason="generated SQL fails to parse"
 )
 @mark.notimpl(["impala"], reason="impala doesn't support memtable")

diff --git a/ibis/backends/tests/test_conditionals.py b/ibis/backends/tests/test_conditionals.py
@@ -86,7 +86,7 @@ def test_value_cases_scalar(con, inp, exp):
         assert result == exp
 
 
-@pytest.mark.broken(
+@pytest.mark.notimpl(
     "exasol",
     reason="the int64 RBI column is .to_pandas()ed to an object column, which is incomparable to ints",
     raises=AssertionError,
@@ -117,7 +117,7 @@ def test_ibis_cases_scalar():
     assert result == "five"
 
 
-@pytest.mark.broken(
+@pytest.mark.notimpl(
     ["sqlite", "exasol"],
     reason="the int64 RBI column is .to_pandas()ed to an object column, which is incomparable to 5",
     raises=TypeError,
@@ -142,7 +142,7 @@ def test_ibis_cases_column(batting):
     assert Counter(result) == Counter(expected)
 
 
-@pytest.mark.broken("clickhouse", reason="special case this and returns 'oops'")
+@pytest.mark.notimpl("clickhouse", reason="special case this and returns 'oops'")
 def test_value_cases_null(con):
     """CASE x WHEN NULL never gets hit"""
     e = ibis.literal(5).nullif(5).case().when(None, "oops").else_("expected").end()

diff --git a/ibis/backends/tests/test_export.py b/ibis/backends/tests/test_export.py
@@ -171,7 +171,7 @@ def test_column_pyarrow_batch_chunk_size(awards_players):
 
 
 @pytest.mark.notimpl(["pandas", "dask"])
-@pytest.mark.broken(
+@pytest.mark.notimpl(
     ["sqlite"],
     raises=pa.ArrowException,
     reason="Test data has empty strings in columns typed as int64",