From b8d2b552cd23480cc7f042a60b00be2b95288eb9 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 8 Feb 2024 08:04:57 -0500 Subject: [PATCH] refactor(docs): inline code and remove `gen_matrix.py` --- docs/.gitignore | 1 - docs/support_matrix.qmd | 54 ++++++++++++++++++++++------ gen_matrix.py | 45 ----------------------- ibis/backends/flink/__init__.py | 5 +++ ibis/backends/flink/compiler/core.py | 3 ++ 5 files changed, 51 insertions(+), 57 deletions(-) delete mode 100644 gen_matrix.py diff --git a/docs/.gitignore b/docs/.gitignore index c6821696481f..3c9a23d5e780 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -14,7 +14,6 @@ diamonds.json *.ndjson reference/ objects.json -*support_matrix.csv # generated notebooks and files *.ipynb diff --git a/docs/support_matrix.qmd b/docs/support_matrix.qmd index d805ad99274d..48ec4d07bc4e 100644 --- a/docs/support_matrix.qmd +++ b/docs/support_matrix.qmd @@ -7,18 +7,49 @@ hide: ```{python} #| echo: false -!python ../gen_matrix.py -``` +from pathlib import Path -```{python} -#| echo: false import pandas as pd -support_matrix = pd.read_csv("./backends/raw_support_matrix.csv") -support_matrix = support_matrix.assign( - Category=support_matrix.Operation.map(lambda op: op.rsplit(".", 1)[0].rsplit(".", 1)[-1]), - Operation=support_matrix.Operation.map(lambda op: op.rsplit(".", 1)[-1]), -).set_index(["Category", "Operation"]) +import ibis +import ibis.expr.operations as ops + + +def get_backends(exclude=()): + entry_points = sorted(ep.name for ep in ibis.util.backend_entry_points()) + return [ + (backend, getattr(ibis, backend)) + for backend in entry_points + if backend not in exclude + ] + + +def get_leaf_classes(op): + for child_class in op.__subclasses__(): + if not child_class.__subclasses__(): + yield child_class + else: + yield from get_leaf_classes(child_class) + + +public_ops = frozenset(get_leaf_classes(ops.Value)) +support = {"Operation": [f"{op.__module__}.{op.__name__}" for op in public_ops]} +support.update( + (name, list(map(backend.has_operation, public_ops))) + for name, backend in get_backends() +) + +support_matrix = ( + pd.DataFrame(support) + .assign(splits=lambda df: df.Operation.str.findall("[a-zA-Z_][a-zA-Z_0-9]*")) + .assign( + Category=lambda df: df.splits.str[-2], + Operation=lambda df: df.splits.str[-1], + ) + .drop(["splits"], axis=1) + .set_index(["Category", "Operation"]) + .sort_index() +) all_visible_ops_count = len(support_matrix) coverage = pd.Index( support_matrix.sum() @@ -70,15 +101,16 @@ dict( #| content: valuebox #| title: "Number of SQL backends" import importlib -from ibis.backends.base.sql import BaseSQLBackend +from ibis.backends.base.sqlglot import SQLGlotBackend sql_backends = sum( issubclass( importlib.import_module(f"ibis.backends.{entry_point.name}").Backend, - BaseSQLBackend + SQLGlotBackend ) for entry_point in ibis.util.backend_entry_points() ) +assert sql_backends > 0 dict(value=sql_backends, color="green", icon="database") ``` diff --git a/gen_matrix.py b/gen_matrix.py deleted file mode 100644 index 9f9745cb7239..000000000000 --- a/gen_matrix.py +++ /dev/null @@ -1,45 +0,0 @@ -from __future__ import annotations - -from pathlib import Path - -import pandas as pd - -import ibis -import ibis.expr.operations as ops - - -def get_backends(exclude=()): - entry_points = sorted(ep.name for ep in ibis.util.backend_entry_points()) - return [ - (backend, getattr(ibis, backend)) - for backend in entry_points - if backend not in exclude - ] - - -def get_leaf_classes(op): - for child_class in op.__subclasses__(): - if not child_class.__subclasses__(): - yield child_class - else: - yield from get_leaf_classes(child_class) - - -def main(): - public_ops = frozenset(get_leaf_classes(ops.Value)) - support = {"operation": [f"{op.__module__}.{op.__name__}" for op in public_ops]} - support.update( - (name, list(map(backend.has_operation, public_ops))) - for name, backend in get_backends() - ) - - df = pd.DataFrame(support).set_index("operation").sort_index() - - with Path(ibis.__file__).parents[1].joinpath( - "docs", "backends", "raw_support_matrix.csv" - ).open(mode="w") as f: - df.to_csv(f, index_label="Operation") - - -if __name__ == "__main__": - main() diff --git a/ibis/backends/flink/__init__.py b/ibis/backends/flink/__init__.py index 2ae904f505ba..679c7680de62 100644 --- a/ibis/backends/flink/__init__.py +++ b/ibis/backends/flink/__init__.py @@ -44,6 +44,11 @@ class Backend(BaseBackend, CanCreateDatabase, NoUrl): supports_temporary_tables = True supports_python_udfs = True + @property + def dialect(self): + # TODO: remove when ported to sqlglot + return self.compiler.dialect + def do_connect(self, table_env: TableEnvironment) -> None: """Create a Flink `Backend` for use with Ibis. diff --git a/ibis/backends/flink/compiler/core.py b/ibis/backends/flink/compiler/core.py index f5d4c3774800..be187ebdb664 100644 --- a/ibis/backends/flink/compiler/core.py +++ b/ibis/backends/flink/compiler/core.py @@ -14,6 +14,7 @@ TableSetFormatter, ) from ibis.backends.base.sql.registry import quote_identifier +from ibis.backends.base.sqlglot.dialects import Flink from ibis.backends.flink.translator import FlinkExprTranslator @@ -96,6 +97,8 @@ class FlinkCompiler(Compiler): cheap_in_memory_tables = True + dialect = Flink + @classmethod def to_sql(cls, node, context=None, params=None): if isinstance(node, ir.Expr):