diff --git a/ibis/backends/base/sql/registry/helpers.py b/ibis/backends/base/sql/registry/helpers.py index 16de96728d19..c3d56f9e20d4 100644 --- a/ibis/backends/base/sql/registry/helpers.py +++ b/ibis/backends/base/sql/registry/helpers.py @@ -14,9 +14,11 @@ def format_call(translator, func, *args): return "{}({})".format(func, ", ".join(formatted_args)) -def quote_identifier(name, quotechar="`", force=False): +def quote_identifier( + name, quotechar="`", force=False, base_identifiers=identifiers.base_identifiers +): """Add quotes to the `name` identifier if needed.""" - if force or name.count(" ") or name in identifiers.base_identifiers: + if force or name.count(" ") or name in base_identifiers: return f"{quotechar}{name}{quotechar}" else: return name diff --git a/ibis/backends/base/sql/registry/identifiers.py b/ibis/backends/base/sql/registry/identifiers.py index 06c67ef4b926..1129a3176698 100644 --- a/ibis/backends/base/sql/registry/identifiers.py +++ b/ibis/backends/base/sql/registry/identifiers.py @@ -1,21 +1,5 @@ from __future__ import annotations -# Copyright 2014 Cloudera Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Base identifiers - base_identifiers = [ "add", "aggregate", diff --git a/ibis/backends/flink/compiler/core.py b/ibis/backends/flink/compiler/core.py index 6be22d372888..dc3d78d964d3 100644 --- a/ibis/backends/flink/compiler/core.py +++ b/ibis/backends/flink/compiler/core.py @@ -14,10 +14,14 @@ TableSetFormatter, ) from ibis.backends.base.sql.registry import quote_identifier +from ibis.backends.flink import identifiers from ibis.backends.flink.translator import FlinkExprTranslator class FlinkTableSetFormatter(TableSetFormatter): + def _quote_identifier(self, name): + return quote_identifier(name, base_identifiers=identifiers.base_identifiers) + def _format_in_memory_table(self, op): names = op.schema.names raw_rows = [] @@ -121,7 +125,7 @@ def _tumble_window_params( filter( None, [ - f"TABLE {quote_identifier(op.table.name)}", + f"TABLE {formatter._quote_identifier(op.table.name)}", f"DESCRIPTOR({formatter._translate(op.time_col)})", formatter._translate(op.window_size), formatter._translate(op.offset) if op.offset else None, @@ -136,7 +140,7 @@ def _hop_window_params(op: ops.HopWindowingTVF, formatter: TableSetFormatter) -> filter( None, [ - f"TABLE {quote_identifier(op.table.name)}", + f"TABLE {formatter._quote_identifier(op.table.name)}", f"DESCRIPTOR({formatter._translate(op.time_col)})", formatter._translate(op.window_slide), formatter._translate(op.window_size), @@ -154,7 +158,7 @@ def _cumulate_window_params( filter( None, [ - f"TABLE {quote_identifier(op.table.name)}", + f"TABLE {formatter._quote_identifier(op.table.name)}", f"DESCRIPTOR({formatter._translate(op.time_col)})", formatter._translate(op.window_step), formatter._translate(op.window_size), diff --git a/ibis/backends/flink/identifiers.py b/ibis/backends/flink/identifiers.py new file mode 100644 index 000000000000..0535fb445b3d --- /dev/null +++ b/ibis/backends/flink/identifiers.py @@ -0,0 +1,560 @@ +from __future__ import annotations + +# https://nightlies.apache.org/flink/flink-docs-release-1.18/docs/dev/table/sql/overview/#reserved-keywords +base_identifiers = [ + "a", + "abs", + "absolute", + "action", + "ada", + "add", + "admin", + "after", + "all", + "allocate", + "allow", + "alter", + "always", + "and", + "analyze", + "any", + "are", + "array", + "as", + "asc", + "asensitive", + "assertion", + "assignment", + "asymmetric", + "at", + "atomic", + "attribute", + "attributes", + "authorization", + "avg", + "before", + "begin", + "bernoulli", + "between", + "bigint", + "binary", + "bit", + "blob", + "boolean", + "both", + "breadth", + "by", + "bytes", + "c", + "call", + "called", + "cardinality", + "cascade", + "cascaded", + "case", + "cast", + "catalog", + "catalog_name", + "ceil", + "ceiling", + "century", + "chain", + "char", + "character", + "characteristics", + "characters", + "character_length", + "character_set_catalog", + "character_set_name", + "character_set_schema", + "char_length", + "check", + "class_origin", + "clob", + "close", + "coalesce", + "cobol", + "collate", + "collation", + "collation_catalog", + "collation_name", + "collation_schema", + "collect", + "column", + "columns", + "column_name", + "command_function", + "command_function_code", + "commit", + "committed", + "condition", + "condition_number", + "connect", + "connection", + "connection_name", + "constraint", + "constraints", + "constraint_catalog", + "constraint_name", + "constraint_schema", + "constructor", + "contains", + "continue", + "convert", + "corr", + "corresponding", + "count", + "covar_pop", + "covar_samp", + "create", + "cross", + "cube", + "cume_dist", + "current", + "current_catalog", + "current_date", + "current_default_transform_group", + "current_path", + "current_role", + "current_schema", + "current_time", + "current_timestamp", + "current_transform_group_for_type", + "current_user", + "cursor", + "cursor_name", + "cycle", + "data", + "database", + "date", + "datetime_interval_code", + "datetime_interval_precision", + "day", + "deallocate", + "dec", + "decade", + "decimal", + "declare", + "default", + "defaults", + "deferrable", + "deferred", + "defined", + "definer", + "degree", + "delete", + "dense_rank", + "depth", + "deref", + "derived", + "desc", + "describe", + "description", + "descriptor", + "deterministic", + "diagnostics", + "disallow", + "disconnect", + "dispatch", + "distinct", + "domain", + "double", + "dow", + "doy", + "drop", + "dynamic", + "dynamic_function", + "dynamic_function_code", + "each", + "element", + "else", + "end", + "end-exec", + "epoch", + "equals", + "escape", + "every", + "except", + "exception", + "exclude", + "excluding", + "exec", + "execute", + "exists", + "exp", + "explain", + "extend", + "external", + "extract", + "false", + "fetch", + "filter", + "final", + "first", + "first_value", + "float", + "floor", + "following", + "for", + "foreign", + "fortran", + "found", + "frac_second", + "free", + "from", + "full", + "function", + "fusion", + "g", + "general", + "generated", + "get", + "global", + "go", + "goto", + "grant", + "granted", + "group", + "grouping", + "having", + "hierarchy", + "hold", + "hour", + "identity", + "immediate", + "implementation", + "import", + "in", + "including", + "increment", + "indicator", + "initially", + "inner", + "inout", + "input", + "insensitive", + "insert", + "instance", + "instantiable", + "int", + "integer", + "intersect", + "intersection", + "interval", + "into", + "invoker", + "is", + "isolation", + "java", + "join", + "k", + "key", + "key_member", + "key_type", + "label", + "language", + "large", + "last", + "last_value", + "lateral", + "leading", + "left", + "length", + "level", + "library", + "like", + "limit", + "ln", + "local", + "localtime", + "localtimestamp", + "locator", + "lower", + "m", + "map", + "match", + "matched", + "max", + "maxvalue", + "member", + "merge", + "message_length", + "message_octet_length", + "message_text", + "method", + "microsecond", + "millennium", + "min", + "minute", + "minvalue", + "mod", + "modifies", + "module", + "modules", + "month", + "more", + "multiset", + "mumps", + "name", + "names", + "national", + "natural", + "nchar", + "nclob", + "nesting", + "new", + "next", + "no", + "none", + "normalize", + "normalized", + "not", + "null", + "nullable", + "nullif", + "nulls", + "number", + "numeric", + "object", + "octets", + "octet_length", + "of", + "offset", + "old", + "on", + "only", + "open", + "option", + "options", + "or", + "order", + "ordering", + "ordinality", + "others", + "out", + "outer", + "output", + "over", + "overlaps", + "overlay", + "overriding", + "pad", + "parameter", + "parameter_mode", + "parameter_name", + "parameter_ordinal_position", + "parameter_specific_catalog", + "parameter_specific_name", + "parameter_specific_schema", + "partial", + "partition", + "pascal", + "passthrough", + "path", + "percentile_cont", + "percentile_disc", + "percent_rank", + "placing", + "plan", + "pli", + "position", + "power", + "preceding", + "precision", + "prepare", + "preserve", + "primary", + "prior", + "privileges", + "procedure", + "public", + "quarter", + "range", + "rank", + "raw", + "read", + "reads", + "real", + "recursive", + "ref", + "references", + "referencing", + "regr_avgx", + "regr_avgy", + "regr_count", + "regr_intercept", + "regr_r2", + "regr_slope", + "regr_sxx", + "regr_sxy", + "regr_syy", + "relative", + "release", + "repeatable", + "reset", + "restart", + "restrict", + "result", + "return", + "returned_cardinality", + "returned_length", + "returned_octet_length", + "returned_sqlstate", + "returns", + "revoke", + "right", + "role", + "rollback", + "rollup", + "routine", + "routine_catalog", + "routine_name", + "routine_schema", + "row", + "rows", + "row_count", + "row_number", + "savepoint", + "scale", + "schema", + "schema_name", + "scope", + "scope_catalogs", + "scope_name", + "scope_schema", + "scroll", + "search", + "second", + "section", + "security", + "select", + "self", + "sensitive", + "sequence", + "serializable", + "server", + "server_name", + "session", + "session_user", + "set", + "sets", + "similar", + "simple", + "size", + "smallint", + "some", + "source", + "space", + "specific", + "specifictype", + "specific_name", + "sql", + "sqlexception", + "sqlstate", + "sqlwarning", + "sql_tsi_day", + "sql_tsi_frac_second", + "sql_tsi_hour", + "sql_tsi_microsecond", + "sql_tsi_minute", + "sql_tsi_month", + "sql_tsi_quarter", + "sql_tsi_second", + "sql_tsi_week", + "sql_tsi_year", + "sqrt", + "start", + "state", + "statement", + "static", + "statistics", + "stddev_pop", + "stddev_samp", + "stream", + "string", + "structure", + "style", + "subclass_origin", + "submultiset", + "substitute", + "substring", + "sum", + "symmetric", + "system", + "system_user", + "table", + "tablesample", + "table_name", + "temporary", + "then", + "ties", + "time", + "timestamp", + "timestampadd", + "timestampdiff", + "timezone_hour", + "timezone_minute", + "tinyint", + "to", + "top_level_count", + "trailing", + "transaction", + "transactions_active", + "transactions_committed", + "transactions_rolled_back", + "transform", + "transforms", + "translate", + "translation", + "treat", + "trigger", + "trigger_catalog", + "trigger_name", + "trigger_schema", + "trim", + "true", + "type", + "uescape", + "unbounded", + "uncommitted", + "under", + "union", + "unique", + "unknown", + "unnamed", + "unnest", + "update", + "upper", + "upsert", + "usage", + "user", + "user_defined_type_catalog", + "user_defined_type_code", + "user_defined_type_name", + "user_defined_type_schema", + "using", + "value", + "values", + "varbinary", + "varchar", + "varying", + "var_pop", + "var_samp", + "version", + "view", + "week", + "when", + "whenever", + "where", + "width_bucket", + "window", + "with", + "within", + "without", + "work", + "wrapper", + "write", + "xml", + "year", + "zone", +] diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index 89f856c8cc37..27bddbda68e4 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -1471,7 +1471,6 @@ def test_agg_name_in_output_column(alltypes): assert "max" in df.columns[1].lower() -@pytest.mark.notimpl(["flink"], "WIP", raises=Py4JError) def test_grouped_case(backend, con): table = ibis.memtable({"key": [1, 1, 2, 2], "value": [10, 30, 20, 40]}) diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index b815ccc35788..04ded077a515 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -1215,7 +1215,7 @@ def test_first_last(backend): raises=com.OperationNotDefinedError, reason="not support by the backend", ) -@pytest.mark.notyet(["flink"], raises=Py4JJavaError, reason="not supported by Flink") +@pytest.mark.broken(["flink"], raises=Py4JJavaError, reason="bug in Flink") def test_range_expression_bounds(backend): t = ibis.memtable( { diff --git a/pyproject.toml b/pyproject.toml index a4ac63034d3d..347350ff367b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -376,7 +376,7 @@ show_deps = true [tool.codespell] # local codespell matches `./docs`, pre-commit codespell matches `docs` skip = "*.lock,.direnv,.git,./docs/_freeze,docs/_freeze/**,*.svg,*.css,*.html,*.js" -ignore-regex = '\b(i[if]f|I[IF]F|AFE)\b' +ignore-regex = '\b(i[if]f|I[IF]F|AFE|inout)\b' builtin = "clear,rare,names" [tool.ruff]