fix(bigquery): calculated column cannot orderby in BigQuery (#17196)

* fix(bigquery): calculated column cannot orderby in BigQuery * typo * add ut * fix lint
apache · Oct 22, 2021 · bedb8f4 · bedb8f4
1 parent 2ba046f
commit bedb8f4
Show file tree

Hide file tree

Showing 5 changed files with 81 additions and 0 deletions.
diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py
@@ -1300,6 +1300,13 @@ def get_sqla_query(  # pylint: disable=too-many-arguments,too-many-locals,too-ma
                 # if engine does not allow using SELECT alias in ORDER BY
                 # revert to the underlying column
                 col = col.element
+
+            if (
+                db_engine_spec.allows_alias_in_select
+                and db_engine_spec.allows_hidden_cc_in_orderby
+                and col.name in [select_col.name for select_col in select_exprs]
+            ):
+                col = literal_column(col.name)
             direction = asc if ascending else desc
             qry = qry.order_by(direction(col))
 

diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py
@@ -289,6 +289,12 @@ class BaseEngineSpec:  # pylint: disable=too-many-public-methods
     # if TRUE, then it doesn't have to.
     allows_hidden_ordeby_agg = True
 
+    # Whether ORDER BY clause can use sql caculated expression
+    # if True, use alias of select column for `order by`
+    # the True is safely for most database
+    # But for backward compatibility, False by default
+    allows_hidden_cc_in_orderby = False
+
     force_column_alias_quotes = False
     arraysize = 0
     max_column_name_length = 0

diff --git a/superset/db_engine_specs/bigquery.py b/superset/db_engine_specs/bigquery.py
@@ -99,6 +99,8 @@ class BigQueryEngineSpec(BaseEngineSpec):
     # same cursor, so we need to run all statements at once
     run_multiple_statements_as_one = True
 
+    allows_hidden_cc_in_orderby = True
+
     """
     https://www.python.org/dev/peps/pep-0249/#arraysize
     raw_connections bypass the pybigquery query execution context and deal with

diff --git a/tests/integration_tests/db_engine_specs/base_engine_spec_tests.py b/tests/integration_tests/db_engine_specs/base_engine_spec_tests.py
@@ -19,6 +19,7 @@
 
 import pytest
 
+from superset.connectors.sqla.models import TableColumn
 from superset.db_engine_specs import get_engine_specs
 from superset.db_engine_specs.base import (
     BaseEngineSpec,
@@ -34,6 +35,7 @@
 from tests.integration_tests.db_engine_specs.base_tests import TestDbEngineSpec
 from tests.integration_tests.test_app import app
 
+from ..fixtures.birth_names_dashboard import load_birth_names_dashboard_with_slices
 from ..fixtures.energy_dashboard import load_energy_table_with_slice
 from ..fixtures.pyodbcRow import Row
 
@@ -273,6 +275,38 @@ def test_pyodbc_rows_to_tuples_passthrough(self):
         result = BaseEngineSpec.pyodbc_rows_to_tuples(data)
         self.assertListEqual(result, data)
 
+    @mock.patch("superset.models.core.Database.db_engine_spec", BaseEngineSpec)
+    @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
+    def test_calculated_column_in_order_by_base_engine_spec(self):
+        table = self.get_table(name="birth_names")
+        TableColumn(
+            column_name="gender_cc",
+            type="VARCHAR(255)",
+            table=table,
+            expression="""
+            case
+              when gender=true then "male"
+              else "female"
+            end
+            """,
+        )
+
+        table.database.sqlalchemy_uri = "sqlite://"
+        query_obj = {
+            "groupby": ["gender_cc"],
+            "is_timeseries": False,
+            "filter": [],
+            "orderby": [["gender_cc", True]],
+        }
+        sql = table.get_query_str(query_obj)
+        assert (
+            """ORDER BY case
+             when gender=true then "male"
+             else "female"
+         end ASC;"""
+            in sql
+        )
+
 
 def test_is_readonly():
     def is_readonly(sql: str) -> bool:

diff --git a/tests/integration_tests/db_engine_specs/bigquery_tests.py b/tests/integration_tests/db_engine_specs/bigquery_tests.py
@@ -17,14 +17,19 @@
 import sys
 import unittest.mock as mock
 
+import pytest
 from pandas import DataFrame
 from sqlalchemy import column
 
+from superset.connectors.sqla.models import TableColumn
 from superset.db_engine_specs.base import BaseEngineSpec
 from superset.db_engine_specs.bigquery import BigQueryEngineSpec
 from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
 from superset.sql_parse import Table
 from tests.integration_tests.db_engine_specs.base_tests import TestDbEngineSpec
+from tests.integration_tests.fixtures.birth_names_dashboard import (
+    load_birth_names_dashboard_with_slices,
+)
 
 
 class TestBigQueryDbEngineSpec(TestDbEngineSpec):
@@ -333,3 +338,30 @@ def test_extract_errors(self):
                 },
             )
         ]
+
+    @mock.patch("superset.models.core.Database.db_engine_spec", BigQueryEngineSpec)
+    @mock.patch("pybigquery._helpers.create_bigquery_client", mock.Mock)
+    @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
+    def test_calculated_column_in_order_by(self):
+        table = self.get_table(name="birth_names")
+        TableColumn(
+            column_name="gender_cc",
+            type="VARCHAR(255)",
+            table=table,
+            expression="""
+            case
+              when gender=true then "male"
+              else "female"
+            end
+            """,
+        )
+
+        table.database.sqlalchemy_uri = "bigquery://"
+        query_obj = {
+            "groupby": ["gender_cc"],
+            "is_timeseries": False,
+            "filter": [],
+            "orderby": [["gender_cc", True]],
+        }
+        sql = table.get_query_str(query_obj)
+        assert "ORDER BY gender_cc ASC" in sql