refactor(mssql): support case-sensitive collations

This updates a few internal table and view calls to use the case-sensitive names which will allows us to (hopefully) support both case-sensitive and case-insensitive collations. The correct casing for the tables and views we use often (and the corresponding columns): Info schema tables: - INFORMATION_SCHEMA.COLUMNS - INFORMATION_SCHEMA.SCHEMATA - INFORMATION_SCHEMA.TABLES Temp table location: tempdb.dbo Catalogs: sys.databases Databases: sys.schemas I also had to swap out a stored procedure that _really_ didn't like being used with the case-sensitive collation, but I think the replacement is nicer anyway (we can push down some column selection to the engine).
ibis-project · Jul 29, 2024 · 6ad5c15 · 6ad5c15
1 parent 916267f
commit 6ad5c15
Show file tree

Hide file tree

Showing 3 changed files with 57 additions and 15 deletions.
diff --git a/ibis/backends/mssql/__init__.py b/ibis/backends/mssql/__init__.py
@@ -54,6 +54,25 @@ def datetimeoffset_to_datetime(value):
     )
 
 
+# For testing we use the collation "Latin1_General_100_BIN2_UTF8"
+# which is case-sensitive and supports UTF8.
+# This allows us to (hopefully) support both case-sensitive and case-insensitive
+# collations.
+# It DOES mean, though, that we need to be correct in our usage of case when
+# referring to system tables and views.
+# So, the correct casing for the tables and views we use often (and the
+# corresponding columns):
+#
+#
+# Info schema tables:
+# - INFORMATION_SCHEMA.COLUMNS
+# - INFORMATION_SCHEMA.SCHEMATA
+# - INFORMATION_SCHEMA.TABLES
+# Temp table location: tempdb.dbo
+# Catalogs: sys.databases
+# Databases: sys.schemas
+
+
 class Backend(SQLBackend, CanCreateCatalog, CanCreateDatabase, CanCreateSchema, NoUrl):
     name = "mssql"
     compiler = MSSQLCompiler()
@@ -171,8 +190,8 @@ def get_schema(
             )
             .from_(
                 sg.table(
-                    "columns",
-                    db="information_schema",
+                    "COLUMNS",
+                    db="INFORMATION_SCHEMA",
                     catalog=catalog or self.current_catalog,
                 )
             )
@@ -212,23 +231,33 @@ def get_schema(
         return sch.Schema(mapping)
 
     def _get_schema_using_query(self, query: str) -> sch.Schema:
+        # Docs describing usage of dm_exec_describe_first_result_set
+        # https://learn.microsoft.com/en-us/sql/relational-databases/system-dynamic-management-views/sys-dm-exec-describe-first-result-set-transact-sql?view=sql-server-ver16
         tsql = sge.convert(str(query)).sql(self.dialect)
-        query = f"EXEC sp_describe_first_result_set @tsql = N{tsql}"
+
+        # For some reason when using "Latin1_General_100_BIN2_UTF8"
+        # the stored procedure `sp_describe_first_result_set` starts throwing errors about DLL loading.
+        # This "dynamic management function" uses the same algorithm and allows
+        # us to pre-filter the columns we want back.
+        # The syntax is:
+        # `sys.dm_exec_describe_first_result_set(@tsql, @params, @include_browse_information)`
+        query = f"""SELECT name,
+                        is_nullable AS nullable,
+                        system_type_name,
+                        precision,
+                        scale
+                    FROM
+                        sys.dm_exec_describe_first_result_set({tsql}, NULL, 0)"""
         with self._safe_raw_sql(query) as cur:
             rows = cur.fetchall()
 
         schema = {}
         for (
-            _,
-            _,
             name,
             nullable,
-            _,
             system_type_name,
-            _,
             precision,
             scale,
-            *_,
         ) in sorted(rows, key=itemgetter(1)):
             newtyp = self.compiler.type_mapper.from_string(
                 system_type_name, nullable=nullable
@@ -463,8 +492,8 @@ def list_tables(
             sg.select("table_name")
             .from_(
                 sg.table(
-                    "tables",
-                    db="information_schema",
+                    "TABLES",
+                    db="INFORMATION_SCHEMA",
                     catalog=catalog if catalog is not None else self.current_catalog,
                 )
             )
@@ -486,8 +515,8 @@ def list_databases(
     ) -> list[str]:
         query = sg.select(C.schema_name).from_(
             sg.table(
-                "schemata",
-                db="information_schema",
+                "SCHEMATA",
+                db="INFORMATION_SCHEMA",
                 catalog=catalog or self.current_catalog,
             )
         )

diff --git a/ibis/backends/mssql/tests/test_client.py b/ibis/backends/mssql/tests/test_client.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import pytest
+from pytest import param
 
 import ibis
 import ibis.expr.datatypes as dt
@@ -35,11 +36,23 @@
     ("DATETIME", dt.Timestamp(scale=3)),
     # Characters strings
     ("CHAR", dt.string),
-    ("TEXT", dt.string),
+    param(
+        "TEXT",
+        dt.string,
+        marks=pytest.mark.notyet(
+            ["mssql"], reason="Not supported by UTF-8 aware collations"
+        ),
+    ),
     ("VARCHAR", dt.string),
     # Unicode character strings
     ("NCHAR", dt.string),
-    ("NTEXT", dt.string),
+    param(
+        "NTEXT",
+        dt.string,
+        marks=pytest.mark.notyet(
+            ["mssql"], reason="Not supported by UTF-8 aware collations"
+        ),
+    ),
     ("NVARCHAR", dt.string),
     # Binary strings
     ("BINARY", dt.binary),

diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py
@@ -644,7 +644,7 @@ def test_order_by_nulls(con, op, nulls_first, expected):
 
 @pytest.mark.notimpl(["druid"])
 @pytest.mark.never(
-    ["exasol", "mssql", "mysql"],
+    ["exasol", "mysql"],
     raises=AssertionError,
     reason="someone decided a long time ago that 'A' = 'a' is true in these systems",
 )