Skip to content

Commit

Permalink
refactor(mssql): support case-sensitive collations
Browse files Browse the repository at this point in the history
This updates a few internal table and view calls to use the
case-sensitive names which will allows us to (hopefully) support both
case-sensitive and case-insensitive collations.

The correct casing for the tables and views we use often (and the
corresponding columns):

Info schema tables:
- INFORMATION_SCHEMA.COLUMNS
- INFORMATION_SCHEMA.SCHEMATA
- INFORMATION_SCHEMA.TABLES
Temp table location: tempdb.dbo
Catalogs: sys.databases
Databases: sys.schemas

I also had to swap out a stored procedure that _really_ didn't like
being used with the case-sensitive collation, but I think the
replacement is nicer anyway (we can push down some column selection to
the engine).
  • Loading branch information
gforsyth committed Jul 29, 2024
1 parent 916267f commit 6ad5c15
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 15 deletions.
53 changes: 41 additions & 12 deletions ibis/backends/mssql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,25 @@ def datetimeoffset_to_datetime(value):
)


# For testing we use the collation "Latin1_General_100_BIN2_UTF8"
# which is case-sensitive and supports UTF8.
# This allows us to (hopefully) support both case-sensitive and case-insensitive
# collations.
# It DOES mean, though, that we need to be correct in our usage of case when
# referring to system tables and views.
# So, the correct casing for the tables and views we use often (and the
# corresponding columns):
#
#
# Info schema tables:
# - INFORMATION_SCHEMA.COLUMNS
# - INFORMATION_SCHEMA.SCHEMATA
# - INFORMATION_SCHEMA.TABLES
# Temp table location: tempdb.dbo
# Catalogs: sys.databases
# Databases: sys.schemas


class Backend(SQLBackend, CanCreateCatalog, CanCreateDatabase, CanCreateSchema, NoUrl):
name = "mssql"
compiler = MSSQLCompiler()
Expand Down Expand Up @@ -171,8 +190,8 @@ def get_schema(
)
.from_(
sg.table(
"columns",
db="information_schema",
"COLUMNS",
db="INFORMATION_SCHEMA",
catalog=catalog or self.current_catalog,
)
)
Expand Down Expand Up @@ -212,23 +231,33 @@ def get_schema(
return sch.Schema(mapping)

def _get_schema_using_query(self, query: str) -> sch.Schema:
# Docs describing usage of dm_exec_describe_first_result_set
# https://learn.microsoft.com/en-us/sql/relational-databases/system-dynamic-management-views/sys-dm-exec-describe-first-result-set-transact-sql?view=sql-server-ver16
tsql = sge.convert(str(query)).sql(self.dialect)
query = f"EXEC sp_describe_first_result_set @tsql = N{tsql}"

# For some reason when using "Latin1_General_100_BIN2_UTF8"
# the stored procedure `sp_describe_first_result_set` starts throwing errors about DLL loading.
# This "dynamic management function" uses the same algorithm and allows
# us to pre-filter the columns we want back.
# The syntax is:
# `sys.dm_exec_describe_first_result_set(@tsql, @params, @include_browse_information)`
query = f"""SELECT name,
is_nullable AS nullable,
system_type_name,
precision,
scale
FROM
sys.dm_exec_describe_first_result_set({tsql}, NULL, 0)"""
with self._safe_raw_sql(query) as cur:
rows = cur.fetchall()

schema = {}
for (
_,
_,
name,
nullable,
_,
system_type_name,
_,
precision,
scale,
*_,
) in sorted(rows, key=itemgetter(1)):
newtyp = self.compiler.type_mapper.from_string(
system_type_name, nullable=nullable
Expand Down Expand Up @@ -463,8 +492,8 @@ def list_tables(
sg.select("table_name")
.from_(
sg.table(
"tables",
db="information_schema",
"TABLES",
db="INFORMATION_SCHEMA",
catalog=catalog if catalog is not None else self.current_catalog,
)
)
Expand All @@ -486,8 +515,8 @@ def list_databases(
) -> list[str]:
query = sg.select(C.schema_name).from_(
sg.table(
"schemata",
db="information_schema",
"SCHEMATA",
db="INFORMATION_SCHEMA",
catalog=catalog or self.current_catalog,
)
)
Expand Down
17 changes: 15 additions & 2 deletions ibis/backends/mssql/tests/test_client.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import pytest
from pytest import param

import ibis
import ibis.expr.datatypes as dt
Expand Down Expand Up @@ -35,11 +36,23 @@
("DATETIME", dt.Timestamp(scale=3)),
# Characters strings
("CHAR", dt.string),
("TEXT", dt.string),
param(
"TEXT",
dt.string,
marks=pytest.mark.notyet(
["mssql"], reason="Not supported by UTF-8 aware collations"
),
),
("VARCHAR", dt.string),
# Unicode character strings
("NCHAR", dt.string),
("NTEXT", dt.string),
param(
"NTEXT",
dt.string,
marks=pytest.mark.notyet(
["mssql"], reason="Not supported by UTF-8 aware collations"
),
),
("NVARCHAR", dt.string),
# Binary strings
("BINARY", dt.binary),
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -644,7 +644,7 @@ def test_order_by_nulls(con, op, nulls_first, expected):

@pytest.mark.notimpl(["druid"])
@pytest.mark.never(
["exasol", "mssql", "mysql"],
["exasol", "mysql"],
raises=AssertionError,
reason="someone decided a long time ago that 'A' = 'a' is true in these systems",
)
Expand Down

0 comments on commit 6ad5c15

Please sign in to comment.