diff --git a/ci/schema/mssql.sql b/ci/schema/mssql.sql index d03927df717e..992b88608dfb 100644 --- a/ci/schema/mssql.sql +++ b/ci/schema/mssql.sql @@ -1,6 +1,6 @@ -DROP TABLE IF EXISTS diamonds; +DROP TABLE IF EXISTS ibis_testing.dbo.diamonds; -CREATE TABLE diamonds ( +CREATE TABLE ibis_testing.dbo.diamonds ( carat FLOAT, cut VARCHAR(MAX), color VARCHAR(MAX), @@ -17,13 +17,13 @@ CREATE TABLE diamonds ( -- /data is a volume mount to the ibis testing data -- used for snappy test data loading -- DataFrame.to_sql is unusably slow for loading CSVs -BULK INSERT diamonds +BULK INSERT ibis_testing.dbo.diamonds FROM '/data/diamonds.csv' WITH (FORMAT = 'CSV', FIELDTERMINATOR = ',', ROWTERMINATOR = '\n', FIRSTROW = 2) -DROP TABLE IF EXISTS astronauts; +DROP TABLE IF EXISTS ibis_testing.dbo.astronauts; -CREATE TABLE astronauts ( +CREATE TABLE ibis_testing.dbo.astronauts ( "id" BIGINT, "number" BIGINT, "nationwide_number" BIGINT, @@ -50,13 +50,13 @@ CREATE TABLE astronauts ( "total_eva_hrs" DOUBLE PRECISION ); -BULK INSERT astronauts +BULK INSERT ibis_testing.dbo.astronauts FROM '/data/astronauts.csv' WITH (FORMAT = 'CSV', FIELDTERMINATOR = ',', ROWTERMINATOR = '\n', FIRSTROW = 2) -DROP TABLE IF EXISTS batting; +DROP TABLE IF EXISTS ibis_testing.dbo.batting; -CREATE TABLE batting ( +CREATE TABLE ibis_testing.dbo.batting ( "playerID" VARCHAR(MAX), "yearID" BIGINT, stint BIGINT, @@ -81,13 +81,13 @@ CREATE TABLE batting ( "GIDP" BIGINT ); -BULK INSERT batting +BULK INSERT ibis_testing.dbo.batting FROM '/data/batting.csv' WITH (FORMAT = 'CSV', FIELDTERMINATOR = ',', ROWTERMINATOR = '\n', FIRSTROW = 2) -DROP TABLE IF EXISTS awards_players; +DROP TABLE IF EXISTS ibis_testing.dbo.awards_players; -CREATE TABLE awards_players ( +CREATE TABLE ibis_testing.dbo.awards_players ( "playerID" VARCHAR(MAX), "awardID" VARCHAR(MAX), "yearID" BIGINT, @@ -96,13 +96,13 @@ CREATE TABLE awards_players ( notes VARCHAR(MAX) ); -BULK INSERT awards_players +BULK INSERT ibis_testing.dbo.awards_players FROM '/data/awards_players.csv' WITH (FORMAT = 'CSV', FIELDTERMINATOR = ',', ROWTERMINATOR = '\n', FIRSTROW = 2) -DROP TABLE IF EXISTS functional_alltypes; +DROP TABLE IF EXISTS ibis_testing.dbo.functional_alltypes; -CREATE TABLE functional_alltypes ( +CREATE TABLE ibis_testing.dbo.functional_alltypes ( id INTEGER, bool_col BIT, tinyint_col SMALLINT, @@ -118,21 +118,21 @@ CREATE TABLE functional_alltypes ( month INTEGER ); -BULK INSERT functional_alltypes +BULK INSERT ibis_testing.dbo.functional_alltypes FROM '/data/functional_alltypes.csv' WITH (FORMAT = 'CSV', FIELDTERMINATOR = ',', ROWTERMINATOR = '\n', FIRSTROW = 2) -DROP TABLE IF EXISTS win; +DROP TABLE IF EXISTS ibis_testing.dbo.win; -CREATE TABLE win (g VARCHAR(MAX), x BIGINT NOT NULL, y BIGINT); -INSERT INTO win VALUES +CREATE TABLE ibis_testing.dbo.win (g VARCHAR(MAX), x BIGINT NOT NULL, y BIGINT); +INSERT INTO ibis_testing.dbo.win VALUES ('a', 0, 3), ('a', 1, 2), ('a', 2, 0), ('a', 3, 1), ('a', 4, 1); -DROP TABLE IF EXISTS topk; +DROP TABLE IF EXISTS ibis_testing.dbo.topk; -CREATE TABLE topk (x BIGINT); -INSERT INTO topk VALUES (1), (1), (NULL); +CREATE TABLE ibis_testing.dbo.topk (x BIGINT); +INSERT INTO ibis_testing.dbo.topk VALUES (1), (1), (NULL); diff --git a/compose.yaml b/compose.yaml index 9833c57a6d0a..6cd5eb36eefc 100644 --- a/compose.yaml +++ b/compose.yaml @@ -63,12 +63,17 @@ services: environment: MSSQL_SA_PASSWORD: 1bis_Testing! ACCEPT_EULA: "Y" + # The default collation in MSSQL isS QL_Latin1_General_CP1_CI_AS + # where the CI stands for Case Insensitive. + # We use a case-sensitive collation for testing so that we don't + # break users with case-sensitive collations. + MSSQL_COLLATION: Latin1_General_100_BIN2_UTF8 healthcheck: interval: 1s retries: 20 test: - CMD-SHELL - - $(find / -name sqlcmd -type f -executable) -C -S localhost -U sa -P "$$MSSQL_SA_PASSWORD" -Q "IF DB_ID('ibis_testing') IS NULL BEGIN CREATE DATABASE [ibis_testing] END" + - $(find /opt -name sqlcmd -type f -executable) -C -S localhost -U sa -P "$$MSSQL_SA_PASSWORD" -Q "SELECT 1" ports: - 1433:1433 volumes: diff --git a/ibis/backends/mssql/__init__.py b/ibis/backends/mssql/__init__.py index 1af80abf145e..34218ca69dc1 100644 --- a/ibis/backends/mssql/__init__.py +++ b/ibis/backends/mssql/__init__.py @@ -54,6 +54,25 @@ def datetimeoffset_to_datetime(value): ) +# For testing we use the collation "Latin1_General_100_BIN2_UTF8" +# which is case-sensitive and supports UTF8. +# This allows us to (hopefully) support both case-sensitive and case-insensitive +# collations. +# It DOES mean, though, that we need to be correct in our usage of case when +# referring to system tables and views. +# So, the correct casing for the tables and views we use often (and the +# corresponding columns): +# +# +# Info schema tables: +# - INFORMATION_SCHEMA.COLUMNS +# - INFORMATION_SCHEMA.SCHEMATA +# - INFORMATION_SCHEMA.TABLES +# Temp table location: tempdb.dbo +# Catalogs: sys.databases +# Databases: sys.schemas + + class Backend(SQLBackend, CanCreateCatalog, CanCreateDatabase, CanCreateSchema, NoUrl): name = "mssql" compiler = MSSQLCompiler() @@ -112,11 +131,15 @@ def do_connect( if user is None and password is None: kwargs.setdefault("Trusted_Connection", "yes") + if database is not None: + # passing database=None tries to interpolate "None" into the + # connection string and use it as a database + kwargs["database"] = database + self.con = pyodbc.connect( user=user, server=f"{host},{port}", password=password, - database=database, driver=driver, **kwargs, ) @@ -171,8 +194,8 @@ def get_schema( ) .from_( sg.table( - "columns", - db="information_schema", + "COLUMNS", + db="INFORMATION_SCHEMA", catalog=catalog or self.current_catalog, ) ) @@ -212,23 +235,33 @@ def get_schema( return sch.Schema(mapping) def _get_schema_using_query(self, query: str) -> sch.Schema: + # Docs describing usage of dm_exec_describe_first_result_set + # https://learn.microsoft.com/en-us/sql/relational-databases/system-dynamic-management-views/sys-dm-exec-describe-first-result-set-transact-sql?view=sql-server-ver16 tsql = sge.convert(str(query)).sql(self.dialect) - query = f"EXEC sp_describe_first_result_set @tsql = N{tsql}" + + # For some reason when using "Latin1_General_100_BIN2_UTF8" + # the stored procedure `sp_describe_first_result_set` starts throwing errors about DLL loading. + # This "dynamic management function" uses the same algorithm and allows + # us to pre-filter the columns we want back. + # The syntax is: + # `sys.dm_exec_describe_first_result_set(@tsql, @params, @include_browse_information)` + query = f"""SELECT name, + is_nullable AS nullable, + system_type_name, + precision, + scale + FROM + sys.dm_exec_describe_first_result_set({tsql}, NULL, 0)""" with self._safe_raw_sql(query) as cur: rows = cur.fetchall() schema = {} for ( - _, - _, name, nullable, - _, system_type_name, - _, precision, scale, - *_, ) in sorted(rows, key=itemgetter(1)): newtyp = self.compiler.type_mapper.from_string( system_type_name, nullable=nullable @@ -463,8 +496,8 @@ def list_tables( sg.select("table_name") .from_( sg.table( - "tables", - db="information_schema", + "TABLES", + db="INFORMATION_SCHEMA", catalog=catalog if catalog is not None else self.current_catalog, ) ) @@ -486,8 +519,8 @@ def list_databases( ) -> list[str]: query = sg.select(C.schema_name).from_( sg.table( - "schemata", - db="information_schema", + "SCHEMATA", + db="INFORMATION_SCHEMA", catalog=catalog or self.current_catalog, ) ) diff --git a/ibis/backends/mssql/tests/conftest.py b/ibis/backends/mssql/tests/conftest.py index 3ead551fc69d..4dd86ed760da 100644 --- a/ibis/backends/mssql/tests/conftest.py +++ b/ibis/backends/mssql/tests/conftest.py @@ -1,7 +1,7 @@ from __future__ import annotations import os -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any import pytest @@ -35,13 +35,23 @@ class TestConf(ServiceBackendTest): def test_files(self) -> Iterable[Path]: return self.data_dir.joinpath("csv").glob("*.csv") + def postload(self, **kw: Any): + self.connection = self.connect(database=IBIS_TEST_MSSQL_DB, **kw) + + def _load_data(self, *, database: str = IBIS_TEST_MSSQL_DB, **_): + with self.connection._safe_raw_sql( + "IF DB_ID('ibis_testing') is NULL BEGIN CREATE DATABASE [ibis_testing] END" + ): + pass + + super()._load_data(database=database, **_) + @staticmethod def connect(*, tmpdir, worker_id, **kw): return ibis.mssql.connect( host=MSSQL_HOST, user=MSSQL_USER, password=MSSQL_PASS, - database=IBIS_TEST_MSSQL_DB, port=MSSQL_PORT, driver=MSSQL_PYODBC_DRIVER, autocommit=True, diff --git a/ibis/backends/mssql/tests/test_client.py b/ibis/backends/mssql/tests/test_client.py index f3604e4b6776..7048ac0415a5 100644 --- a/ibis/backends/mssql/tests/test_client.py +++ b/ibis/backends/mssql/tests/test_client.py @@ -1,6 +1,7 @@ from __future__ import annotations import pytest +from pytest import param import ibis import ibis.expr.datatypes as dt @@ -35,11 +36,23 @@ ("DATETIME", dt.Timestamp(scale=3)), # Characters strings ("CHAR", dt.string), - ("TEXT", dt.string), + param( + "TEXT", + dt.string, + marks=pytest.mark.notyet( + ["mssql"], reason="Not supported by UTF-8 aware collations" + ), + ), ("VARCHAR", dt.string), # Unicode character strings ("NCHAR", dt.string), - ("NTEXT", dt.string), + param( + "NTEXT", + dt.string, + marks=pytest.mark.notyet( + ["mssql"], reason="Not supported by UTF-8 aware collations" + ), + ), ("NVARCHAR", dt.string), # Binary strings ("BINARY", dt.binary), diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 258725f3f628..4f9b8e9dd1be 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -644,7 +644,7 @@ def test_order_by_nulls(con, op, nulls_first, expected): @pytest.mark.notimpl(["druid"]) @pytest.mark.never( - ["exasol", "mssql", "mysql"], + ["exasol", "mysql"], raises=AssertionError, reason="someone decided a long time ago that 'A' = 'a' is true in these systems", )