From e3d02bdbb74366f1c46a3f44da2e31a0bd134cde Mon Sep 17 00:00:00 2001 From: Jim Crist-Harif Date: Wed, 14 Aug 2024 13:21:16 -0500 Subject: [PATCH] feat(impala): add `tbl_properties` to `create_table` (#9839) `TBLPROPERTIES` can be set on table creation. Exposing this lets us un-xfail a few tests by setting the required `TBLPROPERTIES` to make them work. --- ibis/backends/impala/__init__.py | 5 ++++ ibis/backends/impala/ddl.py | 11 +++++++++ ibis/backends/impala/tests/test_partition.py | 25 ++++++++++++++------ 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/ibis/backends/impala/__init__.py b/ibis/backends/impala/__init__.py index e6d8a3e4fc18..2baa4cbd7e8f 100644 --- a/ibis/backends/impala/__init__.py +++ b/ibis/backends/impala/__init__.py @@ -471,6 +471,7 @@ def create_table( format="parquet", location=None, partition=None, + tbl_properties: Mapping[str, Any] | None = None, like_parquet=None, ) -> ir.Table: """Create a new table using an Ibis table expression or in-memory data. @@ -501,6 +502,8 @@ def create_table( partition Must pass a schema to use this. Cannot partition from an expression. + tbl_properties + Table properties to set on table creation. like_parquet Can specify instead of a schema @@ -534,6 +537,7 @@ def create_table( format=format, external=True if location is not None else external, partition=partition, + tbl_properties=tbl_properties, path=location, ) ) @@ -549,6 +553,7 @@ def create_table( external=external, path=location, partition=partition, + tbl_properties=tbl_properties, ) ) return self.table(name, database=database or self.current_database) diff --git a/ibis/backends/impala/ddl.py b/ibis/backends/impala/ddl.py index 0f8749af6c7f..39a47ef25de3 100644 --- a/ibis/backends/impala/ddl.py +++ b/ibis/backends/impala/ddl.py @@ -105,6 +105,13 @@ def _create_line(self): def _location(self): return f"LOCATION '{self.path}'" if self.path else None + def _tbl_properties(self): + return ( + self.format_tblproperties(self.tbl_properties) + if self.tbl_properties + else None + ) + def _storage(self): # By the time we're here, we have a valid format return f"STORED AS {self.format}" @@ -152,6 +159,7 @@ def _pieces(self): yield self._storage() yield self._location() + yield self._tbl_properties() class AlterTable(ImpalaBase, DDL): @@ -258,6 +266,7 @@ def __init__( can_exist=False, path=None, partition=None, + tbl_properties=None, ): super().__init__( table_name, @@ -267,6 +276,7 @@ def __init__( can_exist=can_exist, path=path, partition=partition, + tbl_properties=tbl_properties, ) self.select = select @@ -275,6 +285,7 @@ def _pieces(self): yield self._partitioned_by() yield self._storage() yield self._location() + yield self._tbl_properties() yield "AS" yield self.select diff --git a/ibis/backends/impala/tests/test_partition.py b/ibis/backends/impala/tests/test_partition.py index f11e82c5d9a3..a44ff8921364 100644 --- a/ibis/backends/impala/tests/test_partition.py +++ b/ibis/backends/impala/tests/test_partition.py @@ -5,7 +5,6 @@ import pandas as pd import pandas.testing as tm import pytest -from impala.error import HiveServer2Error import ibis from ibis import util @@ -142,10 +141,14 @@ def test_create_partitioned_table_from_expr(con, alltypes, tmp_parted): tm.assert_frame_equal(result, expected) -@pytest.mark.xfail(raises=HiveServer2Error) def test_add_drop_partition_no_location(con, temp_table): schema = ibis.schema([("foo", "string"), ("year", "int32"), ("month", "int16")]) - con.create_table(temp_table, schema=schema, partition=["year", "month"]) + con.create_table( + temp_table, + schema=schema, + partition=["year", "month"], + tbl_properties={"transactional": "false"}, + ) table = con.table(temp_table) part = {"year": 2007, "month": 4} @@ -159,10 +162,14 @@ def test_add_drop_partition_no_location(con, temp_table): assert len(table.partitions()) == 1 -@pytest.mark.xfail(raises=HiveServer2Error) def test_add_drop_partition_owned_by_impala(con, temp_table): schema = ibis.schema([("foo", "string"), ("year", "int32"), ("month", "int16")]) - con.create_table(temp_table, schema=schema, partition=["year", "month"]) + con.create_table( + temp_table, + schema=schema, + partition=["year", "month"], + tbl_properties={"transactional": "false"}, + ) table = con.table(temp_table) @@ -181,10 +188,14 @@ def test_add_drop_partition_owned_by_impala(con, temp_table): assert len(table.partitions()) == 1 -@pytest.mark.xfail(raises=HiveServer2Error) def test_add_drop_partition_hive_bug(con, temp_table): schema = ibis.schema([("foo", "string"), ("year", "int32"), ("month", "int16")]) - con.create_table(temp_table, schema=schema, partition=["year", "month"]) + con.create_table( + temp_table, + schema=schema, + partition=["year", "month"], + tbl_properties={"transactional": "false"}, + ) table = con.table(temp_table)