From 92fb8299cb09463d032a4e76232fbc71d8188124 Mon Sep 17 00:00:00 2001 From: Omkar P <45419097+omkar-foss@users.noreply.github.com> Date: Wed, 31 Jul 2024 01:16:04 +0530 Subject: [PATCH 1/2] feat(python): add DeltaTable.is_deltatable static method (#2662) This adds a static method `is_deltatable(path, opts)` to the `DeltaTable` Python class, which returns `True` if able to locate a delta table at specified `path` and `False` otherwise. It does so by reusing the Rust internal `is_delta_table_location()` via the `DeltaTableBuilder`. Additionally, this also adds documentation of the usage with examples for the `DeltaTable.is_deltatable()` method. --- crates/core/src/table/builder.rs | 6 ++++++ docs/usage/loading-table.md | 27 +++++++++++++++++++++++++++ python/deltalake/_internal.pyi | 4 ++++ python/deltalake/table.py | 15 +++++++++++++++ python/docs/source/usage.rst | 26 ++++++++++++++++++++++++++ python/src/lib.rs | 20 ++++++++++++++++++++ python/tests/test_table_read.py | 26 ++++++++++++++++++++++++++ 7 files changed, 124 insertions(+) diff --git a/crates/core/src/table/builder.rs b/crates/core/src/table/builder.rs index b421a6199b..22f795e9e9 100644 --- a/crates/core/src/table/builder.rs +++ b/crates/core/src/table/builder.rs @@ -330,6 +330,12 @@ impl DeltaTableBuilder { } Ok(table) } + + /// Check if the [`DeltaTable`] exists + pub async fn verify_deltatable_existence(self) -> DeltaResult { + let table = self.build()?; + table.log_store.is_delta_table_location().await + } } enum UriType { diff --git a/docs/usage/loading-table.md b/docs/usage/loading-table.md index 5c5b8b49c3..70be33b5fc 100644 --- a/docs/usage/loading-table.md +++ b/docs/usage/loading-table.md @@ -59,6 +59,33 @@ For AWS Glue catalog, use AWS environment variables to authenticate. {'id': [5, 7, 9, 5, 6, 7, 8, 9]} ``` +## Verify Table Existence + +You can check whether or not a Delta table exists at a particular path by using +the `DeltaTable.is_deltatable()` method. + +```python +from deltalake import DeltaTable + +table_path = "" +DeltaTable.is_deltatable(table_path) +# True + +invalid_table_path = "" +DeltaTable.is_deltatable(invalid_table_path) +# False + +bucket_table_path = "" +storage_options = { + "AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID", + "AWS_SECRET_ACCESS_KEY": "THE_AWS_SECRET_ACCESS_KEY", + ... +} +DeltaTable.is_deltatable(bucket_table_path) +# True +``` + + ## Custom Storage Backends While delta always needs its internal storage backend to work and be diff --git a/python/deltalake/_internal.pyi b/python/deltalake/_internal.pyi index 3bfe017eb0..23ed7e7ffa 100644 --- a/python/deltalake/_internal.pyi +++ b/python/deltalake/_internal.pyi @@ -34,6 +34,10 @@ class RawDeltaTable: data_catalog_id: Optional[str] = None, catalog_options: Optional[Dict[str, str]] = None, ) -> str: ... + @staticmethod + def is_deltatable( + table_uri: str, storage_options: Optional[Dict[str, str]] + ) -> bool: ... def table_uri(self) -> str: ... def version(self) -> int: ... def get_latest_version(self) -> int: ... diff --git a/python/deltalake/table.py b/python/deltalake/table.py index 4c3c540639..9aa48ef6e2 100644 --- a/python/deltalake/table.py +++ b/python/deltalake/table.py @@ -359,6 +359,21 @@ def from_data_catalog( table_uri=table_uri, version=version, log_buffer_size=log_buffer_size ) + @staticmethod + def is_deltatable( + table_uri: str, storage_options: Optional[Dict[str, str]] = None + ) -> bool: + """ + Returns True if a Delta Table exists at specified path. + Returns False otherwise. + + Args: + table_uri: the path of the DeltaTable + storage_options: a dictionary of the options to use for the + storage backend + """ + return RawDeltaTable.is_deltatable(table_uri, storage_options) + @classmethod def create( cls, diff --git a/python/docs/source/usage.rst b/python/docs/source/usage.rst index baa26f275c..753c1470ec 100644 --- a/python/docs/source/usage.rst +++ b/python/docs/source/usage.rst @@ -90,6 +90,32 @@ For Databricks Unity Catalog authentication, use environment variables: .. _`azure options`: https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html#variants .. _`gcs options`: https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html#variants +Verify Table Existence +~~~~~~~~~~~~~~~~~~~~~~ + +You can check whether or not a Delta table exists at a particular path by using +the :meth:`DeltaTable.is_deltatable()` method. + +.. code-block:: python + from deltalake import DeltaTable + + table_path = "" + DeltaTable.is_deltatable(table_path) + # True + + invalid_table_path = "" + DeltaTable.is_deltatable(invalid_table_path) + # False + + bucket_table_path = "" + storage_options = { + "AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID", + "AWS_SECRET_ACCESS_KEY": "THE_AWS_SECRET_ACCESS_KEY", + ... + } + DeltaTable.is_deltatable(bucket_table_path) + # True + Custom Storage Backends ~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/python/src/lib.rs b/python/src/lib.rs index 6784ae8735..3ec9b285f7 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -136,6 +136,26 @@ impl RawDeltaTable { }) } + #[pyo3(signature = (table_uri, storage_options = None))] + #[staticmethod] + pub fn is_deltatable( + table_uri: &str, + storage_options: Option>, + ) -> PyResult { + let mut builder = deltalake::DeltaTableBuilder::from_uri(table_uri); + if let Some(storage_options) = storage_options { + builder = builder.with_storage_options(storage_options) + } + let res = rt() + .block_on(builder.verify_deltatable_existence()) + .map_err(PythonError::from); + match res { + Ok(true) => Ok(true), + Ok(false) => Ok(false), + Err(err) => Err(err)?, + } + } + pub fn table_uri(&self) -> PyResult { Ok(self._table.table_uri()) } diff --git a/python/tests/test_table_read.py b/python/tests/test_table_read.py index efe2385b6c..49e6974f38 100644 --- a/python/tests/test_table_read.py +++ b/python/tests/test_table_read.py @@ -1,6 +1,7 @@ import os from datetime import date, datetime, timezone from pathlib import Path +from random import random from threading import Barrier, Thread from types import SimpleNamespace from typing import Any, List, Tuple @@ -798,3 +799,28 @@ def test_read_table_last_checkpoint_not_updated(): dt = DeltaTable("../crates/test/tests/data/table_failed_last_checkpoint_update") assert dt.version() == 3 + + +def test_is_deltatable_valid_path(): + table_path = "../crates/test/tests/data/simple_table" + assert DeltaTable.is_deltatable(table_path) + + +def test_is_deltatable_invalid_path(): + # Nonce ensures that the table_path always remains an invalid table path. + nonce = int(random() * 10000) + table_path = "../crates/test/tests/data/simple_table_invalid_%s" % nonce + assert not DeltaTable.is_deltatable(table_path) + + +def test_is_deltatable_with_storage_opts(): + table_path = "../crates/test/tests/data/simple_table" + storage_options = { + "AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID", + "AWS_SECRET_ACCESS_KEY": "THE_AWS_SECRET_ACCESS_KEY", + "AWS_ALLOW_HTTP": "true", + "AWS_S3_ALLOW_UNSAFE_RENAME": "true", + "AWS_S3_LOCKING_PROVIDER": "dynamodb", + "DELTA_DYNAMO_TABLE_NAME": "custom_table_name", + } + assert DeltaTable.is_deltatable(table_path, storage_options=storage_options) From bf2462ea7e53e77157e513f5d0421b5893548910 Mon Sep 17 00:00:00 2001 From: Omkar P <45419097+omkar-foss@users.noreply.github.com> Date: Wed, 31 Jul 2024 12:53:40 +0530 Subject: [PATCH 2/2] Move fn to DeltaTable, replace match with Ok --- crates/core/src/table/builder.rs | 6 ------ crates/core/src/table/mod.rs | 5 +++++ python/src/lib.rs | 16 ++++++++-------- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/crates/core/src/table/builder.rs b/crates/core/src/table/builder.rs index 22f795e9e9..b421a6199b 100644 --- a/crates/core/src/table/builder.rs +++ b/crates/core/src/table/builder.rs @@ -330,12 +330,6 @@ impl DeltaTableBuilder { } Ok(table) } - - /// Check if the [`DeltaTable`] exists - pub async fn verify_deltatable_existence(self) -> DeltaResult { - let table = self.build()?; - table.log_store.is_delta_table_location().await - } } enum UriType { diff --git a/crates/core/src/table/mod.rs b/crates/core/src/table/mod.rs index 969f470bfb..10ca7bd770 100644 --- a/crates/core/src/table/mod.rs +++ b/crates/core/src/table/mod.rs @@ -287,6 +287,11 @@ impl DeltaTable { self.log_store.object_store() } + /// Check if the [`DeltaTable`] exists + pub async fn verify_deltatable_existence(&self) -> DeltaResult { + self.log_store.is_delta_table_location().await + } + /// The URI of the underlying data pub fn table_uri(&self) -> String { self.log_store.root_uri() diff --git a/python/src/lib.rs b/python/src/lib.rs index 3ec9b285f7..c90d116c37 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -146,14 +146,14 @@ impl RawDeltaTable { if let Some(storage_options) = storage_options { builder = builder.with_storage_options(storage_options) } - let res = rt() - .block_on(builder.verify_deltatable_existence()) - .map_err(PythonError::from); - match res { - Ok(true) => Ok(true), - Ok(false) => Ok(false), - Err(err) => Err(err)?, - } + Ok(rt() + .block_on(async { + match builder.build() { + Ok(table) => table.verify_deltatable_existence().await, + Err(err) => Err(err), + } + }) + .map_err(PythonError::from)?) } pub fn table_uri(&self) -> PyResult {