diff --git a/crates/core/src/table/mod.rs b/crates/core/src/table/mod.rs index 969f470bfb..10ca7bd770 100644 --- a/crates/core/src/table/mod.rs +++ b/crates/core/src/table/mod.rs @@ -287,6 +287,11 @@ impl DeltaTable { self.log_store.object_store() } + /// Check if the [`DeltaTable`] exists + pub async fn verify_deltatable_existence(&self) -> DeltaResult { + self.log_store.is_delta_table_location().await + } + /// The URI of the underlying data pub fn table_uri(&self) -> String { self.log_store.root_uri() diff --git a/docs/usage/loading-table.md b/docs/usage/loading-table.md index 5c5b8b49c3..70be33b5fc 100644 --- a/docs/usage/loading-table.md +++ b/docs/usage/loading-table.md @@ -59,6 +59,33 @@ For AWS Glue catalog, use AWS environment variables to authenticate. {'id': [5, 7, 9, 5, 6, 7, 8, 9]} ``` +## Verify Table Existence + +You can check whether or not a Delta table exists at a particular path by using +the `DeltaTable.is_deltatable()` method. + +```python +from deltalake import DeltaTable + +table_path = "" +DeltaTable.is_deltatable(table_path) +# True + +invalid_table_path = "" +DeltaTable.is_deltatable(invalid_table_path) +# False + +bucket_table_path = "" +storage_options = { + "AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID", + "AWS_SECRET_ACCESS_KEY": "THE_AWS_SECRET_ACCESS_KEY", + ... +} +DeltaTable.is_deltatable(bucket_table_path) +# True +``` + + ## Custom Storage Backends While delta always needs its internal storage backend to work and be diff --git a/python/deltalake/_internal.pyi b/python/deltalake/_internal.pyi index 3bfe017eb0..23ed7e7ffa 100644 --- a/python/deltalake/_internal.pyi +++ b/python/deltalake/_internal.pyi @@ -34,6 +34,10 @@ class RawDeltaTable: data_catalog_id: Optional[str] = None, catalog_options: Optional[Dict[str, str]] = None, ) -> str: ... + @staticmethod + def is_deltatable( + table_uri: str, storage_options: Optional[Dict[str, str]] + ) -> bool: ... def table_uri(self) -> str: ... def version(self) -> int: ... def get_latest_version(self) -> int: ... diff --git a/python/deltalake/table.py b/python/deltalake/table.py index 4c3c540639..9aa48ef6e2 100644 --- a/python/deltalake/table.py +++ b/python/deltalake/table.py @@ -359,6 +359,21 @@ def from_data_catalog( table_uri=table_uri, version=version, log_buffer_size=log_buffer_size ) + @staticmethod + def is_deltatable( + table_uri: str, storage_options: Optional[Dict[str, str]] = None + ) -> bool: + """ + Returns True if a Delta Table exists at specified path. + Returns False otherwise. + + Args: + table_uri: the path of the DeltaTable + storage_options: a dictionary of the options to use for the + storage backend + """ + return RawDeltaTable.is_deltatable(table_uri, storage_options) + @classmethod def create( cls, diff --git a/python/docs/source/usage.rst b/python/docs/source/usage.rst index baa26f275c..753c1470ec 100644 --- a/python/docs/source/usage.rst +++ b/python/docs/source/usage.rst @@ -90,6 +90,32 @@ For Databricks Unity Catalog authentication, use environment variables: .. _`azure options`: https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html#variants .. _`gcs options`: https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html#variants +Verify Table Existence +~~~~~~~~~~~~~~~~~~~~~~ + +You can check whether or not a Delta table exists at a particular path by using +the :meth:`DeltaTable.is_deltatable()` method. + +.. code-block:: python + from deltalake import DeltaTable + + table_path = "" + DeltaTable.is_deltatable(table_path) + # True + + invalid_table_path = "" + DeltaTable.is_deltatable(invalid_table_path) + # False + + bucket_table_path = "" + storage_options = { + "AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID", + "AWS_SECRET_ACCESS_KEY": "THE_AWS_SECRET_ACCESS_KEY", + ... + } + DeltaTable.is_deltatable(bucket_table_path) + # True + Custom Storage Backends ~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/python/src/lib.rs b/python/src/lib.rs index 6784ae8735..c90d116c37 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -136,6 +136,26 @@ impl RawDeltaTable { }) } + #[pyo3(signature = (table_uri, storage_options = None))] + #[staticmethod] + pub fn is_deltatable( + table_uri: &str, + storage_options: Option>, + ) -> PyResult { + let mut builder = deltalake::DeltaTableBuilder::from_uri(table_uri); + if let Some(storage_options) = storage_options { + builder = builder.with_storage_options(storage_options) + } + Ok(rt() + .block_on(async { + match builder.build() { + Ok(table) => table.verify_deltatable_existence().await, + Err(err) => Err(err), + } + }) + .map_err(PythonError::from)?) + } + pub fn table_uri(&self) -> PyResult { Ok(self._table.table_uri()) } diff --git a/python/tests/test_table_read.py b/python/tests/test_table_read.py index efe2385b6c..49e6974f38 100644 --- a/python/tests/test_table_read.py +++ b/python/tests/test_table_read.py @@ -1,6 +1,7 @@ import os from datetime import date, datetime, timezone from pathlib import Path +from random import random from threading import Barrier, Thread from types import SimpleNamespace from typing import Any, List, Tuple @@ -798,3 +799,28 @@ def test_read_table_last_checkpoint_not_updated(): dt = DeltaTable("../crates/test/tests/data/table_failed_last_checkpoint_update") assert dt.version() == 3 + + +def test_is_deltatable_valid_path(): + table_path = "../crates/test/tests/data/simple_table" + assert DeltaTable.is_deltatable(table_path) + + +def test_is_deltatable_invalid_path(): + # Nonce ensures that the table_path always remains an invalid table path. + nonce = int(random() * 10000) + table_path = "../crates/test/tests/data/simple_table_invalid_%s" % nonce + assert not DeltaTable.is_deltatable(table_path) + + +def test_is_deltatable_with_storage_opts(): + table_path = "../crates/test/tests/data/simple_table" + storage_options = { + "AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID", + "AWS_SECRET_ACCESS_KEY": "THE_AWS_SECRET_ACCESS_KEY", + "AWS_ALLOW_HTTP": "true", + "AWS_S3_ALLOW_UNSAFE_RENAME": "true", + "AWS_S3_LOCKING_PROVIDER": "dynamodb", + "DELTA_DYNAMO_TABLE_NAME": "custom_table_name", + } + assert DeltaTable.is_deltatable(table_path, storage_options=storage_options)