From 5c5432efa2948e61037e8ee5df4f1f52fb072165 Mon Sep 17 00:00:00 2001 From: Omkar P <45419097+omkar-foss@users.noreply.github.com> Date: Mon, 29 Jul 2024 18:39:11 +0530 Subject: [PATCH] feat(python): add DeltaTable.is_deltatable static method (#2662) This adds a static method is_deltatable(path, opts) to the DeltaTable class, which returns True if delta-rs is able to load a DeltaTable instance from the specified path and False otherwise. Additionally, this also adds documentation of the usage with examples for the DeltaTable.is_deltatable() method. --- docs/usage/loading-table.md | 27 +++++++++++++++++++++++++++ python/deltalake/table.py | 25 ++++++++++++++++++++++--- python/docs/source/usage.rst | 26 ++++++++++++++++++++++++++ python/tests/test_table_read.py | 26 ++++++++++++++++++++++++++ 4 files changed, 101 insertions(+), 3 deletions(-) diff --git a/docs/usage/loading-table.md b/docs/usage/loading-table.md index 5c5b8b49c3..70be33b5fc 100644 --- a/docs/usage/loading-table.md +++ b/docs/usage/loading-table.md @@ -59,6 +59,33 @@ For AWS Glue catalog, use AWS environment variables to authenticate. {'id': [5, 7, 9, 5, 6, 7, 8, 9]} ``` +## Verify Table Existence + +You can check whether or not a Delta table exists at a particular path by using +the `DeltaTable.is_deltatable()` method. + +```python +from deltalake import DeltaTable + +table_path = "" +DeltaTable.is_deltatable(table_path) +# True + +invalid_table_path = "" +DeltaTable.is_deltatable(invalid_table_path) +# False + +bucket_table_path = "" +storage_options = { + "AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID", + "AWS_SECRET_ACCESS_KEY": "THE_AWS_SECRET_ACCESS_KEY", + ... +} +DeltaTable.is_deltatable(bucket_table_path) +# True +``` + + ## Custom Storage Backends While delta always needs its internal storage backend to work and be diff --git a/python/deltalake/table.py b/python/deltalake/table.py index 4c3c540639..1c50b3bfb9 100644 --- a/python/deltalake/table.py +++ b/python/deltalake/table.py @@ -44,7 +44,7 @@ from deltalake._internal import create_deltalake as _create_deltalake from deltalake._util import encode_partition_value from deltalake.data_catalog import DataCatalog -from deltalake.exceptions import DeltaProtocolError +from deltalake.exceptions import DeltaProtocolError, TableNotFoundError from deltalake.fs import DeltaStorageHandler from deltalake.schema import Schema as DeltaSchema @@ -186,7 +186,7 @@ def __init__( if compression_level is not None and compression is None: raise ValueError( - """Providing a compression level without the compression type is not possible, + """Providing a compression level without the compression type is not possible, please provide the compression as well.""" ) if isinstance(compression, str): @@ -359,6 +359,25 @@ def from_data_catalog( table_uri=table_uri, version=version, log_buffer_size=log_buffer_size ) + @staticmethod + def is_deltatable( + table_uri: Union[str, Path], storage_options: Optional[Dict[str, str]] = None + ) -> bool: + """ + Returns True if a Delta Table exists at specified path. + Returns False otherwise. + + Args: + table_uri: the path of the DeltaTable + storage_options: a dictionary of the options to use for the + storage backend + """ + try: + dt = DeltaTable(table_uri, storage_options=storage_options) + return dt is not None + except TableNotFoundError: + return False + @classmethod def create( cls, @@ -1816,7 +1835,7 @@ def add_constraint( """ if len(constraints.keys()) > 1: raise ValueError( - """add_constraints is limited to a single constraint addition at once for now. + """add_constraints is limited to a single constraint addition at once for now. Please execute add_constraints multiple times with each time a different constraint.""" ) diff --git a/python/docs/source/usage.rst b/python/docs/source/usage.rst index baa26f275c..753c1470ec 100644 --- a/python/docs/source/usage.rst +++ b/python/docs/source/usage.rst @@ -90,6 +90,32 @@ For Databricks Unity Catalog authentication, use environment variables: .. _`azure options`: https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html#variants .. _`gcs options`: https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html#variants +Verify Table Existence +~~~~~~~~~~~~~~~~~~~~~~ + +You can check whether or not a Delta table exists at a particular path by using +the :meth:`DeltaTable.is_deltatable()` method. + +.. code-block:: python + from deltalake import DeltaTable + + table_path = "" + DeltaTable.is_deltatable(table_path) + # True + + invalid_table_path = "" + DeltaTable.is_deltatable(invalid_table_path) + # False + + bucket_table_path = "" + storage_options = { + "AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID", + "AWS_SECRET_ACCESS_KEY": "THE_AWS_SECRET_ACCESS_KEY", + ... + } + DeltaTable.is_deltatable(bucket_table_path) + # True + Custom Storage Backends ~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/python/tests/test_table_read.py b/python/tests/test_table_read.py index efe2385b6c..49e6974f38 100644 --- a/python/tests/test_table_read.py +++ b/python/tests/test_table_read.py @@ -1,6 +1,7 @@ import os from datetime import date, datetime, timezone from pathlib import Path +from random import random from threading import Barrier, Thread from types import SimpleNamespace from typing import Any, List, Tuple @@ -798,3 +799,28 @@ def test_read_table_last_checkpoint_not_updated(): dt = DeltaTable("../crates/test/tests/data/table_failed_last_checkpoint_update") assert dt.version() == 3 + + +def test_is_deltatable_valid_path(): + table_path = "../crates/test/tests/data/simple_table" + assert DeltaTable.is_deltatable(table_path) + + +def test_is_deltatable_invalid_path(): + # Nonce ensures that the table_path always remains an invalid table path. + nonce = int(random() * 10000) + table_path = "../crates/test/tests/data/simple_table_invalid_%s" % nonce + assert not DeltaTable.is_deltatable(table_path) + + +def test_is_deltatable_with_storage_opts(): + table_path = "../crates/test/tests/data/simple_table" + storage_options = { + "AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID", + "AWS_SECRET_ACCESS_KEY": "THE_AWS_SECRET_ACCESS_KEY", + "AWS_ALLOW_HTTP": "true", + "AWS_S3_ALLOW_UNSAFE_RENAME": "true", + "AWS_S3_LOCKING_PROVIDER": "dynamodb", + "DELTA_DYNAMO_TABLE_NAME": "custom_table_name", + } + assert DeltaTable.is_deltatable(table_path, storage_options=storage_options)