diff --git a/kedro-datasets/kedro_datasets/databricks/_base_table_dataset.py b/kedro-datasets/kedro_datasets/databricks/_base_table_dataset.py index a31496775..7a850c7a4 100644 --- a/kedro-datasets/kedro_datasets/databricks/_base_table_dataset.py +++ b/kedro-datasets/kedro_datasets/databricks/_base_table_dataset.py @@ -11,9 +11,9 @@ import pandas as pd from kedro.io.core import ( AbstractVersionedDataset, + DatasetError, Version, - VersionNotFoundError, - DatasetError + VersionNotFoundError ) from pyspark.sql import DataFrame from pyspark.sql.readwriter import DataFrameWriter @@ -84,7 +84,7 @@ def _validate_table(self): DatasetError: If the table name does not conform to naming constraints. """ if not re.fullmatch(self._NAMING_REGEX, self.table): - raise DatasetError("table does not conform to naming") + raise DatasetError("Table does not conform to naming") def _validate_database(self): """Validates database name. @@ -93,7 +93,7 @@ def _validate_database(self): DatasetError: If the dataset name does not conform to naming constraints. """ if not re.fullmatch(self._NAMING_REGEX, self.database): - raise DatasetError("database does not conform to naming") + raise DatasetError("Database does not conform to naming") def _validate_catalog(self): """Validates catalog name. @@ -103,7 +103,7 @@ def _validate_catalog(self): """ if self.catalog: if not re.fullmatch(self._NAMING_REGEX, self.catalog): - raise DatasetError("catalog does not conform to naming") + raise DatasetError("Catalog does not conform to naming") def _validate_write_mode(self): """Validates the write mode. diff --git a/kedro-datasets/kedro_datasets/databricks/external_table_dataset.py b/kedro-datasets/kedro_datasets/databricks/external_table_dataset.py index 8b6f858b9..453d538d9 100644 --- a/kedro-datasets/kedro_datasets/databricks/external_table_dataset.py +++ b/kedro-datasets/kedro_datasets/databricks/external_table_dataset.py @@ -8,6 +8,10 @@ from typing import Any import pandas as pd +import pandas as pd +from kedro.io.core import ( + DatasetError +) from kedro_datasets.databricks._base_table_dataset import BaseTable, BaseTableDataset @@ -19,6 +23,18 @@ class ExternalTable(BaseTable): """Stores the definition of an external table.""" + def _validate_existence_of_table(self) -> None: + """Validates that a location is provided if the table does not exist. + + Raises: + DatasetError: If the table does not exist and no location is provided. + """ + if not self.exists() and not self.location: + raise DatasetError( + "If the external table does not exists, the `location` parameter must be provided. " + "This should be valid path in an external location that has already been created." + ) + class ExternalTableDataset(BaseTableDataset): """``ExternalTableDataset`` loads and saves data into external tables in Databricks.