From 8f938edd92319728f27750453a8f1c5558f18304 Mon Sep 17 00:00:00 2001 From: ion-elgreco <15728914+ion-elgreco@users.noreply.github.com> Date: Sun, 28 Jan 2024 14:01:21 +0100 Subject: [PATCH] drop custom filesystem --- python/deltalake/writer.py | 12 +----------- python/tests/test_benchmark.py | 13 ------------- python/tests/test_fs.py | 14 -------------- 3 files changed, 1 insertion(+), 38 deletions(-) diff --git a/python/deltalake/writer.py b/python/deltalake/writer.py index 173153c97a..45a35b64b6 100644 --- a/python/deltalake/writer.py +++ b/python/deltalake/writer.py @@ -84,7 +84,6 @@ def write_deltalake( *, schema: Optional[Union[pa.Schema, DeltaSchema]] = ..., partition_by: Optional[Union[List[str], str]] = ..., - filesystem: Optional[pa_fs.FileSystem] = None, mode: Literal["error", "append", "overwrite", "ignore"] = ..., file_options: Optional[ds.ParquetFileWriteOptions] = ..., max_partitions: Optional[int] = ..., @@ -147,7 +146,6 @@ def write_deltalake( *, schema: Optional[Union[pa.Schema, DeltaSchema]] = None, partition_by: Optional[Union[List[str], str]] = None, - filesystem: Optional[pa_fs.FileSystem] = None, mode: Literal["error", "append", "overwrite", "ignore"] = "error", file_options: Optional[ds.ParquetFileWriteOptions] = None, max_partitions: Optional[int] = None, @@ -185,9 +183,6 @@ def write_deltalake( schema: Optional schema to write. partition_by: List of columns to partition the table by. Only required when creating a new table. - filesystem: Optional filesystem to pass to PyArrow. If not provided will - be inferred from uri. The file system has to be rooted in the table root. - Use the pyarrow.fs.SubTreeFileSystem, to adopt the root of pyarrow file systems. mode: How to handle existing data. Default is to error if table already exists. If 'append', will add new data. If 'overwrite', will replace table with new data. @@ -216,7 +211,7 @@ def write_deltalake( description: User-provided description for this table. configuration: A map containing configuration options for the metadata action. overwrite_schema: If True, allows updating the schema of the table. - storage_options: options passed to the native delta filesystem. Unused if 'filesystem' is defined. + storage_options: options passed to the native delta filesystem. predicate: When using `Overwrite` mode, replace data that matches a predicate. Only used in rust engine. partition_filters: the partition filters that will be used for partition overwrite. Only used in pyarrow engine. large_dtypes: If True, the data schema is kept in large_dtypes, has no effect on pandas dataframe input. @@ -295,11 +290,6 @@ def write_deltalake( elif engine == "pyarrow": # We need to write against the latest table version - if filesystem is not None: - raise NotImplementedError( - "Filesystem support is not yet implemented. #570" - ) - filesystem = pa_fs.PyFileSystem(DeltaStorageHandler(table_uri, storage_options)) if table: # already exists diff --git a/python/tests/test_benchmark.py b/python/tests/test_benchmark.py index d7299ca684..bfcdd3f310 100644 --- a/python/tests/test_benchmark.py +++ b/python/tests/test_benchmark.py @@ -35,19 +35,6 @@ def test_benchmark_write(benchmark, sample_table, tmp_path, engine): assert dt.to_pyarrow_table().sort_by("i") == sample_table -# TODO: support wrapping PyArrow filesystems -# @pytest.mark.benchmark( -# group="write" -# ) -# def test_benchmark_write_pyarrow(benchmark, sample_table, tmp_path): -# fs = pa_fs.SubTreeFileSystem(str(tmp_path), pa_fs.LocalFileSystem()) - -# benchmark(write_deltalake, str(tmp_path), sample_table, mode="overwrite", filesystem=fs) - -# dt = DeltaTable(str(tmp_path)) -# assert dt.to_pyarrow_table(filesystem=fs).sort_by("i") == sample_table - - @pytest.mark.benchmark(group="read") def test_benchmark_read(benchmark, sample_table, tmp_path): write_deltalake(str(tmp_path), sample_table) diff --git a/python/tests/test_fs.py b/python/tests/test_fs.py index 8ece647d4c..b4973572e9 100644 --- a/python/tests/test_fs.py +++ b/python/tests/test_fs.py @@ -159,20 +159,6 @@ def test_roundtrip_s3_direct(s3_localstack_creds, sample_data: pa.Table): table = dt.to_pyarrow_table() assert table == sample_data - # TODO: Refactor so DeltaTable can be instantiated with a storage backend - # Can provide S3Filesystem from pyarrow - # pa_s3fs = S3FileSystem( - # access_key=s3_localstack_creds["AWS_ACCESS_KEY_ID"], - # secret_key=s3_localstack_creds["AWS_SECRET_ACCESS_KEY"], - # endpoint_override=s3_localstack_creds["AWS_ENDPOINT_URL"], - # scheme="http", - # ) - - # write_deltalake(table_path, sample_data, filesystem=pa_s3fs, mode="overwrite") - # assert dt.version() == 2 - # table = dt.to_pyarrow_table() - # assert table == sample_data - @pytest.mark.azure @pytest.mark.integration