From 26a426f325256e260a15521d5097efffd2f1ceb1 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Wed, 7 Dec 2022 14:41:40 +0100 Subject: [PATCH] ARROW-18123: [Python] Fix writing files with multi-byte characters in file name (#14764) Will close [ARROW-18123](https://issues.apache.org/jira/browse/ARROW-18123) Authored-by: Miles Granger Signed-off-by: Joris Van den Bossche --- python/pyarrow/fs.py | 3 ++- python/pyarrow/tests/parquet/test_basic.py | 15 ++++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/python/pyarrow/fs.py b/python/pyarrow/fs.py index c6f44ccbb5997..6633d95b5b2b5 100644 --- a/python/pyarrow/fs.py +++ b/python/pyarrow/fs.py @@ -187,7 +187,8 @@ def _resolve_filesystem_and_path( # neither an URI nor a locally existing path, so assume that # local path was given and propagate a nicer file not found error # instead of a more confusing scheme parsing error - if "empty scheme" not in str(e): + if "empty scheme" not in str(e) \ + and "Cannot parse URI" not in str(e): raise else: path = filesystem.normalize_path(path) diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py index 05321a937b540..004bbd8d77f35 100644 --- a/python/pyarrow/tests/parquet/test_basic.py +++ b/python/pyarrow/tests/parquet/test_basic.py @@ -289,21 +289,26 @@ def test_fspath(tempdir, use_legacy_dataset): @pytest.mark.parametrize("filesystem", [ None, fs.LocalFileSystem(), LocalFileSystem._get_instance() ]) -def test_relative_paths(tempdir, use_legacy_dataset, filesystem): +@pytest.mark.parametrize("name", ("data.parquet", "δΎ‹.parquet")) +def test_relative_paths(tempdir, use_legacy_dataset, filesystem, name): # reading and writing from relative paths table = pa.table({"a": [1, 2, 3]}) + path = tempdir / name # reading - pq.write_table(table, str(tempdir / "data.parquet")) + pq.write_table(table, str(path)) with util.change_cwd(tempdir): - result = pq.read_table("data.parquet", filesystem=filesystem, + result = pq.read_table(name, filesystem=filesystem, use_legacy_dataset=use_legacy_dataset) assert result.equals(table) + path.unlink() + assert not path.exists() + # writing with util.change_cwd(tempdir): - pq.write_table(table, "data2.parquet", filesystem=filesystem) - result = pq.read_table(tempdir / "data2.parquet") + pq.write_table(table, name, filesystem=filesystem) + result = pq.read_table(path) assert result.equals(table)