-
Notifications
You must be signed in to change notification settings - Fork 3.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ARROW-16719: [Python] Add path/URI + filesystem handling to parquet.read_metadata #13629
Changes from 13 commits
02a7f09
0abe1df
4a18fe2
25dde91
3efca03
9580c5a
7d3b42f
970ac49
bc3ec3e
67f444e
26199da
c855deb
d5ebecb
4d8e0fa
c4dba6e
105efa9
6b7bab1
5a63f4b
f9e36d8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,12 +18,15 @@ | |
import datetime | ||
import decimal | ||
from collections import OrderedDict | ||
import os | ||
|
||
import numpy as np | ||
import pytest | ||
|
||
import pyarrow as pa | ||
from pyarrow.tests.parquet.common import _check_roundtrip, make_sample_file | ||
from pyarrow.filesystem import LocalFileSystem, FileSystem | ||
from pyarrow.tests import util | ||
|
||
try: | ||
import pyarrow.parquet as pq | ||
|
@@ -533,6 +536,44 @@ def test_metadata_exceeds_message_size(): | |
metadata = pq.read_metadata(pa.BufferReader(buf)) | ||
|
||
|
||
def test_metadata_schema_filesystem(tmpdir): | ||
table = pa.table({"a": [1, 2, 3]}) | ||
|
||
# URI writing to local file. | ||
fname = "data.parquet" | ||
file_path = 'file:///' + os.path.join(str(tmpdir), fname) | ||
|
||
pq.write_table(table, file_path) | ||
|
||
# Get expected `metadata` from path. | ||
metadata = pq.read_metadata(tmpdir / fname) | ||
schema = table.schema | ||
|
||
assert pq.read_metadata(file_path).equals(metadata) | ||
assert pq.read_metadata( | ||
fname, filesystem=f'file:///{tmpdir}').equals(metadata) | ||
|
||
assert pq.read_schema(file_path).equals(schema) | ||
assert pq.read_schema(fname, filesystem=f'file:///{tmpdir}').equals(schema) | ||
|
||
with util.change_cwd(tmpdir): | ||
# Pass `filesystem` arg | ||
assert pq.read_metadata( | ||
fname, filesystem=LocalFileSystem()).equals(metadata) | ||
assert pq.read_metadata( | ||
fname, filesystem=LocalFileSystem.get_instance()).equals(metadata) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The new LocalFileSystem has no There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks! I was away from keyboard for the week. Will fix it! |
||
|
||
assert pq.read_schema( | ||
fname, filesystem=LocalFileSystem()).equals(schema) | ||
assert pq.read_schema( | ||
fname, filesystem=LocalFileSystem.get_instance()).equals(schema) | ||
|
||
err_msg = ('`filesystem` argument must be a FileSystem' | ||
' instance or a valid file system URI') | ||
with pytest.raises(TypeError, match=err_msg): | ||
pq.read_metadata(fname, filesystem=FileSystem()) | ||
|
||
|
||
def test_metadata_equals(): | ||
table = pa.table({"a": [1, 2, 3]}) | ||
with pa.BufferOutputStream() as out: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry for only noticing this late, but we should import here from
pyarrow.fs
to use the new filesystems in the tests (pyarrow.filesystem
is deprecated)