Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: cover more fsspec backends #1015

Merged
merged 31 commits into from
Nov 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
2869480
use paramiko instead of sshfs
lobis Nov 2, 2023
b7ca2c1
use specified port
lobis Nov 2, 2023
631b4bd
test default handler behaviour
lobis Nov 2, 2023
f8e298f
default to fsspec instead of error if scheme not found
lobis Nov 2, 2023
84c1b94
attempt to close socket
lobis Nov 2, 2023
e56e337
fix ci
lobis Nov 2, 2023
075434d
Revert "fix ci"
lobis Nov 2, 2023
3786573
broader exception
lobis Nov 2, 2023
108ff79
also handle socket exception
lobis Nov 2, 2023
c49aedb
get user robust
lobis Nov 2, 2023
35f5d8c
enable github test with skip if api limit is hit (so we sometimes tes…
lobis Nov 2, 2023
d8ca663
add memory filesystem test
lobis Nov 2, 2023
c38fb86
add zip and tar tests
lobis Nov 2, 2023
24b350a
Merge branch 'fsspec-ssh-paramiko' into fsspec-tests
lobis Nov 3, 2023
fb3f99b
fix memory test
lobis Nov 3, 2023
242dd16
Merge branch 'main' into fsspec-tests
lobis Nov 3, 2023
c31c888
zip/tar tests
lobis Nov 3, 2023
fea35b8
Merge remote-tracking branch 'origin/fsspec-tests' into fsspec-tests
lobis Nov 3, 2023
9d55aa5
rename to reading
lobis Nov 7, 2023
ae57e81
check if test works
lobis Nov 7, 2023
51902e0
missing import
lobis Nov 7, 2023
3d0a37a
remove parent dirs
lobis Nov 7, 2023
bd93a7f
fix zip tar tests
lobis Nov 7, 2023
2520553
skip github if api limits hit
lobis Nov 7, 2023
cb994a6
attempt to fix windows paths
lobis Nov 7, 2023
469c3cb
use more complex uri with object in zip test
lobis Nov 7, 2023
e472a37
debug
lobis Nov 7, 2023
8a4cc84
add new test case to object url split
lobis Nov 7, 2023
b1e1f5d
add new failing test case: TODO make it work
lobis Nov 7, 2023
2cddc78
revert debug changes
lobis Nov 7, 2023
52c8db6
working in new test case
lobis Nov 7, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions src/uproot/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,17 +311,26 @@ def file_object_path_split(path: str) -> tuple[str, str | None]:
path: str = regularize_path(path)
path = path.strip()

if "://" not in path:
# assume it's a local file path
def _split_path(path: str) -> list[str]:
parts = path.split(":")
if pathlib.PureWindowsPath(path).drive:
# Windows absolute path
assert len(parts) >= 2, f"could not split object from windows path {path}"
parts = [parts[0] + ":" + parts[1]] + parts[2:]
return parts

if "://" not in path:
# assume it's a local file path
parts = _split_path(path)
elif _uri_scheme.match(path):
# if not a local path, attempt to match a URI scheme
parsed_url = urlparse(path)
parts = parsed_url.path.split(":")
parsed_url_path = parsed_url.path
if parsed_url_path.startswith("//"):
# This can be a leftover from url chaining in fsspec
# TODO: replace this with str.removeprefix once Python 3.8 is dropped
parsed_url_path = parsed_url_path[2:]
parts = _split_path(parsed_url_path)
else:
# invalid scheme
scheme = path.split("://")[0]
Expand Down
84 changes: 74 additions & 10 deletions tests/test_0692_fsspec.py → tests/test_0692_fsspec_reading.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot4/blob/main/LICENSE

import pytest
import requests

import uproot
import uproot.source.fsspec

import skhep_testdata
import queue
import subprocess
import fsspec
import os


def test_open_fsspec_http(server):
Expand All @@ -22,16 +25,18 @@ def test_open_fsspec_http(server):


@pytest.mark.network
@pytest.mark.skip(
reason="skipping due to GitHub API rate limitations - this should work fine - see https://github.com/scikit-hep/uproot5/pull/973 for details"
)
def test_open_fsspec_github():
with uproot.open(
"github://scikit-hep:scikit-hep-testdata@v0.4.33/src/skhep_testdata/data/uproot-issue121.root",
handler=uproot.source.fsspec.FSSpecSource,
) as f:
data = f["Events/MET_pt"].array(library="np")
assert len(data) == 40
try:
with uproot.open(
"github://scikit-hep:scikit-hep-testdata@v0.4.33/src/skhep_testdata/data/uproot-issue121.root"
) as f:
data = f["Events/MET_pt"].array(library="np")
assert len(data) == 40
except requests.exceptions.HTTPError as e:
if e.response.status_code == 403:
pytest.skip("GitHub API limit has been reached")
else:
raise e


def test_open_fsspec_local():
Expand Down Expand Up @@ -140,3 +145,62 @@ def test_fsspec_chunks(server):

chunk_data_sum = {sum(chunk.raw_data) for chunk in chunks}
assert chunk_data_sum == {3967, 413, 10985}, "Chunk data does not match"


def test_fsspec_memory():
# read the file into memory
with open(skhep_testdata.data_path("uproot-issue121.root"), "rb") as f:
contents = f.read()

# create a memory filesystem
fs = fsspec.filesystem(protocol="memory")
fs.store.clear()
file_path = "skhep_testdata/uproot-issue121.root"
fs.touch(file_path)
# write contents into memory filesystem
with fs.open(file_path, "wb") as f:
f.write(contents)

# read from memory filesystem
with uproot.open(f"memory://{file_path}") as f:
data = f["Events/MET_pt"].array(library="np")
assert len(data) == 40


def test_fsspec_tar(tmp_path):
import tarfile
import io

filename = "uproot-issue121.root"
with open(skhep_testdata.data_path("uproot-issue121.root"), "rb") as f:
contents = f.read()

filename_tar = os.path.join(tmp_path, filename + ".tar")
with tarfile.open(filename_tar, mode="w") as tar:
file_info = tarfile.TarInfo(name=filename)
file_info.size = len(contents)
tar.addfile(file_info, fileobj=io.BytesIO(contents))

# open with fsspec
with uproot.open(f"tar://{filename}::file://{filename_tar}") as f:
data = f["Events/MET_pt"].array(library="np")
assert len(data) == 40


def test_fsspec_zip(tmp_path):
import zipfile

filename = "uproot-issue121.root"
with open(skhep_testdata.data_path("uproot-issue121.root"), "rb") as f:
contents = f.read()

filename_zip = os.path.join(tmp_path, filename + ".zip")
with zipfile.ZipFile(filename_zip, mode="w") as zip_file:
zip_file.writestr(filename, data=contents)

# open with fsspec
with uproot.open(
f"zip://{filename}::file://{filename_zip}:Events/MET_pt"
) as branch:
data = branch.array(library="np")
assert len(data) == 40
14 changes: 14 additions & 0 deletions tests/test_0976_path_object_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,20 @@
None,
),
),
(
"zip://uproot-issue121.root::file:///tmp/pytest-of-runner/pytest-0/test_fsspec_zip0/uproot-issue121.root.zip:Events/MET_pt",
(
"zip://uproot-issue121.root::file:///tmp/pytest-of-runner/pytest-0/test_fsspec_zip0/uproot-issue121.root.zip",
"Events/MET_pt",
),
),
(
r"zip://uproot-issue121.root::file://C:\Users\runneradmin\AppData\Local\Temp\pytest-of-runneradmin\pytest-0\test_fsspec_zip0\uproot-issue121.root.zip:Events/MET_pt",
(
r"zip://uproot-issue121.root::file://C:\Users\runneradmin\AppData\Local\Temp\pytest-of-runneradmin\pytest-0\test_fsspec_zip0\uproot-issue121.root.zip",
"Events/MET_pt",
),
),
],
)
def test_url_split(input_value, expected_output):
Expand Down
Loading