From ec610127b058dd094fb7058fdaae078f701c383f Mon Sep 17 00:00:00 2001 From: Luis Antonio Obis Aparicio <35803280+lobis@users.noreply.github.com> Date: Tue, 14 Nov 2023 16:23:39 -0600 Subject: [PATCH 1/6] feat: add fsspec as required dependency (#1021) * fsspec requirements * simplify fsspec import * use loop property * correctly create schemes list --- pyproject.toml | 2 +- src/uproot/_util.py | 11 ++--------- src/uproot/source/fsspec.py | 7 +++---- 3 files changed, 6 insertions(+), 14 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 23c34eb93..ecb0389a4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ dependencies = [ "awkward>=2.4.6", "importlib-metadata;python_version<\"3.8\"", "numpy", + "fsspec", "packaging", "typing_extensions>=4.1.0; python_version < \"3.11\"" ] @@ -61,7 +62,6 @@ test = [ "lz4", "minio", "aiohttp; python_version<\"3.12\"", # asyncio not available - "fsspec", "fsspec-xrootd", "s3fs; python_version<\"3.12\"", # asyncio not available "paramiko", diff --git a/src/uproot/_util.py b/src/uproot/_util.py index 61c7f1f51..25c34d0bb 100644 --- a/src/uproot/_util.py +++ b/src/uproot/_util.py @@ -19,6 +19,7 @@ from collections.abc import Iterable from urllib.parse import unquote, urlparse +import fsspec import numpy import packaging.version @@ -290,15 +291,7 @@ def regularize_path(path): _windows_absolute_path_pattern_slash = re.compile(r"^[\\/][A-Za-z]:[\\/]") _remote_schemes = ["root", "s3", "http", "https"] -_schemes = ["file", *_remote_schemes] - -try: - # TODO: remove this try/except when fsspec becomes a required dependency - import fsspec - - _schemes = list({*_schemes, *fsspec.available_protocols()}) -except ImportError: - pass +_schemes = list({*_remote_schemes, *fsspec.available_protocols()}) _uri_scheme = re.compile("^(" + "|".join([re.escape(x) for x in _schemes]) + ")://") _uri_scheme_chain = re.compile( diff --git a/src/uproot/source/fsspec.py b/src/uproot/source/fsspec.py index 2cb874575..e52c5b9b1 100644 --- a/src/uproot/source/fsspec.py +++ b/src/uproot/source/fsspec.py @@ -6,6 +6,9 @@ import concurrent.futures import queue +import fsspec +import fsspec.asyn + import uproot import uproot.source.chunk import uproot.source.futures @@ -24,8 +27,6 @@ class FSSpecSource(uproot.source.chunk.Source): """ def __init__(self, file_path: str, **options): - import fsspec.core - options = dict(uproot.reading.open.defaults, **options) storage_options = { k: v @@ -191,8 +192,6 @@ def closed(self) -> bool: class FSSpecLoopExecutor(uproot.source.futures.Executor): @property def loop(self) -> asyncio.AbstractEventLoop: - import fsspec.asyn - return fsspec.asyn.get_loop() def submit(self, coroutine) -> concurrent.futures.Future: From 91ef6cc0bc696f47a169c856615e3620a1b78445 Mon Sep 17 00:00:00 2001 From: Luis Antonio Obis Aparicio <35803280+lobis@users.noreply.github.com> Date: Wed, 15 Nov 2023 16:33:41 -0600 Subject: [PATCH 2/6] feat: set fsspec as default source (#1023) * feat: add fsspec as required dependency (#1021) * fsspec requirements * simplify fsspec import * use loop property * correctly create schemes list * remove deprecated handlers from docs * simplify source selection * return object source * pickle executor * rename test * test more handlers * option to check writeable file-like object * rename test * explicitly set handler * fix s3 source * rename test * Revert "fix s3 source" This reverts commit e76fdbb1e3011a2ad3bd9ea8cb4634b43caa6af6. * sesparate PR for s3 fix (https://github.com/scikit-hep/uproot5/pull/1024) * strip file:// * rename test * rename tests * add aiohttp skip * attempt to parse windows paths * test ci * Revert "test ci" This reverts commit 4c1c8a5bdc3cbbab540c125807b1139a51e9c455. * rename test * remove fsspec from test * remove *_handler options * update defaults * do not override default s3 * do not use fsspec for multiprocessing * rename test * fix not selecting object source * missing import * normalize doc * remove helper * never return None as source * remove unnecessary xrootd source default override since fsspec is default now * rename test * add empty class to pass old pickle test --- src/uproot/__init__.py | 2 +- src/uproot/_dask.py | 5 - src/uproot/_util.py | 161 +++--------------- src/uproot/behaviors/TBranch.py | 10 -- src/uproot/reading.py | 71 +++----- src/uproot/source/fsspec.py | 5 +- ...ations.py => test_0016_interpretations.py} | 0 ...t_0017_multi_basket_multi_branch_fetch.py} | 1 + ... => test_0066_fix_http_fallback_freeze.py} | 2 +- ...th-http.py => test_0088_read_with_http.py} | 4 + ....py => test_0099_read_from_file_object.py} | 12 +- ...st_0173_empty_and_multiprocessing_bugs.py} | 7 +- ...st_0220_contiguous_byte_ranges_in_http.py} | 3 +- tests/test_0302_pickle.py | 4 +- ....py => test_0325_fix_windows_file_uris.py} | 0 ...st_0420_pyroot_uproot_interoperability.py} | 0 ...-dynamic-classes-cant-be-abc-subclasses.py | 17 -- ..._dynamic_classes_cant_be_abc_subclasses.py | 13 ++ 18 files changed, 85 insertions(+), 232 deletions(-) rename tests/{test_0016-interpretations.py => test_0016_interpretations.py} (100%) rename tests/{test_0017-multi-basket-multi-branch-fetch.py => test_0017_multi_basket_multi_branch_fetch.py} (99%) rename tests/{test_0066-fix-http-fallback-freeze.py => test_0066_fix_http_fallback_freeze.py} (90%) rename tests/{test_0088-read-with-http.py => test_0088_read_with_http.py} (92%) rename tests/{test_0099-read-from-file-object.py => test_0099_read_from_file_object.py} (60%) rename tests/{test_0173-empty-and-multiprocessing-bugs.py => test_0173_empty_and_multiprocessing_bugs.py} (87%) rename tests/{test_0220-contiguous-byte-ranges-in-http.py => test_0220_contiguous_byte_ranges_in_http.py} (94%) rename tests/{test_0325-fix-windows-file-uris.py => test_0325_fix_windows_file_uris.py} (100%) rename tests/{test_0420-pyroot-uproot-interoperability.py => test_0420_pyroot_uproot_interoperability.py} (100%) delete mode 100644 tests/test_0520-dynamic-classes-cant-be-abc-subclasses.py create mode 100644 tests/test_0520_dynamic_classes_cant_be_abc_subclasses.py diff --git a/src/uproot/__init__.py b/src/uproot/__init__.py index 8777b4122..2d2de7786 100644 --- a/src/uproot/__init__.py +++ b/src/uproot/__init__.py @@ -72,7 +72,6 @@ isort:skip_file """ - from uproot.version import __version__ import uproot.const import uproot.extras @@ -92,6 +91,7 @@ from uproot.source.xrootd import MultithreadedXRootDSource from uproot.source.s3 import S3Source from uproot.source.object import ObjectSource +from uproot.source.fsspec import FSSpecSource from uproot.source.cursor import Cursor from uproot.source.futures import TrivialExecutor from uproot.source.futures import ThreadPoolExecutor diff --git a/src/uproot/_dask.py b/src/uproot/_dask.py index ae8ee8b69..cabc0faad 100644 --- a/src/uproot/_dask.py +++ b/src/uproot/_dask.py @@ -134,11 +134,6 @@ def dask( Options (type; default): * handler (:doc:`uproot.source.chunk.Source` class; None) - * file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) * timeout (float for HTTP, int for XRootD; 30) * max_num_elements (None or int; None) * num_workers (int; 1) diff --git a/src/uproot/_util.py b/src/uproot/_util.py index 25c34d0bb..28ac3665e 100644 --- a/src/uproot/_util.py +++ b/src/uproot/_util.py @@ -369,63 +369,30 @@ def file_path_to_source_class(file_path, options): Use a file path to get the :doc:`uproot.source.chunk.Source` class that would read it. Returns a tuple of (class, file_path) where the class is a subclass of :doc:`uproot.source.chunk.Source`. - - The "handler" option is the preferred way to specify a custom source class. - The "*_handler" options are for backwards compatibility and will override the "handler" option if set. """ + import uproot.source.chunk file_path = regularize_path(file_path) - out = options["handler"] - if out is not None: - if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)): - raise TypeError( - f"'handler' is not a class object inheriting from Source: {out!r}" - ) - # check if "object_handler" is set - if ( - options["object_handler"] is not None - or options["file_handler"] is not None - or options["xrootd_handler"] is not None - or options["s3_handler"] is not None - or options["http_handler"] is not None + source_cls = options["handler"] + if source_cls is not None: + if not ( + isinstance(source_cls, type) + and issubclass(source_cls, uproot.source.chunk.Source) ): - # These options will override the "handler" option for backwards compatibility - warnings.warn( - """In version 5.2.0, the '*_handler' argument ('http_handler`, 's3_handler', etc.) will be removed from 'uproot.open'. Use 'handler' instead.""", - stacklevel=1, + raise TypeError( + f"'handler' is not a class object inheriting from Source: {source_cls!r}" ) - else: - return out, file_path + return source_cls, file_path if ( not isinstance(file_path, str) and hasattr(file_path, "read") and hasattr(file_path, "seek") ): - out = options["object_handler"] - if out is None: - out = uproot.source.object.ObjectSource - else: - warnings.warn( - f"""In version 5.2.0, the 'object_handler' argument will be removed from 'uproot.open'. Use -uproot.open(..., handler={out!r}) -instead. - -To raise these warnings as errors (and get stack traces to find out where they're called), run -import warnings -warnings.filterwarnings("error", module="uproot.*") -after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""", - DeprecationWarning, - stacklevel=1, - ) - if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)): - raise TypeError( - f"'object_handler' is not a class object inheriting from Source: {out!r}" - ) - - return out, file_path + source_cls = uproot.source.object.ObjectSource + return source_cls, file_path windows_absolute_path = None if win and _windows_absolute_path_pattern.match(file_path) is not None: @@ -457,107 +424,27 @@ def file_path_to_source_class(file_path, options): else: file_path = windows_absolute_path - out = options["file_handler"] - if out is None: - out = uproot.source.file.MemmapSource - else: - warnings.warn( - f"""In version 5.2.0, the 'file_handler' argument will be removed from 'uproot.open'. Use - uproot.open(..., handler={out!r} - instead. - - To raise these warnings as errors (and get stack traces to find out where they're called), run - import warnings - warnings.filterwarnings("error", module="uproot.*") - after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""", - DeprecationWarning, - stacklevel=1, - ) + # uproot.source.file.MemmapSource + source_cls = uproot.source.fsspec.FSSpecSource - if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)): - raise TypeError( - "'file_handler' is not a class object inheriting from Source: " - + repr(out) - ) - return out, os.path.expanduser(file_path) + return source_cls, os.path.expanduser(file_path) elif scheme == "root": - out = options["xrootd_handler"] - if out is None: - out = uproot.source.xrootd.XRootDSource - else: - warnings.warn( - f"""In version 5.2.0, the 'xrootd_handler' argument will be removed from 'uproot.open'. Use - uproot.open(..., handler={out!r} - instead. - - To raise these warnings as errors (and get stack traces to find out where they're called), run - import warnings - warnings.filterwarnings("error", module="uproot.*") - after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""", - DeprecationWarning, - stacklevel=1, - ) - if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)): - raise TypeError( - "'xrootd_handler' is not a class object inheriting from Source: " - + repr(out) - ) - return out, file_path + # uproot.source.xrootd.XRootDSource + source_cls = uproot.source.fsspec.FSSpecSource + return source_cls, file_path elif scheme == "s3": - out = options["s3_handler"] - if out is None: - out = uproot.source.s3.S3Source - else: - warnings.warn( - f"""In version 5.2.0, the 's3_handler' argument will be removed from 'uproot.open'. Use -uproot.open(..., handler={out!r} -instead. - -To raise these warnings as errors (and get stack traces to find out where they're called), run -import warnings -warnings.filterwarnings("error", module="uproot.*") -after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""", - DeprecationWarning, - stacklevel=1, - ) - if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)): - raise TypeError( - "'s3' is not a class object inheriting from Source: " + repr(out) - ) - return out, file_path + # https://github.com/scikit-hep/uproot5/pull/1012 + source_cls = uproot.source.s3.S3Source + return source_cls, file_path elif scheme in ("http", "https"): - out = options["http_handler"] - if out is None: - out = uproot.source.http.HTTPSource - else: - warnings.warn( - f"""In version 5.2.0, the 'http_handler' argument will be removed from 'uproot.open'. Use -uproot.open(..., handler={out!r} -instead. - -To raise these warnings as errors (and get stack traces to find out where they're called), run -import warnings -warnings.filterwarnings("error", module="uproot.*") -after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""", - DeprecationWarning, - stacklevel=1, - ) - if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)): - raise TypeError( - "'http_handler' is not a class object inheriting from Source: " - + repr(out) - ) - return out, file_path - - else: - # try to use fsspec before raising an error - if scheme in _schemes: - return uproot.source.fsspec.FSSpecSource, file_path + # uproot.source.http.HTTPSource + source_cls = uproot.source.fsspec.FSSpecSource + return source_cls, file_path - raise ValueError(f"URI scheme not recognized: {file_path}") + return uproot.source.fsspec.FSSpecSource, file_path if isinstance(__builtins__, dict): diff --git a/src/uproot/behaviors/TBranch.py b/src/uproot/behaviors/TBranch.py index 0d3372e09..047b81b3d 100644 --- a/src/uproot/behaviors/TBranch.py +++ b/src/uproot/behaviors/TBranch.py @@ -153,11 +153,6 @@ def iterate( Options (type; default): * handler (:doc:`uproot.source.chunk.Source` class; None) - * file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) * timeout (float for HTTP, int for XRootD; 30) * max_num_elements (None or int; None) * num_workers (int; 1) @@ -327,11 +322,6 @@ def concatenate( Options (type; default): * handler (:doc:`uproot.source.chunk.Source` class; None) - * file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) * timeout (float for HTTP, int for XRootD; 30) * max_num_elements (None or int; None) * num_workers (int; 1) diff --git a/src/uproot/reading.py b/src/uproot/reading.py index fabd6344d..5957be19e 100644 --- a/src/uproot/reading.py +++ b/src/uproot/reading.py @@ -11,11 +11,11 @@ import struct import sys import uuid -import warnings from collections.abc import Mapping, MutableMapping import uproot import uproot.behaviors.TBranch +import uproot.source.fsspec from uproot._util import no_filter @@ -42,7 +42,7 @@ def open( ``"rel/file.root:tdirectory/ttree"``, ``Path("rel:/file.root")``, ``Path("/abs/path:stuff.root")`` object_cache (None, MutableMapping, or int): Cache of objects drawn - from ROOT directories (e.g histograms, TTrees, other directories); + from ROOT directories (e.g. histograms, TTrees, other directories); if None, do not use a cache; if an int, create a new cache of this size. array_cache (None, MutableMapping, or memory size): Cache of arrays @@ -77,11 +77,6 @@ def open( Options (type; default): * handler (:doc:`uproot.source.chunk.Source` class; None) - * file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) * timeout (float for HTTP, int for XRootD; 30) * max_num_elements (None or int; None) * num_workers (int; 1) @@ -156,43 +151,22 @@ def open( return file.root_directory[object_path] +open.defaults = { + "handler": None, + "timeout": 30, + "max_num_elements": None, + "num_workers": 1, + "use_threads": sys.platform != "emscripten", + "num_fallback_workers": 10, + "begin_chunk_size": 403, # the smallest a ROOT file can be + "minimal_ttree_metadata": True, +} + + class _OpenDefaults(dict): - def __getitem__(self, where): - if where == "xrootd_handler" and where not in self: - # See https://github.com/scikit-hep/uproot5/issues/294 - if uproot.extras.older_xrootd("5.2.0"): - message = ( - f"XRootD {uproot.extras.xrootd_version()} is not fully supported; " - """either upgrade to 5.2.0+ or set - - open.defaults["xrootd_handler"] = uproot.MultithreadedXRootDSource -""" - ) - warnings.warn(message, FutureWarning, stacklevel=1) - - # The key should still be set, regardless of whether we see the warning. - self["xrootd_handler"] = uproot.source.xrootd.XRootDSource - - return dict.__getitem__(self, where) - - -open.defaults = _OpenDefaults( - { - "handler": None, # To be updated to fsspec source - "file_handler": None, # Deprecated - "s3_handler": None, # Deprecated - "http_handler": None, # Deprecated - "object_handler": None, # Deprecated - "xrootd_handler": None, # Deprecated - "timeout": 30, - "max_num_elements": None, - "num_workers": 1, - "use_threads": sys.platform != "emscripten", - "num_fallback_workers": 10, - "begin_chunk_size": 403, # the smallest a ROOT file can be - "minimal_ttree_metadata": True, - } -) + def __init__(self): + raise NotImplementedError # kept for backwards compatibility + must_be_attached = [ "TROOT", @@ -536,11 +510,6 @@ class ReadOnlyFile(CommonFileMethods): Options (type; default): * handler (:doc:`uproot.source.chunk.Source` class; None) - * file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) * timeout (float for HTTP, int for XRootD; 30) * max_num_elements (None or int; None) * num_workers (int; 1) @@ -571,7 +540,7 @@ def __init__( self.decompression_executor = decompression_executor self.interpretation_executor = interpretation_executor - self._options = _OpenDefaults(open.defaults) + self._options = open.defaults.copy() self._options.update(options) for option in ["begin_chunk_size"]: self._options[option] = uproot._util.memory_size(self._options[option]) @@ -581,10 +550,10 @@ def __init__( self.hook_before_create_source() - Source, file_path = uproot._util.file_path_to_source_class( + source_cls, file_path = uproot._util.file_path_to_source_class( file_path, self._options ) - self._source = Source(file_path, **self._options) + self._source = source_cls(file_path, **self._options) self.hook_before_get_chunks() diff --git a/src/uproot/source/fsspec.py b/src/uproot/source/fsspec.py index e52c5b9b1..1809711a5 100644 --- a/src/uproot/source/fsspec.py +++ b/src/uproot/source/fsspec.py @@ -34,7 +34,7 @@ def __init__(self, file_path: str, **options): if k not in uproot.reading.open.defaults.keys() } - self._executor = FSSpecLoopExecutor() + self._open() self._fs, self._file_path = fsspec.core.url_to_fs(file_path, **storage_options) @@ -48,6 +48,9 @@ def __init__(self, file_path: str, **options): self._num_requested_bytes = 0 self.__enter__() + def _open(self): + self._executor = FSSpecLoopExecutor() + def __repr__(self): path = repr(self._file_path) if len(self._file_path) > 10: diff --git a/tests/test_0016-interpretations.py b/tests/test_0016_interpretations.py similarity index 100% rename from tests/test_0016-interpretations.py rename to tests/test_0016_interpretations.py diff --git a/tests/test_0017-multi-basket-multi-branch-fetch.py b/tests/test_0017_multi_basket_multi_branch_fetch.py similarity index 99% rename from tests/test_0017-multi-basket-multi-branch-fetch.py rename to tests/test_0017_multi_basket_multi_branch_fetch.py index b42f6db4b..1ed316f21 100644 --- a/tests/test_0017-multi-basket-multi-branch-fetch.py +++ b/tests/test_0017_multi_basket_multi_branch_fetch.py @@ -310,6 +310,7 @@ def test_cache(): skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root"), object_cache=100, array_cache="100 MB", + handler=uproot.source.file.MemmapSource, ) as f: assert f.cache_key == "db4be408-93ad-11ea-9027-d201a8c0beef:/" assert f["sample"].cache_key == "db4be408-93ad-11ea-9027-d201a8c0beef:/sample;1" diff --git a/tests/test_0066-fix-http-fallback-freeze.py b/tests/test_0066_fix_http_fallback_freeze.py similarity index 90% rename from tests/test_0066-fix-http-fallback-freeze.py rename to tests/test_0066_fix_http_fallback_freeze.py index 632af99bf..4b6ff26e2 100644 --- a/tests/test_0066-fix-http-fallback-freeze.py +++ b/tests/test_0066_fix_http_fallback_freeze.py @@ -1,6 +1,5 @@ # BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE -import numpy import pytest import uproot @@ -8,6 +7,7 @@ @pytest.mark.network def test(): + pytest.importorskip("aiohttp") with uproot.open( {"http://scikit-hep.org/uproot3/examples/HZZ.root": "events"} ) as t: diff --git a/tests/test_0088-read-with-http.py b/tests/test_0088_read_with_http.py similarity index 92% rename from tests/test_0088-read-with-http.py rename to tests/test_0088_read_with_http.py index 7ba760429..446200595 100644 --- a/tests/test_0088-read-with-http.py +++ b/tests/test_0088_read_with_http.py @@ -9,6 +9,8 @@ @pytest.mark.network def test_issue176(): + pytest.importorskip("aiohttp") + with uproot.open( "https://starterkit.web.cern.ch/starterkit/data/advanced-python-2019/dalitzdata.root" ) as f: @@ -18,6 +20,8 @@ def test_issue176(): @pytest.mark.network def test_issue176_again(): + pytest.importorskip("aiohttp") + with uproot.open( "https://starterkit.web.cern.ch/starterkit/data/advanced-python-2019/dalitzdata.root" ) as f: diff --git a/tests/test_0099-read-from-file-object.py b/tests/test_0099_read_from_file_object.py similarity index 60% rename from tests/test_0099-read-from-file-object.py rename to tests/test_0099_read_from_file_object.py index f7a9faf02..730b2560b 100644 --- a/tests/test_0099-read-from-file-object.py +++ b/tests/test_0099_read_from_file_object.py @@ -4,11 +4,19 @@ import skhep_testdata import uproot +import uproot.source.fsspec +import uproot.source.object -def test(): +@pytest.mark.parametrize( + "handler", + [None, uproot.source.object.ObjectSource], +) +def test_read_from_file_like_object(handler): with open(skhep_testdata.data_path("uproot-Zmumu.root"), "rb") as f: - assert uproot.open({f: "events"})["px1"].array(library="np")[:10].tolist() == [ + assert uproot.open({f: "events"}, handler=handler)["px1"].array(library="np")[ + :10 + ].tolist() == [ -41.1952876442, 35.1180497674, 35.1180497674, diff --git a/tests/test_0173-empty-and-multiprocessing-bugs.py b/tests/test_0173_empty_and_multiprocessing_bugs.py similarity index 87% rename from tests/test_0173-empty-and-multiprocessing-bugs.py rename to tests/test_0173_empty_and_multiprocessing_bugs.py index 8280cadf4..dd5a0efaf 100644 --- a/tests/test_0173-empty-and-multiprocessing-bugs.py +++ b/tests/test_0173_empty_and_multiprocessing_bugs.py @@ -1,7 +1,6 @@ # BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE import multiprocessing -import sys import pytest import skhep_testdata @@ -17,8 +16,8 @@ def test_empty(): assert t["z"].array(library="np").tolist() == [] -def readone(filename): - with uproot.open(filename) as f: +def read_one(filename): + with uproot.open(filename, handler=uproot.source.file.MemmapSource) as f: f.decompression_executor = uproot.ThreadPoolExecutor() t = f["events"] b = t["px1"] @@ -28,7 +27,7 @@ def readone(filename): def test_multiprocessing(): with multiprocessing.Pool(1) as pool: out = pool.map( - readone, + read_one, [ skhep_testdata.data_path("uproot-Zmumu.root"), skhep_testdata.data_path("uproot-Zmumu-zlib.root"), diff --git a/tests/test_0220-contiguous-byte-ranges-in-http.py b/tests/test_0220_contiguous_byte_ranges_in_http.py similarity index 94% rename from tests/test_0220-contiguous-byte-ranges-in-http.py rename to tests/test_0220_contiguous_byte_ranges_in_http.py index 3db77022d..26205206f 100644 --- a/tests/test_0220-contiguous-byte-ranges-in-http.py +++ b/tests/test_0220_contiguous_byte_ranges_in_http.py @@ -1,6 +1,5 @@ # BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE -import numpy import pytest import uproot @@ -8,6 +7,8 @@ @pytest.mark.network def test(): + pytest.importorskip("aiohttp") + with uproot.open( "https://starterkit.web.cern.ch/starterkit/data/advanced-python-2019/RD_distribution.root:tree" ) as f: diff --git a/tests/test_0302_pickle.py b/tests/test_0302_pickle.py index 1177c7088..ae746482b 100644 --- a/tests/test_0302_pickle.py +++ b/tests/test_0302_pickle.py @@ -1,8 +1,6 @@ # BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE -import os import pickle -import sys import pytest import skhep_testdata @@ -34,6 +32,8 @@ def test_pickle_roundtrip_mmap(): @pytest.mark.network def test_pickle_roundtrip_http(): + pytest.importorskip("aiohttp") + with uproot.open("https://scikit-hep.org/uproot3/examples/Zmumu.root") as f: pkl = pickle.dumps(f["events"]) diff --git a/tests/test_0325-fix-windows-file-uris.py b/tests/test_0325_fix_windows_file_uris.py similarity index 100% rename from tests/test_0325-fix-windows-file-uris.py rename to tests/test_0325_fix_windows_file_uris.py diff --git a/tests/test_0420-pyroot-uproot-interoperability.py b/tests/test_0420_pyroot_uproot_interoperability.py similarity index 100% rename from tests/test_0420-pyroot-uproot-interoperability.py rename to tests/test_0420_pyroot_uproot_interoperability.py diff --git a/tests/test_0520-dynamic-classes-cant-be-abc-subclasses.py b/tests/test_0520-dynamic-classes-cant-be-abc-subclasses.py deleted file mode 100644 index 0ac90a524..000000000 --- a/tests/test_0520-dynamic-classes-cant-be-abc-subclasses.py +++ /dev/null @@ -1,17 +0,0 @@ -# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE - -import pickle -import sys - -import numpy as np -import pytest -import skhep_testdata - - -@pytest.mark.skipif( - sys.version_info < (3, 7), - reason="Dynamic types depend on module __getattr__, a Python 3.7+ feature.", -) -def test(): - with open("tests/samples/h_dynamic.pkl", "rb") as f: - assert len(list(pickle.load(f).axis(0))) == 100 diff --git a/tests/test_0520_dynamic_classes_cant_be_abc_subclasses.py b/tests/test_0520_dynamic_classes_cant_be_abc_subclasses.py new file mode 100644 index 000000000..1942589ac --- /dev/null +++ b/tests/test_0520_dynamic_classes_cant_be_abc_subclasses.py @@ -0,0 +1,13 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE + +import pickle +import os + +import pytest + +test_directory = os.path.dirname(os.path.realpath(__file__)) + + +def test_pickle(): + with open(os.path.join(test_directory, "samples/h_dynamic.pkl"), "rb") as f: + assert len(list(pickle.load(f).axis(0))) == 100 From 5d9bd315bdf0b5e939b662cdc0d25957af7b83a4 Mon Sep 17 00:00:00 2001 From: Luis Antonio Obis Aparicio Date: Thu, 16 Nov 2023 14:41:53 -0600 Subject: [PATCH 3/6] set version to 5.2.0rc1 (release candidate) --- src/uproot/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uproot/version.py b/src/uproot/version.py index a3cf39315..b23837ac2 100644 --- a/src/uproot/version.py +++ b/src/uproot/version.py @@ -12,7 +12,7 @@ import re -__version__ = "5.1.2" +__version__ = "5.2.0rc1" version = __version__ version_info = tuple(re.split(r"[-\.]", __version__)) From f86bd1aa8a7c102f57256d2a3c56df10a56d2a23 Mon Sep 17 00:00:00 2001 From: Luis Antonio Obis Aparicio Date: Thu, 16 Nov 2023 14:05:11 -0600 Subject: [PATCH 4/6] set s3fs as default for s3 --- src/uproot/_util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/uproot/_util.py b/src/uproot/_util.py index 28ac3665e..1a86c4207 100644 --- a/src/uproot/_util.py +++ b/src/uproot/_util.py @@ -435,8 +435,8 @@ def file_path_to_source_class(file_path, options): return source_cls, file_path elif scheme == "s3": - # https://github.com/scikit-hep/uproot5/pull/1012 - source_cls = uproot.source.s3.S3Source + # uproot.source.s3.S3Source + source_cls = uproot.source.fsspec.FSSpecSource return source_cls, file_path elif scheme in ("http", "https"): From c9f48b0c6a3073cbd25e56c8ae8bbabe5b483977 Mon Sep 17 00:00:00 2001 From: Luis Antonio Obis Aparicio Date: Thu, 16 Nov 2023 16:02:39 -0600 Subject: [PATCH 5/6] test different handlers --- tests/test_0302_pickle.py | 40 +++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/tests/test_0302_pickle.py b/tests/test_0302_pickle.py index ae746482b..c33fb40f6 100644 --- a/tests/test_0302_pickle.py +++ b/tests/test_0302_pickle.py @@ -7,11 +7,18 @@ import uproot -pytest.importorskip("awkward") - -def test_pickle_roundtrip_mmap(): - with uproot.open(skhep_testdata.data_path("uproot-small-dy-withoffsets.root")) as f: +@pytest.mark.parametrize( + "handler", + [ + uproot.source.file.MemmapSource, + uproot.source.fsspec.FSSpecSource, + ], +) +def test_pickle_roundtrip_local(handler): + with uproot.open( + skhep_testdata.data_path("uproot-small-dy-withoffsets.root"), handler=handler + ) as f: pkl = pickle.dumps(f["tree"]) branch = pickle.loads(pkl)["Muon_pt"] @@ -30,11 +37,20 @@ def test_pickle_roundtrip_mmap(): ] +@pytest.mark.parametrize( + "handler", + [ + uproot.source.http.HTTPSource, + uproot.source.fsspec.FSSpecSource, + ], +) @pytest.mark.network -def test_pickle_roundtrip_http(): +def test_pickle_roundtrip_http(handler): pytest.importorskip("aiohttp") - with uproot.open("https://scikit-hep.org/uproot3/examples/Zmumu.root") as f: + with uproot.open( + "https://scikit-hep.org/uproot3/examples/Zmumu.root", handler=handler + ) as f: pkl = pickle.dumps(f["events"]) tree = pickle.loads(pkl) @@ -53,12 +69,20 @@ def test_pickle_roundtrip_http(): ] +@pytest.mark.parametrize( + "handler", + [ + uproot.source.xrootd.XRootDSource, + uproot.source.fsspec.FSSpecSource, + ], +) @pytest.mark.network @pytest.mark.xrootd -def test_pickle_roundtrip_xrootd(): +def test_pickle_roundtrip_xrootd(handler): pytest.importorskip("XRootD") with uproot.open( - "root://eospublic.cern.ch//eos/root-eos/cms_opendata_2012_nanoaod/Run2012B_DoubleMuParked.root" + "root://eospublic.cern.ch//eos/root-eos/cms_opendata_2012_nanoaod/Run2012B_DoubleMuParked.root", + handler=handler, ) as f: pkl = pickle.dumps(f["Events"]) From 738bec3fb4f640ea37b29643ca1cd5d105e58945 Mon Sep 17 00:00:00 2001 From: Luis Antonio Obis Aparicio Date: Thu, 16 Nov 2023 17:06:31 -0600 Subject: [PATCH 6/6] correct serialization of fsspec source --- src/uproot/source/fsspec.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/uproot/source/fsspec.py b/src/uproot/source/fsspec.py index 1809711a5..3c6bb5ec7 100644 --- a/src/uproot/source/fsspec.py +++ b/src/uproot/source/fsspec.py @@ -34,14 +34,13 @@ def __init__(self, file_path: str, **options): if k not in uproot.reading.open.defaults.keys() } - self._open() - self._fs, self._file_path = fsspec.core.url_to_fs(file_path, **storage_options) # What should we do when there is a chain of filesystems? self._async_impl = self._fs.async_impl - self._file = self._fs.open(self._file_path) + self._open() + self._fh = None self._num_requests = 0 self._num_requested_chunks = 0 @@ -50,6 +49,7 @@ def __init__(self, file_path: str, **options): def _open(self): self._executor = FSSpecLoopExecutor() + self._file = self._fs.open(self._file_path) def __repr__(self): path = repr(self._file_path) @@ -60,6 +60,8 @@ def __repr__(self): def __getstate__(self): state = dict(self.__dict__) state.pop("_executor") + state.pop("_file") + state.pop("_fh") return state def __setstate__(self, state):