From 8f37e6f467301c6a78c7c14320614437f0d93f44 Mon Sep 17 00:00:00 2001 From: Luis Antonio Obis Aparicio <35803280+lobis@users.noreply.github.com> Date: Wed, 15 Nov 2023 16:33:41 -0600 Subject: [PATCH] feat: set fsspec as default source (#1023) * feat: add fsspec as required dependency (#1021) * fsspec requirements * simplify fsspec import * use loop property * correctly create schemes list * remove deprecated handlers from docs * simplify source selection * return object source * pickle executor * rename test * test more handlers * option to check writeable file-like object * rename test * explicitly set handler * fix s3 source * rename test * Revert "fix s3 source" This reverts commit e76fdbb1e3011a2ad3bd9ea8cb4634b43caa6af6. * sesparate PR for s3 fix (https://github.com/scikit-hep/uproot5/pull/1024) * strip file:// * rename test * rename tests * add aiohttp skip * attempt to parse windows paths * test ci * Revert "test ci" This reverts commit 4c1c8a5bdc3cbbab540c125807b1139a51e9c455. * rename test * remove fsspec from test * remove *_handler options * update defaults * do not override default s3 * do not use fsspec for multiprocessing * rename test * fix not selecting object source * missing import * normalize doc * remove helper * never return None as source * remove unnecessary xrootd source default override since fsspec is default now * rename test * add empty class to pass old pickle test --- src/uproot/__init__.py | 2 +- src/uproot/_dask.py | 5 - src/uproot/_util.py | 161 +++--------------- src/uproot/behaviors/TBranch.py | 10 -- src/uproot/reading.py | 71 +++----- ...st_0017_multi_basket_multi_branch_fetch.py | 1 + tests/test_0066_fix_http_fallback_freeze.py | 2 +- tests/test_0088_read_with_http.py | 4 + tests/test_0099_read_from_file_object.py | 12 +- ...est_0173_empty_and_multiprocessing_bugs.py | 7 +- ...est_0220_contiguous_byte_ranges_in_http.py | 3 +- tests/test_0302_pickle.py | 6 +- ..._dynamic_classes_cant_be_abc_subclasses.py | 3 +- 13 files changed, 70 insertions(+), 217 deletions(-) diff --git a/src/uproot/__init__.py b/src/uproot/__init__.py index 8777b4122..2d2de7786 100644 --- a/src/uproot/__init__.py +++ b/src/uproot/__init__.py @@ -72,7 +72,6 @@ isort:skip_file """ - from uproot.version import __version__ import uproot.const import uproot.extras @@ -92,6 +91,7 @@ from uproot.source.xrootd import MultithreadedXRootDSource from uproot.source.s3 import S3Source from uproot.source.object import ObjectSource +from uproot.source.fsspec import FSSpecSource from uproot.source.cursor import Cursor from uproot.source.futures import TrivialExecutor from uproot.source.futures import ThreadPoolExecutor diff --git a/src/uproot/_dask.py b/src/uproot/_dask.py index b1f033c80..38bb213c7 100644 --- a/src/uproot/_dask.py +++ b/src/uproot/_dask.py @@ -142,11 +142,6 @@ def dask( Options (type; default): * handler (:doc:`uproot.source.chunk.Source` class; None) - * file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) * timeout (float for HTTP, int for XRootD; 30) * max_num_elements (None or int; None) * num_workers (int; 1) diff --git a/src/uproot/_util.py b/src/uproot/_util.py index 25c34d0bb..28ac3665e 100644 --- a/src/uproot/_util.py +++ b/src/uproot/_util.py @@ -369,63 +369,30 @@ def file_path_to_source_class(file_path, options): Use a file path to get the :doc:`uproot.source.chunk.Source` class that would read it. Returns a tuple of (class, file_path) where the class is a subclass of :doc:`uproot.source.chunk.Source`. - - The "handler" option is the preferred way to specify a custom source class. - The "*_handler" options are for backwards compatibility and will override the "handler" option if set. """ + import uproot.source.chunk file_path = regularize_path(file_path) - out = options["handler"] - if out is not None: - if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)): - raise TypeError( - f"'handler' is not a class object inheriting from Source: {out!r}" - ) - # check if "object_handler" is set - if ( - options["object_handler"] is not None - or options["file_handler"] is not None - or options["xrootd_handler"] is not None - or options["s3_handler"] is not None - or options["http_handler"] is not None + source_cls = options["handler"] + if source_cls is not None: + if not ( + isinstance(source_cls, type) + and issubclass(source_cls, uproot.source.chunk.Source) ): - # These options will override the "handler" option for backwards compatibility - warnings.warn( - """In version 5.2.0, the '*_handler' argument ('http_handler`, 's3_handler', etc.) will be removed from 'uproot.open'. Use 'handler' instead.""", - stacklevel=1, + raise TypeError( + f"'handler' is not a class object inheriting from Source: {source_cls!r}" ) - else: - return out, file_path + return source_cls, file_path if ( not isinstance(file_path, str) and hasattr(file_path, "read") and hasattr(file_path, "seek") ): - out = options["object_handler"] - if out is None: - out = uproot.source.object.ObjectSource - else: - warnings.warn( - f"""In version 5.2.0, the 'object_handler' argument will be removed from 'uproot.open'. Use -uproot.open(..., handler={out!r}) -instead. - -To raise these warnings as errors (and get stack traces to find out where they're called), run -import warnings -warnings.filterwarnings("error", module="uproot.*") -after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""", - DeprecationWarning, - stacklevel=1, - ) - if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)): - raise TypeError( - f"'object_handler' is not a class object inheriting from Source: {out!r}" - ) - - return out, file_path + source_cls = uproot.source.object.ObjectSource + return source_cls, file_path windows_absolute_path = None if win and _windows_absolute_path_pattern.match(file_path) is not None: @@ -457,107 +424,27 @@ def file_path_to_source_class(file_path, options): else: file_path = windows_absolute_path - out = options["file_handler"] - if out is None: - out = uproot.source.file.MemmapSource - else: - warnings.warn( - f"""In version 5.2.0, the 'file_handler' argument will be removed from 'uproot.open'. Use - uproot.open(..., handler={out!r} - instead. - - To raise these warnings as errors (and get stack traces to find out where they're called), run - import warnings - warnings.filterwarnings("error", module="uproot.*") - after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""", - DeprecationWarning, - stacklevel=1, - ) + # uproot.source.file.MemmapSource + source_cls = uproot.source.fsspec.FSSpecSource - if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)): - raise TypeError( - "'file_handler' is not a class object inheriting from Source: " - + repr(out) - ) - return out, os.path.expanduser(file_path) + return source_cls, os.path.expanduser(file_path) elif scheme == "root": - out = options["xrootd_handler"] - if out is None: - out = uproot.source.xrootd.XRootDSource - else: - warnings.warn( - f"""In version 5.2.0, the 'xrootd_handler' argument will be removed from 'uproot.open'. Use - uproot.open(..., handler={out!r} - instead. - - To raise these warnings as errors (and get stack traces to find out where they're called), run - import warnings - warnings.filterwarnings("error", module="uproot.*") - after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""", - DeprecationWarning, - stacklevel=1, - ) - if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)): - raise TypeError( - "'xrootd_handler' is not a class object inheriting from Source: " - + repr(out) - ) - return out, file_path + # uproot.source.xrootd.XRootDSource + source_cls = uproot.source.fsspec.FSSpecSource + return source_cls, file_path elif scheme == "s3": - out = options["s3_handler"] - if out is None: - out = uproot.source.s3.S3Source - else: - warnings.warn( - f"""In version 5.2.0, the 's3_handler' argument will be removed from 'uproot.open'. Use -uproot.open(..., handler={out!r} -instead. - -To raise these warnings as errors (and get stack traces to find out where they're called), run -import warnings -warnings.filterwarnings("error", module="uproot.*") -after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""", - DeprecationWarning, - stacklevel=1, - ) - if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)): - raise TypeError( - "'s3' is not a class object inheriting from Source: " + repr(out) - ) - return out, file_path + # https://github.com/scikit-hep/uproot5/pull/1012 + source_cls = uproot.source.s3.S3Source + return source_cls, file_path elif scheme in ("http", "https"): - out = options["http_handler"] - if out is None: - out = uproot.source.http.HTTPSource - else: - warnings.warn( - f"""In version 5.2.0, the 'http_handler' argument will be removed from 'uproot.open'. Use -uproot.open(..., handler={out!r} -instead. - -To raise these warnings as errors (and get stack traces to find out where they're called), run -import warnings -warnings.filterwarnings("error", module="uproot.*") -after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""", - DeprecationWarning, - stacklevel=1, - ) - if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)): - raise TypeError( - "'http_handler' is not a class object inheriting from Source: " - + repr(out) - ) - return out, file_path - - else: - # try to use fsspec before raising an error - if scheme in _schemes: - return uproot.source.fsspec.FSSpecSource, file_path + # uproot.source.http.HTTPSource + source_cls = uproot.source.fsspec.FSSpecSource + return source_cls, file_path - raise ValueError(f"URI scheme not recognized: {file_path}") + return uproot.source.fsspec.FSSpecSource, file_path if isinstance(__builtins__, dict): diff --git a/src/uproot/behaviors/TBranch.py b/src/uproot/behaviors/TBranch.py index 7fb3e3c45..a82603176 100644 --- a/src/uproot/behaviors/TBranch.py +++ b/src/uproot/behaviors/TBranch.py @@ -154,11 +154,6 @@ def iterate( Options (type; default): * handler (:doc:`uproot.source.chunk.Source` class; None) - * file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) * timeout (float for HTTP, int for XRootD; 30) * max_num_elements (None or int; None) * num_workers (int; 1) @@ -328,11 +323,6 @@ def concatenate( Options (type; default): * handler (:doc:`uproot.source.chunk.Source` class; None) - * file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) * timeout (float for HTTP, int for XRootD; 30) * max_num_elements (None or int; None) * num_workers (int; 1) diff --git a/src/uproot/reading.py b/src/uproot/reading.py index 63aa88e6f..935c6e646 100644 --- a/src/uproot/reading.py +++ b/src/uproot/reading.py @@ -12,11 +12,11 @@ import struct import sys import uuid -import warnings from collections.abc import Mapping, MutableMapping import uproot import uproot.behaviors.TBranch +import uproot.source.fsspec from uproot._util import no_filter @@ -43,7 +43,7 @@ def open( ``"rel/file.root:tdirectory/ttree"``, ``Path("rel:/file.root")``, ``Path("/abs/path:stuff.root")`` object_cache (None, MutableMapping, or int): Cache of objects drawn - from ROOT directories (e.g histograms, TTrees, other directories); + from ROOT directories (e.g. histograms, TTrees, other directories); if None, do not use a cache; if an int, create a new cache of this size. array_cache (None, MutableMapping, or memory size): Cache of arrays @@ -78,11 +78,6 @@ def open( Options (type; default): * handler (:doc:`uproot.source.chunk.Source` class; None) - * file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) * timeout (float for HTTP, int for XRootD; 30) * max_num_elements (None or int; None) * num_workers (int; 1) @@ -157,43 +152,22 @@ def open( return file.root_directory[object_path] +open.defaults = { + "handler": None, + "timeout": 30, + "max_num_elements": None, + "num_workers": 1, + "use_threads": sys.platform != "emscripten", + "num_fallback_workers": 10, + "begin_chunk_size": 403, # the smallest a ROOT file can be + "minimal_ttree_metadata": True, +} + + class _OpenDefaults(dict): - def __getitem__(self, where): - if where == "xrootd_handler" and where not in self: - # See https://github.com/scikit-hep/uproot5/issues/294 - if uproot.extras.older_xrootd("5.2.0"): - message = ( - f"XRootD {uproot.extras.xrootd_version()} is not fully supported; " - """either upgrade to 5.2.0+ or set - - open.defaults["xrootd_handler"] = uproot.MultithreadedXRootDSource -""" - ) - warnings.warn(message, FutureWarning, stacklevel=1) - - # The key should still be set, regardless of whether we see the warning. - self["xrootd_handler"] = uproot.source.xrootd.XRootDSource - - return dict.__getitem__(self, where) - - -open.defaults = _OpenDefaults( - { - "handler": None, # To be updated to fsspec source - "file_handler": None, # Deprecated - "s3_handler": None, # Deprecated - "http_handler": None, # Deprecated - "object_handler": None, # Deprecated - "xrootd_handler": None, # Deprecated - "timeout": 30, - "max_num_elements": None, - "num_workers": 1, - "use_threads": sys.platform != "emscripten", - "num_fallback_workers": 10, - "begin_chunk_size": 403, # the smallest a ROOT file can be - "minimal_ttree_metadata": True, - } -) + def __init__(self): + raise NotImplementedError # kept for backwards compatibility + must_be_attached = [ "TROOT", @@ -537,11 +511,6 @@ class ReadOnlyFile(CommonFileMethods): Options (type; default): * handler (:doc:`uproot.source.chunk.Source` class; None) - * file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) - * object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`) * timeout (float for HTTP, int for XRootD; 30) * max_num_elements (None or int; None) * num_workers (int; 1) @@ -572,7 +541,7 @@ def __init__( self.decompression_executor = decompression_executor self.interpretation_executor = interpretation_executor - self._options = _OpenDefaults(open.defaults) + self._options = open.defaults.copy() self._options.update(options) for option in ["begin_chunk_size"]: self._options[option] = uproot._util.memory_size(self._options[option]) @@ -582,10 +551,10 @@ def __init__( self.hook_before_create_source() - Source, file_path = uproot._util.file_path_to_source_class( + source_cls, file_path = uproot._util.file_path_to_source_class( file_path, self._options ) - self._source = Source(file_path, **self._options) + self._source = source_cls(file_path, **self._options) self.hook_before_get_chunks() diff --git a/tests/test_0017_multi_basket_multi_branch_fetch.py b/tests/test_0017_multi_basket_multi_branch_fetch.py index b42f6db4b..1ed316f21 100644 --- a/tests/test_0017_multi_basket_multi_branch_fetch.py +++ b/tests/test_0017_multi_basket_multi_branch_fetch.py @@ -310,6 +310,7 @@ def test_cache(): skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root"), object_cache=100, array_cache="100 MB", + handler=uproot.source.file.MemmapSource, ) as f: assert f.cache_key == "db4be408-93ad-11ea-9027-d201a8c0beef:/" assert f["sample"].cache_key == "db4be408-93ad-11ea-9027-d201a8c0beef:/sample;1" diff --git a/tests/test_0066_fix_http_fallback_freeze.py b/tests/test_0066_fix_http_fallback_freeze.py index 632af99bf..4b6ff26e2 100644 --- a/tests/test_0066_fix_http_fallback_freeze.py +++ b/tests/test_0066_fix_http_fallback_freeze.py @@ -1,6 +1,5 @@ # BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE -import numpy import pytest import uproot @@ -8,6 +7,7 @@ @pytest.mark.network def test(): + pytest.importorskip("aiohttp") with uproot.open( {"http://scikit-hep.org/uproot3/examples/HZZ.root": "events"} ) as t: diff --git a/tests/test_0088_read_with_http.py b/tests/test_0088_read_with_http.py index 7ba760429..446200595 100644 --- a/tests/test_0088_read_with_http.py +++ b/tests/test_0088_read_with_http.py @@ -9,6 +9,8 @@ @pytest.mark.network def test_issue176(): + pytest.importorskip("aiohttp") + with uproot.open( "https://starterkit.web.cern.ch/starterkit/data/advanced-python-2019/dalitzdata.root" ) as f: @@ -18,6 +20,8 @@ def test_issue176(): @pytest.mark.network def test_issue176_again(): + pytest.importorskip("aiohttp") + with uproot.open( "https://starterkit.web.cern.ch/starterkit/data/advanced-python-2019/dalitzdata.root" ) as f: diff --git a/tests/test_0099_read_from_file_object.py b/tests/test_0099_read_from_file_object.py index f7a9faf02..730b2560b 100644 --- a/tests/test_0099_read_from_file_object.py +++ b/tests/test_0099_read_from_file_object.py @@ -4,11 +4,19 @@ import skhep_testdata import uproot +import uproot.source.fsspec +import uproot.source.object -def test(): +@pytest.mark.parametrize( + "handler", + [None, uproot.source.object.ObjectSource], +) +def test_read_from_file_like_object(handler): with open(skhep_testdata.data_path("uproot-Zmumu.root"), "rb") as f: - assert uproot.open({f: "events"})["px1"].array(library="np")[:10].tolist() == [ + assert uproot.open({f: "events"}, handler=handler)["px1"].array(library="np")[ + :10 + ].tolist() == [ -41.1952876442, 35.1180497674, 35.1180497674, diff --git a/tests/test_0173_empty_and_multiprocessing_bugs.py b/tests/test_0173_empty_and_multiprocessing_bugs.py index 8280cadf4..dd5a0efaf 100644 --- a/tests/test_0173_empty_and_multiprocessing_bugs.py +++ b/tests/test_0173_empty_and_multiprocessing_bugs.py @@ -1,7 +1,6 @@ # BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE import multiprocessing -import sys import pytest import skhep_testdata @@ -17,8 +16,8 @@ def test_empty(): assert t["z"].array(library="np").tolist() == [] -def readone(filename): - with uproot.open(filename) as f: +def read_one(filename): + with uproot.open(filename, handler=uproot.source.file.MemmapSource) as f: f.decompression_executor = uproot.ThreadPoolExecutor() t = f["events"] b = t["px1"] @@ -28,7 +27,7 @@ def readone(filename): def test_multiprocessing(): with multiprocessing.Pool(1) as pool: out = pool.map( - readone, + read_one, [ skhep_testdata.data_path("uproot-Zmumu.root"), skhep_testdata.data_path("uproot-Zmumu-zlib.root"), diff --git a/tests/test_0220_contiguous_byte_ranges_in_http.py b/tests/test_0220_contiguous_byte_ranges_in_http.py index 3db77022d..26205206f 100644 --- a/tests/test_0220_contiguous_byte_ranges_in_http.py +++ b/tests/test_0220_contiguous_byte_ranges_in_http.py @@ -1,6 +1,5 @@ # BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE -import numpy import pytest import uproot @@ -8,6 +7,8 @@ @pytest.mark.network def test(): + pytest.importorskip("aiohttp") + with uproot.open( "https://starterkit.web.cern.ch/starterkit/data/advanced-python-2019/RD_distribution.root:tree" ) as f: diff --git a/tests/test_0302_pickle.py b/tests/test_0302_pickle.py index fbd436511..c33fb40f6 100644 --- a/tests/test_0302_pickle.py +++ b/tests/test_0302_pickle.py @@ -12,7 +12,7 @@ "handler", [ uproot.source.file.MemmapSource, - # uproot.source.fsspec.FSSpecSource, + uproot.source.fsspec.FSSpecSource, ], ) def test_pickle_roundtrip_local(handler): @@ -41,7 +41,7 @@ def test_pickle_roundtrip_local(handler): "handler", [ uproot.source.http.HTTPSource, - # uproot.source.fsspec.FSSpecSource, + uproot.source.fsspec.FSSpecSource, ], ) @pytest.mark.network @@ -73,7 +73,7 @@ def test_pickle_roundtrip_http(handler): "handler", [ uproot.source.xrootd.XRootDSource, - # uproot.source.fsspec.FSSpecSource, + uproot.source.fsspec.FSSpecSource, ], ) @pytest.mark.network diff --git a/tests/test_0520_dynamic_classes_cant_be_abc_subclasses.py b/tests/test_0520_dynamic_classes_cant_be_abc_subclasses.py index 0ac90a524..c50407149 100644 --- a/tests/test_0520_dynamic_classes_cant_be_abc_subclasses.py +++ b/tests/test_0520_dynamic_classes_cant_be_abc_subclasses.py @@ -3,9 +3,8 @@ import pickle import sys -import numpy as np +import uproot import pytest -import skhep_testdata @pytest.mark.skipif(