Skip to content

Commit

Permalink
feat: set fsspec as default source (#1023)
Browse files Browse the repository at this point in the history
* feat: add fsspec as required dependency (#1021)

* fsspec requirements

* simplify fsspec import

* use loop property

* correctly create schemes list

* remove deprecated handlers from docs

* simplify source selection

* return object source

* pickle executor

* rename test

* test more handlers

* option to check writeable file-like object

* rename test

* explicitly set handler

* fix s3 source

* rename test

* Revert "fix s3 source"

This reverts commit e76fdbb.

* sesparate PR for s3 fix (#1024)

* strip file://

* rename test

* rename tests

* add aiohttp skip

* attempt to parse windows paths

* test ci

* Revert "test ci"

This reverts commit 4c1c8a5.

* rename test

* remove fsspec from test

* remove *_handler options

* update defaults

* do not override default s3

* do not use fsspec for multiprocessing

* rename test

* fix not selecting object source

* missing import

* normalize doc

* remove helper

* never return None as source

* remove unnecessary xrootd source default override since fsspec is default now

* rename test

* add empty class to pass old pickle test
  • Loading branch information
lobis committed Nov 16, 2023
1 parent ec61012 commit 91ef6cc
Show file tree
Hide file tree
Showing 18 changed files with 85 additions and 232 deletions.
2 changes: 1 addition & 1 deletion src/uproot/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@
isort:skip_file
"""


from uproot.version import __version__
import uproot.const
import uproot.extras
Expand All @@ -92,6 +91,7 @@
from uproot.source.xrootd import MultithreadedXRootDSource
from uproot.source.s3 import S3Source
from uproot.source.object import ObjectSource
from uproot.source.fsspec import FSSpecSource
from uproot.source.cursor import Cursor
from uproot.source.futures import TrivialExecutor
from uproot.source.futures import ThreadPoolExecutor
Expand Down
5 changes: 0 additions & 5 deletions src/uproot/_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,11 +134,6 @@ def dask(
Options (type; default):
* handler (:doc:`uproot.source.chunk.Source` class; None)
* file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* timeout (float for HTTP, int for XRootD; 30)
* max_num_elements (None or int; None)
* num_workers (int; 1)
Expand Down
161 changes: 24 additions & 137 deletions src/uproot/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,63 +369,30 @@ def file_path_to_source_class(file_path, options):
Use a file path to get the :doc:`uproot.source.chunk.Source` class that would read it.
Returns a tuple of (class, file_path) where the class is a subclass of :doc:`uproot.source.chunk.Source`.
The "handler" option is the preferred way to specify a custom source class.
The "*_handler" options are for backwards compatibility and will override the "handler" option if set.
"""

import uproot.source.chunk

file_path = regularize_path(file_path)

out = options["handler"]
if out is not None:
if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)):
raise TypeError(
f"'handler' is not a class object inheriting from Source: {out!r}"
)
# check if "object_handler" is set
if (
options["object_handler"] is not None
or options["file_handler"] is not None
or options["xrootd_handler"] is not None
or options["s3_handler"] is not None
or options["http_handler"] is not None
source_cls = options["handler"]
if source_cls is not None:
if not (
isinstance(source_cls, type)
and issubclass(source_cls, uproot.source.chunk.Source)
):
# These options will override the "handler" option for backwards compatibility
warnings.warn(
"""In version 5.2.0, the '*_handler' argument ('http_handler`, 's3_handler', etc.) will be removed from 'uproot.open'. Use 'handler' instead.""",
stacklevel=1,
raise TypeError(
f"'handler' is not a class object inheriting from Source: {source_cls!r}"
)
else:
return out, file_path
return source_cls, file_path

if (
not isinstance(file_path, str)
and hasattr(file_path, "read")
and hasattr(file_path, "seek")
):
out = options["object_handler"]
if out is None:
out = uproot.source.object.ObjectSource
else:
warnings.warn(
f"""In version 5.2.0, the 'object_handler' argument will be removed from 'uproot.open'. Use
uproot.open(..., handler={out!r})
instead.
To raise these warnings as errors (and get stack traces to find out where they're called), run
import warnings
warnings.filterwarnings("error", module="uproot.*")
after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""",
DeprecationWarning,
stacklevel=1,
)
if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)):
raise TypeError(
f"'object_handler' is not a class object inheriting from Source: {out!r}"
)

return out, file_path
source_cls = uproot.source.object.ObjectSource
return source_cls, file_path

windows_absolute_path = None
if win and _windows_absolute_path_pattern.match(file_path) is not None:
Expand Down Expand Up @@ -457,107 +424,27 @@ def file_path_to_source_class(file_path, options):
else:
file_path = windows_absolute_path

out = options["file_handler"]
if out is None:
out = uproot.source.file.MemmapSource
else:
warnings.warn(
f"""In version 5.2.0, the 'file_handler' argument will be removed from 'uproot.open'. Use
uproot.open(..., handler={out!r}
instead.
To raise these warnings as errors (and get stack traces to find out where they're called), run
import warnings
warnings.filterwarnings("error", module="uproot.*")
after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""",
DeprecationWarning,
stacklevel=1,
)
# uproot.source.file.MemmapSource
source_cls = uproot.source.fsspec.FSSpecSource

if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)):
raise TypeError(
"'file_handler' is not a class object inheriting from Source: "
+ repr(out)
)
return out, os.path.expanduser(file_path)
return source_cls, os.path.expanduser(file_path)

elif scheme == "root":
out = options["xrootd_handler"]
if out is None:
out = uproot.source.xrootd.XRootDSource
else:
warnings.warn(
f"""In version 5.2.0, the 'xrootd_handler' argument will be removed from 'uproot.open'. Use
uproot.open(..., handler={out!r}
instead.
To raise these warnings as errors (and get stack traces to find out where they're called), run
import warnings
warnings.filterwarnings("error", module="uproot.*")
after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""",
DeprecationWarning,
stacklevel=1,
)
if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)):
raise TypeError(
"'xrootd_handler' is not a class object inheriting from Source: "
+ repr(out)
)
return out, file_path
# uproot.source.xrootd.XRootDSource
source_cls = uproot.source.fsspec.FSSpecSource
return source_cls, file_path

elif scheme == "s3":
out = options["s3_handler"]
if out is None:
out = uproot.source.s3.S3Source
else:
warnings.warn(
f"""In version 5.2.0, the 's3_handler' argument will be removed from 'uproot.open'. Use
uproot.open(..., handler={out!r}
instead.
To raise these warnings as errors (and get stack traces to find out where they're called), run
import warnings
warnings.filterwarnings("error", module="uproot.*")
after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""",
DeprecationWarning,
stacklevel=1,
)
if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)):
raise TypeError(
"'s3' is not a class object inheriting from Source: " + repr(out)
)
return out, file_path
# https://github.com/scikit-hep/uproot5/pull/1012
source_cls = uproot.source.s3.S3Source
return source_cls, file_path

elif scheme in ("http", "https"):
out = options["http_handler"]
if out is None:
out = uproot.source.http.HTTPSource
else:
warnings.warn(
f"""In version 5.2.0, the 'http_handler' argument will be removed from 'uproot.open'. Use
uproot.open(..., handler={out!r}
instead.
To raise these warnings as errors (and get stack traces to find out where they're called), run
import warnings
warnings.filterwarnings("error", module="uproot.*")
after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""",
DeprecationWarning,
stacklevel=1,
)
if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)):
raise TypeError(
"'http_handler' is not a class object inheriting from Source: "
+ repr(out)
)
return out, file_path

else:
# try to use fsspec before raising an error
if scheme in _schemes:
return uproot.source.fsspec.FSSpecSource, file_path
# uproot.source.http.HTTPSource
source_cls = uproot.source.fsspec.FSSpecSource
return source_cls, file_path

raise ValueError(f"URI scheme not recognized: {file_path}")
return uproot.source.fsspec.FSSpecSource, file_path


if isinstance(__builtins__, dict):
Expand Down
10 changes: 0 additions & 10 deletions src/uproot/behaviors/TBranch.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,11 +153,6 @@ def iterate(
Options (type; default):
* handler (:doc:`uproot.source.chunk.Source` class; None)
* file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* timeout (float for HTTP, int for XRootD; 30)
* max_num_elements (None or int; None)
* num_workers (int; 1)
Expand Down Expand Up @@ -327,11 +322,6 @@ def concatenate(
Options (type; default):
* handler (:doc:`uproot.source.chunk.Source` class; None)
* file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* timeout (float for HTTP, int for XRootD; 30)
* max_num_elements (None or int; None)
* num_workers (int; 1)
Expand Down
71 changes: 20 additions & 51 deletions src/uproot/reading.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
import struct
import sys
import uuid
import warnings
from collections.abc import Mapping, MutableMapping

import uproot
import uproot.behaviors.TBranch
import uproot.source.fsspec
from uproot._util import no_filter


Expand All @@ -42,7 +42,7 @@ def open(
``"rel/file.root:tdirectory/ttree"``, ``Path("rel:/file.root")``,
``Path("/abs/path:stuff.root")``
object_cache (None, MutableMapping, or int): Cache of objects drawn
from ROOT directories (e.g histograms, TTrees, other directories);
from ROOT directories (e.g. histograms, TTrees, other directories);
if None, do not use a cache; if an int, create a new cache of this
size.
array_cache (None, MutableMapping, or memory size): Cache of arrays
Expand Down Expand Up @@ -77,11 +77,6 @@ def open(
Options (type; default):
* handler (:doc:`uproot.source.chunk.Source` class; None)
* file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* timeout (float for HTTP, int for XRootD; 30)
* max_num_elements (None or int; None)
* num_workers (int; 1)
Expand Down Expand Up @@ -156,43 +151,22 @@ def open(
return file.root_directory[object_path]


open.defaults = {
"handler": None,
"timeout": 30,
"max_num_elements": None,
"num_workers": 1,
"use_threads": sys.platform != "emscripten",
"num_fallback_workers": 10,
"begin_chunk_size": 403, # the smallest a ROOT file can be
"minimal_ttree_metadata": True,
}


class _OpenDefaults(dict):
def __getitem__(self, where):
if where == "xrootd_handler" and where not in self:
# See https://github.com/scikit-hep/uproot5/issues/294
if uproot.extras.older_xrootd("5.2.0"):
message = (
f"XRootD {uproot.extras.xrootd_version()} is not fully supported; "
"""either upgrade to 5.2.0+ or set
open.defaults["xrootd_handler"] = uproot.MultithreadedXRootDSource
"""
)
warnings.warn(message, FutureWarning, stacklevel=1)

# The key should still be set, regardless of whether we see the warning.
self["xrootd_handler"] = uproot.source.xrootd.XRootDSource

return dict.__getitem__(self, where)


open.defaults = _OpenDefaults(
{
"handler": None, # To be updated to fsspec source
"file_handler": None, # Deprecated
"s3_handler": None, # Deprecated
"http_handler": None, # Deprecated
"object_handler": None, # Deprecated
"xrootd_handler": None, # Deprecated
"timeout": 30,
"max_num_elements": None,
"num_workers": 1,
"use_threads": sys.platform != "emscripten",
"num_fallback_workers": 10,
"begin_chunk_size": 403, # the smallest a ROOT file can be
"minimal_ttree_metadata": True,
}
)
def __init__(self):
raise NotImplementedError # kept for backwards compatibility


must_be_attached = [
"TROOT",
Expand Down Expand Up @@ -536,11 +510,6 @@ class ReadOnlyFile(CommonFileMethods):
Options (type; default):
* handler (:doc:`uproot.source.chunk.Source` class; None)
* file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* timeout (float for HTTP, int for XRootD; 30)
* max_num_elements (None or int; None)
* num_workers (int; 1)
Expand Down Expand Up @@ -571,7 +540,7 @@ def __init__(
self.decompression_executor = decompression_executor
self.interpretation_executor = interpretation_executor

self._options = _OpenDefaults(open.defaults)
self._options = open.defaults.copy()
self._options.update(options)
for option in ["begin_chunk_size"]:
self._options[option] = uproot._util.memory_size(self._options[option])
Expand All @@ -581,10 +550,10 @@ def __init__(

self.hook_before_create_source()

Source, file_path = uproot._util.file_path_to_source_class(
source_cls, file_path = uproot._util.file_path_to_source_class(
file_path, self._options
)
self._source = Source(file_path, **self._options)
self._source = source_cls(file_path, **self._options)

self.hook_before_get_chunks()

Expand Down
5 changes: 4 additions & 1 deletion src/uproot/source/fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def __init__(self, file_path: str, **options):
if k not in uproot.reading.open.defaults.keys()
}

self._executor = FSSpecLoopExecutor()
self._open()

self._fs, self._file_path = fsspec.core.url_to_fs(file_path, **storage_options)

Expand All @@ -48,6 +48,9 @@ def __init__(self, file_path: str, **options):
self._num_requested_bytes = 0
self.__enter__()

def _open(self):
self._executor = FSSpecLoopExecutor()

def __repr__(self):
path = repr(self._file_path)
if len(self._file_path) > 10:
Expand Down
File renamed without changes.
Loading

0 comments on commit 91ef6cc

Please sign in to comment.