pydata · dcherian · Nov 20, 2024 · Nov 18, 2024 · Nov 18, 2024 · Nov 18, 2024
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -57,6 +57,9 @@ New Features
 - Optimize :py:meth:`DataArray.polyfit` and :py:meth:`Dataset.polyfit` with dask, when used with
   arrays with more than two dimensions.
   (:issue:`5629`). By `Deepak Cherian <https://github.com/dcherian>`_.
+- Support for directly opening remote files as string paths (for example, ``s3://bucket/data.nc``)
+  with ``fsspec`` when using the ``h5netcdf`` engine (:issue:`9723`, :pull:`9797`).
+  By `James Bourbeau <https://github.com/jrbourbeau>`_.
 - Re-implement the :py:mod:`ufuncs` module, which now dynamically dispatches to the
   underlying array's backend. Provides better support for certain wrapped array types
   like ``jax.numpy.ndarray``. (:issue:`7848`, :pull:`9776`).

diff --git a/xarray/backends/common.py b/xarray/backends/common.py
@@ -178,6 +178,15 @@ def _normalize_path_list(
     return _normalize_path_list(paths)
 
 
+def _open_remote_file(file, mode, storage_options=None):
+    import fsspec
+
+    fs, _, paths = fsspec.get_fs_token_paths(
+        file, mode=mode, storage_options=storage_options
+    )
+    return fs.open(paths[0], mode=mode)
+
+
 def _encode_variable_name(name):
     if name is None:
         name = NONE_VAR_NAME

diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py
@@ -13,6 +13,7 @@
     BackendEntrypoint,
     WritableCFDataStore,
     _normalize_path,
+    _open_remote_file,
     datatree_from_dict_with_io_cleanup,
     find_root_and_group,
 )
@@ -149,9 +150,16 @@ def open(
         decode_vlen_strings=True,
         driver=None,
         driver_kwds=None,
+        storage_options: dict[str, Any] | None = None,
     ):
         import h5netcdf
 
+        if isinstance(filename, str) and is_remote_uri(filename) and driver is None:
+            mode_ = "rb" if mode == "r" else mode
+            filename = _open_remote_file(
+                filename, mode=mode_, storage_options=storage_options
+            )
+
         if isinstance(filename, bytes):
             raise ValueError(
                 "can't open netCDF4/HDF5 as bytes "
@@ -161,7 +169,7 @@ def open(
             magic_number = read_magic_number_from_file(filename)
             if not magic_number.startswith(b"\211HDF\r\n\032\n"):
                 raise ValueError(
-                    f"{magic_number} is not the signature of a valid netCDF4 file"
+                    f"{magic_number!r} is not the signature of a valid netCDF4 file"
                 )
 
         if format not in [None, "NETCDF4"]:
@@ -425,6 +433,7 @@ def open_dataset(
         decode_vlen_strings=True,
         driver=None,
         driver_kwds=None,
+        storage_options: dict[str, Any] | None = None,
     ) -> Dataset:
         filename_or_obj = _normalize_path(filename_or_obj)
         store = H5NetCDFStore.open(
@@ -437,6 +446,7 @@ def open_dataset(
             decode_vlen_strings=decode_vlen_strings,
             driver=driver,
             driver_kwds=driver_kwds,
+            storage_options=storage_options,
         )
 
         store_entrypoint = StoreBackendEntrypoint()

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -6489,3 +6489,24 @@ def test_zarr_safe_chunk_region(tmp_path):
     chunk = ds.isel(region)
     chunk = chunk.chunk()
     chunk.chunk().to_zarr(store, region=region)
+
+
+@requires_h5netcdf
+@requires_fsspec
+def test_h5netcdf_storage_options() -> None:
+    with create_tmp_files(2, allow_cleanup_failure=ON_WINDOWS) as (f1, f2):
+        ds1 = create_test_data()
+        ds1.to_netcdf(f1, engine="h5netcdf")
+
+        ds2 = create_test_data()
+        ds2.to_netcdf(f2, engine="h5netcdf")
+
+        files = [f"file://{f}" for f in [f1, f2]]
+        ds = xr.open_mfdataset(
+            files,
+            engine="h5netcdf",
+            concat_dim="time",
+            combine="nested",
+            storage_options={"skip_instance_cache": False},
+        )
+        assert_identical(xr.concat([ds1, ds2], dim="time"), ds)