Skip to content

Commit

Permalink
Merge pull request #180 from NREL/pp/multi_time_from_list
Browse files Browse the repository at this point in the history
Wildcard support for list input to `MultiTimeResource`
  • Loading branch information
ppinchuk authored Aug 15, 2024
2 parents a96af89 + a569c25 commit 5ecb5fe
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 28 deletions.
45 changes: 26 additions & 19 deletions rex/multi_time_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
Classes to handle resource data stored over multiple files
"""
import os
from fnmatch import fnmatch
from glob import glob
from itertools import chain
from fnmatch import fnmatch

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -33,8 +34,9 @@ def __init__(self, h5_path, res_cls=Resource, hsds=False, hsds_kwargs=None,
h5_path : str | list
Unix shell style pattern path with * wildcards to multi-file
resource file sets. Files must have the same coordinates
but can have different datasets or time indexes. Can also be an
explicit list of multi time files.
but can have different datasets or time indexes. Can also be
an explicit list of multi time files, which themselves can
contain * wildcards.
res_cls : obj
Resource class to use to open and access resource data
hsds : bool
Expand Down Expand Up @@ -259,8 +261,9 @@ def _get_file_paths(cls, h5_path, hsds=False, hsds_kwargs=None):
h5_path : str | list
Unix shell style pattern path with * wildcards to multi-file
resource file sets. Files must have the same coordinates
but can have different datasets or time indexes. Can also be an
explicit list of multi time files.
but can have different datasets or time indexes. Can also be
an explicit list of multi time files, which themselves can
contain * wildcards.
hsds : bool
Boolean flag to use h5pyd to handle .h5 'files' hosted on AWS
behind HSDS
Expand All @@ -278,10 +281,9 @@ def _get_file_paths(cls, h5_path, hsds=False, hsds_kwargs=None):
file_paths = cls._get_hsds_file_paths(h5_path,
hsds_kwargs=hsds_kwargs)
elif isinstance(h5_path, (list, tuple)):
for fp in h5_path:
msg = 'Does not exist: {}'.format(fp)
assert os.path.exists(fp), msg
file_paths = h5_path
file_paths = list(chain.from_iterable(glob(fp) for fp in h5_path))
for fp in file_paths:
assert os.path.exists(fp), 'Does not exist: {}'.format(fp)
elif os.path.isdir(h5_path):
msg = ('h5_path must be a unix shell style pattern with '
'wildcard * in order to find files, but received '
Expand Down Expand Up @@ -493,8 +495,9 @@ def __init__(self, h5_path, unscale=True, str_decode=True,
h5_path : str | list
Unix shell style pattern path with * wildcards to multi-file
resource file sets. Files must have the same coordinates
but can have different datasets or time indexes. Can also be an
explicit list of multi time files.
but can have different datasets or time indexes. Can also be
an explicit list of multi time files, which themselves can
contain * wildcards.
unscale : bool
Boolean flag to automatically unscale variables on extraction
str_decode : bool
Expand Down Expand Up @@ -850,8 +853,9 @@ def __init__(self, h5_path, unscale=True, str_decode=True, hsds=False,
h5_path : str | list
Unix shell style pattern path with * wildcards to multi-file
resource file sets. Files must have the same coordinates
but can have different datasets or time indexes. Can also be an
explicit list of multi time files.
but can have different datasets or time indexes. Can also be
an explicit list of multi time files, which themselves can
contain * wildcards.
unscale : bool
Boolean flag to automatically unscale variables on extraction
str_decode : bool
Expand Down Expand Up @@ -885,8 +889,9 @@ def __init__(self, h5_path, unscale=True, str_decode=True, hsds=False,
h5_path : str | list
Unix shell style pattern path with * wildcards to multi-file
resource file sets. Files must have the same coordinates
but can have different datasets or time indexes. Can also be an
explicit list of multi time files.
but can have different datasets or time indexes. Can also be
an explicit list of multi time files, which themselves can
contain * wildcards.
unscale : bool
Boolean flag to automatically unscale variables on extraction
str_decode : bool
Expand Down Expand Up @@ -920,8 +925,9 @@ def __init__(self, h5_path, unscale=True, str_decode=True, hsds=False,
h5_path : str | list
Unix shell style pattern path with * wildcards to multi-file
resource file sets. Files must have the same coordinates
but can have different datasets or time indexes. Can also be an
explicit list of multi time files.
but can have different datasets or time indexes. Can also be
an explicit list of multi time files, which themselves can
contain * wildcards.
unscale : bool
Boolean flag to automatically unscale variables on extraction
str_decode : bool
Expand Down Expand Up @@ -953,8 +959,9 @@ def __init__(self, h5_path, unscale=True, str_decode=True, hsds=False,
h5_path : str | list
Unix shell style pattern path with * wildcards to multi-file
resource file sets. Files must have the same coordinates
but can have different datasets or time indexes. Can also be an
explicit list of multi time files.
but can have different datasets or time indexes. Can also be
an explicit list of multi time files, which themselves can
contain * wildcards.
unscale : bool
Boolean flag to automatically unscale variables on extraction
str_decode : bool
Expand Down
24 changes: 16 additions & 8 deletions rex/multi_year_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,10 +411,12 @@ def __init__(self, h5_path, years=None, unscale=True, str_decode=True,
"""
Parameters
----------
h5_path : str
h5_path : str | list
Unix shell style pattern path with * wildcards to multi-file
resource file sets. Files must have the same coordinates
but can have different datasets or time indexes.
but can have different datasets or time indexes. Can also be
an explicit list of multi time files, which themselves can
contain * wildcards.
years : list, optional
List of years to access, by default None
unscale : bool
Expand Down Expand Up @@ -498,10 +500,12 @@ def __init__(self, h5_path, years=None, unscale=True, str_decode=True,
"""
Parameters
----------
h5_path : str
h5_path : str | list
Unix shell style pattern path with * wildcards to multi-file
resource file sets. Files must have the same coordinates
but can have different datasets or time indexes.
but can have different datasets or time indexes. Can also be
an explicit list of multi time files, which themselves can
contain * wildcards.
years : list, optional
List of years to access, by default None
unscale : bool
Expand Down Expand Up @@ -532,10 +536,12 @@ def __init__(self, h5_path, years=None, unscale=True, str_decode=True,
"""
Parameters
----------
h5_path : str
h5_path : str | list
Unix shell style pattern path with * wildcards to multi-file
resource file sets. Files must have the same coordinates
but can have different datasets or time indexes.
but can have different datasets or time indexes. Can also be
an explicit list of multi time files, which themselves can
contain * wildcards.
years : list, optional
List of years to access, by default None
unscale : bool
Expand Down Expand Up @@ -566,10 +572,12 @@ def __init__(self, h5_path, years=None, unscale=True, str_decode=True,
"""
Parameters
----------
h5_path : str
h5_path : str | list
Unix shell style pattern path with * wildcards to multi-file
resource file sets. Files must have the same coordinates
but can have different datasets or time indexes.
but can have different datasets or time indexes. Can also be
an explicit list of multi time files, which themselves can
contain * wildcards.
years : list, optional
List of years to access, by default None
unscale : bool
Expand Down
2 changes: 1 addition & 1 deletion rex/version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""rex Version number"""

__version__ = "0.2.88"
__version__ = "0.2.89"
68 changes: 68 additions & 0 deletions tests/test_multi_time_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,16 @@ def MultiTimeNSRDB_list_res():
return MultiTimeNSRDB(files)


@pytest.fixture
def MultiTimeNSRDB_wildcard_list_res():
"""
Init NSRDB resource handler
"""
files = [os.path.join(TESTDATADIR, 'nsrdb/ri_100_nsrdb_20*.h5')]

return MultiTimeNSRDB(files)


@pytest.fixture
def MultiTimeWind_res():
"""
Expand Down Expand Up @@ -258,6 +268,49 @@ def test_ds(MultiTimeNSRDB_list_res, ds_name='dni'):
MultiTimeNSRDB_list_res.close()


class TestMultiTimeWildcardList:
"""
Test multi time resource handler from list of files with wildcards
"""
@staticmethod
def test_res(MultiTimeNSRDB_wildcard_list_res):
"""
test NSRDB class calls
"""
check_res(MultiTimeNSRDB_wildcard_list_res)
assert len(MultiTimeNSRDB_wildcard_list_res.h5.files) >= 2
MultiTimeNSRDB_wildcard_list_res.close()

@staticmethod
def test_meta(MultiTimeNSRDB_wildcard_list_res):
"""
test extraction of NSRDB meta data
"""
check_meta(MultiTimeNSRDB_wildcard_list_res)
assert len(MultiTimeNSRDB_wildcard_list_res.h5.files) >= 2
MultiTimeNSRDB_wildcard_list_res.close()

@staticmethod
def test_time_index(MultiTimeNSRDB_wildcard_list_res):
"""
test extraction of NSRDB time_index
"""
check_time_index(MultiTimeNSRDB_wildcard_list_res)
assert len(MultiTimeNSRDB_wildcard_list_res.h5.files) >= 2
MultiTimeNSRDB_wildcard_list_res.close()

@staticmethod
def test_ds(MultiTimeNSRDB_wildcard_list_res, ds_name='dni'):
"""
test extraction of a variable array, attributes, and properties
"""
check_dset(MultiTimeNSRDB_wildcard_list_res, ds_name)
check_attrs(MultiTimeNSRDB_wildcard_list_res, ds_name)
check_properties(MultiTimeNSRDB_wildcard_list_res, ds_name)
assert len(MultiTimeNSRDB_wildcard_list_res.h5.files) >= 2
MultiTimeNSRDB_wildcard_list_res.close()


class TestMultiTimeWindResource:
"""
Multi Year WindResource Resource handler tests
Expand Down Expand Up @@ -323,6 +376,21 @@ def test_map_hsds_files():
assert not any(wrong), 'Wrong files: {}'.format(wrong)


def test_multi_time_resource_acts_like_resource_single_file():
"""Test that MultiTimeWindResource behaves like Resource for one file."""

path = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_2012.h5')

with Resource(path) as res, MultiTimeWindResource([path]) as mt_res:
assert set(res.datasets) == set(mt_res.datasets)
assert (res.time_index == mt_res.time_index).all()
assert res.shape == mt_res.shape
for ds in res.datasets:
if any(kw in ds for kw in ['meta', 'time']):
continue
assert np.allclose(res[ds], mt_res[ds])


@pytest.mark.timeout(10)
def test_mt_iterator():
"""
Expand Down

0 comments on commit 5ecb5fe

Please sign in to comment.