From 9de74ef52b73379f90f3439ae1e373ab08b6a825 Mon Sep 17 00:00:00 2001 From: Alessandro Amici Date: Wed, 25 Jul 2018 21:48:32 +0200 Subject: [PATCH] Add a filter_by_keys kwargs to open_dataset to filter the GRIB file index before building the cube. See #2. --- cfgrib/dataset.py | 4 +++- cfgrib/messages.py | 4 ++-- cfgrib/xarray_store.py | 9 ++++++--- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/cfgrib/dataset.py b/cfgrib/dataset.py index 7d30e0c4..db225ce5 100644 --- a/cfgrib/dataset.py +++ b/cfgrib/dataset.py @@ -357,8 +357,9 @@ def dict_merge(master, update): def build_dataset_components( stream, encode_parameter=False, encode_time=False, encode_vertical=False, encode_geography=False, + filter_by_keys={}, ): - index = stream.index(ALL_KEYS) + index = stream.index(ALL_KEYS).subindex(filter_by_keys) param_ids = index['paramId'] dimensions = collections.OrderedDict() variables = collections.OrderedDict() @@ -387,6 +388,7 @@ class Dataset(object): encode_time = attr.attrib(default=True) encode_vertical = attr.attrib(default=True) encode_geography = attr.attrib(default=True) + filter_by_keys = attr.attrib(default={}, type=T.Dict[str, T.Any]) @classmethod def frompath(cls, path, mode='r', **kwargs): diff --git a/cfgrib/messages.py b/cfgrib/messages.py index 75ec07c6..4dae140e 100644 --- a/cfgrib/messages.py +++ b/cfgrib/messages.py @@ -227,8 +227,8 @@ def getone(self, item): raise ValueError("not one value for %r: %r" % (item, len(values))) return values[0] - def subindex(self, dict_query={}, **query): - query.update(dict_query) + def subindex(self, filter_by_keys={}, **query): + query.update(filter_by_keys) raw_query = [(self.index_keys.index(k), v) for k, v in query.items()] offsets = collections.OrderedDict() for header_values in self.offsets: diff --git a/cfgrib/xarray_store.py b/cfgrib/xarray_store.py index 1fe210fd..9edf4eb0 100644 --- a/cfgrib/xarray_store.py +++ b/cfgrib/xarray_store.py @@ -151,12 +151,15 @@ def get_encoding(self): return encoding -def open_dataset(path, flavour_name='ecmwf', **kwargs): - overrides = {} +def open_dataset(path, flavour_name='ecmwf', filter_by_keys={}, **kwargs): + overrides = { + 'flavour_name': flavour_name, + 'filter_by_keys': filter_by_keys, + } for k in list(kwargs): # copy to allow the .pop() if k.startswith('encode_'): overrides[k] = kwargs.pop(k) - store = GribDataStore.frompath(path, flavour_name=flavour_name, **overrides) + store = GribDataStore.frompath(path, **overrides) return _open_dataset(store, **kwargs)