From 69a640d46399244a2427ffdd64dce56fb2c9abf0 Mon Sep 17 00:00:00 2001 From: Jeffrey Milloy Date: Fri, 3 Jul 2020 09:58:01 -0400 Subject: [PATCH] FIX: Some parts of hdf/netcdf datasets require the file-like object to still be open. --- podpac/core/data/dataset_source.py | 1 + podpac/core/data/file_source.py | 29 ++++++++++++++++------------- podpac/core/data/h5py_source.py | 1 + 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/podpac/core/data/dataset_source.py b/podpac/core/data/dataset_source.py index e9020330c..988e612c5 100644 --- a/podpac/core/data/dataset_source.py +++ b/podpac/core/data/dataset_source.py @@ -51,6 +51,7 @@ def open_dataset(self, fp): return xr.open_dataset(fp) def close_dataset(self): + super(Dataset, self).close_dataset() self.dataset.close() @cached_property diff --git a/podpac/core/data/file_source.py b/podpac/core/data/file_source.py index 3516b48f3..43393598c 100644 --- a/podpac/core/data/file_source.py +++ b/podpac/core/data/file_source.py @@ -87,32 +87,32 @@ def _dataset_caching_node(self): @cached_property def dataset(self): + # get from the cache # use the _dataset_caching_node "stub" here because the only node attr we care about is the source if self.cache_dataset and self._dataset_caching_node.has_cache(key="dataset"): data = self._dataset_caching_node.get_cache(key="dataset") - with BytesIO(data) as f: - return self._open(BytesIO(data), cache=False) - elif self.source.startswith("s3://"): + self._file = BytesIO(data) + return self._open(self._file, cache=False) + + # otherwise, open the file + if self.source.startswith("s3://"): _logger.info("Loading AWS resource: %s" % self.source) - with self.s3.open(self.source, "rb") as f: - return self._open(f) + self._file = self.s3.open(self.source, "rb") elif self.source.startswith("http://") or self.source.startswith("https://"): _logger.info("Downloading: %s" % self.source) response = requests.get(self.source) - with BytesIO(response.content) as f: - return self._open(f) + self._file = BytesIO(response.content) elif self.source.startswith("ftp://"): _logger.info("Downloading: %s" % self.source) addinfourl = urlopen(self.source) - with BytesIO(addinfourl.read()) as f: - return self._open(f) + self._file = BytesIO(addinfourl.read()) elif self.source.startswith("file://"): addinfourl = urlopen(self.source) - with BytesIO(addinfourl.read()) as f: - return self._open(f) + self._file = BytesIO(addinfourl.read()) else: - with open(self.source, "rb") as f: - return self._open(f) + self._file = open(self.source, "rb") + + return self._open(self._file) def _open(self, f, cache=True): if self.cache_dataset and cache: @@ -124,6 +124,9 @@ def open_dataset(self, f): """ TODO """ raise NotImplementedError() + def close_dataset(self): + self._file.close() + @common_doc(COMMON_DATA_DOC) class FileKeysMixin(tl.HasTraits): diff --git a/podpac/core/data/h5py_source.py b/podpac/core/data/h5py_source.py index 7cb89a1c6..fa7b52247 100644 --- a/podpac/core/data/h5py_source.py +++ b/podpac/core/data/h5py_source.py @@ -54,6 +54,7 @@ def dataset(self): def close_dataset(self): """Closes the file. """ + super(Dataset, self).close_dataset() self.dataset.close() # -------------------------------------------------------------------------