From 0ef06c083f92cdf0410b07dd79a384de473d5403 Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Mon, 16 Nov 2020 09:53:22 +0100 Subject: [PATCH 01/22] Allow path object to pass through to the file handler --- satpy/readers/file_handlers.py | 8 ++---- satpy/readers/olci_nc.py | 47 +++++++++++++++++++++------------- 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/satpy/readers/file_handlers.py b/satpy/readers/file_handlers.py index 086420d626..09ef077165 100644 --- a/satpy/readers/file_handlers.py +++ b/satpy/readers/file_handlers.py @@ -20,9 +20,8 @@ from abc import ABCMeta import numpy as np -from pathlib import PurePath - from pyresample.geometry import SwathDefinition + from satpy.dataset import combine_metadata @@ -31,10 +30,7 @@ class BaseFileHandler(metaclass=ABCMeta): def __init__(self, filename, filename_info, filetype_info): """Initialize file handler.""" - if isinstance(filename, PurePath): - self.filename = str(filename) - else: - self.filename = filename + self.filename = filename self.navigation_reader = None self.filename_info = filename_info self.filetype_info = filetype_info diff --git a/satpy/readers/olci_nc.py b/satpy/readers/olci_nc.py index 88cca91cb8..6e839c2823 100644 --- a/satpy/readers/olci_nc.py +++ b/satpy/readers/olci_nc.py @@ -40,16 +40,17 @@ import logging -from datetime import datetime +from contextlib import suppress +from functools import lru_cache +from functools import reduce import dask.array as da import numpy as np import xarray as xr +from satpy import CHUNK_SIZE from satpy.readers.file_handlers import BaseFileHandler from satpy.utils import angle2xyz, xyz2angle -from satpy import CHUNK_SIZE -from functools import reduce logger = logging.getLogger(__name__) @@ -100,30 +101,40 @@ def __init__(self, filename, filename_info, filetype_info, """Init the olci reader base.""" super(NCOLCIBase, self).__init__(filename, filename_info, filetype_info) - self.nc = xr.open_dataset(self.filename, + self._engine = engine + self._start_time = filename_info['start_time'] + self._end_time = filename_info['end_time'] + # TODO: get metadata from the manifest file (xfdumanifest.xml) + self.platform_name = PLATFORM_NAMES[filename_info['mission_id']] + self.sensor = 'olci' + self.open_file = None + + @property + @lru_cache(maxsize=2) + def nc(self): + """Get the nc xr dataset.""" + try: + f_obj = self.filename.open() + self.open_file = f_obj + except AttributeError: + f_obj = self.filename + dataset = xr.open_dataset(f_obj, decode_cf=True, mask_and_scale=True, - engine=engine, + engine=self._engine, chunks={'columns': CHUNK_SIZE, 'rows': CHUNK_SIZE}) - - self.nc = self.nc.rename({'columns': 'x', 'rows': 'y'}) - - # TODO: get metadata from the manifest file (xfdumanifest.xml) - self.platform_name = PLATFORM_NAMES[filename_info['mission_id']] - self.sensor = 'olci' + return dataset.rename({'columns': 'x', 'rows': 'y'}) @property def start_time(self): """Start time property.""" - return datetime.strptime(self.nc.attrs['start_time'], - '%Y-%m-%dT%H:%M:%S.%fZ') + return self._start_time @property def end_time(self): """End time property.""" - return datetime.strptime(self.nc.attrs['stop_time'], - '%Y-%m-%dT%H:%M:%S.%fZ') + return self._end_time def get_dataset(self, key, info): """Load a dataset.""" @@ -134,10 +145,10 @@ def get_dataset(self, key, info): def __del__(self): """Close the NetCDF file that may still be open.""" - try: + with suppress(IOError, OSError, AttributeError): self.nc.close() - except (IOError, OSError, AttributeError): - pass + with suppress(AttributeError): + self.open_file.close() class NCOLCICal(NCOLCIBase): From 47a4dcf925e119f2a1445bce0dd90e6581455608 Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Wed, 18 Nov 2020 17:36:57 +0100 Subject: [PATCH 02/22] Add tests --- satpy/tests/reader_tests/test_olci_nc.py | 15 +++++++++++++++ satpy/tests/test_file_handlers.py | 13 ++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/satpy/tests/reader_tests/test_olci_nc.py b/satpy/tests/reader_tests/test_olci_nc.py index 0597d5c82a..c39df23cc2 100644 --- a/satpy/tests/reader_tests/test_olci_nc.py +++ b/satpy/tests/reader_tests/test_olci_nc.py @@ -76,6 +76,21 @@ def test_instantiate(self, mocked_dataset): mocked_dataset.assert_called() mocked_dataset.reset_mock() + @mock.patch('xarray.open_dataset') + def test_open_file_objects(self, mocked_open_dataset): + """Test initialization of file handlers.""" + from satpy.readers.olci_nc import NCOLCIBase + filename_info = {'mission_id': 'S3A', 'dataset_name': 'Oa01', 'start_time': 0, 'end_time': 0} + + open_file = mock.MagicMock() + + file_handler = NCOLCIBase(open_file, filename_info, 'c') + file_handler.nc + mocked_open_dataset.assert_called() + open_file.open.assert_called() + assert (open_file.open.return_value in mocked_open_dataset.call_args[0] or + open_file.open.return_value == mocked_open_dataset.call_args[1].get('filename_or_obj')) + @mock.patch('xarray.open_dataset') def test_get_dataset(self, mocked_dataset): """Test reading datasets.""" diff --git a/satpy/tests/test_file_handlers.py b/satpy/tests/test_file_handlers.py index e0447b57b3..92e20d9d19 100644 --- a/satpy/tests/test_file_handlers.py +++ b/satpy/tests/test_file_handlers.py @@ -28,7 +28,7 @@ class TestBaseFileHandler(unittest.TestCase): """Test the BaseFileHandler.""" def setUp(self): - """Setup the test.""" + """Set up the test.""" self._old_set = BaseFileHandler.__abstractmethods__ BaseFileHandler._abstractmethods__ = set() self.fh = BaseFileHandler( @@ -140,6 +140,17 @@ def test_combine_orbital_parameters(self): # Empty self.fh.combine_info([{}]) + def testt_file_is_kept_intact(self): + """Test that the file object passed (string, path, or other) is kept intact.""" + open_file = mock.MagicMock() + bfh = BaseFileHandler(open_file, {'filename_info': 'bla'}, 'filetype_info') + assert bfh.filename == open_file + + from pathlib import Path + filename = Path('/bla/bla.nc') + bfh = BaseFileHandler(filename, {'filename_info': 'bla'}, 'filetype_info') + assert isinstance(bfh.filename, Path) + def tearDown(self): """Tear down the test.""" BaseFileHandler.__abstractmethods__ = self._old_set From 973ca187d25ec2f621781950dc12fe35e125940a Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Fri, 20 Nov 2020 14:01:43 +0100 Subject: [PATCH 03/22] Add FSFile --- satpy/readers/__init__.py | 61 ++++++++++++++++++++++- satpy/tests/test_readers.py | 98 +++++++++++++++++++++++++++++++++++++ 2 files changed, 158 insertions(+), 1 deletion(-) diff --git a/satpy/readers/__init__.py b/satpy/readers/__init__.py index ec89c0e1ba..8e9115f96e 100644 --- a/satpy/readers/__init__.py +++ b/satpy/readers/__init__.py @@ -21,6 +21,7 @@ import os import warnings from datetime import datetime, timedelta +from functools import total_ordering import yaml @@ -519,7 +520,7 @@ def load_readers(filenames=None, reader=None, reader_kwargs=None, def _get_reader_kwargs(reader, reader_kwargs): - """Helper for load_readers to form reader_kwargs. + """Help load_readers to form reader_kwargs. Helper for load_readers to get reader_kwargs and reader_kwargs_without_filter in the desirable form. @@ -538,3 +539,61 @@ def _get_reader_kwargs(reader, reader_kwargs): reader_kwargs_without_filter[k].pop('filter_parameters', None) return (reader_kwargs, reader_kwargs_without_filter) + + +@total_ordering +class FSFile(os.PathLike): + """Implementation of a PathLike file object, that can be opened. + + This is made to be use in conjuction with fsspec or s3fs. For example:: + + zipfile = S3B_OL_2_WFR____20201103T100807_20201103T101107_20201103T121330_0179_045_179_1980_MAR_O_NR_002.zip + filename = "sentinel-s3-ol2wfr-zips/2020/11/03/" + filename + the_files = fsspec.open_files("simplecache::zip://**/*.nc::s3://" + filename, s3={'anon': False}) + + fs_files = [FSFile(open_file) for open_file in the_files] + + scn = Scene(filenames=fs_files, reader='olci_l2') + scn.load(['chl_nn']) + scn.save_datasets() + + """ + + def __init__(self, file, fs=None): + """Initialise the FSFile instance. + + *file* can be string or an fsspec.OpenFile instance. In the latter case, the follow argument *fs* has no effect. + *fs* can be None or a fsspec filesystem instance. + """ + try: + self._file = file.path + self._fs = file.fs + except AttributeError: + self._file = file + self._fs = fs + + def __str__(self): + """Return the string version of the filename.""" + return self._file + + def __fspath__(self): + """Comply with PathLike.""" + return self._file + + def __repr__(self): + """Representation of the object.""" + return '' + + def open(self): + """Open the file. + + This is read-only. + """ + try: + return self._fs.open(self._file) + except AttributeError: + return open(self._file) + + def __lt__(self, other): + """Implement ordering.""" + return os.fspath(self) < os.fspath(other) diff --git a/satpy/tests/test_readers.py b/satpy/tests/test_readers.py index 46b2c03c8f..ddad14c995 100644 --- a/satpy/tests/test_readers.py +++ b/satpy/tests/test_readers.py @@ -832,3 +832,101 @@ def test_multi_readers(self): reader=("abi_l1b", "viirs_sdr"), group_keys=("start_time"), time_threshold=10**9) + + +def _generate_random_string(): + import uuid + return str(uuid.uuid1()) + + +class TestFSFile(unittest.TestCase): + """Test the FSFile class.""" + + def setUp(self): + """Set up the instance.""" + import fsspec + from pathlib import Path + import tempfile + import zipfile + self.random_string = _generate_random_string() + self.local_filename = os.path.join(tempfile.gettempdir(), self.random_string) + Path(self.local_filename).touch() + self.local_file = fsspec.open(self.local_filename) + + self.random_string2 = _generate_random_string() + self.local_filename2 = os.path.join(tempfile.gettempdir(), self.random_string2) + Path(self.local_filename2).touch() + self.zip_name = os.path.join(tempfile.gettempdir(), self.random_string2 + ".zip") + zip_file = zipfile.ZipFile(self.zip_name, 'w', zipfile.ZIP_DEFLATED) + zip_file.write(self.local_filename2) + zip_file.close() + os.remove(self.local_filename2) + + def tearDown(self): + """Destroy the instance.""" + os.remove(self.local_filename) + os.remove(self.zip_name) + + def test_regular_filename_is_returned_with_str(self): + """Test that str give the filename.""" + from satpy.readers import FSFile + assert str(FSFile(self.random_string)) == self.random_string + + def test_fsfile_with_regular_filename_abides_pathlike(self): + """Test that FSFile abides PathLike for regular filenames.""" + from satpy.readers import FSFile + assert os.fspath(FSFile(self.random_string)) == self.random_string + + def test_fsfile_with_regular_filename_and_fs_spec_abides_pathlike(self): + """Test that FSFile abides PathLike for filename+fs instances.""" + from satpy.readers import FSFile + assert os.fspath(FSFile(self.random_string, fs=None)) == self.random_string + + def test_fsfile_with_fs_open_file_abides_pathlike(self): + """Test that FSFile abides PathLike for fsspec OpenFile instances.""" + from satpy.readers import FSFile + assert os.fspath(FSFile(self.local_file)).endswith(self.random_string) + + def test_repr_includes_filename(self): + """Test that repr includes the filename.""" + from satpy.readers import FSFile + assert self.random_string in repr(FSFile(self.local_file)) + + def test_open_regular_file(self): + """Test opening a regular file.""" + from satpy.readers import FSFile + assert hasattr(FSFile(self.local_filename).open(), 'tell') + + def test_open_local_fs_file(self): + """Test opening a localfs file.""" + from satpy.readers import FSFile + assert hasattr(FSFile(self.local_file).open(), 'tell') + + def test_open_zip_fs_regular_filename(self): + """Test opening a zipfs with a regular filename provided.""" + from satpy.readers import FSFile + from fsspec.implementations.zip import ZipFileSystem + zip_fs = ZipFileSystem(self.zip_name) + file = FSFile(self.local_filename2, zip_fs) + assert hasattr(file.open(), 'tell') + + def test_open_zip_fs_openfile(self): + """Test opening a zipfs openfile.""" + from satpy.readers import FSFile + import fsspec + open_file = fsspec.open("zip://" + self.local_filename2 + "::file://" + self.zip_name) + file = FSFile(open_file) + assert hasattr(file.open(), 'tell') + + def test_sorting_fsfiles(self): + """Test sorting FSFiles.""" + from satpy.readers import FSFile + from fsspec.implementations.zip import ZipFileSystem + zip_fs = ZipFileSystem(self.zip_name) + file1 = FSFile(self.local_filename2, zip_fs) + + file2 = FSFile(self.local_filename) + + sorted_filenames = [os.fspath(file) for file in sorted([file1, file2, '/tmp/bla'])] + expected_filenames = sorted(['/tmp/bla', os.fspath(file1), os.fspath(file2)]) + assert sorted_filenames == expected_filenames From f04c1379206d46eb9505bc8967a5bdf703a69ca7 Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Fri, 20 Nov 2020 14:12:27 +0100 Subject: [PATCH 04/22] Make deepcode happier --- satpy/tests/test_readers.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/satpy/tests/test_readers.py b/satpy/tests/test_readers.py index ddad14c995..15eda415e4 100644 --- a/satpy/tests/test_readers.py +++ b/satpy/tests/test_readers.py @@ -839,6 +839,13 @@ def _generate_random_string(): return str(uuid.uuid1()) +def _assert_is_open_file_and_close(opened): + try: + assert hasattr(opened, 'tell') + finally: + opened.close() + + class TestFSFile(unittest.TestCase): """Test the FSFile class.""" @@ -895,12 +902,12 @@ def test_repr_includes_filename(self): def test_open_regular_file(self): """Test opening a regular file.""" from satpy.readers import FSFile - assert hasattr(FSFile(self.local_filename).open(), 'tell') + _assert_is_open_file_and_close(FSFile(self.local_filename).open()) def test_open_local_fs_file(self): """Test opening a localfs file.""" from satpy.readers import FSFile - assert hasattr(FSFile(self.local_file).open(), 'tell') + _assert_is_open_file_and_close(FSFile(self.local_file).open()) def test_open_zip_fs_regular_filename(self): """Test opening a zipfs with a regular filename provided.""" @@ -908,7 +915,7 @@ def test_open_zip_fs_regular_filename(self): from fsspec.implementations.zip import ZipFileSystem zip_fs = ZipFileSystem(self.zip_name) file = FSFile(self.local_filename2, zip_fs) - assert hasattr(file.open(), 'tell') + _assert_is_open_file_and_close(file.open()) def test_open_zip_fs_openfile(self): """Test opening a zipfs openfile.""" @@ -916,7 +923,7 @@ def test_open_zip_fs_openfile(self): import fsspec open_file = fsspec.open("zip://" + self.local_filename2 + "::file://" + self.zip_name) file = FSFile(open_file) - assert hasattr(file.open(), 'tell') + _assert_is_open_file_and_close(file.open()) def test_sorting_fsfiles(self): """Test sorting FSFiles.""" From dedeaea207bceb865bf09e79807e507f0540b31c Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Fri, 20 Nov 2020 14:30:54 +0100 Subject: [PATCH 05/22] Fix windows tests --- satpy/tests/test_readers.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/satpy/tests/test_readers.py b/satpy/tests/test_readers.py index 15eda415e4..8ae0d6202b 100644 --- a/satpy/tests/test_readers.py +++ b/satpy/tests/test_readers.py @@ -19,11 +19,12 @@ import os import unittest +from contextlib import suppress from unittest import mock -from satpy.dataset.dataid import WavelengthRange, ModifierTuple, DataID -from satpy.dataset.data_dict import get_key import pytest +from satpy.dataset.data_dict import get_key +from satpy.dataset.dataid import WavelengthRange, ModifierTuple, DataID # clear the config dir environment variable so it doesn't interfere os.environ.pop("PPP_CONFIG_DIR", None) @@ -872,7 +873,8 @@ def setUp(self): def tearDown(self): """Destroy the instance.""" os.remove(self.local_filename) - os.remove(self.zip_name) + with suppress(PermissionError): + os.remove(self.zip_name) def test_regular_filename_is_returned_with_str(self): """Test that str give the filename.""" From 8afdb38cb13ca5b692bddaca515bae2132416ca8 Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Fri, 20 Nov 2020 16:06:17 +0100 Subject: [PATCH 06/22] Fix windows tests --- satpy/tests/test_readers.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/satpy/tests/test_readers.py b/satpy/tests/test_readers.py index 8ae0d6202b..395dbd02f9 100644 --- a/satpy/tests/test_readers.py +++ b/satpy/tests/test_readers.py @@ -847,6 +847,11 @@ def _assert_is_open_file_and_close(opened): opened.close() +def _posixify_path(filename): + drive, driveless_name = os.path.splitdrive(filename) + return driveless_name.replace('\\', '/') + + class TestFSFile(unittest.TestCase): """Test the FSFile class.""" @@ -916,14 +921,14 @@ def test_open_zip_fs_regular_filename(self): from satpy.readers import FSFile from fsspec.implementations.zip import ZipFileSystem zip_fs = ZipFileSystem(self.zip_name) - file = FSFile(self.local_filename2, zip_fs) + file = FSFile(_posixify_path(self.local_filename2), zip_fs) _assert_is_open_file_and_close(file.open()) def test_open_zip_fs_openfile(self): """Test opening a zipfs openfile.""" from satpy.readers import FSFile import fsspec - open_file = fsspec.open("zip://" + self.local_filename2 + "::file://" + self.zip_name) + open_file = fsspec.open("zip:/" + _posixify_path(self.local_filename2) + "::file://" + self.zip_name) file = FSFile(open_file) _assert_is_open_file_and_close(file.open()) @@ -936,6 +941,7 @@ def test_sorting_fsfiles(self): file2 = FSFile(self.local_filename) - sorted_filenames = [os.fspath(file) for file in sorted([file1, file2, '/tmp/bla'])] - expected_filenames = sorted(['/tmp/bla', os.fspath(file1), os.fspath(file2)]) + extra_file = os.path.normpath('/tmp/bla') + sorted_filenames = [os.fspath(file) for file in sorted([file1, file2, extra_file])] + expected_filenames = sorted([extra_file, os.fspath(file1), os.fspath(file2)]) assert sorted_filenames == expected_filenames From 445fe1b2c15269b9a49cc3c4bc43695a3da01917 Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Mon, 23 Nov 2020 15:11:01 +0100 Subject: [PATCH 07/22] Add support for FSFile in abi l1b reader --- satpy/readers/__init__.py | 9 +++++++ satpy/readers/abi_base.py | 18 +++++++++---- satpy/tests/reader_tests/test_abi_l1b.py | 33 ++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 5 deletions(-) diff --git a/satpy/readers/__init__.py b/satpy/readers/__init__.py index 8e9115f96e..e8fb8f0a71 100644 --- a/satpy/readers/__init__.py +++ b/satpy/readers/__init__.py @@ -597,3 +597,12 @@ def open(self): def __lt__(self, other): """Implement ordering.""" return os.fspath(self) < os.fspath(other) + + +def open_file_or_filename(unknown_file_thing): + """Try to open the *unknown_file_thing*, otherwise return the filename.""" + try: + f_obj = unknown_file_thing.open() + except AttributeError: + f_obj = unknown_file_thing + return f_obj diff --git a/satpy/readers/abi_base.py b/satpy/readers/abi_base.py index 323c3d4171..0866958f2e 100644 --- a/satpy/readers/abi_base.py +++ b/satpy/readers/abi_base.py @@ -22,10 +22,11 @@ import numpy as np import xarray as xr - from pyresample import geometry -from satpy.readers.file_handlers import BaseFileHandler + from satpy import CHUNK_SIZE +from satpy.readers import open_file_or_filename +from satpy.readers.file_handlers import BaseFileHandler logger = logging.getLogger(__name__) @@ -41,14 +42,14 @@ class NC_ABI_BASE(BaseFileHandler): def __init__(self, filename, filename_info, filetype_info): """Open the NetCDF file with xarray and prepare the Dataset for reading.""" super(NC_ABI_BASE, self).__init__(filename, filename_info, filetype_info) - # xarray's default netcdf4 engine + f_obj = open_file_or_filename(self.filename) try: - self.nc = xr.open_dataset(self.filename, + self.nc = xr.open_dataset(f_obj, decode_cf=True, mask_and_scale=False, chunks={'x': CHUNK_SIZE, 'y': CHUNK_SIZE}, ) except ValueError: - self.nc = xr.open_dataset(self.filename, + self.nc = xr.open_dataset(f_obj, decode_cf=True, mask_and_scale=False, chunks={'lon': CHUNK_SIZE, 'lat': CHUNK_SIZE}, ) @@ -100,6 +101,13 @@ def is_int(val): fill = fill.astype('u%s' % fill.dtype.itemsize) if fill is not None: + # Some backends (h5netcdf) may return attributes as shape (1,) + # arrays rather than shape () scalars, which according to the netcdf + # documentation at + # is correct. + if np.ndim(fill) > 0: + fill = fill.item() if is_int(data) and is_int(factor) and is_int(offset): new_fill = fill else: diff --git a/satpy/tests/reader_tests/test_abi_l1b.py b/satpy/tests/reader_tests/test_abi_l1b.py index 8e8d606e0f..e35b3b0832 100644 --- a/satpy/tests/reader_tests/test_abi_l1b.py +++ b/satpy/tests/reader_tests/test_abi_l1b.py @@ -241,3 +241,36 @@ def test_vis_calibrate(self): 'toa_bidirectional_reflectance') self.assertEqual(res.attrs['long_name'], 'Bidirectional Reflectance') + + +class Test_NC_ABI_File(unittest.TestCase): + """Test file opening.""" + + @mock.patch('satpy.readers.abi_base.xr') + def test_open_dataset(self, _): + """Test openning a dataset.""" + from satpy.readers.abi_l1b import NC_ABI_L1B + + openable_thing = mock.MagicMock() + + NC_ABI_L1B(openable_thing, {'platform_shortname': 'g16'}, None) + openable_thing.open.assert_called() + + +class Test_NC_ABI_L1B_H5netcdf(Test_NC_ABI_L1B): + """Allow h5netcdf peculiarities.""" + + def setUp(self): + """Create fake data for the tests.""" + rad_data = np.int16(50) + rad = xr.DataArray( + rad_data, + attrs={ + 'scale_factor': 0.5, + 'add_offset': -1., + '_FillValue': np.array([1002]), + 'units': 'W m-2 um-1 sr-1', + 'valid_range': (0, 4095), + } + ) + super(Test_NC_ABI_L1B_H5netcdf, self).setUp(rad=rad) From 02ff1f8c5905f9172ad27aced7e4509f94676fe1 Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Mon, 23 Nov 2020 15:15:12 +0100 Subject: [PATCH 08/22] Refactor olci file opening --- satpy/readers/olci_nc.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/satpy/readers/olci_nc.py b/satpy/readers/olci_nc.py index 6e839c2823..95747348ce 100644 --- a/satpy/readers/olci_nc.py +++ b/satpy/readers/olci_nc.py @@ -49,6 +49,7 @@ import xarray as xr from satpy import CHUNK_SIZE +from satpy.readers import open_file_or_filename from satpy.readers.file_handlers import BaseFileHandler from satpy.utils import angle2xyz, xyz2angle @@ -113,11 +114,7 @@ def __init__(self, filename, filename_info, filetype_info, @lru_cache(maxsize=2) def nc(self): """Get the nc xr dataset.""" - try: - f_obj = self.filename.open() - self.open_file = f_obj - except AttributeError: - f_obj = self.filename + f_obj = open_file_or_filename(self.filename) dataset = xr.open_dataset(f_obj, decode_cf=True, mask_and_scale=True, @@ -147,8 +144,6 @@ def __del__(self): """Close the NetCDF file that may still be open.""" with suppress(IOError, OSError, AttributeError): self.nc.close() - with suppress(AttributeError): - self.open_file.close() class NCOLCICal(NCOLCIBase): From 407fa63230f105e13701681a8c827207d25e3de2 Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Mon, 23 Nov 2020 15:15:53 +0100 Subject: [PATCH 09/22] Change FSFile example in class docstring --- satpy/readers/__init__.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/satpy/readers/__init__.py b/satpy/readers/__init__.py index e8fb8f0a71..cbbc0c691a 100644 --- a/satpy/readers/__init__.py +++ b/satpy/readers/__init__.py @@ -545,17 +545,20 @@ def _get_reader_kwargs(reader, reader_kwargs): class FSFile(os.PathLike): """Implementation of a PathLike file object, that can be opened. - This is made to be use in conjuction with fsspec or s3fs. For example:: + This is made to be used in conjuction with fsspec or s3fs. For example:: - zipfile = S3B_OL_2_WFR____20201103T100807_20201103T101107_20201103T121330_0179_045_179_1980_MAR_O_NR_002.zip - filename = "sentinel-s3-ol2wfr-zips/2020/11/03/" + filename - the_files = fsspec.open_files("simplecache::zip://**/*.nc::s3://" + filename, s3={'anon': False}) + from satpy import Scene + import fsspec + filename = 'noaa-goes16/ABI-L1b-RadC/2019/001/17/*_G16_s20190011702186*' + + the_files = fsspec.open_files("simplecache::s3://" + filename, s3={'anon': True}) + + from satpy.readers import FSFile fs_files = [FSFile(open_file) for open_file in the_files] - scn = Scene(filenames=fs_files, reader='olci_l2') - scn.load(['chl_nn']) - scn.save_datasets() + scn = Scene(filenames=fs_files, reader='abi_l1b') + scn.load(['true_color_raw']) """ From 719386f38288cd2598dbdee90649fe764810394c Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Mon, 23 Nov 2020 15:36:36 +0100 Subject: [PATCH 10/22] Fix style --- satpy/tests/test_readers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satpy/tests/test_readers.py b/satpy/tests/test_readers.py index 395dbd02f9..cf441487ab 100644 --- a/satpy/tests/test_readers.py +++ b/satpy/tests/test_readers.py @@ -941,7 +941,7 @@ def test_sorting_fsfiles(self): file2 = FSFile(self.local_filename) - extra_file = os.path.normpath('/tmp/bla') + extra_file = os.path.normpath('/somedir/bla') sorted_filenames = [os.fspath(file) for file in sorted([file1, file2, extra_file])] expected_filenames = sorted([extra_file, os.fspath(file1), os.fspath(file2)]) assert sorted_filenames == expected_filenames From 9c5b55f797c2007ff60467edbcac08f2b4dbe3b8 Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Mon, 23 Nov 2020 15:37:12 +0100 Subject: [PATCH 11/22] Refactor ABI base --- satpy/readers/abi_base.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/satpy/readers/abi_base.py b/satpy/readers/abi_base.py index 0866958f2e..3ac93ca1f9 100644 --- a/satpy/readers/abi_base.py +++ b/satpy/readers/abi_base.py @@ -81,17 +81,27 @@ def __getitem__(self, item): variables which causes inaccurate unscaled data values. This method forces the scale factor to a 64-bit float first. """ - def is_int(val): - return np.issubdtype(val.dtype, np.integer) if hasattr(val, 'dtype') else isinstance(val, int) - data = self.nc[item] attrs = data.attrs + data = self._adjust_data(data, item) + + data.attrs = attrs + + data = self._adjust_coords(data, item) + + return data + + def _adjust_data(self, data, item): + """Adjust data with typing, scaling and filling.""" factor = data.attrs.get('scale_factor', 1) offset = data.attrs.get('add_offset', 0) fill = data.attrs.get('_FillValue') unsigned = data.attrs.get('_Unsigned', None) + def is_int(val): + return np.issubdtype(val.dtype, np.integer) if hasattr(val, 'dtype') else isinstance(val, int) + # Ref. GOESR PUG-L1B-vol3, section 5.0.2 Unsigned Integer Processing if unsigned is not None and unsigned.lower() == 'true': # cast the data from int to uint @@ -99,7 +109,6 @@ def is_int(val): if fill is not None: fill = fill.astype('u%s' % fill.dtype.itemsize) - if fill is not None: # Some backends (h5netcdf) may return attributes as shape (1,) # arrays rather than shape () scalars, which according to the netcdf @@ -113,7 +122,6 @@ def is_int(val): else: new_fill = np.nan data = data.where(data != fill, new_fill) - if factor != 1 and item in ('x', 'y'): # be more precise with x/y coordinates # see get_area_def for more information @@ -125,10 +133,10 @@ def is_int(val): if not is_int(factor): factor = float(factor) data = data * factor + offset + return data - data.attrs = attrs - - # handle coordinates (and recursive fun) + def _adjust_coords(self, data, item): + """Handle coordinates (and recursive fun).""" new_coords = {} # 'time' dimension causes issues in other processing # 'x_image' and 'y_image' are confusing to some users and unnecessary @@ -143,7 +151,6 @@ def is_int(val): self.coords[coord_name] = self[coord_name] new_coords[coord_name] = self.coords[coord_name] data.coords.update(new_coords) - return data def get_dataset(self, key, info): From 034b5213bb68b88f2be4f8325ac80ce7f63e84dc Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Tue, 24 Nov 2020 10:03:49 +0100 Subject: [PATCH 12/22] Clean up style --- satpy/readers/olci_nc.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/satpy/readers/olci_nc.py b/satpy/readers/olci_nc.py index 95747348ce..29488ce7fa 100644 --- a/satpy/readers/olci_nc.py +++ b/satpy/readers/olci_nc.py @@ -41,13 +41,12 @@ import logging from contextlib import suppress -from functools import lru_cache +from functools import cached_property from functools import reduce import dask.array as da import numpy as np import xarray as xr - from satpy import CHUNK_SIZE from satpy.readers import open_file_or_filename from satpy.readers.file_handlers import BaseFileHandler @@ -110,8 +109,7 @@ def __init__(self, filename, filename_info, filetype_info, self.sensor = 'olci' self.open_file = None - @property - @lru_cache(maxsize=2) + @cached_property def nc(self): """Get the nc xr dataset.""" f_obj = open_file_or_filename(self.filename) From b2956f2e91009932803844643a9d6765b79b110e Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Tue, 1 Dec 2020 13:34:52 +0100 Subject: [PATCH 13/22] Replace cached_property with lru_cache It was introduced in python 3.8, so we have to wait a bit for it. --- satpy/readers/olci_nc.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/satpy/readers/olci_nc.py b/satpy/readers/olci_nc.py index 29488ce7fa..117a033c90 100644 --- a/satpy/readers/olci_nc.py +++ b/satpy/readers/olci_nc.py @@ -41,7 +41,7 @@ import logging from contextlib import suppress -from functools import cached_property +from functools import lru_cache from functools import reduce import dask.array as da @@ -109,7 +109,8 @@ def __init__(self, filename, filename_info, filetype_info, self.sensor = 'olci' self.open_file = None - @cached_property + @property + @lru_cache def nc(self): """Get the nc xr dataset.""" f_obj = open_file_or_filename(self.filename) From 2b3f0260511f930d77fc58a2c200fbf97c2a7e46 Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Tue, 1 Dec 2020 13:41:27 +0100 Subject: [PATCH 14/22] Mute quality note in tests --- satpy/tests/reader_tests/test_olci_nc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satpy/tests/reader_tests/test_olci_nc.py b/satpy/tests/reader_tests/test_olci_nc.py index c39df23cc2..05dd498002 100644 --- a/satpy/tests/reader_tests/test_olci_nc.py +++ b/satpy/tests/reader_tests/test_olci_nc.py @@ -85,7 +85,7 @@ def test_open_file_objects(self, mocked_open_dataset): open_file = mock.MagicMock() file_handler = NCOLCIBase(open_file, filename_info, 'c') - file_handler.nc + file_handler.nc # noqa mocked_open_dataset.assert_called() open_file.open.assert_called() assert (open_file.open.return_value in mocked_open_dataset.call_args[0] or From 4bffac3db5775ed5228ccf6080f2a382e91ce1ce Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Tue, 1 Dec 2020 13:44:37 +0100 Subject: [PATCH 15/22] Mute quality note in tests --- satpy/tests/reader_tests/test_olci_nc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/satpy/tests/reader_tests/test_olci_nc.py b/satpy/tests/reader_tests/test_olci_nc.py index 05dd498002..eb82cff157 100644 --- a/satpy/tests/reader_tests/test_olci_nc.py +++ b/satpy/tests/reader_tests/test_olci_nc.py @@ -85,6 +85,7 @@ def test_open_file_objects(self, mocked_open_dataset): open_file = mock.MagicMock() file_handler = NCOLCIBase(open_file, filename_info, 'c') + # deepcode ignore W0104: This is a property that is actually a function call. file_handler.nc # noqa mocked_open_dataset.assert_called() open_file.open.assert_called() From eb7b9c24cd4af04c20a6875e3a277ea62d63fe3a Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Tue, 1 Dec 2020 13:49:12 +0100 Subject: [PATCH 16/22] Mute quality note in tests --- satpy/tests/reader_tests/test_olci_nc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satpy/tests/reader_tests/test_olci_nc.py b/satpy/tests/reader_tests/test_olci_nc.py index eb82cff157..380bf000c4 100644 --- a/satpy/tests/reader_tests/test_olci_nc.py +++ b/satpy/tests/reader_tests/test_olci_nc.py @@ -86,7 +86,7 @@ def test_open_file_objects(self, mocked_open_dataset): file_handler = NCOLCIBase(open_file, filename_info, 'c') # deepcode ignore W0104: This is a property that is actually a function call. - file_handler.nc # noqa + file_handler.nc # pylint: disable=W0104 mocked_open_dataset.assert_called() open_file.open.assert_called() assert (open_file.open.return_value in mocked_open_dataset.call_args[0] or From 4dd8b20f8f869c6169a176cc932db75177341d78 Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Tue, 1 Dec 2020 13:56:19 +0100 Subject: [PATCH 17/22] Fix typo --- satpy/tests/test_file_handlers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satpy/tests/test_file_handlers.py b/satpy/tests/test_file_handlers.py index 92e20d9d19..6c41f55a35 100644 --- a/satpy/tests/test_file_handlers.py +++ b/satpy/tests/test_file_handlers.py @@ -140,7 +140,7 @@ def test_combine_orbital_parameters(self): # Empty self.fh.combine_info([{}]) - def testt_file_is_kept_intact(self): + def test_file_is_kept_intact(self): """Test that the file object passed (string, path, or other) is kept intact.""" open_file = mock.MagicMock() bfh = BaseFileHandler(open_file, {'filename_info': 'bla'}, 'filetype_info') From ecdcbce1f36e8c7e53060725b64e1e1701ac0d08 Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Tue, 1 Dec 2020 14:14:05 +0100 Subject: [PATCH 18/22] Fix lru_cache call for python 3.7 --- satpy/readers/olci_nc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satpy/readers/olci_nc.py b/satpy/readers/olci_nc.py index 117a033c90..8d5ddca15f 100644 --- a/satpy/readers/olci_nc.py +++ b/satpy/readers/olci_nc.py @@ -110,7 +110,7 @@ def __init__(self, filename, filename_info, filetype_info, self.open_file = None @property - @lru_cache + @lru_cache(max_size=2) def nc(self): """Get the nc xr dataset.""" f_obj = open_file_or_filename(self.filename) From be6c75706c9baadb8efb9133869abe5ab87bda39 Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Tue, 1 Dec 2020 14:14:30 +0100 Subject: [PATCH 19/22] Fix typo --- satpy/readers/olci_nc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satpy/readers/olci_nc.py b/satpy/readers/olci_nc.py index 8d5ddca15f..413183e61e 100644 --- a/satpy/readers/olci_nc.py +++ b/satpy/readers/olci_nc.py @@ -110,7 +110,7 @@ def __init__(self, filename, filename_info, filetype_info, self.open_file = None @property - @lru_cache(max_size=2) + @lru_cache(maxsize=2) def nc(self): """Get the nc xr dataset.""" f_obj = open_file_or_filename(self.filename) From 4e3b7b5bf44cc52d63911c1ca385f59082b8379c Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Wed, 2 Dec 2020 10:17:29 +0100 Subject: [PATCH 20/22] Fix cached_property in ABI readers --- satpy/readers/abi_base.py | 62 ++++++++++++++++++++++++--------------- satpy/readers/olci_nc.py | 16 ++++++++-- 2 files changed, 52 insertions(+), 26 deletions(-) diff --git a/satpy/readers/abi_base.py b/satpy/readers/abi_base.py index 3ac93ca1f9..42b23c7176 100644 --- a/satpy/readers/abi_base.py +++ b/satpy/readers/abi_base.py @@ -18,6 +18,7 @@ """Advance Baseline Imager reader base class for the Level 1b and l2+ reader.""" import logging +from contextlib import suppress from datetime import datetime import numpy as np @@ -28,6 +29,16 @@ from satpy.readers import open_file_or_filename from satpy.readers.file_handlers import BaseFileHandler +try: + from functools import cached_property +except ImportError: + # for python < 3.8 + from functools import lru_cache + + def cached_property(func): + """Port back functools.cached_property.""" + return property(lru_cache(maxsize=None)(func)) + logger = logging.getLogger(__name__) PLATFORM_NAMES = { @@ -42,33 +53,40 @@ class NC_ABI_BASE(BaseFileHandler): def __init__(self, filename, filename_info, filetype_info): """Open the NetCDF file with xarray and prepare the Dataset for reading.""" super(NC_ABI_BASE, self).__init__(filename, filename_info, filetype_info) - f_obj = open_file_or_filename(self.filename) - try: - self.nc = xr.open_dataset(f_obj, - decode_cf=True, - mask_and_scale=False, - chunks={'x': CHUNK_SIZE, 'y': CHUNK_SIZE}, ) - except ValueError: - self.nc = xr.open_dataset(f_obj, - decode_cf=True, - mask_and_scale=False, - chunks={'lon': CHUNK_SIZE, 'lat': CHUNK_SIZE}, ) - if 't' in self.nc.dims or 't' in self.nc.coords: - self.nc = self.nc.rename({'t': 'time'}) platform_shortname = filename_info['platform_shortname'] self.platform_name = PLATFORM_NAMES.get(platform_shortname) - if 'goes_imager_projection' in self.nc: - self.nlines = self.nc['y'].size - self.ncols = self.nc['x'].size - elif 'goes_lat_lon_projection' in self.nc: - self.nlines = self.nc['lat'].size - self.ncols = self.nc['lon'].size - self.nc = self.nc.rename({'lon': 'x', 'lat': 'y'}) + self.nlines = self.nc['y'].size + self.ncols = self.nc['x'].size self.coords = {} + @cached_property + def nc(self): + """Get the xarray dataset for this file.""" + f_obj = open_file_or_filename(self.filename) + try: + nc = xr.open_dataset(f_obj, + decode_cf=True, + mask_and_scale=False, + chunks={'x': CHUNK_SIZE, 'y': CHUNK_SIZE}, ) + except ValueError: + nc = xr.open_dataset(f_obj, + decode_cf=True, + mask_and_scale=False, + chunks={'lon': CHUNK_SIZE, 'lat': CHUNK_SIZE}, ) + nc = self._rename_dims(nc) + return nc + + @staticmethod + def _rename_dims(nc): + if 't' in nc.dims or 't' in nc.coords: + nc = nc.rename({'t': 'time'}) + if 'goes_lat_lon_projection' in nc: + nc = nc.rename({'lon': 'x', 'lat': 'y'}) + return nc + @property def sensor(self): """Get sensor name for current file handler.""" @@ -278,7 +296,5 @@ def spatial_resolution_to_number(self): def __del__(self): """Close the NetCDF file that may still be open.""" - try: + with suppress(IOError, OSError, AttributeError): self.nc.close() - except (IOError, OSError, AttributeError): - pass diff --git a/satpy/readers/olci_nc.py b/satpy/readers/olci_nc.py index 413183e61e..753d29dde2 100644 --- a/satpy/readers/olci_nc.py +++ b/satpy/readers/olci_nc.py @@ -41,17 +41,28 @@ import logging from contextlib import suppress -from functools import lru_cache from functools import reduce import dask.array as da import numpy as np import xarray as xr + from satpy import CHUNK_SIZE from satpy.readers import open_file_or_filename from satpy.readers.file_handlers import BaseFileHandler from satpy.utils import angle2xyz, xyz2angle +try: + from functools import cached_property +except ImportError: + # for python < 3.8 + from functools import lru_cache + + def cached_property(func): + """Port back functools.cached_property.""" + return property(lru_cache(maxsize=None)(func)) + + logger = logging.getLogger(__name__) PLATFORM_NAMES = {'S3A': 'Sentinel-3A', @@ -109,8 +120,7 @@ def __init__(self, filename, filename_info, filetype_info, self.sensor = 'olci' self.open_file = None - @property - @lru_cache(maxsize=2) + @cached_property def nc(self): """Get the nc xr dataset.""" f_obj = open_file_or_filename(self.filename) From 81505c6f43e69cb3668e66e59ee57a4ff0270121 Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Wed, 2 Dec 2020 13:40:10 +0100 Subject: [PATCH 21/22] Factorize cached_property backport --- satpy/_compat.py | 25 +++++++++++++++++++++++++ satpy/readers/abi_base.py | 6 +----- satpy/readers/olci_nc.py | 7 +------ 3 files changed, 27 insertions(+), 11 deletions(-) create mode 100644 satpy/_compat.py diff --git a/satpy/_compat.py b/satpy/_compat.py new file mode 100644 index 0000000000..109630361d --- /dev/null +++ b/satpy/_compat.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2020 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""Backports and compatibility fixes for satpy.""" + +from functools import lru_cache + + +def cached_property(func): + """Port back functools.cached_property.""" + return property(lru_cache(maxsize=None)(func)) diff --git a/satpy/readers/abi_base.py b/satpy/readers/abi_base.py index 42b23c7176..1a7de22e85 100644 --- a/satpy/readers/abi_base.py +++ b/satpy/readers/abi_base.py @@ -33,11 +33,7 @@ from functools import cached_property except ImportError: # for python < 3.8 - from functools import lru_cache - - def cached_property(func): - """Port back functools.cached_property.""" - return property(lru_cache(maxsize=None)(func)) + from satpy._compat import cached_property logger = logging.getLogger(__name__) diff --git a/satpy/readers/olci_nc.py b/satpy/readers/olci_nc.py index 753d29dde2..19491de821 100644 --- a/satpy/readers/olci_nc.py +++ b/satpy/readers/olci_nc.py @@ -56,12 +56,7 @@ from functools import cached_property except ImportError: # for python < 3.8 - from functools import lru_cache - - def cached_property(func): - """Port back functools.cached_property.""" - return property(lru_cache(maxsize=None)(func)) - + from satpy._compat import cached_property logger = logging.getLogger(__name__) From d7d227b4aaafb43b8e057b4d0a89c577f30c1137 Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Wed, 2 Dec 2020 13:48:45 +0100 Subject: [PATCH 22/22] Put try/except for cached_property backport in _compat --- satpy/_compat.py | 13 ++++++++----- satpy/readers/abi_base.py | 7 +------ satpy/readers/olci_nc.py | 6 +----- 3 files changed, 10 insertions(+), 16 deletions(-) diff --git a/satpy/_compat.py b/satpy/_compat.py index 109630361d..409261c697 100644 --- a/satpy/_compat.py +++ b/satpy/_compat.py @@ -17,9 +17,12 @@ # satpy. If not, see . """Backports and compatibility fixes for satpy.""" -from functools import lru_cache +try: + from functools import cached_property +except ImportError: + # for python < 3.8 + from functools import lru_cache - -def cached_property(func): - """Port back functools.cached_property.""" - return property(lru_cache(maxsize=None)(func)) + def cached_property(func): + """Port back functools.cached_property.""" + return property(lru_cache(maxsize=None)(func)) diff --git a/satpy/readers/abi_base.py b/satpy/readers/abi_base.py index 1a7de22e85..09bb89e077 100644 --- a/satpy/readers/abi_base.py +++ b/satpy/readers/abi_base.py @@ -26,15 +26,10 @@ from pyresample import geometry from satpy import CHUNK_SIZE +from satpy._compat import cached_property from satpy.readers import open_file_or_filename from satpy.readers.file_handlers import BaseFileHandler -try: - from functools import cached_property -except ImportError: - # for python < 3.8 - from satpy._compat import cached_property - logger = logging.getLogger(__name__) PLATFORM_NAMES = { diff --git a/satpy/readers/olci_nc.py b/satpy/readers/olci_nc.py index 19491de821..7ddf83283b 100644 --- a/satpy/readers/olci_nc.py +++ b/satpy/readers/olci_nc.py @@ -52,11 +52,7 @@ from satpy.readers.file_handlers import BaseFileHandler from satpy.utils import angle2xyz, xyz2angle -try: - from functools import cached_property -except ImportError: - # for python < 3.8 - from satpy._compat import cached_property +from satpy._compat import cached_property logger = logging.getLogger(__name__)