From d1f745e47b6d896b0524b4e6c851b17861d0f130 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Thu, 23 May 2024 11:47:22 +0200 Subject: [PATCH 1/3] first version of ESACCI-AEROSOL CMORizer (Python version) --- doc/sphinx/source/input.rst | 2 +- .../data/cmor_config/ESACCI-AEROSOL.yml | 33 +++ esmvaltool/cmorizers/data/datasets.yml | 9 +- .../downloaders/datasets/esacci_aerosol.py | 8 +- .../formatters/datasets/esacci_aerosol.ncl | 169 -------------- .../formatters/datasets/esacci_aerosol.py | 217 ++++++++++++++++++ .../recipes/examples/recipe_check_obs.yml | 3 +- 7 files changed, 264 insertions(+), 177 deletions(-) create mode 100644 esmvaltool/cmorizers/data/cmor_config/ESACCI-AEROSOL.yml delete mode 100644 esmvaltool/cmorizers/data/formatters/datasets/esacci_aerosol.ncl create mode 100644 esmvaltool/cmorizers/data/formatters/datasets/esacci_aerosol.py diff --git a/doc/sphinx/source/input.rst b/doc/sphinx/source/input.rst index 481cd066a7..9a44919bbc 100644 --- a/doc/sphinx/source/input.rst +++ b/doc/sphinx/source/input.rst @@ -290,7 +290,7 @@ A list of the datasets for which a CMORizers is available is provided in the fol +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | ERA-Interim-Land | sm (Lmon) | 3 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ -| ESACCI-AEROSOL | abs550aer, od550aer, od550aerStderr, od550lt1aer, od870aer, od870aerStderr (aero) | 2 | NCL | +| ESACCI-AEROSOL | abs550aer, od550aer, od550aerStderr, od550lt1aer, od870aer, od870aerStderr (aero) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | ESACCI-CLOUD | clivi, clt, cltStderr, clwvi, lwp, rlut, rlutcs, rsut, rsutcs, rsdt, rlus, rsus, rsuscs (Amon) | 2 | NCL | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ diff --git a/esmvaltool/cmorizers/data/cmor_config/ESACCI-AEROSOL.yml b/esmvaltool/cmorizers/data/cmor_config/ESACCI-AEROSOL.yml new file mode 100644 index 0000000000..4ac93c93e9 --- /dev/null +++ b/esmvaltool/cmorizers/data/cmor_config/ESACCI-AEROSOL.yml @@ -0,0 +1,33 @@ +--- +# Common global attributes for Cmorizer output +filename: '{year}??-ESACCI-L3C_AEROSOL-AER_PRODUCTS-*-SU_MONTHLY-v4.3.nc' +attributes: + dataset_id: ESACCI-AEROSOL + version: 'SU-v4.3' + tier: 2 + modeling_realm: sat + project_id: OBS + source: 'ftp://anon-ftp.ceda.ac.uk/neodc/esacci/aerosol/data/' + reference: ["esacci-aerosol"] + comment: "Note that the variable tsStderr is an uncertainty not a standard error." + +# Variables to cmorize (here use only filename prefix) +variables: + od550aer: + mip: aero + raw: AOD550_mean + od870aer: + mip: aero + raw: AOD870_mean + od550lt1aer: + mip: aero + raw: FM_AOD550_mean + abs550aer: + mip: aero + raw: AAOD550_mean + od550aerStderr: + mip: aero + raw: AOD550_uncertainty + od870aerStderr: + mip: aero + raw: AOD870_uncertainty diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml index 47632da653..ec0a01c893 100644 --- a/esmvaltool/cmorizers/data/datasets.yml +++ b/esmvaltool/cmorizers/data/datasets.yml @@ -420,11 +420,14 @@ datasets: ESACCI-AEROSOL: tier: 2 source: ftp://anon-ftp.ceda.ac.uk/neodc/esacci/aerosol/data/ - last_access: 2019-01-24 + # last_access: 2019-01-24 + last_access: 2024-05-22 info: | Download the data from: - ATSR2_SU/L3/v4.21/MONTHLY/ (1997-2002) - AATSR_SU/L3/v4.21/MONTHLY/ (2003-2011) + # ATSR2_SU/L3/v4.21/MONTHLY/ (1997-2002) + # AATSR_SU/L3/v4.21/MONTHLY/ (2003-2011) + ATSR2_SU/L3/v4.3/MONTHLY/ (1997-2002) + AATSR_SU/L3/v4.3/MONTHLY/ (2003-2011) Other years are not considered since they are not complete. Put all files in input_dir_path (no subdirectories with years). diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_aerosol.py b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_aerosol.py index 09d2616c9c..53acd652f6 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_aerosol.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_aerosol.py @@ -29,7 +29,7 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date, if start_date is None: start_date = datetime(1997, 1, 1) if end_date is None: - end_date = datetime(2011, 1, 1) + end_date = datetime(2011, 12, 31) loop_date = start_date downloader = CCIDownloader( @@ -43,9 +43,11 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date, while loop_date <= end_date: year = loop_date.year if year < 2003: - downloader.set_cwd('ATSR2_SU/L3/v4.21/MONTHLY') + # downloader.set_cwd('ATSR2_SU/L3/v4.21/MONTHLY') + downloader.set_cwd('ATSR2_SU/L3/v4.3/MONTHLY') else: - downloader.set_cwd('AATSR_SU/L3/v4.21/MONTHLY') + # downloader.set_cwd('AATSR_SU/L3/v4.21/MONTHLY') + downloader.set_cwd('AATSR_SU/L3/v4.3/MONTHLY') downloader.download_year(loop_date.year) loop_date += relativedelta.relativedelta(years=1) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/esacci_aerosol.ncl b/esmvaltool/cmorizers/data/formatters/datasets/esacci_aerosol.ncl deleted file mode 100644 index e169d0eb4e..0000000000 --- a/esmvaltool/cmorizers/data/formatters/datasets/esacci_aerosol.ncl +++ /dev/null @@ -1,169 +0,0 @@ -; ############################################################################# -; ESMValTool CMORizer for ESACCI-AEROSOL data -; ############################################################################# -; -; Tier -; Tier 2: other freely-available dataset. -; -; Source -; ftp://anon-ftp.ceda.ac.uk/neodc/esacci/aerosol/data/ -; -; Last access -; 20190124 -; -; Download and processing instructions -; Download the data from: -; ATSR2_SU/L3/v4.21/MONTHLY/ (1997-2002) -; AATSR_SU/L3/v4.21/MONTHLY/ (2003-2011) -; Other years are not considered since they are not complete. -; Put all files in input_dir_path (no subdirectories with years). -; -; Modification history -; 20190124-righi_mattia: adapted to v2. -; 20160718-lauer_axel: added AOD550 + AOD870 uncertainties. -; 20160525-righi_mattia: updated to v4.21 and adding more variables. -; 20150126-righi_mattia: adding AOD at other wavelengths. -; 20151124-righi_mattia: switched to monthly raw data (now available). -; 20150424-righi_mattia: written. -; -; ############################################################################# -loadscript(getenv("esmvaltool_root") + \ - "/data/formatters/interface.ncl") - -begin - - ; Script name (for logger) - DIAG_SCRIPT = "esacci_aerosol.ncl" - - ; Source name - OBSNAME = "ESACCI-AEROSOL" - - ; Tier - TIER = 2 - - ; Period - print(start_year) - YEAR1 = get_year(start_year, 1997) - print(YEAR1) - YEAR2 = get_year(end_year, 2011) - - ; Selected variable (standard name) - VAR = (/"od550aer", "od870aer", "od550lt1aer", "abs550aer", \ - "od550aerStderr", "od870aerStderr"/) - - ; Name in the raw data - NAME = (/"AOD550_mean", "AOD870_mean", "FM_AOD550_mean", "AAOD550_mean", \ - "AOD550_uncertainty", "AOD870_uncertainty"/) - - ; MIP - MIP = (/"aero", "aero", "aero", "aero", \ - "aero", "aero"/) - - ; Frequency - FREQ = (/"mon", "mon", "mon", "mon", \ - "mon", "mon"/) - - ; CMOR table - CMOR_TABLE = getenv("cmor_tables") + \ - (/"/cmip5/Tables/CMIP5_aero", \ - "/cmip5/Tables/CMIP5_aero", \ - "/cmip5/Tables/CMIP5_aero", \ - "/cmip5/Tables/CMIP5_aero", \ - "/custom/CMOR_od550aerStderr.dat", \ - "/custom/CMOR_od870aerStderr.dat"/) - - ; Type - TYPE = "sat" - - ; Version - VERSION = "SU-v4.21" - - ; Global attributes - SOURCE = "ftp://anon-ftp.ceda.ac.uk/neodc/esacci/aerosol/data/" - REF = "Popp et al., Remote Sens., doi:10.3390/rs8050421, 2016." - COMMENT = "Combined dataset ERS2-ATSR2 (1997-2002) and ENVISAT-AATSR " + \ - "(2003-2011), based on the University of Swansea algorithm " + \ - "(monthly mean L3 data)" - -end - -begin - - do vv = 0, dimsizes(VAR) - 1 - - log_info("Processing " + VAR(vv) + " (" + MIP(vv) + ")") - - time = create_timec(YEAR1, YEAR2) - date = cd_calendar(time, 1) - - ; Create timeseries - do yy = YEAR1, YEAR2 - do mm = 1, 12 - - ldate = yy + sprinti("%0.2i", mm) - - ; Read file - fname = systemfunc("ls " + input_dir_path + ldate + "*.nc") - - ; No files found - if (all(ismissing(fname))) then - continue - end if - - ; Extract data - f = addfile(fname, "r") - xx = f->$NAME(vv)$ - - ; Assign to global array - if (.not.isdefined("output")) then - dims = array_append_record(dimsizes(time), dimsizes(xx), 0) - output = new(dims, float) - output!0 = "time" - output&time = time - output!1 = "lat" - output&lat = f->latitude - output!2 = "lon" - output&lon = f->longitude - end if - output(ind(toint(ldate).eq.date), :, :) = (/xx/) - delete(fname) - - end do - end do - - ; Set fill value - output = where(output.eq.-999, output@_FillValue, output) - - ; Format coordinates - output!0 = "time" - output!1 = "lat" - output!2 = "lon" - format_coords(output, YEAR1 + "0101", YEAR2 + "1231", FREQ(vv)) - - ; Set variable attributes - tmp = format_variable(output, VAR(vv), CMOR_TABLE(vv)) - delete(output) - output = tmp - delete(tmp) - - ; Calculate coordinate bounds - bounds = guess_coord_bounds(output, FREQ(vv)) - - ; Set global attributes - gAtt = set_global_atts(OBSNAME, TIER, SOURCE, REF, COMMENT) - - ; Output file - DATESTR = YEAR1 + "01-" + YEAR2 + "12" - fout = output_dir_path + \ - str_join((/"OBS", OBSNAME, TYPE, VERSION, \ - MIP(vv), VAR(vv), DATESTR/), "_") + ".nc" - - ; Write variable - write_nc(fout, VAR(vv), output, bounds, gAtt) - delete(gAtt) - delete(output) - delete(bounds) - - end do - -end diff --git a/esmvaltool/cmorizers/data/formatters/datasets/esacci_aerosol.py b/esmvaltool/cmorizers/data/formatters/datasets/esacci_aerosol.py new file mode 100644 index 0000000000..51c756bbe0 --- /dev/null +++ b/esmvaltool/cmorizers/data/formatters/datasets/esacci_aerosol.py @@ -0,0 +1,217 @@ +"""ESMValTool CMORizer for ESACCI-AEROSOL data. + +Tier + Tier 2: other freely-available dataset. + +Source + CCI CEDA ftp: ftp://anon-ftp.ceda.ac.uk/neodc/esacci/aerosol/data/ + ATSR2_SU/L3/v4.3/MONTHLY/ (1997-2002) + AATSR_SU/L3/v4.3/MONTHLY/ (2003-2011) + Other years are not considered since they are not complete. + + +Last access + 20240522 + +Download and processing instructions + Download the following files: + ftp: ftp://anon-ftp.ceda.ac.uk/neodc/esacci/aerosol/data/ + ATSR2_SU/L3/v4.3/MONTHLY/YYYY/*.nc + AATSR_SU/L3/v4.3/MONTHLY/YYYY/*.nc + and put all files in one directory. + +Modification history + 20240522-lauer_axel: written. +""" + +import glob +import logging +import os +from copy import deepcopy +from datetime import datetime +from dateutil import relativedelta + +import cf_units +import iris +import numpy as np +from dask import array as da +from esmvalcore.cmor.table import CMOR_TABLES + +from ...utilities import save_variable + +logger = logging.getLogger(__name__) + + +def _fix_coordinates(cube, definition): + """Fix coordinates.""" + axis2def = {'T': 'time', 'X': 'longitude', 'Y': 'latitude'} + axes = ['T', 'X', 'Y'] + + for axis in axes: + coord_def = definition.coordinates.get(axis2def[axis]) + if coord_def: + coord = cube.coord(axis=axis) + if axis == 'T': + coord.convert_units('days since 1850-1-1 00:00:00.0') + coord.standard_name = coord_def.standard_name + coord.var_name = coord_def.out_name + coord.long_name = coord_def.long_name + coord.points = coord.core_points().astype('float64') + if len(coord.points) > 1: + if coord.bounds is not None: + coord.bounds = None + coord.guess_bounds() + + return cube + + +def _extract_variable(in_files, var, cfg, out_dir): + logger.info("CMORizing variable '%s' from input files '%s'", + var['short_name'], ', '.join(in_files)) + attributes = deepcopy(cfg['attributes']) + attributes['mip'] = var['mip'] + attributes['raw'] = var['raw'] + cmor_table = CMOR_TABLES[attributes['project_id']] + definition = cmor_table.get_variable(var['mip'], var['short_name']) + + # load all input files (1 year) into 1 cube + # --> drop attributes that differ among input files + cube_list = iris.load(in_files, var['raw']) + # (global) attributes to remove + drop_attrs = ['tracking_id', 'id', 'time_coverage_start', + 'time_coverage_end', 'date_created', + 'inputfilelist'] + + time_unit = 'days since 1850-01-01 00:00:00' + time_calendar = 'standard' + + new_list = iris.cube.CubeList() + + for cube in cube_list: + # get time from attributes (no time coordinate) + time0 = cube.attributes['time_coverage_start'] + year0 = int(time0[0:4]) + month0 = int(time0[4:6]) + timestamp = datetime(year0, month0, 15) + time_coord = iris.coords.DimCoord( + cf_units.date2num(timestamp, time_unit, time_calendar), + standard_name='time', + var_name='time', + units=cf_units.Unit(time_unit, calendar=time_calendar) + ) + cube = iris.util.new_axis(cube) + cube.add_dim_coord(time_coord, 0) + + for attr in drop_attrs: + if attr in cube.attributes.keys(): + cube.attributes.pop(attr) + + new_list.append(cube) + + # make sure there is one cube for every day of the year + # (print debug info about missing days and add cube with + # nan to fill gaps + +# full_list = iris.cube.CubeList() +# loop_date = datetime(year, 1, 1) +# time_list = [] + + for cube in new_list: + loncoord = cube.coord('longitude') + latcoord = cube.coord('latitude') + loncoord.points = np.round(loncoord.core_points(), 3) + latcoord.points = np.round(latcoord.core_points(), 3) + +# # create list of available days ('time_list') +# +# for cube in new_list: +# timecoord = cube.coord('time') +# cubetime = timecoord.units.num2date(timecoord.points) +# time_list.append(cubetime) +# +# # create cube list for every day of the year by adding +# # cubes containing only nan to fill possible gaps +# +# while loop_date <= datetime(year, 12, 31): +# date_available = False +# for idx, cubetime in enumerate(time_list): +# if loop_date == cubetime: +# date_available = True +# break +# if date_available: +# full_list.append(new_list[idx]) +# else: +# logger.debug(f"No data available for {loop_date}") +# nan_cube = _create_nan_cube(new_list[0], loop_date.year, +# loop_date.month, loop_date.day) +# full_list.append(nan_cube) +# loop_date += relativedelta.relativedelta(days=1) + + iris.util.unify_time_units(new_list) + cube = new_list.concatenate_cube() + cube.coord('time').points = cube.coord('time').core_points().astype( + 'float64') + + # Set correct names + cube.var_name = definition.short_name + cube.standard_name = definition.standard_name + cube.long_name = definition.long_name + + # Fix units + cube.units = definition.units + +# # Fix data type +# cube.data = cube.core_data().astype('float32') + + # Roll longitude + cube.coord('longitude').points = cube.coord('longitude').points + 180. + nlon = len(cube.coord('longitude').points) + cube.data = da.roll(cube.core_data(), int(nlon / 2), axis=-1) + cube.attributes.update({"geospatial_lon_min": "0", + "geospatial_lon_max": "360"}) + + # Fix coordinates + cube = _fix_coordinates(cube, definition) + cube.coord('latitude').attributes = None + cube.coord('longitude').attributes = None + + # Save results + logger.debug("Saving cube\n%s", cube) + logger.debug("Setting time dimension to UNLIMITED while saving!") + save_variable(cube, cube.var_name, + out_dir, attributes, + unlimited_dimensions=['time']) + logger.info("Finished CMORizing %s", ', '.join(in_files)) + + +def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): + """Cmorize ESACCI-AEROSOL dataset.""" + glob_attrs = cfg['attributes'] + + logger.info("Starting cmorization for tier%s OBS files: %s", + glob_attrs['tier'], glob_attrs['dataset_id']) + logger.info("Input data from: %s", in_dir) + logger.info("Output will be written to: %s", out_dir) + logger.info('CMORizing ESACCI-SNOW version %s', glob_attrs['version']) + + if start_date is None: + start_date = datetime(1997, 1, 1) + if end_date is None: + end_date = datetime(2011, 12, 31) + + for short_name, var in cfg['variables'].items(): + if 'short_name' not in var: + var['short_name'] = short_name + loop_date = start_date + while loop_date <= end_date: + filepattern = os.path.join( + in_dir, cfg['filename'].format(year=loop_date.year) + ) + in_files = glob.glob(filepattern) + if not in_files: + logger.info('%d: no data not found for ' + 'variable %s', loop_date.year, short_name) + else: + _extract_variable(in_files, var, cfg, out_dir) + + loop_date += relativedelta.relativedelta(years=1) diff --git a/esmvaltool/recipes/examples/recipe_check_obs.yml b/esmvaltool/recipes/examples/recipe_check_obs.yml index f92478bd9a..2abfa960fb 100644 --- a/esmvaltool/recipes/examples/recipe_check_obs.yml +++ b/esmvaltool/recipes/examples/recipe_check_obs.yml @@ -167,7 +167,8 @@ diagnostics: od870aerStderr: additional_datasets: - {dataset: ESACCI-AEROSOL, project: OBS, mip: aero, tier: 2, - type: sat, version: SU-v4.21, start_year: 1997, end_year: 2011} +# type: sat, version: SU-v4.21, start_year: 1997, end_year: 2011} + type: sat, version: SU-v4.3, start_year: 1997, end_year: 2011} scripts: null From ed1470062ed4be533c8b621e8383404471320559 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Mon, 27 May 2024 16:34:40 +0200 Subject: [PATCH 2/3] added support for daily data for ESACCI-AEROSOL CMORizer --- .../data/cmor_config/ESACCI-AEROSOL.yml | 59 +++++++- .../downloaders/datasets/esacci_aerosol.py | 46 +++++- .../formatters/datasets/esacci_aerosol.py | 141 +++++++++++++----- .../recipes/examples/recipe_check_obs.yml | 37 ++++- 4 files changed, 225 insertions(+), 58 deletions(-) diff --git a/esmvaltool/cmorizers/data/cmor_config/ESACCI-AEROSOL.yml b/esmvaltool/cmorizers/data/cmor_config/ESACCI-AEROSOL.yml index 4ac93c93e9..4f4c97f705 100644 --- a/esmvaltool/cmorizers/data/cmor_config/ESACCI-AEROSOL.yml +++ b/esmvaltool/cmorizers/data/cmor_config/ESACCI-AEROSOL.yml @@ -1,6 +1,5 @@ --- # Common global attributes for Cmorizer output -filename: '{year}??-ESACCI-L3C_AEROSOL-AER_PRODUCTS-*-SU_MONTHLY-v4.3.nc' attributes: dataset_id: ESACCI-AEROSOL version: 'SU-v4.3' @@ -11,23 +10,67 @@ attributes: reference: ["esacci-aerosol"] comment: "Note that the variable tsStderr is an uncertainty not a standard error." -# Variables to cmorize (here use only filename prefix) +# Variables to cmorize variables: - od550aer: +# monthly means + od550aer_month: mip: aero + short_name: od550aer raw: AOD550_mean - od870aer: + file: '{year}??-ESACCI-L3C_AEROSOL-AER_PRODUCTS-*-SU_MONTHLY-v4.3.nc' + od870aer_month: mip: aero + short_name: od870aer raw: AOD870_mean - od550lt1aer: + file: '{year}??-ESACCI-L3C_AEROSOL-AER_PRODUCTS-*-SU_MONTHLY-v4.3.nc' + od550lt1aer_month: mip: aero + short_name: od550lt1aer raw: FM_AOD550_mean - abs550aer: + file: '{year}??-ESACCI-L3C_AEROSOL-AER_PRODUCTS-*-SU_MONTHLY-v4.3.nc' + abs550aer_month: mip: aero + short_name: abs550aer raw: AAOD550_mean - od550aerStderr: + file: '{year}??-ESACCI-L3C_AEROSOL-AER_PRODUCTS-*-SU_MONTHLY-v4.3.nc' + od550aerStderr_month: mip: aero + short_name: od550aerStderr raw: AOD550_uncertainty - od870aerStderr: + file: '{year}??-ESACCI-L3C_AEROSOL-AER_PRODUCTS-*-SU_MONTHLY-v4.3.nc' + od870aerStderr_month: mip: aero + short_name: od870aerStderr raw: AOD870_uncertainty + file: '{year}??-ESACCI-L3C_AEROSOL-AER_PRODUCTS-*-SU_MONTHLY-v4.3.nc' +# daily data + od550aer_day: + mip: aero + short_name: od550aer + raw: AOD550_mean + file: '{year}????-ESACCI-L3C_AEROSOL-AER_PRODUCTS-*-SU_DAILY-v4.3.nc' + od870aer_day: + mip: aero + short_name: od870aer + raw: AOD870_mean + file: '{year}????-ESACCI-L3C_AEROSOL-AER_PRODUCTS-*-SU_DAILY-v4.3.nc' + od550lt1aer_day: + mip: aero + short_name: od550lt1aer + raw: FM_AOD550_mean + file: '{year}????-ESACCI-L3C_AEROSOL-AER_PRODUCTS-*-SU_DAILY-v4.3.nc' + abs550aer_day: + mip: aero + short_name: abs550aer + raw: AAOD550_mean + file: '{year}????-ESACCI-L3C_AEROSOL-AER_PRODUCTS-*-SU_DAILY-v4.3.nc' + od550aerStderr_day: + mip: aero + short_name: od550aerStderr + raw: AOD550_uncertainty + file: '{year}????-ESACCI-L3C_AEROSOL-AER_PRODUCTS-*-SU_DAILY-v4.3.nc' + od870aerStderr_day: + mip: aero + short_name: od870aerStderr + raw: AOD870_uncertainty + file: '{year}????-ESACCI-L3C_AEROSOL-AER_PRODUCTS-*-SU_DAILY-v4.3.nc' diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_aerosol.py b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_aerosol.py index 53acd652f6..55a486e0ab 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_aerosol.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_aerosol.py @@ -40,14 +40,44 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date, ) downloader.connect() + version = 'v4.3' + algorithm = 'SU' + + # download monthly data + + loop_date = start_date while loop_date <= end_date: - year = loop_date.year - if year < 2003: - # downloader.set_cwd('ATSR2_SU/L3/v4.21/MONTHLY') - downloader.set_cwd('ATSR2_SU/L3/v4.3/MONTHLY') + if loop_date.year < 2003: + instrument = 'ATSR2' else: - # downloader.set_cwd('AATSR_SU/L3/v4.21/MONTHLY') - downloader.set_cwd('AATSR_SU/L3/v4.3/MONTHLY') - - downloader.download_year(loop_date.year) + instrument = 'AATSR' + rel_base_dir = f'{instrument}_{algorithm}/L3/{version}/MONTHLY' + downloader.set_cwd(rel_base_dir) + if downloader.exists(f'{loop_date.year}'): + downloader.download_folder(f'{loop_date.year}', + f'{algorithm}-{version}-monthly') + else: + logger.info(f'{loop_date.year}: no data found') loop_date += relativedelta.relativedelta(years=1) + + # download daily data + + loop_date = start_date + while loop_date <= end_date: + if loop_date.year < 2003: + instrument = 'ATSR2' + else: + instrument = 'AATSR' + rel_base_dir = f'{instrument}_{algorithm}/L3/{version}/DAILY' + downloader.set_cwd(rel_base_dir) + if downloader.exists(f'{loop_date.year}'): + downloader.set_cwd(f'{rel_base_dir}/{loop_date.year}') + if downloader.exists(f"{loop_date.month:02}"): + downloader.download_folder(f'{loop_date.month:02}', + f'{algorithm}-{version}-daily') + else: + logger.info(f'{loop_date.year}/{loop_date.month}: ' + f'no data found') + else: + logger.info(f'{loop_date.year}: no data found') + loop_date += relativedelta.relativedelta(months=1) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/esacci_aerosol.py b/esmvaltool/cmorizers/data/formatters/datasets/esacci_aerosol.py index 51c756bbe0..67e480bc4b 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/esacci_aerosol.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/esacci_aerosol.py @@ -7,6 +7,8 @@ CCI CEDA ftp: ftp://anon-ftp.ceda.ac.uk/neodc/esacci/aerosol/data/ ATSR2_SU/L3/v4.3/MONTHLY/ (1997-2002) AATSR_SU/L3/v4.3/MONTHLY/ (2003-2011) + ATSR2_SU/L3/v4.3/DAILY/ (1997-2002) + AATSR_SU/L3/v4.3/DAILY/ (2003-2011) Other years are not considered since they are not complete. @@ -18,7 +20,12 @@ ftp: ftp://anon-ftp.ceda.ac.uk/neodc/esacci/aerosol/data/ ATSR2_SU/L3/v4.3/MONTHLY/YYYY/*.nc AATSR_SU/L3/v4.3/MONTHLY/YYYY/*.nc - and put all files in one directory. + ATSR2_SU/L3/v4.3/DAILY/YYYY/MM/*.nc + AATSR_SU/L3/v4.3/DAILY/YYYY/MM/*.nc + and put all monthly files into one directory named {version}-monthly + all daily files into one directory named {version}-daily. + {version} is defined in cmorizers/data/cmor_config/ESACCI-AEROSOL.yml + (e.g. version: 'SU-v4.3') Modification history 20240522-lauer_axel: written. @@ -42,6 +49,23 @@ logger = logging.getLogger(__name__) +def _create_nan_cube(cube, year, month, day): + """Create cube containing only nan from existing cube.""" + nan_cube = cube.copy() + nan_cube.data = da.ma.masked_greater(cube.core_data(), -1e20) + + # Read dataset time unit and calendar from file + dataset_time_unit = str(nan_cube.coord('time').units) + dataset_time_calender = nan_cube.coord('time').units.calendar + # Convert datetime + newtime = datetime(year=year, month=month, day=day) + newtime = cf_units.date2num(newtime, dataset_time_unit, + dataset_time_calender) + nan_cube.coord('time').points = float(newtime) + + return nan_cube + + def _fix_coordinates(cube, definition): """Fix coordinates.""" axis2def = {'T': 'time', 'X': 'longitude', 'Y': 'latitude'} @@ -65,7 +89,7 @@ def _fix_coordinates(cube, definition): return cube -def _extract_variable(in_files, var, cfg, out_dir): +def _extract_variable(in_files, var, cfg, out_dir, is_daily): logger.info("CMORizing variable '%s' from input files '%s'", var['short_name'], ', '.join(in_files)) attributes = deepcopy(cfg['attributes']) @@ -92,7 +116,11 @@ def _extract_variable(in_files, var, cfg, out_dir): time0 = cube.attributes['time_coverage_start'] year0 = int(time0[0:4]) month0 = int(time0[4:6]) - timestamp = datetime(year0, month0, 15) + day0 = int(time0[6:8]) + if is_daily: + timestamp = datetime(year0, month0, day0) + else: + timestamp = datetime(year0, month0, 15) time_coord = iris.coords.DimCoord( cf_units.date2num(timestamp, time_unit, time_calendar), standard_name='time', @@ -108,13 +136,13 @@ def _extract_variable(in_files, var, cfg, out_dir): new_list.append(cube) - # make sure there is one cube for every day of the year - # (print debug info about missing days and add cube with + # make sure there is one cube for every day (daily data) or + # every month (monthly data) of the year + # (print debug info about missing days/months and add cube with # nan to fill gaps -# full_list = iris.cube.CubeList() -# loop_date = datetime(year, 1, 1) -# time_list = [] + full_list = iris.cube.CubeList() + time_list = [] for cube in new_list: loncoord = cube.coord('longitude') @@ -122,33 +150,52 @@ def _extract_variable(in_files, var, cfg, out_dir): loncoord.points = np.round(loncoord.core_points(), 3) latcoord.points = np.round(latcoord.core_points(), 3) -# # create list of available days ('time_list') -# -# for cube in new_list: -# timecoord = cube.coord('time') -# cubetime = timecoord.units.num2date(timecoord.points) -# time_list.append(cubetime) -# -# # create cube list for every day of the year by adding -# # cubes containing only nan to fill possible gaps -# -# while loop_date <= datetime(year, 12, 31): -# date_available = False -# for idx, cubetime in enumerate(time_list): -# if loop_date == cubetime: -# date_available = True -# break -# if date_available: -# full_list.append(new_list[idx]) -# else: -# logger.debug(f"No data available for {loop_date}") -# nan_cube = _create_nan_cube(new_list[0], loop_date.year, -# loop_date.month, loop_date.day) -# full_list.append(nan_cube) -# loop_date += relativedelta.relativedelta(days=1) - - iris.util.unify_time_units(new_list) - cube = new_list.concatenate_cube() + # create list of available days/months ('time_list') + + for cube in new_list: + timecoord = cube.coord('time') + cubetime = timecoord.units.num2date(timecoord.points) + time_list.append(cubetime) + + # create cube list for every day/month of the year by adding + # cubes containing only nan to fill possible gaps + + if is_daily: + loop_date = datetime(year0, 1, 1) + while loop_date <= datetime(year0, 12, 31): + date_available = False + for idx, cubetime in enumerate(time_list): + if loop_date == cubetime: + date_available = True + break + if date_available: + full_list.append(new_list[idx]) + else: + logger.debug(f"No data available for {loop_date}") + nan_cube = _create_nan_cube(new_list[0], loop_date.year, + loop_date.month, loop_date.day) + full_list.append(nan_cube) + loop_date += relativedelta.relativedelta(days=1) + else: + loop_date = datetime(year0, 1, 15) + print(loop_date) + while loop_date <= datetime(year0, 12, 31): + date_available = False + for idx, cubetime in enumerate(time_list): + if loop_date == cubetime: + date_available = True + break + if date_available: + full_list.append(new_list[idx]) + else: + logger.debug(f"No data available for {loop_date}") + nan_cube = _create_nan_cube(new_list[0], loop_date.year, + loop_date.month, loop_date.day) + full_list.append(nan_cube) + loop_date += relativedelta.relativedelta(months=1) + + iris.util.unify_time_units(full_list) + cube = full_list.concatenate_cube() cube.coord('time').points = cube.coord('time').core_points().astype( 'float64') @@ -178,6 +225,11 @@ def _extract_variable(in_files, var, cfg, out_dir): # Save results logger.debug("Saving cube\n%s", cube) logger.debug("Setting time dimension to UNLIMITED while saving!") + version = attributes['version'] + if is_daily: + attributes['version'] = f'{version}-DAILY' + else: + attributes['version'] = f'{version}-MONTHLY' save_variable(cube, cube.var_name, out_dir, attributes, unlimited_dimensions=['time']) @@ -192,26 +244,39 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): glob_attrs['tier'], glob_attrs['dataset_id']) logger.info("Input data from: %s", in_dir) logger.info("Output will be written to: %s", out_dir) - logger.info('CMORizing ESACCI-SNOW version %s', glob_attrs['version']) + logger.info('CMORizing ESACCI-AEROSOL version %s', glob_attrs['version']) if start_date is None: start_date = datetime(1997, 1, 1) if end_date is None: end_date = datetime(2011, 12, 31) + version = cfg['attributes']['version'] + for short_name, var in cfg['variables'].items(): if 'short_name' not in var: var['short_name'] = short_name loop_date = start_date + if 'day' in short_name: + logger.info("Input data for %s is daily data", short_name) + daily = True + else: + logger.info("Input data for %s is monthly data", short_name) + daily = False while loop_date <= end_date: + if daily: + freqstr = 'daily' + else: + freqstr = 'monthly' filepattern = os.path.join( - in_dir, cfg['filename'].format(year=loop_date.year) + in_dir, f'{version}-{freqstr}', + var['file'].format(year=loop_date.year) ) in_files = glob.glob(filepattern) if not in_files: logger.info('%d: no data not found for ' 'variable %s', loop_date.year, short_name) else: - _extract_variable(in_files, var, cfg, out_dir) + _extract_variable(in_files, var, cfg, out_dir, daily) loop_date += relativedelta.relativedelta(years=1) diff --git a/esmvaltool/recipes/examples/recipe_check_obs.yml b/esmvaltool/recipes/examples/recipe_check_obs.yml index 2abfa960fb..8e546711cc 100644 --- a/esmvaltool/recipes/examples/recipe_check_obs.yml +++ b/esmvaltool/recipes/examples/recipe_check_obs.yml @@ -156,8 +156,23 @@ diagnostics: scripts: null - ESACCI-AEROSOL: - description: ESACCI-AEROSOL check +# ESACCI-AEROSOL: +# description: ESACCI-AEROSOL check +# variables: +# abs550aer: +# od550aer: +# od550aerStderr: +# od550lt1aer: +# od870aer: +# od870aerStderr: +# additional_datasets: +# - {dataset: ESACCI-AEROSOL, project: OBS, mip: aero, tier: 2, +# type: sat, version: SU-v4.3, start_year: 1997, end_year: 2011} +# scripts: null + + + ESACCI-AEROSOL-MOTHLY: + description: ESACCI-AEROSOL monthly data check variables: abs550aer: od550aer: @@ -167,8 +182,22 @@ diagnostics: od870aerStderr: additional_datasets: - {dataset: ESACCI-AEROSOL, project: OBS, mip: aero, tier: 2, -# type: sat, version: SU-v4.21, start_year: 1997, end_year: 2011} - type: sat, version: SU-v4.3, start_year: 1997, end_year: 2011} + type: sat, version: SU-v4.3-MONTHLY, start_year: 1997, end_year: 2011} + scripts: null + + + ESACCI-AEROSOL-DAILY: + description: ESACCI-AEROSOL daily data check + variables: + abs550aer: + od550aer: + od550aerStderr: + od550lt1aer: + od870aer: + od870aerStderr: + additional_datasets: + - {dataset: ESACCI-AEROSOL, project: OBS, mip: aero, tier: 2, frequency: day, + type: sat, version: SU-v4.3-DAILY, start_year: 1997, end_year: 2011} scripts: null From 7faa4d0e52f624f07e026de987b006c31407eebb Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Mon, 27 May 2024 16:50:47 +0200 Subject: [PATCH 3/3] fixed some flake8/pylint issues --- .../data/downloaders/datasets/esacci_aerosol.py | 11 +++++++---- .../data/formatters/datasets/esacci_aerosol.py | 6 +++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_aerosol.py b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_aerosol.py index 55a486e0ab..468abeee38 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_aerosol.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_aerosol.py @@ -1,11 +1,14 @@ """Script to download ESACCI-AEROSOL from CCI CEDA ftp.""" +import logging from datetime import datetime from dateutil import relativedelta from esmvaltool.cmorizers.data.downloaders.ftp import CCIDownloader +logger = logging.getLogger(__name__) + def download_dataset(config, dataset, dataset_info, start_date, end_date, overwrite): @@ -57,7 +60,7 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date, downloader.download_folder(f'{loop_date.year}', f'{algorithm}-{version}-monthly') else: - logger.info(f'{loop_date.year}: no data found') + logger.info('%d: no data found', loop_date.year) loop_date += relativedelta.relativedelta(years=1) # download daily data @@ -76,8 +79,8 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date, downloader.download_folder(f'{loop_date.month:02}', f'{algorithm}-{version}-daily') else: - logger.info(f'{loop_date.year}/{loop_date.month}: ' - f'no data found') + logger.info('%d/%d: no data found', loop_date.year, + loop_date.month) else: - logger.info(f'{loop_date.year}: no data found') + logger.info('%d: no data found', loop_date.year) loop_date += relativedelta.relativedelta(months=1) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/esacci_aerosol.py b/esmvaltool/cmorizers/data/formatters/datasets/esacci_aerosol.py index 67e480bc4b..2c56644eb3 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/esacci_aerosol.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/esacci_aerosol.py @@ -171,7 +171,7 @@ def _extract_variable(in_files, var, cfg, out_dir, is_daily): if date_available: full_list.append(new_list[idx]) else: - logger.debug(f"No data available for {loop_date}") + logger.debug("No data available for %d", loop_date) nan_cube = _create_nan_cube(new_list[0], loop_date.year, loop_date.month, loop_date.day) full_list.append(nan_cube) @@ -188,7 +188,7 @@ def _extract_variable(in_files, var, cfg, out_dir, is_daily): if date_available: full_list.append(new_list[idx]) else: - logger.debug(f"No data available for {loop_date}") + logger.debug("No data available for %d", loop_date) nan_cube = _create_nan_cube(new_list[0], loop_date.year, loop_date.month, loop_date.day) full_list.append(nan_cube) @@ -265,7 +265,7 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): daily = False while loop_date <= end_date: if daily: - freqstr = 'daily' + freqstr = 'daily' else: freqstr = 'monthly' filepattern = os.path.join(