-
Notifications
You must be signed in to change notification settings - Fork 128
/
cds_satellite_albedo.py
217 lines (175 loc) · 7.6 KB
/
cds_satellite_albedo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
r"""ESMValTool CMORizer for cds-satellite-albedo data.
Tier
Tier 3
Source
https://cds.climate.copernicus.eu/
cdsapp#!/dataset/satellite-albedo?tab=form
Last access
20190401
Download and processing instructions
- Download the data from source to the right directory
using the download script
- Decompress the files within the directory:
"find . -name '*.tar.gz' -execdir tar -xzvf '{}' \;"
Notes
-----
- This script regrids and cmorizes the above dataset.
Modification history
20191208-crezee_bas: written based on cds-satellite-lai-fapar
"""
import glob
import logging
import os
from copy import deepcopy
from datetime import datetime
from warnings import catch_warnings, filterwarnings
import cf_units
import iris
from esmvalcore.preprocessor import regrid
from iris import NameConstraint
from esmvaltool.cmorizers.data import utilities as utils
logger = logging.getLogger(__name__)
def _attrs_are_the_same(cubelist):
# assume they are the same
attrs_the_same = True
allattrs = cubelist[0].attributes
for key in allattrs:
try:
unique_attr_vals = {cube.attributes[key] for cube in cubelist}
# This exception is needed for valid_range, which is an
# array and therefore not hashable
except TypeError:
unique_attr_vals = {
tuple(cube.attributes[key])
for cube in cubelist
}
if len(unique_attr_vals) > 1:
attrs_the_same = False
print(f"Different values found for {key}-attribute: "
f"{unique_attr_vals}")
return attrs_the_same
def _cmorize_dataset(in_file, var, cfg, out_dir):
logger.info("CMORizing variable '%s' from input file '%s'",
var['short_name'], in_file)
attributes = deepcopy(cfg['attributes'])
attributes['mip'] = var['mip']
cmor_table = cfg['cmor_table']
definition = cmor_table.get_variable(var['mip'], var['short_name'])
cube = iris.load_cube(str(in_file),
constraint=NameConstraint(var_name=var['raw']))
# Set correct names
cube.var_name = definition.short_name
if definition.standard_name:
cube.standard_name = definition.standard_name
cube.long_name = definition.long_name
# Convert units if required
cube.convert_units(definition.units)
# Set global attributes
utils.set_global_atts(cube, attributes)
logger.info("Saving CMORized cube for variable %s", cube.var_name)
utils.save_variable(cube, cube.var_name, out_dir, attributes)
return in_file
def _regrid_dataset(in_dir, var, cfg):
"""Regridding of original files.
This function regrids each file and write to disk appending 'regrid'
in front of filename.
"""
filelist = glob.glob(os.path.join(in_dir, var['file']))
for infile in filelist:
_, infile_tail = os.path.split(infile)
outfile_tail = infile_tail.replace('c3s', 'c3s_regridded')
outfile = os.path.join(cfg['work_dir'], outfile_tail)
with catch_warnings():
filterwarnings(
action='ignore',
# Full message:
# UserWarning: Skipping global attribute 'long_name':
# 'long_name' is not a permitted attribute
message="Skipping global attribute 'long_name'",
category=UserWarning,
module='iris',
)
cube = iris.load_cube(infile,
constraint=NameConstraint(
var_name=var['raw']))
cube = regrid(cube, cfg['custom']['regrid_resolution'], 'nearest')
logger.info("Saving: %s", outfile)
iris.save(cube, outfile)
def _set_time_bnds(in_dir, var):
"""Set time_bnds by using attribute and returns a cubelist."""
# This is a complicated expression, but necessary to keep local
# variables below the limit, otherwise prospector complains.
cubelist = iris.load(
glob.glob(
os.path.join(in_dir, var['file'].replace('c3s', 'c3s_regridded'))))
# The purpose of the following loop is to remove any attributes
# that differ between cubes (otherwise concatenation over time fails).
# In addition, care is taken of the time coordinate, by adding the
# time_coverage attributes as time_bnds to the time coordinate.
for n_cube, _ in enumerate(cubelist):
time_coverage_start = cubelist[n_cube].\
attributes.pop('time_coverage_start')
time_coverage_end = cubelist[n_cube].\
attributes.pop('time_coverage_end')
# Now put time_coverage_start/end as time_bnds
# Convert time_coverage_xxxx to datetime
bnd_a = datetime.strptime(time_coverage_start, "%Y-%m-%dT%H:%M:%SZ")
bnd_b = datetime.strptime(time_coverage_end, "%Y-%m-%dT%H:%M:%SZ")
# Put in shape for time_bnds
time_bnds_datetime = [bnd_a, bnd_b]
# Read dataset time unit and calendar from file
dataset_time_unit = str(cubelist[n_cube].coord('time').units)
dataset_time_calender = cubelist[n_cube].coord('time').units.calendar
# Convert datetime
time_bnds = cf_units.date2num(time_bnds_datetime, dataset_time_unit,
dataset_time_calender)
# Put them on the file
cubelist[n_cube].coord('time').bounds = time_bnds
return cubelist
def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
"""Cmorization func call."""
# run the cmorization
# Pass on the workdir to the cfg dictionary
cfg['work_dir'] = cfg_user.work_dir
# If it doesn't exist, create it
if not os.path.isdir(cfg['work_dir']):
logger.info("Creating working directory for regridding: %s",
cfg['work_dir'])
os.mkdir(cfg['work_dir'])
for short_name, var in cfg['variables'].items():
var['short_name'] = short_name
logger.info("Processing var %s", short_name)
# Regridding
logger.info("Start regridding to: %s",
cfg['custom']['regrid_resolution'])
_regrid_dataset(in_dir, var, cfg)
logger.info("Finished regridding")
# File concatenation
logger.info("Start setting time_bnds")
cubelist = _set_time_bnds(cfg['work_dir'], var)
attrs_to_remove = ['identifier', 'date_created']
for cube in cubelist:
for attr in attrs_to_remove:
cube.attributes.pop(attr)
# Loop over two different platform names
for platformname in ['SPOT-4', 'SPOT-5']:
# Now split the cubelist on the different platform
logger.info("Start processing part of dataset: %s", platformname)
cubelist_platform = cubelist.extract(
iris.AttributeConstraint(platform=platformname))
if cubelist_platform:
assert _attrs_are_the_same(cubelist_platform)
cube = cubelist_platform.concatenate_cube()
else:
logger.warning(
"No files found for platform %s \
(check input data)", platformname)
continue
savename = os.path.join(cfg['work_dir'],
var['short_name'] + platformname + '.nc')
logger.info("Saving as: %s", savename)
iris.save(cube, savename)
logger.info("Finished file concatenation over time")
logger.info("Start CMORization of file %s", savename)
_cmorize_dataset(savename, var, cfg, out_dir)
logger.info("Finished regridding and CMORizing %s", savename)