-
Notifications
You must be signed in to change notification settings - Fork 128
/
gpcc.py
177 lines (146 loc) · 5.81 KB
/
gpcc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
"""ESMValTool CMORizer for GPCC data.
Tier
Tier 2: other freely-available dataset.
Source
https://opendata.dwd.de/climate_environment/GPCC/html/fulldata-monthly_v2018_doi_download.html
https://opendata.dwd.de/climate_environment/GPCC/
full_data_2018/full_data_monthly_v2018_[025 05 10 25].nc.gz
Last access
20200225
Download and processing instructions
Download the following files:
full_data_monthly_{version}.nc.gz
Two files are generated per version, one with version_grid (i.e. v2018_25),
one with version_grid-numgauge1 (i.e. v2018_25-numgauge1), which is constrained
on holding gridpoint values relying on data from at least one station (i.e.
removing gridpoints solely relying on climatological infilling).
"""
import copy
import logging
import os
from warnings import catch_warnings, filterwarnings
import cftime
import iris
import numpy as np
from cf_units import Unit
from iris import NameConstraint
from esmvaltool.cmorizers.data import utilities as utils
logger = logging.getLogger(__name__)
def _get_centered_timecoord(cube):
"""Fix time coordinate.
Time points start at the beginning of month at 00:00:00.
"""
time = cube.coord('time')
times = time.units.num2date(time.points)
# get bounds
starts = [cftime.DatetimeNoLeap(c.year, c.month, 1) for c in times]
ends = [
cftime.DatetimeNoLeap(c.year, c.month + 1, 1)
if c.month < 12 else cftime.DatetimeNoLeap(c.year + 1, 1, 1)
for c in times
]
time.bounds = time.units.date2num(np.stack([starts, ends], -1))
# get points
time.points = [np.mean((t1, t2)) for t1, t2 in time.bounds]
def _extract_variable(short_name, var, version, cfg, filepath, out_dir):
"""Extract variable."""
raw_var = var.get('raw', short_name)
with catch_warnings():
filterwarnings(
action='ignore',
message='Ignoring netCDF variable .* invalid units .*',
category=UserWarning,
module='iris',
)
cube = iris.load_cube(filepath, NameConstraint(var_name=raw_var))
# Fix units (mm/month) -> 'kg m-2 month-1' -> 'kg m-2 s-1'
cmor_info = cfg['cmor_table'].get_variable(var['mip'], short_name)
cube.units = Unit(var.get('raw_units', short_name))
cube.convert_units(cmor_info.units)
# fix calendar type
cube.coord('time').units = Unit(cube.coord('time').units.origin,
calendar=var.get('calendar', short_name))
cube.coord('time').convert_units(
Unit('days since 1950-1-1 00:00:00', calendar='gregorian'))
# Fix coordinates
# fix time
_get_centered_timecoord(cube)
# fix flipped latitude
utils.flip_dim_coord(cube, 'latitude')
utils.fix_dim_coordnames(cube)
cube_coord = cube.coord('latitude')
utils.fix_bounds(cube, cube_coord)
# fix longitude
cube_coord = cube.coord('longitude')
if cube_coord.points[0] < 0. and \
cube_coord.points[-1] < 181.:
cube_coord.points = \
cube_coord.points + 180.
utils.fix_bounds(cube, cube_coord)
cube.attributes['geospatial_lon_min'] = 0.
cube.attributes['geospatial_lon_max'] = 360.
nlon = len(cube_coord.points)
utils.roll_cube_data(cube, nlon // 2, -1)
# Fix metadata
attrs = cfg['attributes']
attrs['mip'] = var['mip']
attrs['version'] = version
utils.fix_var_metadata(cube, cmor_info)
utils.set_global_atts(cube, attrs)
# Save variable
utils.save_variable(cube,
short_name,
out_dir,
attrs,
unlimited_dimensions=['time'])
# build contrainted cube on numgauge < 1
constraint_var = var.get('constraint', short_name)
with catch_warnings():
filterwarnings(
action='ignore',
message='Ignoring netCDF variable .* invalid units .*',
category=UserWarning,
module='iris',
)
constr_cube = iris.load_cube(filepath,
NameConstraint(var_name=constraint_var))
# fix flipped latitude
utils.flip_dim_coord(constr_cube, 'latitude')
utils.fix_dim_coordnames(constr_cube)
cube_coord = constr_cube.coord('latitude')
utils.fix_bounds(constr_cube, cube_coord)
# fix longitude
cube_coord = constr_cube.coord('longitude')
if cube_coord.points[0] < 0. and \
cube_coord.points[-1] < 181.:
cube_coord.points = \
cube_coord.points + 180.
utils.fix_bounds(constr_cube, cube_coord)
constr_cube.attributes['geospatial_lon_min'] = 0.
constr_cube.attributes['geospatial_lon_max'] = 360.
nlon = len(cube_coord.points)
utils.roll_cube_data(constr_cube, nlon // 2, -1)
cube.data = np.ma.masked_where(constr_cube.data < 1., cube.data)
# Save variable
attrs = copy.deepcopy(cfg['attributes'])
attrs.update({
'comment': 'constrained on gridpoint values being based on'
'at least 1 station',
'version': attrs['version'] + '-numgauge1'
})
attrs['mip'] = var['mip']
utils.save_variable(cube,
short_name,
out_dir,
attrs,
unlimited_dimensions=['time'])
def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
"""Cmorization func call."""
raw_filepath = os.path.join(in_dir, cfg['filename'])
# Run the cmorization
for version in cfg['attributes']['version'].values():
for (short_name, var) in cfg['variables'].items():
raw_var = var.get('raw', short_name)
filepath = raw_filepath.format(version=version, raw_name=raw_var)
logger.info("CMORizing variable '%s'", short_name)
_extract_variable(short_name, var, version, cfg, filepath, out_dir)