-
Notifications
You must be signed in to change notification settings - Fork 128
/
eppley_vgpm_modis.py
141 lines (115 loc) · 4 KB
/
eppley_vgpm_modis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
"""ESMValTool CMORizer for Eppley-VGPM-MODIS data from Oregon State University.
Tier
Source
http://orca.science.oregonstate.edu/data/1x2/monthly/eppley.r2018.m.chl.m.sst/hdf
Last access
20190515
Download and processing instructions
Download and unpack all the *.tar files under a single directory
(no subdirectories with years) in ${RAWOBS}/Tier2/Eppley-VGPM-MODIS
Modification history
20190515-lovato_tomas: written.
"""
import glob
import logging
import os
import iris
import numpy as np
import pandas as pd
import xarray as xr
from esmvaltool.cmorizers.data.utilities import (
constant_metadata,
fix_coords,
fix_var_metadata,
save_variable,
set_global_atts,
)
logger = logging.getLogger(__name__)
def _fix_data(cube, var):
"""Specific data fixes for different variables."""
logger.info("Fixing data ...")
with constant_metadata(cube):
if var == 'intpp':
cube /= 1000. * 12.01 * 86400.
return cube
def extract_variable(var_info, raw_info, out_dir, attrs):
"""Extract to all vars."""
var = var_info.short_name
cubes = iris.load(raw_info['file'])
rawvar = raw_info['name']
for cube in cubes:
if cube.var_name == rawvar:
fix_var_metadata(cube, var_info)
cube = fix_coords(cube)
_fix_data(cube, var)
set_global_atts(cube, attrs)
save_variable(
cube,
var,
out_dir,
attrs,
local_keys=['coordinates'],
unlimited_dimensions=['time'],
)
def merge_data(in_dir, out_dir, raw_info):
"""Merge all data into a single file."""
data_array = []
var = raw_info['name']
filelist = sorted(glob.glob(in_dir + '/' + raw_info['file'] + '*.hdf'))
for filename in filelist:
dataset = xr.open_rasterio(filename).rename({
'y': 'lat',
'x': 'lon'
}).squeeze().drop('band')
# create coordinates
dataset = dataset.assign_coords(
time=pd.to_datetime(filename[-11:-4], format='%Y%j'))
dataset = dataset.expand_dims(dim='time', axis=0)
spacing = 90. / dataset.lat.size
dataset = dataset.assign_coords(
lat=np.linspace(-90. + spacing, 90. - spacing, dataset.lat.size))
dataset.lat.attrs = {'long_name': 'Latitude', 'units': 'degrees_north'}
dataset = dataset.assign_coords(
lon=np.linspace(-180. + spacing, 180. - spacing, dataset.lon.size))
dataset.lon.attrs = {'long_name': 'Longitude', 'units': 'degrees_east'}
# get current file data
data_array.append(dataset)
damerge = xr.concat(data_array, dim='time')
# need data flip to match coordinates
damerge.data = np.fliplr(damerge.data)
# save to file
dataset = damerge.to_dataset(name=var)
thekeys = {
'lat': {
'_FillValue': False
},
'lon': {
'_FillValue': False
},
'time': {
'calendar': 'gregorian'
},
var: {
'_FillValue': -9999.0
}
}
filename = os.path.join(out_dir, raw_info['file'] + '_merged.nc')
dataset.to_netcdf(filename, encoding=thekeys, unlimited_dims='time')
logger.info("Merged data written to: %s", filename)
return filename
def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
"""Cmorization func call."""
cmor_table = cfg['cmor_table']
glob_attrs = cfg['attributes']
# run the cmorization
for var, vals in cfg['variables'].items():
var_info = cmor_table.get_variable(vals['mip'], var)
glob_attrs['mip'] = vals['mip']
raw_info = {'name': vals['raw'], 'file': vals['file']}
# merge data
inpfile = merge_data(in_dir, out_dir, raw_info)
logger.info("CMORizing var %s from file %s", var, inpfile)
raw_info['file'] = inpfile
extract_variable(var_info, raw_info, out_dir, glob_attrs)
# Remove temporary input file
os.remove(inpfile)