-
Notifications
You must be signed in to change notification settings - Fork 13
/
sst_script.py
executable file
·84 lines (75 loc) · 2.99 KB
/
sst_script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Feb 13 13:48:45 2021
@author: shlomi
"""
from PW_paths import work_yuval
from aux_gps import move_or_copy_files_from_doy_dir_structure_to_single_path
from aux_gps import path_glob
import xarray as xr
import numpy as np
sst_path = work_yuval / 'SST/allData/ghrsst/data/GDS2/L4/GLOB/NCEI/AVHRR_OI/v2'
movepath = work_yuval/'SST/allData'
savepath = work_yuval/'SST'
def save_yearly(movepath, savepath, years, name=None):
from dask.diagnostics import ProgressBar
ps = path_glob(movepath, '*.nc')
for year in years:
print('saving year {}...'.format(year))
# for sea level :
ps_year = [x for x in ps if str(year) in x.as_posix().split('/')[-1].split('_')[-2][0:4]]
# for ssts:
# ps_year = [x for x in ps if str(year) in x.as_posix().split('/')[-1][0:4]]
# ds = xr.open_mfdataset(ps_year)
print(len(ps_year))
ds_list = [xr.open_dataset(x) for x in ps_year]
ds = xr.concat(ds_list, 'time')
ds = ds.sortby('time')
# years, datasets = zip(*ds.groupby("time.year"))
if name is None:
filename = '{}-'.format(year) + '-'.join(ps[0].as_posix().split('/')[-1].split('-')[1:])
else:
filename = '{}-'.format(year) + '-' + name + '.nc'
filepath = savepath / filename
delayed = ds.to_netcdf(filepath, compute=False)
with ProgressBar():
results = delayed.compute()
print('Done!')
return
# # now builds the filenames:
# filepaths = []
# for year in years:
# filename = '{}-'.format(year) + '-'.join(ps[0].as_posix().split('/')[-1].split('-')[1:])
# filepath = savepath / filename
# filepaths.append(filepath)
# xr.save_mfdataset(datasets, filepaths)
def save_subset(savepath, subset='med1'):
from dask.diagnostics import ProgressBar
ps = path_glob(savepath, '*.nc')
print(len(ps))
ds_list = [xr.open_dataset(x, chunks={'time': 10})[['analysed_sst', 'analysis_error']] for x in ps]
ds = xr.concat(ds_list, 'time')
ds = ds.sortby('time')
if subset == 'med1':
print('subsetting to med1')
# ssts:
lat_slice = [30, 50]
lon_slice = [-20, 45]
# sla:
lat_slice = [31, 32]
lon_slice = [34, 35]
ds = ds.sel(lat=slice(*lat_slice), lon=slice(*lon_slice))
yrmin = ds['time'].dt.year.min().item()
yrmax = ds['time'].dt.year.max().item()
filename = '{}-{}_{}-'.format(subset, yrmin, yrmax) + \
'-'.join(ps[0].as_posix().split('/')[-1].split('-')[1:])
delayed = ds.to_netcdf(savepath / filename, compute=False)
with ProgressBar():
results = delayed.compute()
return ds
# years = move_or_copy_files_from_doy_dir_structure_to_single_path(yearly_path=sst_path, movepath=movepath, opr='copy')
# print('opening copied files and saving to {}'.format(movepath))
# years = np.arange(2000, 2021)
# save_yearly(movepath, savepath, years)
# save_subset(savepath, subset='med1')