Skip to content

Commit

Permalink
968 increase minimal xugrid version to speed up xuopen dataset (#984)
Browse files Browse the repository at this point in the history
* replace ds[varn] with ds.variables[varn] where possible, calling the frozen object prevents accessing the variable which is slow in case of many variables

* updated minimal xugrid version

* updated whatsnew
  • Loading branch information
veenstrajelmer authored Sep 3, 2024
1 parent 87ff0e1 commit fa8c25b
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 17 deletions.
7 changes: 4 additions & 3 deletions dfm_tools/interpolate_grid2bnd.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ def interp_uds_to_plipoints(uds:xu.UgridDataset, gdf:geopandas.GeoDataFrame) ->
# TODO: revert after fixing https://github.com/Deltares/xugrid/issues/274
vars_without_facedim = []
for varn in uds.variables:
if facedim not in uds[varn].dims:
if facedim not in uds.variables[varn].dims:
vars_without_facedim.append(varn)
uds_face = uds.drop(vars_without_facedim)

Expand All @@ -488,8 +488,9 @@ def interp_uds_to_plipoints(uds:xu.UgridDataset, gdf:geopandas.GeoDataFrame) ->
# re-add removed variables again, sometimes important for e.g. depth
# TODO: remove after fixing https://github.com/Deltares/xugrid/issues/274
for varn in vars_without_facedim:
if edgedim not in uds[varn].dims and nodedim not in uds[varn].dims:
ds[varn] = uds[varn]
vardims = uds.variables[varn].dims
if edgedim not in vardims and nodedim not in vardims:
ds[varn] = uds.variables[varn]

# rename station dimname and varname (is index, are both mesh2d_nFaces to start with)
ds = ds.rename({facedim:dimn_point}) # rename mesh2d_nFaces to plipoints
Expand Down
6 changes: 3 additions & 3 deletions dfm_tools/xarray_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,8 @@ def preprocess_ERA5(ds):
# Prevent writing to (incorrectly scaled) int, since it might mess up mfdataset (https://github.com/Deltares/dfm_tools/issues/239)
# By dropping scaling/offset encoding and converting to float32 (will result in a larger dataset)
# ERA5 datasets retrieved with the new CDS-beta are zipped float32 instead of scaled int, so this is only needed for backwards compatibility with old files.
for var in ds.data_vars:
if not set(['dtype','scale_factor','add_offset']).issubset(ds[var].encoding.keys()):
for var in ds.data_vars.keys():
if not set(['dtype','scale_factor','add_offset']).issubset(ds.variables[var].encoding.keys()):
continue
# the _FillValue will still be -32767 (int default), but this is no issue for float32
ds[var].encoding.pop('scale_factor')
Expand Down Expand Up @@ -315,7 +315,7 @@ def Dataset_varswithdim(ds,dimname): #TODO: dit zit ook in xugrid, wordt nu gebr

varlist_keep = []
for varname in ds.variables.keys():
if dimname in ds[varname].dims:
if dimname in ds.variables[varname].dims:
varlist_keep.append(varname)
ds = ds[varlist_keep]

Expand Down
18 changes: 9 additions & 9 deletions dfm_tools/xugrid_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def remove_ghostcells(uds, fname): #TODO: remove ghostcells from output or align

#drop ghostcells
part_domainno_fromfname = int(part_domainno_fromfname)
da_domainno = uds[varn_domain]
da_domainno = uds.variables[varn_domain]
idx = np.flatnonzero(da_domainno == part_domainno_fromfname)
uds = uds.isel({uds.grid.face_dimension:idx})
return uds
Expand Down Expand Up @@ -118,11 +118,11 @@ def decode_default_fillvals(ds):
# TODO: this function can be removed when xarray does it automatically: https://github.com/Deltares/dfm_tools/issues/490

nfillattrs_added = 0
for varn in ds.variables:
for varn in ds.variables.keys():
# TODO: possible to get always_mask boolean with `netCDF4.Dataset(file_nc).variables[varn].always_mask`, but this seems to be always True for FM mapfiles
if '_FillValue' in ds[varn].encoding:
if '_FillValue' in ds.variables[varn].encoding:
continue
dtype_str = ds[varn].dtype.str[1:]
dtype_str = ds.variables[varn].dtype.str[1:]
if dtype_str not in default_fillvals.keys():
continue
varn_fillval = default_fillvals[dtype_str]
Expand All @@ -144,9 +144,9 @@ def remove_nan_fillvalue_attrs(ds : (xr.Dataset, xu.UgridDataset)):
ds = ds.obj

count = 0
for varn in ds.variables:
if '_FillValue' in ds[varn].encoding:
if np.isnan(ds[varn].encoding['_FillValue']):
for varn in ds.variables.keys():
if '_FillValue' in ds.variables[varn].encoding:
if np.isnan(ds.variables[varn].encoding['_FillValue']):
ds[varn].encoding.pop('_FillValue')
count += 1
if count > 0:
Expand Down Expand Up @@ -299,9 +299,9 @@ def open_dataset_curvilinear(file_nc,

print('>> getting vertices from ds: ',end='')
dtstart = dt.datetime.now()
vertices_longitude = ds[varn_vert_lon].to_numpy()
vertices_longitude = ds.variables[varn_vert_lon].to_numpy()
vertices_longitude = vertices_longitude.reshape(-1,vertices_longitude.shape[-1])
vertices_latitude = ds[varn_vert_lat].to_numpy()
vertices_latitude = ds.variables[varn_vert_lat].to_numpy()
vertices_latitude = vertices_latitude.reshape(-1,vertices_latitude.shape[-1])
print(f'{(dt.datetime.now()-dtstart).total_seconds():.2f} sec')

Expand Down
1 change: 1 addition & 0 deletions docs/whats-new.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
- update to cdsapi 0.7.2 and properly catching error for dummy dataset in [#972](https://github.com/Deltares/dfm_tools/pull/972)
- deprecated `dfmt.open_dataset_extra()` (partly replaced by `dfmt.open_prepare_dataset()`) in [#974](https://github.com/Deltares/dfm_tools/pull/974)
- improved nan-conversion in `dfmt.forcinglike_to_Dataset()` in [#982](https://github.com/Deltares/dfm_tools/pull/982)
- improved performance of `dfmt.open_partitioned_dataset()` for datasets with many variables in [#984](https://github.com/Deltares/dfm_tools/pull/984)


## 0.25.0 (2024-08-16)
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ dependencies = [
"netcdf4>=1.5.4",
#bottleneck<1.3.3 pip install fails in py39
"bottleneck>=1.3.3",
#xugrid<0.11.2 sometimes fails on merged chunks that are inconsistent
"xugrid>=0.11.2",
#xugrid<0.12.0 has sub-optimal performance because of accessing dataarrays of variables
"xugrid>=0.12.0",
#cdsapi<0.7.2 has different error upon dummy dataset
"cdsapi>=0.7.2",
#pydap<3.4.0 is from May 2017 and does not support newer python versions
Expand Down

0 comments on commit fa8c25b

Please sign in to comment.