From d5b4550420d34083fd199afe959efa85bec688bf Mon Sep 17 00:00:00 2001 From: Howard Soh Date: Fri, 19 Jan 2024 19:35:17 +0000 Subject: [PATCH] #2772 Use JSON for attrubutes and numpy serialization dor 2D data instead of NetCDF --- scripts/python/met/dataplane.py | 153 ++++++++++++-------------------- 1 file changed, 59 insertions(+), 94 deletions(-) diff --git a/scripts/python/met/dataplane.py b/scripts/python/met/dataplane.py index 57c9ac367b..1a8b834c73 100644 --- a/scripts/python/met/dataplane.py +++ b/scripts/python/met/dataplane.py @@ -1,7 +1,8 @@ import os import sys +import json +import re import numpy as np -import netCDF4 as nc import xarray as xr from importlib import util as import_util @@ -19,17 +20,16 @@ class dataplane(logger): @staticmethod def call_python(argv): + # argv[0] is the python wrapper script (caller) logger.log_msg(f"Module:\t{repr(argv[0])}") if 1 == len(argv): - logger.quit(f"User command is missing") + logger.quit_msg(f"User python command is missing") + sys.exit(1) + + logger.log_msg(f"User python command:\t{repr(' '.join(argv[1:]))}") - logger.log_msg("User Command:\t" + repr(' '.join(argv[1:]))) - # argv[0] is the python wrapper script (caller) # argv[1] contains the user defined python script pyembed_module_name = argv[1] - sys.argv = argv[1:] - logger.log_msg(f" sys.argv:\t{sys.argv}") - # append user script dir to system path pyembed_dir, pyembed_name = os.path.split(pyembed_module_name) if pyembed_dir: @@ -40,11 +40,18 @@ def call_python(argv): user_base = pyembed_name.replace('.py','') + argv_org = sys.argv # save sys.argv + sys.argv = argv[1:] spec = import_util.spec_from_file_location(user_base, pyembed_module_name) met_in = import_util.module_from_spec(spec) spec.loader.exec_module(met_in) + sys.argv = argv_org # restore sys.argv return met_in + @staticmethod + def get_tmp_numpy_filename(tmp_filename): + return re.sub(".json$", ".npy", tmp_filename) if tmp_filename.endswith(".json") else f'{tmp_filename}.npy' + @staticmethod def is_integer(a_data): return isinstance(a_data, int) @@ -100,76 +107,33 @@ def read_2d_text_input(input_file): return met_data @staticmethod - def read_dataplane(netcdf_filename): - # read NetCDF file - ds = nc.Dataset(netcdf_filename, 'r') - - dp = ds['met_data'] - met_data = dp[:] - attr_name = dataplane.ATTR_USER_FILL_VALUE - user_fill_value = dp.getncattr(attr_name) if hasattr(dp, attr_name) else None - - met_attrs = {} - - # grid is defined as a dictionary or string - grid = {} - for attr, attr_val in ds.__dict__.items(): - if 'grid.' in attr: - grid_attr = attr.split('.')[1] - grid[grid_attr] = attr_val - else: - met_attrs[attr] = attr_val - - if grid: - met_attrs['grid'] = grid - - met_attrs['name'] = met_attrs['name_str'] - del met_attrs['name_str'] - + def read_dataplane(tmp_filename): met_info = {} - met_info['met_data'] = met_data - if user_fill_value is not None: - met_attrs['fill_value'] = user_fill_value - met_info['attrs'] = met_attrs - + with open(tmp_filename) as json_h: + met_info['attrs'] = json.load(json_h) + # read 2D numeric data + numpy_dump_name = dataplane.get_tmp_numpy_filename(tmp_filename) + met_dp_data = np.load(numpy_dump_name) + met_info['met_data'] = met_dp_data return met_info @staticmethod - def write_dataplane(met_in, netcdf_filename): - met_info = {'met_data': met_in.met_data} + def write_dataplane(met_in, tmp_filename): if hasattr(met_in.met_data, 'attrs') and met_in.met_data.attrs: attrs = met_in.met_data.attrs else: attrs = met_in.attrs - met_info['attrs'] = attrs + with open(tmp_filename,'w') as json_h: + json.dump(attrs, json_h) - # write NetCDF file - ds = nc.Dataset(netcdf_filename, 'w') - - # create dimensions and variable - nx, ny = met_in.met_data.shape - ds.createDimension('x', nx) - ds.createDimension('y', ny) - dp = ds.createVariable('met_data', met_in.met_data.dtype, ('x', 'y'), - fill_value=dataplane.MET_FILL_VALUE) - dp[:] = met_in.met_data - - # append attributes - for attr, attr_val in met_info['attrs'].items(): - if attr_val is None: - continue - - if attr == 'name': - setattr(ds, 'name_str', attr_val) - elif attr == 'fill_value': - setattr(dp, dataplane.ATTR_USER_FILL_VALUE, attr_val) - elif type(attr_val) == dict: - for key in attr_val: - setattr(ds, attr + '.' + key, attr_val[key]) - else: - setattr(ds, attr, attr_val) - - ds.close() + if isinstance(met_in.met_data, (np.ma.MaskedArray, np.ma.core.MaskedArray)): + met_dp_data = np.ma.getdata(met_in.met_data, subok=False) + elif isinstance(met_in.met_data, np.ndarray): + met_dp_data = met_in.met_data + else: + met_dp_data = np.array(met_in.met_data) + numpy_dump_name = dataplane.get_tmp_numpy_filename(tmp_filename) + np.save(numpy_dump_name, met_dp_data) @staticmethod def validate_met_data(met_data, fill_value=None): @@ -180,33 +144,34 @@ def validate_met_data(met_data, fill_value=None): from_ndarray = False if met_data is None: logger.quit(f"{method_name} The met_data is None") + sys.exit(1) + + nx, ny = met_data.shape + + met_fill_value = dataplane.MET_FILL_VALUE + if dataplane.is_xarray_dataarray(met_data): + from_xarray = True + attrs = met_data.attrs + met_data = met_data.data + modified_met_data = True + if isinstance(met_data, np.ndarray): + from_ndarray = True + met_data = np.ma.array(met_data) + + if isinstance(met_data, np.ma.MaskedArray): + is_int_data = dataplane.is_integer(met_data[0,0]) or dataplane.is_integer(met_data[int(nx/2),int(ny/2)]) + met_data = np.ma.masked_equal(met_data, float('nan')) + met_data = np.ma.masked_equal(met_data, float('inf')) + if fill_value is not None: + met_data = np.ma.masked_equal(met_data, fill_value) + met_data = met_data.filled(int(met_fill_value) if is_int_data else met_fill_value) + else: + logger.log_msg(f"{method_name} unknown datatype {type(met_data)}") + + if dataplane.KEEP_XARRAY: + return xr.DataArray(met_data,attrs=attrs) if from_xarray else met_data else: - nx, ny = met_data.shape - - met_fill_value = dataplane.MET_FILL_VALUE - if dataplane.is_xarray_dataarray(met_data): - from_xarray = True - attrs = met_data.attrs - met_data = met_data.data - modified_met_data = True - if isinstance(met_data, np.ndarray): - from_ndarray = True - met_data = np.ma.array(met_data) - - if isinstance(met_data, np.ma.MaskedArray): - is_int_data = dataplane.is_integer(met_data[0,0]) or dataplane.is_integer(met_data[int(nx/2),int(ny/2)]) - met_data = np.ma.masked_equal(met_data, float('nan')) - met_data = np.ma.masked_equal(met_data, float('inf')) - if fill_value is not None: - met_data = np.ma.masked_equal(met_data, fill_value) - met_data = met_data.filled(int(met_fill_value) if is_int_data else met_fill_value) - else: - logger.log_msg(f"{method_name} unknown datatype {type(met_data)}") - - if dataplane.KEEP_XARRAY: - return xr.DataArray(met_data,attrs=attrs) if from_xarray else met_data - else: - return met_data + return met_data def main(argv):