Skip to content

Commit

Permalink
#2772 Changed default temp output format to JSON ande numpoy serializ…
Browse files Browse the repository at this point in the history
…ation
  • Loading branch information
Howard Soh committed Jan 29, 2024
1 parent 858a484 commit 6a54b1b
Show file tree
Hide file tree
Showing 3 changed files with 541 additions and 482 deletions.
168 changes: 132 additions & 36 deletions scripts/python/met/dataplane.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,30 @@
import os
import sys
import json
import re
import numpy as np
import xarray as xr

from importlib import util as import_util
from met.logger import logger
from met.logger import met_base, met_base_tools

###########################################

class dataplane(logger):
class dataplane(met_base):

KEEP_XARRAY = True
class_name = "dataplane"

MET_FILL_VALUE = -9999.
ATTR_USER_FILL_VALUE = 'user_fill_value'

@staticmethod
def call_python(argv):
# argv[0] is the python wrapper script (caller)
logger.log_msg(f"Module:\t{repr(argv[0])}")
met_base.log_message(f"Module:\t{repr(argv[0])}")
if 1 == len(argv):
logger.quit_msg(f"User python command is missing")
met_base.quit_msg(f"User python command is missing")
sys.exit(1)

logger.log_msg(f"User python command:\t{repr(' '.join(argv[1:]))}")
met_base.log_message(f"User python command:\t{repr(' '.join(argv[1:]))}")

# argv[1] contains the user defined python script
pyembed_module_name = argv[1]
Expand All @@ -48,9 +46,10 @@ def call_python(argv):
sys.argv = argv_org # restore sys.argv
return met_in

@staticmethod
def get_tmp_numpy_filename(tmp_filename):
return re.sub(".json$", ".npy", tmp_filename) if tmp_filename.endswith(".json") else f'{tmp_filename}.npy'
#@staticmethod
#def get_numpy_filename(tmp_filename):
# return met_base_tools.replace_extension(tmp_filename, "json", "npy") if tmp_filename.endswith(".json") else \
# met_base_tools.replace_extension(tmp_filename, "nc", "npy") if tmp_filename.endswith(".nc") else f'{tmp_filename}.npy'

@staticmethod
def is_integer(a_data):
Expand Down Expand Up @@ -108,47 +107,78 @@ def read_2d_text_input(input_file):

@staticmethod
def read_dataplane(tmp_filename):
# Default is JSON for attributes and NUMPY serialization for 2D array
return dataplane.read_dataplane_nc(tmp_filename) if met_base_tools.use_netcdf_format() \
else dataplane.read_dataplane_json_numpy(tmp_filename)

@staticmethod
def read_dataplane_json_numpy(tmp_filename):
if met_base_tools.is_debug_enabled("dataplane"):
met_base.log_message(f"Read from a temporary JSON file and a temporary numpy output (dataplane)")

met_info = {}
with open(tmp_filename) as json_h:
met_info['attrs'] = json.load(json_h)
with open(tmp_filename) as json_fh:
met_info['attrs'] = json.load(json_fh)
# read 2D numeric data
numpy_dump_name = dataplane.get_tmp_numpy_filename(tmp_filename)
numpy_dump_name = met_base_tools.get_numpy_filename(tmp_filename)
met_dp_data = np.load(numpy_dump_name)
met_info['met_data'] = met_dp_data
return met_info

@staticmethod
def write_dataplane(met_in, tmp_filename):
if hasattr(met_in.met_data, 'attrs') and met_in.met_data.attrs:
attrs = met_in.met_data.attrs
else:
attrs = met_in.attrs
with open(tmp_filename,'w') as json_h:
json.dump(attrs, json_h)
def read_dataplane_nc(netcdf_filename):
import netCDF4 as nc

if isinstance(met_in.met_data, (np.ma.MaskedArray, np.ma.core.MaskedArray)):
met_dp_data = np.ma.getdata(met_in.met_data, subok=False)
elif isinstance(met_in.met_data, np.ndarray):
met_dp_data = met_in.met_data
else:
met_dp_data = np.array(met_in.met_data)
numpy_dump_name = dataplane.get_tmp_numpy_filename(tmp_filename)
np.save(numpy_dump_name, met_dp_data)
if met_base_tools.is_debug_enabled("dataplane"):
met_base.log_message(f"Read from a temporary NetCDF file (dataplane)")

# read NetCDF file
ds = nc.Dataset(netcdf_filename, 'r')

dp = ds['met_data']
met_data = dp[:]
attr_name = dataplane.ATTR_USER_FILL_VALUE
user_fill_value = dp.getncattr(attr_name) if hasattr(dp, attr_name) else None

met_attrs = {}

# grid is defined as a dictionary or string
grid = {}
for attr, attr_val in ds.__dict__.items():
if 'grid.' in attr:
grid_attr = attr.split('.')[1]
grid[grid_attr] = attr_val
else:
met_attrs[attr] = attr_val

if grid:
met_attrs['grid'] = grid

met_attrs['name'] = met_attrs['name_str']
del met_attrs['name_str']

met_info = {}
met_info['met_data'] = met_data
if user_fill_value is not None:
met_attrs['fill_value'] = user_fill_value
met_info['attrs'] = met_attrs

return met_info

@staticmethod
def validate_met_data(met_data, fill_value=None):
method_name = f"{dataplane.class_name}.validate()"
#logger.log_msg(f"{method_name} type(met_data)= {type(met_data)}")
#met_base.log_msg(f"{method_name} type(met_data)= {type(met_data)}")
attrs = None
from_xarray = False
from_ndarray = False
if met_data is None:
logger.quit(f"{method_name} The met_data is None")
met_base.quit(f"{method_name} The met_data is None")
sys.exit(1)

nx, ny = met_data.shape

met_fill_value = dataplane.MET_FILL_VALUE
met_fill_value = met_base.MET_FILL_VALUE
if dataplane.is_xarray_dataarray(met_data):
from_xarray = True
attrs = met_data.attrs
Expand All @@ -166,13 +196,79 @@ def validate_met_data(met_data, fill_value=None):
met_data = np.ma.masked_equal(met_data, fill_value)
met_data = met_data.filled(int(met_fill_value) if is_int_data else met_fill_value)
else:
logger.log_msg(f"{method_name} unknown datatype {type(met_data)}")
met_base.log_message(f"{method_name} unknown datatype {type(met_data)}")

if dataplane.KEEP_XARRAY:
return xr.DataArray(met_data,attrs=attrs) if from_xarray else met_data
else:
return met_data

@staticmethod
def write_dataplane(met_in, tmp_filename):
# Default is JSON for attributes and NUMPY serialization for 2D array
if met_base_tools.use_netcdf_format():
dataplane.write_dataplane_nc(met_in, tmp_filename)
else:
dataplane.write_dataplane_json_numpy(met_in, tmp_filename)

@staticmethod
def write_dataplane_json_numpy(met_in, tmp_filename):
if met_base_tools.is_debug_enabled("dataplane"):
met_base.log_message(f"Save to a temporary JSON file and a temporary numpy output (dataplane)")
if hasattr(met_in.met_data, 'attrs') and met_in.met_data.attrs:
attrs = met_in.met_data.attrs
else:
attrs = met_in.attrs
with open(tmp_filename,'w') as json_fh:
json.dump(attrs, json_fh)

met_dp_data = met_base_tools.convert_to_ndarray(met_in.met_data)
numpy_dump_name = met_base_tools.get_numpy_filename(tmp_filename)
np.save(numpy_dump_name, met_dp_data)

@staticmethod
def write_dataplane_nc(met_in, netcdf_filename):
import netCDF4 as nc

if met_base_tools.is_debug_enabled("dataplane"):
met_base.log_message(f"Save to a temporary NetCDF file (dataplane)")

met_info = {'met_data': met_in.met_data}
if hasattr(met_in.met_data, 'attrs') and met_in.met_data.attrs:
attrs = met_in.met_data.attrs
else:
attrs = met_in.attrs
met_info['attrs'] = attrs

# write NetCDF file
ds = nc.Dataset(netcdf_filename, 'w')

# create dimensions and variable
nx, ny = met_in.met_data.shape
ds.createDimension('x', nx)
ds.createDimension('y', ny)
dp = ds.createVariable('met_data', met_in.met_data.dtype, ('x', 'y'),
fill_value=dataplane.MET_FILL_VALUE)
dp[:] = met_in.met_data

# append attributes
for attr, attr_val in met_info['attrs'].items():
if attr_val is None:
continue

if attr == 'name':
setattr(ds, 'name_str', attr_val)
elif attr == 'fill_value':
setattr(dp, dataplane.ATTR_USER_FILL_VALUE, attr_val)
elif type(attr_val) == dict:
for key in attr_val:
setattr(ds, attr + '.' + key, attr_val[key])
else:
setattr(ds, attr, attr_val)

ds.close()



def main(argv):
global attrs, met_data, met_info
Expand All @@ -196,14 +292,14 @@ def main(argv):
fill_value = met_in.user_fill_value

fill_value = attrs.get('fill_value', None)
dataplane.log_msg('validating the dataplane array...')
met_base.log_message('validating the dataplane array...')
met_data = dataplane.validate_met_data(init_met_data, fill_value)
met_info['met_data'] = met_data

if os.environ.get('MET_PYTHON_DEBUG', None) is not None:
dataplane.log_msg('--- met_data after validating ---')
dataplane.log_msg(met_data)
met_base.log_message('--- met_data after validating ---')
met_base.log_message(met_data)

if __name__ == '__main__' or __name__ == sys.argv[0]:
main(sys.argv)
dataplane.log_msg(f'{__name__} complete')
met_base.log_message(f'{__name__} complete')
Loading

0 comments on commit 6a54b1b

Please sign in to comment.