From 8ca65a8667764e5d419a552d9b1f5ff52173b8b9 Mon Sep 17 00:00:00 2001 From: BaptisteVandecrux Date: Mon, 24 Jun 2024 06:54:12 +0200 Subject: [PATCH 1/4] make CLI scripts usable within python --- setup.py | 8 ++--- src/pypromice/process/get_l2.py | 32 +++++++++++--------- src/pypromice/process/get_l2tol3.py | 30 +++++++++---------- src/pypromice/process/join_l2.py | 13 ++++++--- src/pypromice/process/join_l3.py | 45 ++++++++++++++++------------- 5 files changed, 71 insertions(+), 57 deletions(-) diff --git a/setup.py b/setup.py index 835a6b84..52a9b216 100644 --- a/setup.py +++ b/setup.py @@ -41,10 +41,10 @@ 'console_scripts': [ 'get_promice_data = pypromice.get.get_promice_data:get_promice_data', 'get_l0tx = pypromice.tx.get_l0tx:get_l0tx', - 'join_l2 = pypromice.process.join_l2:join_l2', - 'join_l3 = pypromice.process.join_l3:join_l3', - 'get_l2 = pypromice.process.get_l2:get_l2', - 'get_l2tol3 = pypromice.process.get_l2tol3:get_l2tol3', + 'join_l2 = pypromice.process.join_l2:main', + 'join_l3 = pypromice.process.join_l3:main', + 'get_l2 = pypromice.process.get_l2:main', + 'get_l2tol3 = pypromice.process.get_l2tol3:main', 'get_watsontx = pypromice.tx.get_watsontx:get_watsontx', 'get_bufr = pypromice.postprocess.get_bufr:main', 'get_msg = pypromice.tx.get_msg:get_msg' diff --git a/src/pypromice/process/get_l2.py b/src/pypromice/process/get_l2.py index 01747501..87f9efcf 100644 --- a/src/pypromice/process/get_l2.py +++ b/src/pypromice/process/get_l2.py @@ -22,9 +22,8 @@ def parse_arguments_l2(): args = parser.parse_args() return args -def get_l2(): - args = parse_arguments_l2() +def get_l2(config_file, inpath, outpath, variables, metadata): logging.basicConfig( format="%(asctime)s; %(levelname)s; %(name)s; %(message)s", level=logging.INFO, @@ -32,27 +31,32 @@ def get_l2(): ) # Define input path - station_name = args.config_file.split('/')[-1].split('.')[0] - station_path = os.path.join(args.inpath, station_name) + station_name = config_file.split('/')[-1].split('.')[0] + station_path = os.path.join(inpath, station_name) if os.path.exists(station_path): - aws = AWS(args.config_file, station_path, args.variables, args.metadata) + aws = AWS(config_file, station_path, variables, metadata) else: - aws = AWS(args.config_file, args.inpath, args.variables, args.metadata) + aws = AWS(config_file, inpath, variables, metadata) # Perform level 1 and 2 processing aws.getL1() aws.getL2() - v = getVars(args.variables) - m = getMeta(args.metadata) + v = getVars(variables) + m = getMeta(metadata) # Write out level 2 - if args.outpath is not None: - if not os.path.isdir(args.outpath): - os.mkdir(args.outpath) + if outpath is not None: + if not os.path.isdir(outpath): + os.mkdir(outpath) if aws.L2.attrs['format'] == 'raw': - prepare_and_write(aws.L2, args.outpath, v, m, '10min') - prepare_and_write(aws.L2, args.outpath, v, m, '60min') + prepare_and_write(aws.L2, outpath, v, m, '10min') + prepare_and_write(aws.L2, outpath, v, m, '60min') + return aws + +def main(): + args = parse_arguments_l2() + _ = get_l2(args.config_file, args.inpath, args.outpath, args.variables, args.metadata) if __name__ == "__main__": - get_l2() + main() \ No newline at end of file diff --git a/src/pypromice/process/get_l2tol3.py b/src/pypromice/process/get_l2tol3.py index e9ac24ce..876109ce 100644 --- a/src/pypromice/process/get_l2tol3.py +++ b/src/pypromice/process/get_l2tol3.py @@ -10,8 +10,7 @@ def parse_arguments_l2tol3(debug_args=None): parser = ArgumentParser(description="AWS L3 script for the processing L3 "+ - "data from L2 and merging the L3 data with its "+ - "historical site. An hourly, daily and monthly L3 "+ + "data from L2. An hourly, daily and monthly L3 "+ "data product is outputted to the defined output path") parser.add_argument('-i', '--inpath', type=str, required=True, help='Path to Level 2 .nc data file') @@ -21,15 +20,11 @@ def parse_arguments_l2tol3(debug_args=None): required=False, help='File path to variables look-up table') parser.add_argument('-m', '--metadata', default=None, type=str, required=False, help='File path to metadata') - parser.add_argument('-g', '--gcnet_historical', default=None, type=str, - required=False, help='File path to historical GC-Net data file') - # here will come additional arguments for the merging with historical stations args = parser.parse_args(args=debug_args) return args -def get_l2tol3(): - args = parse_arguments_l2tol3() +def get_l2tol3(inpath, outpath, variables, metadata): logging.basicConfig( format="%(asctime)s; %(levelname)s; %(name)s; %(message)s", level=logging.INFO, @@ -37,7 +32,7 @@ def get_l2tol3(): ) # Define Level 2 dataset from file - with xr.open_dataset(args.inpath) as l2: + with xr.open_dataset(inpath) as l2: l2.load() # Remove encoding attributes from NetCDF @@ -54,12 +49,17 @@ def get_l2tol3(): l3 = toL3(l2) # Write Level 3 dataset to file if output directory given - v = getVars(args.variables) - m = getMeta(args.metadata) - if args.outpath is not None: - prepare_and_write(l3, args.outpath, v, m, '60min') - prepare_and_write(l3, args.outpath, v, m, '1D') - prepare_and_write(l3, args.outpath, v, m, 'M') + v = getVars(variables) + m = getMeta(metadata) + if outpath is not None: + prepare_and_write(l3, outpath, v, m, '60min') + prepare_and_write(l3, outpath, v, m, '1D') + prepare_and_write(l3, outpath, v, m, 'M') + return l3 +def main(): + args = parse_arguments_l2tol3() + _ = get_l2tol3(args.inpath, args.outpath, args.variables, args.metadata) + if __name__ == "__main__": - get_l2tol3() + main() diff --git a/src/pypromice/process/join_l2.py b/src/pypromice/process/join_l2.py index 0c31db48..5b8979f5 100644 --- a/src/pypromice/process/join_l2.py +++ b/src/pypromice/process/join_l2.py @@ -4,6 +4,7 @@ import xarray as xr from argparse import ArgumentParser from pypromice.process.L1toL2 import correctPrecip +from pypromice.process.write import prepare_and_write logger = logging.getLogger(__name__) def parse_arguments_join(): @@ -47,7 +48,7 @@ def loadArr(infile): return ds, name -def join_l2(): +def join_l2(file1,file2,outpath,variables,metadata): args = parse_arguments_join() logging.basicConfig( format="%(asctime)s; %(levelname)s; %(name)s; %(message)s", @@ -99,9 +100,13 @@ def join_l2(): all_ds.attrs['format'] = 'merged RAW and TX' # Resample to hourly, daily and monthly datasets and write to file - prepare_and_write(all_ds, args.outpath, args.variables, args.metadata, resample = False) + prepare_and_write(all_ds, outpath, variables, metadata, resample = False) - logger.info(f'Files saved to {os.path.join(args.outpath, name)}...') + logger.info(f'Files saved to {os.path.join(outpath, name)}...') +def main(): + args = parse_arguments_join() + join_l2(args.file1, args.file2, args.outpath, args.variables, args.metadata) + if __name__ == "__main__": - join_l2() + main() diff --git a/src/pypromice/process/join_l3.py b/src/pypromice/process/join_l3.py index 905ee81f..c2226138 100644 --- a/src/pypromice/process/join_l3.py +++ b/src/pypromice/process/join_l3.py @@ -27,8 +27,7 @@ def parse_arguments_joinl3(debug_args=None): help='Path to variables look-up table .csv file for variable name retained'''), parser.add_argument('-m', '--metadata', default=None, type=str, required=False, help='Path to metadata table .csv file for metadata information'''), - parser.add_argument('-d', '--datatype', default='raw', type=str, required=False, - help='Data type to output, raw or tx') + args = parser.parse_args(args=debug_args) return args @@ -106,7 +105,6 @@ def readNead(infile): ds=ds.rename({'timestamp':'time'}) return ds - def loadArr(infile, isNead): if infile.split('.')[-1].lower() in 'csv': if isNead: @@ -132,7 +130,7 @@ def loadArr(infile, isNead): print(f'{name} array loaded from {infile}') return ds, name -# will be used in the future +# %% will be used in the future # def aligning_surface_heights(l3_merged, l3): # df_aux['z_surf_combined'] = \ # df_aux['z_surf_combined'] \ @@ -157,7 +155,7 @@ def loadArr(infile, isNead): # df_in.loc[[df_in.z_surf_combined.first_valid_index()],:].index.astype('int64')[0] # ) + df_in.loc[df_in.z_surf_combined.first_valid_index(), 'z_surf_combined'] # return l3_merged - + # %% def build_station_list(config_folder: str, target_station_site: str) -> list: """ Get a list of unique station information dictionaries for a given station site. @@ -192,24 +190,23 @@ def build_station_list(config_folder: str, target_station_site: str) -> list: return station_info_list -def join_l3(): - args = parse_arguments_joinl3() - +def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, metadata): + # Get the list of station information dictionaries associated with the given site - list_station_info = build_station_list(args.config_folder, args.site) + list_station_info = build_station_list(config_folder, site) # Read the datasets and store them into a list along with their latest timestamp and station info list_station_data = [] for station_info in list_station_info: stid = station_info["stid"] - filepath = os.path.join(args.folder_l3, stid, stid+'_hour.nc') + filepath = os.path.join(folder_l3, stid, stid+'_hour.nc') isNead = False if station_info["project"].lower() in ["historical gc-net", "glaciobasis"]: - filepath = os.path.join(args.folder_gcnet, stid+'.csv') + filepath = os.path.join(folder_gcnet, stid+'.csv') isNead = True if not os.path.isfile(filepath): - logger.info(stid+' is from an project '+args.folder_l3+' or '+args.folder_gcnet) + logger.info(stid+' is from an project '+folder_l3+' or '+folder_gcnet) continue l3, _ = loadArr(filepath, isNead) @@ -283,17 +280,25 @@ def join_l3(): # Assign site id - l3_merged.attrs['site_id'] = args.site - l3_merged.attrs['stations'] = ' '.join(station_list) + l3_merged.attrs['site_id'] = site + l3_merged.attrs['stations'] = ' '.join(sorted_stids) l3_merged.attrs['level'] = 'L3' - v = getVars(args.variables) - m = getMeta(args.metadata) - if args.outpath is not None: - prepare_and_write(l3_merged, args.outpath, v, m, '60min') - prepare_and_write(l3_merged, args.outpath, v, m, '1D') - prepare_and_write(l3_merged, args.outpath, v, m, 'M') + v = getVars(variables) + m = getMeta(metadata) + if outpath is not None: + prepare_and_write(l3_merged, outpath, v, m, '60min') + prepare_and_write(l3_merged, outpath, v, m, '1D') + prepare_and_write(l3_merged, outpath, v, m, 'M') + +def main(): + args = parse_arguments_joinl3() + join_l3(args.config_folder, args.site, args.folder_l3, + args.folder_gcnet, args.outpath, args.variables, + args.metadata) + + if __name__ == "__main__": join_l3() From d25d8cb13fd768558bd35af6a3c8d3ec78b595c0 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Mon, 24 Jun 2024 09:11:49 +0200 Subject: [PATCH 2/4] return result in join_l2 and join_l3 --- src/pypromice/process/join_l2.py | 3 ++- src/pypromice/process/join_l3.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/pypromice/process/join_l2.py b/src/pypromice/process/join_l2.py index 5b8979f5..d19246e7 100644 --- a/src/pypromice/process/join_l2.py +++ b/src/pypromice/process/join_l2.py @@ -103,10 +103,11 @@ def join_l2(file1,file2,outpath,variables,metadata): prepare_and_write(all_ds, outpath, variables, metadata, resample = False) logger.info(f'Files saved to {os.path.join(outpath, name)}...') + return all_ds def main(): args = parse_arguments_join() - join_l2(args.file1, args.file2, args.outpath, args.variables, args.metadata) + _ = join_l2(args.file1, args.file2, args.outpath, args.variables, args.metadata) if __name__ == "__main__": main() diff --git a/src/pypromice/process/join_l3.py b/src/pypromice/process/join_l3.py index c2226138..77195842 100644 --- a/src/pypromice/process/join_l3.py +++ b/src/pypromice/process/join_l3.py @@ -290,15 +290,15 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me prepare_and_write(l3_merged, outpath, v, m, '60min') prepare_and_write(l3_merged, outpath, v, m, '1D') prepare_and_write(l3_merged, outpath, v, m, 'M') - + return l3_merged, sorted_list_station_data def main(): args = parse_arguments_joinl3() - join_l3(args.config_folder, args.site, args.folder_l3, + _, _ = join_l3(args.config_folder, args.site, args.folder_l3, args.folder_gcnet, args.outpath, args.variables, args.metadata) if __name__ == "__main__": - join_l3() + main() From b65d0cc07fe7f43d860fa991f9e8aef167ec2440 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Mon, 24 Jun 2024 14:54:22 +0200 Subject: [PATCH 3/4] removing args from join_l2 function --- src/pypromice/process/join_l2.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/pypromice/process/join_l2.py b/src/pypromice/process/join_l2.py index d19246e7..8d850d83 100644 --- a/src/pypromice/process/join_l2.py +++ b/src/pypromice/process/join_l2.py @@ -49,24 +49,23 @@ def loadArr(infile): def join_l2(file1,file2,outpath,variables,metadata): - args = parse_arguments_join() logging.basicConfig( format="%(asctime)s; %(levelname)s; %(name)s; %(message)s", level=logging.INFO, stream=sys.stdout, ) # Check files - if os.path.isfile(args.file1) and os.path.isfile(args.file2): + if os.path.isfile(file1) and os.path.isfile(file2): # Load data arrays - ds1, n1 = loadArr(args.file1) - ds2, n2 = loadArr(args.file2) + ds1, n1 = loadArr(file1) + ds2, n2 = loadArr(file2) # Check stations match if n1.lower() == n2.lower(): # Merge arrays - logger.info(f'Combining {args.file1} with {args.file2}...') + logger.info(f'Combining {file1} with {file2}...') name = n1 all_ds = ds1.combine_first(ds2) @@ -83,18 +82,18 @@ def join_l2(file1,file2,outpath,variables,metadata): logger.info(f'Mismatched station names {n1}, {n2}') exit() - elif os.path.isfile(args.file1): - ds1, name = loadArr(args.file1) - logger.info(f'Only one file found {args.file1}...') + elif os.path.isfile(file1): + ds1, name = loadArr(file1) + logger.info(f'Only one file found {file1}...') all_ds = ds1 - elif os.path.isfile(args.file2): - ds2, name = loadArr(args.file2) - logger.info(f'Only one file found {args.file2}...') + elif os.path.isfile(file2): + ds2, name = loadArr(file2) + logger.info(f'Only one file found {file2}...') all_ds = ds2 else: - logger.info(f'Invalid files {args.file1}, {args.file2}') + logger.info(f'Invalid files {file1}, {file2}') exit() all_ds.attrs['format'] = 'merged RAW and TX' From 0dcac0cac301b50ad1528071356f0a524f343247 Mon Sep 17 00:00:00 2001 From: BaptisteVandecrux Date: Thu, 27 Jun 2024 10:21:07 +0200 Subject: [PATCH 4/4] proper removal of encoding info when reading netcdf --- src/pypromice/process/get_l2tol3.py | 4 ++-- src/pypromice/process/join_l2.py | 4 ++-- src/pypromice/process/join_l3.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/pypromice/process/get_l2tol3.py b/src/pypromice/process/get_l2tol3.py index 876109ce..6f845287 100644 --- a/src/pypromice/process/get_l2tol3.py +++ b/src/pypromice/process/get_l2tol3.py @@ -37,8 +37,8 @@ def get_l2tol3(inpath, outpath, variables, metadata): # Remove encoding attributes from NetCDF for varname in l2.variables: - if 'encoding' in l2[varname].attrs: - del l2[varname].attrs['encoding'] + if l2[varname].encoding!={}: + l2[varname].encoding = {} if 'bedrock' in l2.attrs.keys(): l2.attrs['bedrock'] = l2.attrs['bedrock'] == 'True' diff --git a/src/pypromice/process/join_l2.py b/src/pypromice/process/join_l2.py index 8d850d83..06f3d8c5 100644 --- a/src/pypromice/process/join_l2.py +++ b/src/pypromice/process/join_l2.py @@ -31,8 +31,8 @@ def loadArr(infile): ds.load() # Remove encoding attributes from NetCDF for varname in ds.variables: - if 'encoding' in ds[varname].attrs: - del ds[varname].attrs['encoding'] + if ds[varname].encoding!={}: + ds[varname].encoding = {} try: name = ds.attrs['station_id'] diff --git a/src/pypromice/process/join_l3.py b/src/pypromice/process/join_l3.py index 77195842..e062911c 100644 --- a/src/pypromice/process/join_l3.py +++ b/src/pypromice/process/join_l3.py @@ -119,8 +119,8 @@ def loadArr(infile, isNead): ds = xr.open_dataset(infile) # Remove encoding attributes from NetCDF for varname in ds.variables: - if 'encoding' in ds[varname].attrs: - del ds[varname].attrs['encoding'] + if ds[varname].encoding!={}: + ds[varname].encoding = {} try: name = ds.attrs['station_name']