diff --git a/.github/workflows/process_test.yml b/.github/workflows/process_test.yml index 5ba3d51a..e7180f6f 100644 --- a/.github/workflows/process_test.yml +++ b/.github/workflows/process_test.yml @@ -38,7 +38,7 @@ jobs: run: | mkdir $GITHUB_WORKSPACE/out/ for i in $(echo ${{ env.TEST_STATION }} | tr ' ' '\n'); do - python3 $GITHUB_WORKSPACE/main/src/pypromice/process/get_l3.py -v $GITHUB_WORKSPACE/main/src/pypromice/process/variables.csv -m $GITHUB_WORKSPACE/main/src/pypromice/process/metadata.csv -c $GITHUB_WORKSPACE/aws-l0/raw/config/$i.toml -i $GITHUB_WORKSPACE/aws-l0/raw -o $GITHUB_WORKSPACE/out/ + python3 $GITHUB_WORKSPACE/main/src/pypromice/process/get_l2.py -v $GITHUB_WORKSPACE/main/src/pypromice/process/variables.csv -m $GITHUB_WORKSPACE/main/src/pypromice/process/metadata.csv -c $GITHUB_WORKSPACE/aws-l0/raw/config/$i.toml -i $GITHUB_WORKSPACE/aws-l0/raw -o $GITHUB_WORKSPACE/out/ done - name: Upload test output uses: actions/upload-artifact@v3 diff --git a/.gitignore b/.gitignore index 065beda1..cebe88d2 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,4 @@ src/pypromice/postprocess/positions.csv # sqlite db files *.db +*.bak diff --git a/setup.py b/setup.py index 2a4638d3..1755a7a3 100644 --- a/setup.py +++ b/setup.py @@ -42,8 +42,9 @@ 'console_scripts': [ 'get_promice_data = pypromice.get.get_promice_data:get_promice_data', 'get_l0tx = pypromice.tx.get_l0tx:get_l0tx', + 'get_l2 = pypromice.process.get_l2:get_l2', + 'join_l2 = pypromice.process.join_l2:join_l2', 'get_l3 = pypromice.process.get_l3:get_l3', - 'join_l3 = pypromice.process.join_l3:join_l3', 'get_watsontx = pypromice.tx.get_watsontx:get_watsontx', 'get_bufr = pypromice.postprocess.get_bufr:main', 'get_msg = pypromice.tx.get_msg:get_msg' diff --git a/src/pypromice/process/L0toL1.py b/src/pypromice/process/L0toL1.py index a056a08b..0d55c730 100644 --- a/src/pypromice/process/L0toL1.py +++ b/src/pypromice/process/L0toL1.py @@ -5,9 +5,9 @@ import numpy as np import pandas as pd import xarray as xr -import re - +import re, logging from pypromice.process.value_clipping import clip_values +logger = logging.getLogger(__name__) def toL1(L0, vars_df, T_0=273.15, tilt_threshold=-100): @@ -28,9 +28,10 @@ def toL1(L0, vars_df, T_0=273.15, tilt_threshold=-100): ------- ds : xarray.Dataset Level 1 dataset - ''' + ''' assert(type(L0) == xr.Dataset) ds = L0 + ds.attrs['level'] = 'L1' for l in list(ds.keys()): if l not in ['time', 'msg_i', 'gps_lat', 'gps_lon', 'gps_alt', 'gps_time']: @@ -247,7 +248,7 @@ def getPressDepth(z_pt, p, pt_antifreeze, pt_z_factor, pt_z_coef, pt_z_p_coef): rho_af = 1145 else: rho_af = np.nan - print('ERROR: Incorrect metadata: "pt_antifreeze" = ' + + logger.info('ERROR: Incorrect metadata: "pt_antifreeze" = ' + f'{pt_antifreeze}. Antifreeze mix only supported at 50% or 100%') # assert(False) diff --git a/src/pypromice/process/L1toL2.py b/src/pypromice/process/L1toL2.py index 73ca1a7d..1a0087cb 100644 --- a/src/pypromice/process/L1toL2.py +++ b/src/pypromice/process/L1toL2.py @@ -30,7 +30,18 @@ def toL2( eps_clear=9.36508e-6, emissivity=0.97, ) -> xr.Dataset: - '''Process one Level 1 (L1) product to Level 2 + '''Process one Level 1 (L1) product to Level 2. + In this step we do: + - manual flagging and adjustments + - automated QC: persistence, percentile + - custom filter: gps_alt filter, NaN t_rad removed from dlr & ulr + - smoothing of tilt and rot + - calculation of rh with regards to ice in subfreezin conditions + - calculation of cloud coverage + - correction of dsr and usr for tilt + - filtering of dsr based on a theoritical TOA irradiance and grazing light + - calculation of albedo + - calculation of directional wind speed Parameters ---------- @@ -59,6 +70,7 @@ def toL2( Level 2 dataset ''' ds = L1.copy(deep=True) # Reassign dataset + ds.attrs['level'] = 'L2' try: ds = adjustTime(ds) # Adjust time after a user-defined csv files ds = flagNAN(ds) # Flag NaNs after a user-defined csv files @@ -85,10 +97,20 @@ def toL2( ds['dlr'] = ds.dlr.where(ds.t_rad.notnull()) ds['ulr'] = ds.ulr.where(ds.t_rad.notnull()) + # calculating realtive humidity with regard to ice T_100 = _getTempK(T_0) ds['rh_u_cor'] = correctHumidity(ds['rh_u'], ds['t_u'], T_0, T_100, ews, ei0) + if ds.attrs['number_of_booms']==2: + ds['rh_l_cor'] = correctHumidity(ds['rh_l'], ds['t_l'], + T_0, T_100, ews, ei0) + + if hasattr(ds,'t_i'): + if ~ds['t_i'].isnull().all(): + ds['rh_i_cor'] = correctHumidity(ds['rh_i'], ds['t_i'], + T_0, T_100, ews, ei0) + # Determiune cloud cover for on-ice stations cc = calcCloudCoverage(ds['t_u'], T_0, eps_overcast, eps_clear, # Calculate cloud coverage ds['dlr'], ds.attrs['station_id']) @@ -176,22 +198,52 @@ def toL2( ds['precip_u_cor'], ds['precip_u_rate'] = correctPrecip(ds['precip_u'], ds['wspd_u']) if ds.attrs['number_of_booms']==2: - ds['rh_l_cor'] = correctHumidity(ds['rh_l'], ds['t_l'], # Correct relative humidity - T_0, T_100, ews, ei0) - if ~ds['precip_l'].isnull().all() and precip_flag: # Correct precipitation ds['precip_l_cor'], ds['precip_l_rate']= correctPrecip(ds['precip_l'], ds['wspd_l']) - if hasattr(ds,'t_i'): - if ~ds['t_i'].isnull().all(): # Instantaneous msg processing - ds['rh_i_cor'] = correctHumidity(ds['rh_i'], ds['t_i'], # Correct relative humidity - T_0, T_100, ews, ei0) + # Get directional wind speed + ds['wdir_u'] = ds['wdir_u'].where(ds['wspd_u'] != 0) + ds['wspd_x_u'], ds['wspd_y_u'] = calcDirWindSpeeds(ds['wspd_u'], ds['wdir_u']) + + if ds.attrs['number_of_booms']==2: + ds['wdir_l'] = ds['wdir_l'].where(ds['wspd_l'] != 0) + ds['wspd_x_l'], ds['wspd_y_l'] = calcDirWindSpeeds(ds['wspd_l'], ds['wdir_l']) + + if hasattr(ds, 'wdir_i'): + if ~ds['wdir_i'].isnull().all() and ~ds['wspd_i'].isnull().all(): + ds['wdir_i'] = ds['wdir_i'].where(ds['wspd_i'] != 0) + ds['wspd_x_i'], ds['wspd_y_i'] = calcDirWindSpeeds(ds['wspd_i'], ds['wdir_i']) + ds = clip_values(ds, vars_df) return ds +def calcDirWindSpeeds(wspd, wdir, deg2rad=np.pi/180): + '''Calculate directional wind speed from wind speed and direction + + Parameters + ---------- + wspd : xr.Dataarray + Wind speed data array + wdir : xr.Dataarray + Wind direction data array + deg2rad : float + Degree to radians coefficient. The default is np.pi/180 + + Returns + ------- + wspd_x : xr.Dataarray + Wind speed in X direction + wspd_y : xr.Datarray + Wind speed in Y direction + ''' + wspd_x = wspd * np.sin(wdir * deg2rad) + wspd_y = wspd * np.cos(wdir * deg2rad) + return wspd_x, wspd_y + + def calcCloudCoverage(T, T_0, eps_overcast, eps_clear, dlr, station_id): '''Calculate cloud cover from T and T_0 diff --git a/src/pypromice/process/L2toL3.py b/src/pypromice/process/L2toL3.py index a71a4028..caf3354f 100755 --- a/src/pypromice/process/L2toL3.py +++ b/src/pypromice/process/L2toL3.py @@ -4,10 +4,16 @@ """ import numpy as np import xarray as xr +from statsmodels.nonparametric.smoothers_lowess import lowess +import logging +logger = logging.getLogger(__name__) -def toL3(L2, T_0=273.15, z_0=0.001, R_d=287.05, eps=0.622, es_0=6.1071, - es_100=1013.246): - '''Process one Level 2 (L2) product to Level 3 (L3) +def toL3(L2, T_0=273.15): + '''Process one Level 2 (L2) product to Level 3 (L3) meaning calculating all + derived variables: + - Turbulent fluxes + - smoothed and inter/extrapolated GPS coordinates + Parameters ---------- @@ -15,26 +21,13 @@ def toL3(L2, T_0=273.15, z_0=0.001, R_d=287.05, eps=0.622, es_0=6.1071, L2 AWS data T_0 : int Steam point temperature. Default is 273.15. - z_0 : int - Aerodynamic surface roughness length for momention, assumed constant - for all ice/snow surfaces. Default is 0.001. - R_d : int - Gas constant of dry air. Default is 287.05. - eps : int - Default is 0.622. - es_0 : int - Saturation vapour pressure at the melting point (hPa). Default is 6.1071. - es_100 : int - Saturation vapour pressure at steam point temperature (hPa). Default is - 1013.246. ''' ds = L2 + ds.attrs['level'] = 'L3' T_100 = _getTempK(T_0) # Get steam point temperature as K - ds['wdir_u'] = ds['wdir_u'].where(ds['wspd_u'] != 0) # Get directional wind speed - ds['wspd_x_u'], ds['wspd_y_u'] = calcDirWindSpeeds(ds['wspd_u'], ds['wdir_u']) - + # Turbulent heat flux calculation # Upper boom bulk calculation T_h_u = ds['t_u'].copy() # Copy for processing p_h_u = ds['p_u'].copy() @@ -44,86 +37,97 @@ def toL3(L2, T_0=273.15, z_0=0.001, R_d=287.05, eps=0.622, es_0=6.1071, z_WS_u = ds['z_boom_u'].copy() + 0.4 # Get height of Anemometer z_T_u = ds['z_boom_u'].copy() - 0.1 # Get height of thermometer - rho_atm_u = 100 * p_h_u / R_d / (T_h_u + T_0) # Calculate atmospheric density - nu_u = calcVisc(T_h_u, T_0, rho_atm_u) # Calculate kinematic viscosity - q_h_u = calcHumid(T_0, T_100, T_h_u, es_0, es_100, eps, # Calculate specific humidity - p_h_u, RH_cor_h_u) + q_h_u = calcSpHumid(T_0, T_100, T_h_u, p_h_u, RH_cor_h_u) # Calculate specific humidity + if not ds.attrs['bedrock']: - SHF_h_u, LHF_h_u= calcHeatFlux(T_0, T_h_u, Tsurf_h, rho_atm_u, WS_h_u, # Calculate latent and sensible heat fluxes - z_WS_u, z_T_u, nu_u, q_h_u, p_h_u) - SHF_h_u, LHF_h_u = cleanHeatFlux(SHF_h_u, LHF_h_u, T_h_u, Tsurf_h, p_h_u, # Clean heat flux values - WS_h_u, RH_cor_h_u, ds['z_boom_u']) + SHF_h_u, LHF_h_u= calcHeatFlux(T_0, T_h_u, Tsurf_h, WS_h_u, # Calculate latent and sensible heat fluxes + z_WS_u, z_T_u, q_h_u, p_h_u) + ds['dshf_u'] = (('time'), SHF_h_u.data) ds['dlhf_u'] = (('time'), LHF_h_u.data) + q_h_u = 1000 * q_h_u # Convert sp.humid from kg/kg to g/kg - q_h_u = cleanSpHumid(q_h_u, T_h_u, Tsurf_h, p_h_u, RH_cor_h_u) # Clean sp.humid values ds['qh_u'] = (('time'), q_h_u.data) # Lower boom bulk calculation - if ds.attrs['number_of_booms']==2: - # ds['wdir_l'] = _calcWindDir(ds['wspd_x_l'], ds['wspd_y_l']) # Calculatate wind direction - + if ds.attrs['number_of_booms']==2: T_h_l = ds['t_l'].copy() # Copy for processing p_h_l = ds['p_l'].copy() WS_h_l = ds['wspd_l'].copy() RH_cor_h_l = ds['rh_l_cor'].copy() z_WS_l = ds['z_boom_l'].copy() + 0.4 # Get height of W z_T_l = ds['z_boom_l'].copy() - 0.1 # Get height of thermometer - - rho_atm_l = 100 * p_h_l / R_d / (T_h_l + T_0) # Calculate atmospheric density - nu_l = calcVisc(T_h_l, T_0, rho_atm_l) # Calculate kinematic viscosity - q_h_l = calcHumid(T_0, T_100, T_h_l, es_0, es_100, eps, # Calculate sp.humidity - p_h_l, RH_cor_h_l) + + q_h_l = calcSpHumid(T_0, T_100, T_h_l, p_h_l, RH_cor_h_l) # Calculate sp.humidity + if not ds.attrs['bedrock']: - SHF_h_l, LHF_h_l= calcHeatFlux(T_0, T_h_l, Tsurf_h, rho_atm_l, WS_h_l, # Calculate latent and sensible heat fluxes - z_WS_l, z_T_l, nu_l, q_h_l, p_h_l) - SHF_h_l, LHF_h_l = cleanHeatFlux(SHF_h_l, LHF_h_l, T_h_l, Tsurf_h, p_h_l, # Clean heat flux values - WS_h_l, RH_cor_h_l, ds['z_boom_l']) + SHF_h_l, LHF_h_l= calcHeatFlux(T_0, T_h_l, Tsurf_h, WS_h_l, # Calculate latent and sensible heat fluxes + z_WS_l, z_T_l, q_h_l, p_h_l) + ds['dshf_l'] = (('time'), SHF_h_l.data) ds['dlhf_l'] = (('time'), LHF_h_l.data) q_h_l = 1000 * q_h_l # Convert sp.humid from kg/kg to g/kg - q_h_l = cleanSpHumid(q_h_l, T_h_l, Tsurf_h, p_h_l, RH_cor_h_l) # Clean sp.humid values - ds['qh_l'] = (('time'), q_h_l.data) - ds['wdir_l'] = ds['wdir_l'].where(ds['wspd_l'] != 0) # Get directional wind speed - ds['wspd_x_l'], ds['wspd_y_l'] = calcDirWindSpeeds(ds['wspd_l'], ds['wdir_l']) - - if hasattr(ds, 'wdir_i'): - if ~ds['wdir_i'].isnull().all() and ~ds['wspd_i'].isnull().all(): # Instantaneous msg processing - ds['wdir_i'] = ds['wdir_i'].where(ds['wspd_i'] != 0) # Get directional wind speed - ds['wspd_x_i'], ds['wspd_y_i'] = calcDirWindSpeeds(ds['wspd_i'], ds['wdir_i']) + ds['qh_l'] = (('time'), q_h_l.data) - return ds + # Smoothing and inter/extrapolation of GPS coordinates + for var in ['gps_lat', 'gps_lon', 'gps_alt']: + logger.info('Postprocessing '+var) -def calcDirWindSpeeds(wspd, wdir, deg2rad=np.pi/180): - '''Calculate directional wind speed from wind speed and direction - - Parameters - ---------- - wspd : xr.Dataarray - Wind speed data array - wdir : xr.Dataarray - Wind direction data array - deg2rad : float - Degree to radians coefficient. The default is np.pi/180 - - Returns - ------- - wspd_x : xr.Dataarray - Wind speed in X direction - wspd_y : xr.Datarray - Wind speed in Y direction - ''' - wspd_x = wspd * np.sin(wdir * deg2rad) - wspd_y = wspd * np.cos(wdir * deg2rad) - return wspd_x, wspd_y + # saving the static value and droping 'lat','lon' or 'alt' as they are + # being reassigned as timeseries + var_out = var.replace('gps_','') + + if var_out == 'alt': + if 'altitude' in list(ds.attrs.keys()): + static_value = float(ds.attrs['altitude']) + else: + print('no standard altitude for', ds.station_id) + static_value = np.nan + elif var_out == 'lat': + static_value = float(ds.attrs['latitude']) + elif var_out == 'lon': + static_value = float(ds.attrs['longitude']) + ds=ds.drop_vars(var_out) + + # if there is no gps observations, then we use the static value repeated + # for each time stamp + if var not in ds.data_vars: + print('no',var,'at', ds.station_id) + ds[var_out] = ('time', np.ones_like(ds['t_u'].data)*static_value) + ds[var_out+'_avg'] = static_value + continue + + if ds[var].isnull().all(): + print('no',var,'at',ds.station_id) + ds[var_out] = ('time', np.ones_like(ds['t_u'].data)*static_value) + ds[var_out+'_avg'] = static_value + continue + + # here we detect potential relocation of the station in the form of a + # break in the general trend of the latitude, longitude and altitude + # in the future, this could/should be listed in an external file to + # avoid missed relocations or sensor issues interpreted as a relocation + if var == 'gps_alt': + _, breaks = find_breaks(ds[var].to_series(), alpha=8) + else: + _, breaks = find_breaks(ds[var].to_series(), alpha=6) + + # smoothing and inter/extrapolation of the coordinate + ds[var_out] = \ + ('time', piecewise_smoothing_and_interpolation(ds[var].to_series(), breaks)) + + ds['lat_avg'] = ds['lat'].mean() + ds['lon_avg'] = ds['lon'].mean() + ds['alt_avg'] = ds['alt'].mean() + return ds -def calcHeatFlux(T_0, T_h, Tsurf_h, rho_atm, WS_h, z_WS, z_T, nu, q_h, p_h, +def calcHeatFlux(T_0, T_h, Tsurf_h, WS_h, z_WS, z_T, q_h, p_h, kappa=0.4, WS_lim=1., z_0=0.001, g=9.82, es_0=6.1071, eps=0.622, gamma=16., L_sub=2.83e6, L_dif_max=0.01, c_pd=1005., aa=0.7, - bb=0.75, cc=5., dd=0.35): + bb=0.75, cc=5., dd=0.35, R_d=287.05): '''Calculate latent and sensible heat flux using the bulk calculation method @@ -159,9 +163,14 @@ def calcHeatFlux(T_0, T_h, Tsurf_h, rho_atm, WS_h, z_WS, z_T, nu, q_h, p_h, g : int Gravitational acceleration (m/s2). Default is 9.82. es_0 : int - Saturation vapour pressure at the melting point (hPa). Default is 6.1071. + Saturation vapour pressure at the melting point (hPa). Default is 6.1071. + es_100 : int + Saturation vapour pressure at steam point temperature (hPa). Default is + 1013.246. eps : int Ratio of molar masses of vapor and dry air (0.622). + R_d : int + Gas constant of dry air. Default is 287.05. gamma : int Flux profile correction (Paulson & Dyer). Default is 16.. L_sub : int @@ -182,6 +191,9 @@ def calcHeatFlux(T_0, T_h, Tsurf_h, rho_atm, WS_h, z_WS, z_T, nu, q_h, p_h, dd : int Flux profile correction constants (Holtslag & De Bruin '88). Default is 0.35. + z_0 : int + Aerodynamic surface roughness length for momention, assumed constant + for all ice/snow surfaces. Default is 0.001. Returns ------- @@ -190,6 +202,9 @@ def calcHeatFlux(T_0, T_h, Tsurf_h, rho_atm, WS_h, z_WS, z_T, nu, q_h, p_h, LHF_h : xarray.DataArray Latent heat flux ''' + rho_atm = 100 * p_h / R_d / (T_h + T_0) # Calculate atmospheric density + nu = calcVisc(T_h, T_0, rho_atm) # Calculate kinematic viscosity + SHF_h = xr.zeros_like(T_h) # Create empty xarrays LHF_h = xr.zeros_like(T_h) L = xr.full_like(T_h, 1E5) @@ -275,7 +290,11 @@ def calcHeatFlux(T_0, T_h, Tsurf_h, rho_atm, WS_h, z_WS, z_T, nu, q_h, p_h, # If n_elements(where(L_dif > L_dif_max)) eq 1 then break if np.all(L_dif <= L_dif_max): break - + + HF_nan = np.isnan(p_h) | np.isnan(T_h) | np.isnan(Tsurf_h) \ + | np.isnan(q_h) | np.isnan(WS_h) | np.isnan(z_T) + SHF_h[HF_nan] = np.nan + LHF_h[HF_nan] = np.nan return SHF_h, LHF_h def calcVisc(T_h, T_0, rho_atm): @@ -301,9 +320,8 @@ def calcVisc(T_h, T_0, rho_atm): # Kinematic viscosity of air in m^2/s return mu / rho_atm -def calcHumid(T_0, T_100, T_h, es_0, es_100, eps, p_h, RH_cor_h): +def calcSpHumid(T_0, T_100, T_h, p_h, RH_cor_h, es_0=6.1071, es_100=1013.246, eps=0.622): '''Calculate specific humidity - Parameters ---------- T_0 : float @@ -346,72 +364,80 @@ def calcHumid(T_0, T_100, T_h, es_0, es_100, eps, p_h, RH_cor_h): freezing = T_h < 0 q_sat[freezing] = eps * es_ice[freezing] / (p_h[freezing] - (1 - eps) * es_ice[freezing]) + q_nan = np.isnan(T_h) | np.isnan(p_h) + q_sat[q_nan] = np.nan + # Convert to kg/kg return RH_cor_h * q_sat / 100 -def cleanHeatFlux(SHF, LHF, T, Tsurf, p, WS, RH_cor, z_boom): - '''Find invalid heat flux data values and replace with NaNs, based on - air temperature, surface temperature, air pressure, wind speed, - corrected relative humidity, and boom height + +def find_breaks(df,alpha): + '''Detects potential relocation of the station from the GPS measurements. + The code first makes a forward linear interpolation of the coordinates and + then looks for important jumps in latitude, longitude and altitude. The jumps + that are higher than a given threshold (expressed as a multiple of the + standard deviation) are mostly caused by the station being moved during + maintenance. To avoid misclassification, only the jumps detected in May-Sept. + are kept. Parameters ---------- - SHF : xarray.DataArray - Sensible heat flux - LHF : xarray.DataArray - Latent heat flux - T : xarray.DataArray - Air temperature - Tsurf : xarray.DataArray - Surface temperature - p : xarray.DataArray - Air pressure - WS : xarray.DataArray - Wind speed - RH_cor : xarray.DataArray - Relative humidity corrected - z_boom : xarray.DataArray - Boom height - - Returns - ------- - SHF : xarray.DataArray - Sensible heat flux corrected - LHF : xarray.DataArray - Latent heat flux corrected + df : pandas.Series + series of observed latitude, longitude or elevation + alpha: float + coefficient to be applied to the the standard deviation of the daily + coordinate fluctuation ''' - HF_nan = np.isnan(p) | np.isnan(T) | np.isnan(Tsurf) \ - | np.isnan(RH_cor) | np.isnan(WS) | np.isnan(z_boom) - SHF[HF_nan] = np.nan - LHF[HF_nan] = np.nan - return SHF, LHF - -def cleanSpHumid(q_h, T, Tsurf, p, RH_cor): - '''Find invalid specific humidity data values and replace with NaNs, - based on air temperature, surface temperature, air pressure, - and corrected relative humidity + diff = df.resample('D').median().interpolate( + method='linear', limit_area='inside', limit_direction='forward').diff() + thresh = diff.std() * alpha + list_diff = diff.loc[diff.abs()>thresh].reset_index() + list_diff = list_diff.loc[list_diff.time.dt.month.isin([5,6,7,8,9])] + list_diff['year']=list_diff.time.dt.year + list_diff=list_diff.groupby('year').max() + return diff, [None]+list_diff.time.to_list()+[None] + + +def piecewise_smoothing_and_interpolation(df_in, breaks): + '''Smoothes, inter- or extrapolate the gps observations. The processing is + done piecewise so that each period between station relocation are done + separately (no smoothing of the jump due to relocation). Locally Weighted + Scatterplot Smoothing (lowess) is then used to smooth the available + observations. Then this smoothed curve is interpolated linearly over internal + gaps. Eventually, this interpolated curve is extrapolated linearly for + timestamps before the first valid measurement and after the last valid + measurement. Parameters ---------- - q_h : xarray.DataArray - Specific humidity - T : xarray.DataArray - Air temperature - Tsurf : xarray.DataArray - Surface temperature - p : xarray.DataArray - Air pressure - RH_cor : xarray.DataArray - Relative humidity corrected - - Returns - ------- - q_h : xarray.DataArray - Specific humidity corrected''' - q_nan = np.isnan(T) | np.isnan(RH_cor) | np.isnan(p) | np.isnan(Tsurf) - q_h[q_nan] = np.nan - return q_h - + df_in : pandas.Series + series of observed latitude, longitude or elevation + breaks: list + List of timestamps of station relocation. First and last item should be + None so that they can be used in slice(breaks[i], breaks[i+1]) + ''' + df_all = pd.Series() # dataframe gathering all the smoothed pieces + for i in range(len(breaks)-1): + df = df_in.loc[slice(breaks[i], breaks[i+1])].copy() + + y_sm = lowess(df, + pd.to_numeric(df.index), + is_sorted=True, frac=1/3, it=0, + ) + df.loc[df.notnull()] = y_sm[:,1] + df = df.interpolate(method='linear', limit_area='inside') + + last_valid_6_months = slice(df.last_valid_index()-pd.to_timedelta('180D'),None) + df.loc[last_valid_6_months] = (df.loc[last_valid_6_months].interpolate( axis=0, + method='spline',order=1, limit_direction='forward', fill_value="extrapolate")).values + + first_valid_6_months = slice(None, df.first_valid_index()+pd.to_timedelta('180D')) + df.loc[first_valid_6_months] = (df.loc[first_valid_6_months].interpolate( axis=0, + method='spline',order=1, limit_direction='backward', fill_value="extrapolate")).values + df_all=pd.concat((df_all, df)) + + df_all = df_all[~df_all.index.duplicated(keep='first')] + return df_all.values def _calcAtmosDens(p_h, R_d, T_h, T_0): # TODO: check this shouldn't be in this step somewhere '''Calculate atmospheric density''' diff --git a/src/pypromice/process/aws.py b/src/pypromice/process/aws.py index 58440595..5588f8e5 100644 --- a/src/pypromice/process/aws.py +++ b/src/pypromice/process/aws.py @@ -2,32 +2,26 @@ """ AWS data processing module """ -import logging -from functools import reduce -from importlib import metadata -import os, unittest, toml, datetime, uuid, pkg_resources -from typing import Sequence, Optional - -import numpy as np import warnings warnings.simplefilter(action='ignore', category=FutureWarning) + +import logging, os import pandas as pd import xarray as xr -from datetime import timedelta +from functools import reduce from pypromice.process.L0toL1 import toL1 from pypromice.process.L1toL2 import toL2 from pypromice.process.L2toL3 import toL3 +from pypromice.process import write, load, utilities +from pypromice.process.resample import resample_dataset pd.set_option('display.precision', 2) xr.set_options(keep_attrs=True) logger = logging.getLogger(__name__) -#------------------------------------------------------------------------------ - - class AWS(object): '''AWS object to load and process PROMICE AWS data''' @@ -53,15 +47,15 @@ def __init__(self, config_file, inpath, var_file=None, meta_file=None): # Load config, variables CSF standards, and L0 files self.config = self.loadConfig(config_file, inpath) - self.vars = getVars(var_file) - self.meta = getMeta(meta_file) + self.vars = load.getVars(var_file) + self.meta = load.getMeta(meta_file) # Load config file L0 = self.loadL0() self.L0=[] for l in L0: - n = getColNames(self.vars, l.attrs['number_of_booms'], l.attrs['format']) - self.L0.append(popCols(l, n)) + n = write.getColNames(self.vars, l) + self.L0.append(utilities.popCols(l, n)) self.L1 = None self.L1A = None @@ -78,18 +72,26 @@ def process(self): self.getL2() self.getL3() - def write(self, outpath): - '''Write L3 data to .csv and .nc file''' + def writeL2(self, outpath): + '''Write L2 data to .csv and .nc file''' if os.path.isdir(outpath): - self.writeArr(outpath) + self.writeArr(self.L2, outpath) else: logger.info(f'Outpath f{outpath} does not exist. Unable to save to file') pass + def writeL3(self, outpath): + '''Write L3 data to .csv and .nc file''' + if os.path.isdir(outpath): + self.writeArr(self.L3, outpath) + else: + logger.info(f'Outpath f{outpath} does not exist. Unable to save to file') + pass + def getL1(self): '''Perform L0 to L1 data processing''' logger.info('Level 1 processing...') - self.L0 = [addBasicMeta(item, self.vars) for item in self.L0] + self.L0 = [utilities.addBasicMeta(item, self.vars) for item in self.L0] self.L1 = [toL1(item, self.vars) for item in self.L0] self.L1A = reduce(xr.Dataset.combine_first, self.L1) @@ -104,82 +106,29 @@ def getL3(self): logger.info('Level 3 processing...') self.L3 = toL3(self.L2) - # Resample L3 product - f = [l.attrs['format'] for l in self.L0] - if 'raw' in f or 'STM' in f: - logger.info('Resampling to 10 minute') - self.L3 = resampleL3(self.L3, '10min') - else: - self.L3 = resampleL3(self.L3, '60min') - logger.info('Resampling to hour') - - # Re-format time - t = self.L3['time'].values - self.L3['time'] = list(t) - - # Switch gps_lon to negative (degrees_east) - # Do this here, and NOT in addMeta, otherwise we switch back to positive - # when calling getMeta in joinL3! PJW - if self.L3.attrs['station_id'] not in ['UWN', 'Roof_GEUS', 'Roof_PROMICE']: - self.L3['gps_lon'] = self.L3['gps_lon'] * -1 - - # Add variable attributes and metadata - self.L3 = self.addAttributes(self.L3) - - # Round all values to specified decimals places - self.L3 = roundValues(self.L3, self.vars) - - def addAttributes(self, L3): - '''Add variable and attribute metadata - - Parameters - ---------- - L3 : xr.Dataset - Level-3 data object - - Returns - ------- - L3 : xr.Dataset - Level-3 data object with attributes - ''' - L3 = addVars(L3, self.vars) - L3 = addMeta(L3, self.meta) - return L3 - - def writeArr(self, outpath): + def writeArr(self, dataset, outpath, t=None): '''Write L3 data to .nc and .csv hourly and daily files Parameters ---------- + dataset : xarray.Dataset + Dataset to write to file outpath : str Output directory - L3 : AWS.L3 - Level-3 data object + t : str + Resampling string. This is automatically defined based + on the data type if not given. The default is None. ''' - outdir = os.path.join(outpath, self.L3.attrs['station_id']) - if not os.path.isdir(outdir): - os.mkdir(outdir) - - col_names = getColNames( - self.vars, - self.L3.attrs['number_of_booms'], - self.L3.attrs['format'], - self.L3.attrs['bedrock'], - ) - - t = int(pd.Timedelta((self.L3['time'][1] - self.L3['time'][0]).values).total_seconds()) - logger.info('Writing to files...') - if t == 600: - out_csv = os.path.join(outdir, self.L3.attrs['station_id']+'_10min.csv') - out_nc = os.path.join(outdir, self.L3.attrs['station_id']+'_10min.nc') + if t is not None: + write.prepare_and_write(dataset, outpath, self.vars, self.meta, t) else: - out_csv = os.path.join(outdir, self.L3.attrs['station_id']+'_hour.csv') - out_nc = os.path.join(outdir, self.L3.attrs['station_id']+'_hour.nc') - writeCSV(out_csv, self.L3, col_names) - col_names = col_names + ['lat', 'lon', 'alt'] - writeNC(out_nc, self.L3, col_names) - logger.info(f'Written to {out_csv}') - logger.info(f'Written to {out_nc}') + f = [l.attrs['format'] for l in self.L0] + if 'raw' in f or 'STM' in f: + write.prepare_and_write(dataset, outpath, self.vars, + self.meta, '10min') + else: + write.prepare_and_write(dataset, outpath, self.vars, + self.meta, '60min') def loadConfig(self, config_file, inpath): '''Load configuration from .toml file @@ -196,7 +145,7 @@ def loadConfig(self, config_file, inpath): conf : dict Configuration parameters ''' - conf = getConfig(config_file, inpath) + conf = load.getConfig(config_file, inpath) return conf def loadL0(self): @@ -246,687 +195,7 @@ def readL0file(self, conf): L0 data ''' file_version = conf.get('file_version', -1) - ds = getL0(conf['file'], conf['nodata'], conf['columns'], + ds = load.getL0(conf['file'], conf['nodata'], conf['columns'], conf["skiprows"], file_version, time_offset=conf.get('time_offset')) - ds = populateMeta(ds, conf, ["columns", "skiprows", "modem"]) + ds = utilities.populateMeta(ds, conf, ["columns", "skiprows", "modem"]) return ds - -#------------------------------------------------------------------------------ - -def getConfig(config_file, inpath, default_columns: Sequence[str] = ('msg_lat', 'msg_lon')): - '''Load configuration from .toml file. PROMICE .toml files support defining - features at the top level which apply to all nested properties, but do not - overwrite nested properties if they are defined - - Parameters - ---------- - config_file : str - TOML file path - inpath : str - Input folder directory where L0 files can be found - - Returns - ------- - conf : dict - Configuration dictionary - ''' - conf = toml.load(config_file) # Move all top level keys to nested properties, - top = [_ for _ in conf.keys() if not type(conf[_]) is dict] # if they are not already defined in the nested properties - subs = [_ for _ in conf.keys() if type(conf[_]) is dict] # Insert the section name (config_file) as a file property and config file - for s in subs: - for t in top: - if t not in conf[s].keys(): - conf[s][t] = conf[t] - - conf[s]['conf'] = config_file - conf[s]['file'] = os.path.join(inpath, s) - conf[s]["columns"].extend(default_columns) - - for t in top: conf.pop(t) # Delete all top level keys beause each file - # should carry all properties with it - for k in conf.keys(): # Check required fields are present - for field in ["columns", "station_id", "format", "skiprows"]: - assert(field in conf[k].keys()), field+" not in config keys" - return conf - -def getL0(infile, nodata, cols, skiprows, file_version, - delimiter=',', comment='#', time_offset: Optional[float] = None) -> xr.Dataset: - ''' Read L0 data file into pandas DataFrame object - - Parameters - ---------- - infile : str - L0 file path - nodata : list - List containing value for nan values and reassigned value - cols : list - List of columns in file - skiprows : int - Skip rows value - file_version : int - Version of L0 file - delimiter : str - String delimiter for L0 file - comment : str - Notifier of commented sections in L0 file - time_offset : Optional[float] - Time offset in hours for correcting for non utc time data. - Returns - ------- - ds : xarray.Dataset - L0 Dataset - ''' - if file_version == 1: - df = pd.read_csv(infile, comment=comment, index_col=0, - na_values=nodata, names=cols, - sep=delimiter, - skiprows=skiprows, skip_blank_lines=True, - usecols=range(len(cols)), - low_memory=False) - df['time'] = pd.to_datetime( - df.year.astype(str) \ - + df.doy.astype(str).str.zfill(3) \ - + df.hhmm.astype(str).str.zfill(4), - format='%Y%j%H%M' - ) - df = df.set_index('time') - - else: - df = pd.read_csv(infile, comment=comment, index_col=0, - na_values=nodata, names=cols, parse_dates=True, - sep=delimiter, skiprows=skiprows, - skip_blank_lines=True, - usecols=range(len(cols)), - low_memory=False) - try: - df.index = pd.to_datetime(df.index) - except ValueError as e: - logger.info("\n", infile) - logger.info("\nValueError:") - logger.info(e) - logger.info('\t\t> Trying pd.to_datetime with format=mixed') - try: - df.index = pd.to_datetime(df.index, format='mixed') - except Exception as e: - logger.info("\nDateParseError:") - logger.info(e) - logger.info('\t\t> Trying again removing apostrophes in timestamp (old files format)') - df.index = pd.to_datetime(df.index.str.replace("\"","")) - - if time_offset is not None: - df.index = df.index + timedelta(hours=time_offset) - - # Drop SKIP columns - for c in df.columns: - if c[0:4] == 'SKIP': - df.drop(columns=c, inplace=True) - - # Carry relevant metadata with ds - ds = xr.Dataset.from_dataframe(df) - return ds - -def addBasicMeta(ds, vars_df): - ''' Use a variable lookup table DataFrame to add the basic metadata - to the xarray dataset. This is later amended to finalise L3 - - Parameters - ---------- - ds : xr.Dataset - Dataset to add metadata to - vars_df : pd.DataFrame - Metadata dataframe - - Returns - ------- - ds : xr.Dataset - Dataset with added metadata - ''' - for v in vars_df.index: - if v == 'time': continue # coordinate variable, not normal var - if v not in list(ds.variables): continue - for c in ['standard_name', 'long_name', 'units']: - if isinstance(vars_df[c][v], float) and np.isnan(vars_df[c][v]): continue - ds[v].attrs[c] = vars_df[c][v] - return ds - -def populateMeta(ds, conf, skip): - '''Populate L0 Dataset with metadata dictionary - - Parameters - ---------- - ds : xarray.Dataset - L0 dataset - conf : dict - Metadata dictionary - skip : list - List of column names to skip parsing to metadata - - Returns - ------- - ds : xarray.Dataset - L0 dataset with metadata populated as Dataset attributes - ''' - meta = {} - # skip = ["columns", "skiprows"] - for k in conf.keys(): - if k not in skip: meta[k] = conf[k] - ds.attrs = meta - return ds - -def writeCSV(outfile, Lx, csv_order): - '''Write data product to CSV file - - Parameters - ---------- - outfile : str - Output file path - Lx : xr.Dataset - Dataset to write to file - csv_order : list - List order of variables - ''' - Lcsv = Lx.to_dataframe().dropna(how='all') - if csv_order is not None: - names = [c for c in csv_order if c in list(Lcsv.columns)] - Lcsv = Lcsv[names] - Lcsv.to_csv(outfile) - -def writeNC(outfile, Lx, col_names=None): - '''Write data product to NetCDF file - - Parameters - ---------- - outfile : str - Output file path - Lx : xr.Dataset - Dataset to write to file - ''' - if os.path.isfile(outfile): - os.remove(outfile) - if col_names is not None: - names = [c for c in col_names if c in list(Lx.keys())] - else: - names = list(Lx.keys()) - Lx[names].to_netcdf(outfile, mode='w', format='NETCDF4', compute=True) - -def writeAll(outpath, station_id, l3_h, l3_d, l3_m, csv_order=None): - '''Write L3 hourly, daily and monthly datasets to .nc and .csv - files - - outpath : str - Output file path - station_id : str - Station name - l3_h : xr.Dataset - L3 hourly data - l3_d : xr.Dataset - L3 daily data - l3_m : xr.Dataset - L3 monthly data - csv_order : list, optional - List order of variables - ''' - if not os.path.isdir(outpath): - os.mkdir(outpath) - outfile_h = os.path.join(outpath, station_id + '_hour') - outfile_d = os.path.join(outpath, station_id + '_day') - outfile_m = os.path.join(outpath, station_id + '_month') - for o,l in zip([outfile_h, outfile_d, outfile_m], [l3_h ,l3_d, l3_m]): - writeCSV(o+'.csv',l, csv_order) - writeNC(o+'.nc',l) - - -def popCols(ds, names): - '''Populate dataset with all given variable names - - Parammeters - ----------- - ds : xr.Dataset - Dataset - names : list - List of variable names to populate - ''' - for v in names: - if v not in list(ds.variables): - ds[v] = (('time'), np.arange(ds['time'].size)*np.nan) - return ds - -def getColNames(vars_df, booms=None, data_type=None, bedrock=False): - '''Get all variable names for a given data type, based on a variables - look-up table - - Parameters - ---------- - vars_df : pd.DataFrame - Variables look-up table - booms : int, optional - Number of booms. If this parameter is empty then all variables - regardless of boom type will be passed. The default is None. - data_type : str, optional - Data type, "tx", "STM" or "raw". If this parameter is empty then all - variables regardless of data type will be passed. The default is None. - - Returns - ------- - list - Variable names - ''' - if booms==1: - vars_df = vars_df.loc[vars_df['station_type'].isin(['one-boom','all'])] - elif booms==2: - vars_df = vars_df.loc[vars_df['station_type'].isin(['two-boom','all'])] - - if data_type=='TX': - vars_df = vars_df.loc[vars_df['data_type'].isin(['TX','all'])] - elif data_type=='STM' or data_type=='raw': - vars_df = vars_df.loc[vars_df['data_type'].isin(['raw','all'])] - - col_names = list(vars_df.index) - if isinstance(bedrock, str): - bedrock = (bedrock.lower() == 'true') - if bedrock == True: - col_names.remove('cc') - for v in ['dlhf_u', 'dlhf_l', 'dshf_u', 'dshf_l']: - try: - col_names.remove(v) - except: - pass - return col_names - -def roundValues(ds, df, col='max_decimals'): - '''Round all variable values in data array based on pre-defined rounding - value in variables look-up table DataFrame - - Parameters - ---------- - ds : xr.Dataset - Dataset to round values in - df : pd.Dataframe - Variable look-up table with rounding values - col : str - Column in variable look-up table that contains rounding values. The - default is "max_decimals" - ''' - df = df[col] - df = df.dropna(how='all') - for var in df.index: - if var not in list(ds.variables): - continue - if df[var] is not np.nan: - ds[var] = ds[var].round(decimals=int(df[var])) - return ds - -def addVars(ds, variables): - '''Add variable attributes from file to dataset - - Parameters - ---------- - ds : xarray.Dataset - Dataset to add variable attributes to - variables : pandas.DataFrame - Variables lookup table file - - Returns - ------- - ds : xarray.Dataset - Dataset with metadata - ''' - for k in ds.keys(): - if k not in variables.index: continue - ds[k].attrs['standard_name'] = variables.loc[k]['standard_name'] - ds[k].attrs['long_name'] = variables.loc[k]['long_name'] - ds[k].attrs['units'] = variables.loc[k]['units'] - ds[k].attrs['coverage_content_type'] = variables.loc[k]['coverage_content_type'] - ds[k].attrs['coordinates'] = variables.loc[k]['coordinates'] - return ds - -def addMeta(ds, meta): - '''Add metadata attributes from file to dataset - - Parameters - ---------- - ds : xarray.Dataset - Dataset to add metadata attributes to - meta : dict - Metadata file - - Returns - ------- - ds : xarray.Dataset - Dataset with metadata - ''' - ds['lon'] = ds['gps_lon'].mean() - ds['lon'].attrs = ds['gps_lon'].attrs - - ds['lat'] = ds['gps_lat'].mean() - ds['lat'].attrs = ds['gps_lat'].attrs - - ds['alt'] = ds['gps_alt'].mean() - ds['alt'].attrs = ds['gps_alt'].attrs - - # for k in ds.keys(): # for each var - # if 'units' in ds[k].attrs: - # if ds[k].attrs['units'] == 'C': - # ds[k].attrs['units'] = 'degrees_C' - - # https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery_1-3#geospatial_bounds - ds.attrs['id'] = 'dk.geus.promice:' + str(uuid.uuid3(uuid.NAMESPACE_DNS, ds.attrs['station_id'])) - ds.attrs['history'] = 'Generated on ' + datetime.datetime.utcnow().isoformat() - ds.attrs['date_created'] = str(datetime.datetime.now().isoformat()) - ds.attrs['date_modified'] = ds.attrs['date_created'] - ds.attrs['date_issued'] = ds.attrs['date_created'] - ds.attrs['date_metadata_modified'] = ds.attrs['date_created'] - - ds.attrs['geospatial_bounds'] = "POLYGON((" + \ - f"{ds['lat'].min().values} {ds['lon'].min().values}, " + \ - f"{ds['lat'].min().values} {ds['lon'].max().values}, " + \ - f"{ds['lat'].max().values} {ds['lon'].max().values}, " + \ - f"{ds['lat'].max().values} {ds['lon'].min().values}, " + \ - f"{ds['lat'].min().values} {ds['lon'].min().values}))" - - ds.attrs['geospatial_lat_min'] = str(ds['lat'].min().values) - ds.attrs['geospatial_lat_max'] = str(ds['lat'].max().values) - ds.attrs['geospatial_lon_min'] = str(ds['lon'].min().values) - ds.attrs['geospatial_lon_max'] = str(ds['lon'].max().values) - ds.attrs['geospatial_vertical_min'] = str(ds['alt'].min().values) - ds.attrs['geospatial_vertical_max'] = str(ds['alt'].max().values) - ds.attrs['geospatial_vertical_positive'] = 'up' - ds.attrs['time_coverage_start'] = str(ds['time'][0].values) - ds.attrs['time_coverage_end'] = str(ds['time'][-1].values) - - try: - ds.attrs['source']= 'pypromice v' + str(metadata.version('pypromice')) - except: - ds.attrs['source'] = 'pypromice' - - # https://www.digi.com/resources/documentation/digidocs/90001437-13/reference/r_iso_8601_duration_format.htm - try: - ds.attrs['time_coverage_duration'] = str(pd.Timedelta((ds['time'][-1] - ds['time'][0]).values).isoformat()) - ds.attrs['time_coverage_resolution'] = str(pd.Timedelta((ds['time'][1] - ds['time'][0]).values).isoformat()) - except: - ds.attrs['time_coverage_duration'] = str(pd.Timedelta(0).isoformat()) - ds.attrs['time_coverage_resolution'] = str(pd.Timedelta(0).isoformat()) - - # Note: int64 dtype (long int) is incompatible with OPeNDAP access via THREDDS for NetCDF files - # See https://stackoverflow.com/questions/48895227/output-int32-time-dimension-in-netcdf-using-xarray - ds.time.encoding["dtype"] = "i4" # 32-bit signed integer - #ds.time.encoding["calendar"] = 'proleptic_gregorian' # this is default - - # Load metadata attributes and add to Dataset - [_addAttr(ds, key, value) for key,value in meta.items()] - - # Check attribute formating - for k,v in ds.attrs.items(): - if not isinstance(v, str) or not isinstance(v, int): - ds.attrs[k]=str(v) - return ds - - -def getVars(v_file=None): - '''Load variables.csv file - - Parameters - ---------- - v_file : str - Variable lookup table file path - - Returns - ------- - pandas.DataFrame - Variables dataframe - ''' - if v_file is None: - with pkg_resources.resource_stream('pypromice', 'process/variables.csv') as stream: - return pd.read_csv(stream, index_col=0, comment="#", encoding='utf-8') - else: - return pd.read_csv(v_file, index_col=0, comment="#") - - -def getMeta(m_file=None, delimiter=','): #TODO change to DataFrame output to match variables.csv - '''Load metadata table - - Parameters - ---------- - m_file : str - Metadata file path - delimiter : str - Metadata character delimiter. The default is "," - - Returns - ------- - meta : dict - Metadata dictionary - ''' - meta={} - if m_file is None: - with pkg_resources.resource_stream('pypromice', 'process/metadata.csv') as stream: - lines = stream.read().decode("utf-8") - lines = lines.split("\n") - else: - with open(m_file, 'r') as f: - lines = f.readlines() - for l in lines[1:]: - try: - meta[l.split(',')[0]] = l.split(delimiter)[1].split('\n')[0].replace(';',',') - except IndexError: - pass - return meta - -def resampleL3(ds_h, t): - '''Resample L3 AWS data, e.g. hourly to daily average. This uses pandas - DataFrame resampling at the moment as a work-around to the xarray Dataset - resampling. As stated, xarray resampling is a lengthy process that takes - ~2-3 minutes per operation: ds_d = ds_h.resample({'time':"1D"}).mean() - This has now been fixed, so needs implementing: - https://github.com/pydata/xarray/issues/4498#event-6610799698 - - Parameters - ---------- - ds_h : xarray.Dataset - L3 AWS dataset either at 10 min (for raw data) or hourly (for tx data) - t : str - Resample factor, same variable definition as in - pandas.DataFrame.resample() - - Returns - ------- - ds_d : xarray.Dataset - L3 AWS dataset resampled to the frequency defined by t - ''' - df_d = ds_h.to_dataframe().resample(t).mean() - - # recalculating wind direction from averaged directional wind speeds - for var in ['wdir_u','wdir_l','wdir_i']: - if var in df_d.columns: - if ('wspd_x_'+var.split('_')[1] in df_d.columns) & ('wspd_x_'+var.split('_')[1] in df_d.columns): - df_d[var] = _calcWindDir(df_d['wspd_x_'+var.split('_')[1]], - df_d['wspd_y_'+var.split('_')[1]]) - else: - logger.info(var,'in dataframe but not','wspd_x_'+var.split('_')[1],'wspd_x_'+var.split('_')[1]) - - # recalculating relative humidity from average vapour pressure and average - # saturation vapor pressure - for var in ['rh_u','rh_l']: - lvl = var.split('_')[1] - if var in df_d.columns: - if ('t_'+lvl in ds_h.keys()): - es_wtr, es_cor = calculateSaturationVaporPressure(ds_h['t_'+lvl]) - p_vap = ds_h[var] / 100 * es_wtr - - df_d[var] = (p_vap.to_series().resample(t).mean() \ - / es_wtr.to_series().resample(t).mean())*100 - df_d[var+'_cor'] = (p_vap.to_series().resample(t).mean() \ - / es_cor.to_series().resample(t).mean())*100 - - vals = [xr.DataArray(data=df_d[c], dims=['time'], - coords={'time':df_d.index}, attrs=ds_h[c].attrs) for c in df_d.columns] - ds_d = xr.Dataset(dict(zip(df_d.columns,vals)), attrs=ds_h.attrs) - return ds_d - - -def calculateSaturationVaporPressure(t, T_0=273.15, T_100=373.15, es_0=6.1071, - es_100=1013.246, eps=0.622): - '''Calculate specific humidity - - Parameters - ---------- - T_0 : float - Steam point temperature. Default is 273.15. - T_100 : float - Steam point temperature in Kelvin - t : xarray.DataArray - Air temperature - es_0 : float - Saturation vapour pressure at the melting point (hPa) - es_100 : float - Saturation vapour pressure at steam point temperature (hPa) - - Returns - ------- - xarray.DataArray - Saturation vapour pressure with regard to water above 0 C (hPa) - xarray.DataArray - Saturation vapour pressure where subfreezing timestamps are with regards to ice (hPa) - ''' - # Saturation vapour pressure above 0 C (hPa) - es_wtr = 10**(-7.90298 * (T_100 / (t + T_0) - 1) + 5.02808 * np.log10(T_100 / (t + T_0)) - - 1.3816E-7 * (10**(11.344 * (1 - (t + T_0) / T_100)) - 1) - + 8.1328E-3 * (10**(-3.49149 * (T_100 / (t + T_0) -1)) - 1) + np.log10(es_100)) - - # Saturation vapour pressure below 0 C (hPa) - es_ice = 10**(-9.09718 * (T_0 / (t + T_0) - 1) - 3.56654 - * np.log10(T_0 / (t + T_0)) + 0.876793 - * (1 - (t + T_0) / T_0) - + np.log10(es_0)) - - # Saturation vapour pressure (hPa) - es_cor = xr.where(t < 0, es_ice, es_wtr) - - return es_wtr, es_cor - - -def _calcWindDir(wspd_x, wspd_y): - '''Calculate wind direction in degrees - - Parameters - ---------- - wspd_x : xarray.DataArray - Wind speed in X direction - wspd_y : xarray.DataArray - Wind speed in Y direction - - Returns - ------- - wdir : xarray.DataArray - Wind direction''' - deg2rad = np.pi / 180 - rad2deg = 1 / deg2rad - wdir = np.arctan2(wspd_x, wspd_y) * rad2deg - wdir = (wdir + 360) % 360 - return wdir - - -def _addAttr(ds, key, value): - '''Add attribute to xarray dataset - - ds : xr.Dataset - Dataset to add attribute to - key : str - Attribute name, with "." denoting variable attributes - value : str/int - Value for attribute''' - if len(key.split('.')) == 2: - try: - ds[key.split('.')[0]].attrs[key.split('.')[1]] = str(value) - except: - pass - # logger.info(f'Unable to add metadata to {key.split(".")[0]}') - else: - ds.attrs[key] = value - - -#------------------------------------------------------------------------------ - -class TestProcess(unittest.TestCase): - - def testgetVars(self): - '''Test variable table lookup retrieval''' - v = getVars() - self.assertIsInstance(v, pd.DataFrame) - self.assertTrue(v.columns[0] in 'standard_name') - self.assertTrue(v.columns[2] in 'units') - - def testgetMeta(self): - '''Test AWS names retrieval''' - m = getMeta() - self.assertIsInstance(m, dict) - self.assertTrue('references' in m) - - def testAddAll(self): - '''Test variable and metadata attributes added to Dataset''' - d = xr.Dataset() - v = getVars() - att = list(v.index) - att1 = ['gps_lon', 'gps_lat', 'gps_alt', 'albedo', 'p'] - for a in att: - d[a]=[0,1] - for a in att1: - d[a]=[0,1] - d['time'] = [datetime.datetime.now(), - datetime.datetime.now()-timedelta(days=365)] - d.attrs['station_id']='TEST' - meta = getMeta() - d = addVars(d, v) - d = addMeta(d, meta) - self.assertTrue(d.attrs['station_id']=='TEST') - self.assertIsInstance(d.attrs['references'], str) - - def testL0toL3(self): - '''Test L0 to L3 processing''' - try: - import pypromice - pAWS = AWS(os.path.join(os.path.dirname(pypromice.__file__),'test/test_config1.toml'), - os.path.join(os.path.dirname(pypromice.__file__),'test')) - except: - pAWS = AWS('../test/test_config1.toml', '../test/') - pAWS.process() - self.assertIsInstance(pAWS.L3, xr.Dataset) - self.assertTrue(pAWS.L3.attrs['station_id']=='TEST1') - - def testCLIgetl3(self): - '''Test get_l3 CLI''' - exit_status = os.system('get_l3 -h') - self.assertEqual(exit_status, 0) - - def testCLIjoinl3(self): - '''Test join_l3 CLI''' - exit_status = os.system('join_l3 -h') - self.assertEqual(exit_status, 0) - -#------------------------------------------------------------------------------ - -if __name__ == "__main__": - - # # Test an individual station - # test_station = 'xxx' - # # config_file = '../../../../aws-l0/raw/config/{}.toml'.format(test_station) - # config_file = '../../../../aws-l0/tx/config/{}.toml'.format(test_station) - # # inpath= '../../../../aws-l0/raw/{}/'.format(test_station) - # inpath= '../../../../aws-l0/tx/' - # vari = 'variables.csv' - # pAWS_gc = AWS(config_file, inpath, var_file=vari) - # pAWS_gc.process() - # pAWS_gc.getL1() - # pAWS_gc.getL2() - # pAWS_gc.getL3() - - # # Use test configs - # config_files = ['test/test_config1.toml', 'test/test_config2.toml'] - # inpath= 'test/' - # outpath = 'test/' - # vari = 'variables.csv' - # for cf in config_files: - # pAWS_gc = AWS(cf, inpath, var_file=vari) - # pAWS_gc.process() - - unittest.main() diff --git a/src/pypromice/process/get_l3.py b/src/pypromice/process/get_l2.py old mode 100755 new mode 100644 similarity index 65% rename from src/pypromice/process/get_l3.py rename to src/pypromice/process/get_l2.py index 4cc18436..d0202f85 --- a/src/pypromice/process/get_l3.py +++ b/src/pypromice/process/get_l2.py @@ -2,13 +2,14 @@ import logging, os, sys, unittest from argparse import ArgumentParser from pypromice.process.aws import AWS +from pypromice.process.write import prepare_and_write -def parse_arguments_l3(): - parser = ArgumentParser(description="AWS L3 processor") +def parse_arguments_l2(): + parser = ArgumentParser(description="AWS L2 processor") parser.add_argument('-c', '--config_file', type=str, required=True, help='Path to config (TOML) file') - parser.add_argument('-i', '--inpath', default='data', type=str, required=True, + parser.add_argument('-i', '--inpath', type=str, required=True, help='Path to input data') parser.add_argument('-o', '--outpath', default=None, type=str, required=False, help='Path where to write output') @@ -19,15 +20,16 @@ def parse_arguments_l3(): args = parser.parse_args() return args -def get_l3(): - args = parse_arguments_l3() +def get_l2(): + args = parse_arguments_l2() logging.basicConfig( format="%(asctime)s; %(levelname)s; %(name)s; %(message)s", level=logging.INFO, stream=sys.stdout, ) - + + # Define input path station_name = args.config_file.split('/')[-1].split('.')[0] station_path = os.path.join(args.inpath, station_name) @@ -36,11 +38,19 @@ def get_l3(): else: aws = AWS(args.config_file, args.inpath, args.variables, args.metadata) - aws.process() - + # Perform level 1 and 2 processing + aws.getL1() + aws.getL2() + + # Write out level 2 if args.outpath is not None: - aws.write(args.outpath) - + if not os.path.isdir(args.outpath): + os.mkdir(args.outpath) + if aws.L2.attrs['format'] == 'raw': + prepare_and_write(aws.L2, args.outpath, args.variables, args.metadata, '10min') + prepare_and_write(aws.L2, args.outpath, args.variables, args.metadata, '60min') + + if __name__ == "__main__": - get_l3() - + get_l2() + \ No newline at end of file diff --git a/src/pypromice/process/get_l2tol3.py b/src/pypromice/process/get_l2tol3.py new file mode 100644 index 00000000..8204bfc6 --- /dev/null +++ b/src/pypromice/process/get_l2tol3.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +import os, logging, sys +import xarray as xr +from argparse import ArgumentParser +import pypromice +from pypromice.process.L2toL3 import toL3 +from pypromice.process.write import prepare_and_write +logger = logging.getLogger(__name__) + +def parse_arguments_l2tol3(debug_args=None): + parser = ArgumentParser(description="AWS L3 script for the processing L3 "+ + "data from L2 and merging the L3 data with its "+ + "historical site. An hourly, daily and monthly L3 "+ + "data product is outputted to the defined output path") + parser.add_argument('-i', '--inpath', type=str, required=True, + help='Path to Level 2 .nc data file') + parser.add_argument('-o', '--outpath', default=None, type=str, required=False, + help='Path where to write output') + parser.add_argument('-v', '--variables', default=None, type=str, + required=False, help='File path to variables look-up table') + parser.add_argument('-m', '--metadata', default=None, type=str, + required=False, help='File path to metadata') + parser.add_argument('-g', '--gcnet_historical', default=None, type=str, + required=False, help='File path to historical GC-Net data file') + + # here will come additional arguments for the merging with historical stations + args = parser.parse_args(args=debug_args) + return args + +def get_l2tol3(): + args = parse_arguments_l2tol3() + logging.basicConfig( + format="%(asctime)s; %(levelname)s; %(name)s; %(message)s", + level=logging.INFO, + stream=sys.stdout, + ) + + # Define Level 2 dataset from file + with xr.open_dataset(args.inpath) as l2: + l2.load() + for varname in l2.variables: + if 'encoding' in l2[varname].attrs: + del l2[varname].attrs['encoding'] + if 'bedrock' in l2.attrs.keys(): + l2.attrs['bedrock'] = l2.attrs['bedrock'] == 'True' + if 'number_of_booms' in l2.attrs.keys(): + l2.attrs['number_of_booms'] = int(l2.attrs['number_of_booms']) + + # Perform Level 3 processing + l3 = toL3(l2) + + # Write Level 3 dataset to file if output directory given + if args.outpath is not None: + prepare_and_write(l3, args.outpath, args.variables, args.metadata, '60min') + prepare_and_write(l3, args.outpath, args.variables, args.metadata, '1D') + prepare_and_write(l3, args.outpath, args.variables, args.metadata, 'M') + +if __name__ == "__main__": + get_l2tol3() diff --git a/src/pypromice/process/join_l2.py b/src/pypromice/process/join_l2.py new file mode 100644 index 00000000..37abff8d --- /dev/null +++ b/src/pypromice/process/join_l2.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python +import logging, sys, os, unittest +import pandas as pd +import xarray as xr +from argparse import ArgumentParser +from pypromice.process.utilities import addMeta, roundValues +from pypromice.process.write import prepare_and_write +from pypromice.process.L1toL2 import correctPrecip +logger = logging.getLogger(__name__) + +def parse_arguments_join(): + parser = ArgumentParser(description="AWS L2 joiner for merging together two L2 products, for example an L2 RAW and L2 TX data product. An hourly, daily and monthly L2 data product is outputted to the defined output path") + parser.add_argument('-s', '--file1', type=str, required=True, + help='Path to source L2 file, which will be preferenced in merge process') + parser.add_argument('-t', '--file2', type=str, required=True, + help='Path to target L2 file, which will be used to fill gaps in merge process') + parser.add_argument('-o', '--outpath', default=os.getcwd(), type=str, required=True, + help='Path where to write output') + parser.add_argument('-v', '--variables', default=None, type=str, required=False, + help='Path to variables look-up table .csv file for variable name retained'''), + parser.add_argument('-m', '--metadata', default=None, type=str, required=False, + help='Path to metadata table .csv file for metadata information'''), + args = parser.parse_args() + return args + +def loadArr(infile): + if infile.split('.')[-1].lower() == 'csv': + df = pd.read_csv(infile, index_col=0, parse_dates=True) + ds = xr.Dataset.from_dataframe(df) + elif infile.split('.')[-1].lower() == 'nc': + with xr.open_dataset(infile) as ds: + ds.load() + for varname in ds.variables: + if 'encoding' in ds[varname].attrs: + del ds[varname].attrs['encoding'] + + try: + name = ds.attrs['station_id'] + except: + name = infile.split('/')[-1].split('.')[0].split('_hour')[0].split('_10min')[0] + ds.attrs['station_id'] = name + if 'bedrock' in ds.attrs.keys(): + ds.attrs['bedrock'] = ds.attrs['bedrock'] == 'True' + if 'number_of_booms' in ds.attrs.keys(): + ds.attrs['number_of_booms'] = int(ds.attrs['number_of_booms']) + + logger.info(f'{name} array loaded from {infile}') + return ds, name + + +def join_l2(): + args = parse_arguments_join() + logging.basicConfig( + format="%(asctime)s; %(levelname)s; %(name)s; %(message)s", + level=logging.INFO, + stream=sys.stdout, + ) + # Check files + if os.path.isfile(args.file1) and os.path.isfile(args.file2): + + # Load data arrays + ds1, n1 = loadArr(args.file1) + ds2, n2 = loadArr(args.file2) + + # Check stations match + if n1.lower() == n2.lower(): + + # Merge arrays + logger.info(f'Combining {args.file1} with {args.file2}...') + name = n1 + all_ds = ds1.combine_first(ds2) + + # Re-calculate corrected precipitation + if hasattr(all_ds, 'precip_u_cor'): + if ~all_ds['precip_u_cor'].isnull().all(): + all_ds['precip_u_cor'], _ = correctPrecip(all_ds['precip_u'], + all_ds['wspd_u']) + if hasattr(all_ds, 'precip_l_cor'): + if ~all_ds['precip_l_cor'].isnull().all(): + all_ds['precip_l_cor'], _ = correctPrecip(all_ds['precip_l'], + all_ds['wspd_l']) + else: + logger.info(f'Mismatched station names {n1}, {n2}') + exit() + + elif os.path.isfile(args.file1): + ds1, name = loadArr(args.file1) + logger.info(f'Only one file found {args.file1}...') + all_ds = ds1 + + elif os.path.isfile(args.file2): + ds2, name = loadArr(args.file2) + logger.info(f'Only one file found {args.file2}...') + all_ds = ds2 + + else: + logger.info(f'Invalid files {args.file1}, {args.file2}') + exit() + + all_ds.attrs['format'] = 'merged RAW and TX' + + # Resample to hourly, daily and monthly datasets and write to file + prepare_and_write(all_ds, args.outpath, args.variables, args.metadata, resample = False) + + logger.info(f'Files saved to {os.path.join(args.outpath, name)}...') + +if __name__ == "__main__": + join_l2() diff --git a/src/pypromice/process/join_l3.py b/src/pypromice/process/join_l3.py index 2e0d3fe3..669126c5 100644 --- a/src/pypromice/process/join_l3.py +++ b/src/pypromice/process/join_l3.py @@ -1,36 +1,116 @@ #!/usr/bin/env python -import os, unittest +import logging, os, sys, unittest, toml, pkg_resources +from argparse import ArgumentParser +from pypromice.process.write import prepare_and_write +import numpy as np import pandas as pd import xarray as xr -from argparse import ArgumentParser -from pypromice.process import getVars, getMeta, addMeta, getColNames, \ - roundValues, resampleL3, writeAll -from pypromice.process.L1toL2 import correctPrecip - -def parse_arguments_join(): - parser = ArgumentParser(description="AWS L3 joiner for merging together two L3 products, for example an L3 RAW and L3 TX data product. An hourly, daily and monthly L3 data product is outputted to the defined output path") - parser.add_argument('-s', '--file1', type=str, required=True, - help='Path to source L3 file, which will be preferenced in merge process') - parser.add_argument('-t', '--file2', type=str, required=True, - help='Path to target L3 file, which will be used to fill gaps in merge process') - parser.add_argument('-o', '--outpath', default=os.getcwd(), type=str, required=True, +logger = logging.getLogger(__name__) + +def parse_arguments_joinl3(debug_args=None): + parser = ArgumentParser(description="AWS L3 script for the processing L3 data from L2 and merging the L3 data with its historical site. An hourly, daily and monthly L3 data product is outputted to the defined output path") + parser.add_argument('-c', '--config_folder', type=str, required=True, + help='Path to folder with sites configuration (TOML) files') + parser.add_argument('-s', '--site', default=None, type=str, required=False, + help='Name of site to process (default: all sites are processed)') + + parser.add_argument('-l3', '--folder_l3', type=str, required=True, + help='Path to level 3 folder') + parser.add_argument('-gc', '--folder_gcnet', type=str, required=False, + help='Path to GC-Net historical L1 folder') + + parser.add_argument('-o', '--outpath', default=os.getcwd(), type=str, required=True, help='Path where to write output') + parser.add_argument('-v', '--variables', default=None, type=str, required=False, help='Path to variables look-up table .csv file for variable name retained'''), parser.add_argument('-m', '--metadata', default=None, type=str, required=False, help='Path to metadata table .csv file for metadata information'''), parser.add_argument('-d', '--datatype', default='raw', type=str, required=False, help='Data type to output, raw or tx') - args = parser.parse_args() + args = parser.parse_args(args=debug_args) return args +def readNead(infile): + with open(infile) as f: + fmt = f.readline() + assert(fmt[0] == "#") + assert(fmt.split("#")[1].split()[0] == "NEAD") + assert(fmt.split("#")[1].split()[1] == "1.0") + assert(fmt.split("#")[1].split()[2] == "UTF-8") + + line = f.readline() + assert(line[0] == "#") + assert(line.split("#")[1].strip() == '[METADATA]') + + meta = {} + fields = {} + section = 'meta' + while True: + line = f.readline() + if line.strip(' ') == '#': continue + if line == "# [DATA]\n": break # done reading header + if line == "# [FIELDS]\n": + section = 'fields' + continue # done reading header + + if line[0] == "\n": continue # blank line + assert(line[0] == "#") # if not blank, must start with "#" + + key_eq_val = line.split("#")[1].strip() + if key_eq_val == '' or key_eq_val == None: continue # Line is just "#" or "# " or "# #"... + assert("=" in key_eq_val), print(line, key_eq_val) + key = key_eq_val.split("=")[0].strip() + val = key_eq_val.split("=")[1].strip() + + # Convert from string to number if it is a number + if val.strip('-').strip('+').replace('.','').isdigit(): + val = float(val) + if val == int(val): + val = int(val) + + if section == 'meta': meta[key] = val + if section == 'fields': fields[key] = val + # done reading header + + # Find delimiter and fields for reading NEAD as simple CSV + assert("field_delimiter" in meta.keys()) + assert("fields" in fields.keys()) + FD = meta["field_delimiter"] + names = [_.strip() for _ in fields.pop('fields').split(FD)] + + df = pd.read_csv(infile, + comment = "#", + names = names, + sep = FD, + usecols=np.arange(len(names)), + skip_blank_lines = True) + df['timestamp'] = pd.to_datetime(df.timestamp).dt.tz_localize(None) + df = df.set_index('timestamp') + ds = df.to_xarray() + ds.attrs = meta + return ds + + def loadArr(infile): if infile.split('.')[-1].lower() in 'csv': - df = pd.read_csv(infile, index_col=0, parse_dates=True) - ds = xr.Dataset.from_dataframe(df) - + with open(infile) as f: + text_splitted = f.read().splitlines() + first_char = text_splitted[0][0] + + if first_char != '#': + df = pd.read_csv(infile) + df['time'] = pd.to_datetime(df['time']).dt.tz_localize(None) + df = df.set_index('time') + ds = xr.Dataset.from_dataframe(df) + else: + ds = readNead(infile) + f.close() elif infile.split('.')[-1].lower() in 'nc': ds = xr.open_dataset(infile) + for varname in ds.variables: + if 'encoding' in ds[varname].attrs: + del ds[varname].attrs['encoding'] try: name = ds.attrs['station_name'] @@ -39,92 +119,164 @@ def loadArr(infile): print(f'{name} array loaded from {infile}') return ds, name + + +def gcnet_postprocessing(l3): + file_path = pkg_resources.resource_stream('pypromice', 'ressources/variable_aliases_GC-Net.csv') + + var_name = pd.read_csv(file_path) + var_name = var_name.set_index('old_name').GEUS_name + msk = [v for v in var_name.index if v in l3.data_vars] + var_name = var_name.loc[msk].to_dict() + l3['TA1'] = l3['TA1'].combine_first(l3['TA3']) + l3['TA2'] = l3['TA2'].combine_first(l3['TA4']) + + l3=l3.rename(var_name) + l3=l3.rename({'timestamp':'time'}) + return l3 + +# will be used in the future +# def aligning_surface_heights(l3m, l3): + # df_aux['z_surf_combined'] = \ + # df_aux['z_surf_combined'] \ + # - df_aux.loc[df_aux.z_surf_combined.last_valid_index(), 'z_surf_combined'] \ + # + df_v6.loc[df_v6.z_surf_combined.first_valid_index(), 'z_surf_combined'] + + # if s == 'Swiss Camp 10m': + # df.loc[:df.HS_combined.first_valid_index(), 'HS_combined'] = \ + # df2.loc[:df.HS_combined.first_valid_index(), 'HS_combined'] \ + # - df2.loc[df2.HS_combined.last_valid_index(), 'HS_combined'] \ + # + df.loc[df.HS_combined.first_valid_index(), 'HS_combined'] + + # df.loc[df.HS_combined.diff()==0,'HS_combined'] = np.nan + + # fit = np.polyfit(df.loc[df.HS_combined.notnull(),:].index.astype('int64'), + # df.loc[df.HS_combined.notnull(),'HS_combined'], 1) + # fit_fn = np.poly1d(fit) + + # df['HS_combined'] = df['HS_combined'].values \ + # - fit_fn( + # df_in.loc[[df_in.z_surf_combined.first_valid_index()],:].index.astype('int64')[0] + # ) + df_in.loc[df_in.z_surf_combined.first_valid_index(), 'z_surf_combined'] + # return l3m + +def build_station_dict(config_folder): + station_dict = {} + + # Iterate through all files in the given folder + for filename in os.listdir(config_folder): + if filename.endswith(".toml"): + # Construct full file path + file_path = os.path.join(config_folder, filename) + + # Load TOML file + with open(file_path, 'r') as file: + data = toml.load(file) + station_key = next(iter(data)) + station_data = data[station_key] + + if station_data["station_site"] in station_dict: + station_dict[station_data["station_site"]].append(station_key) + else: + station_dict[station_data["station_site"]] = [station_key] + + return station_dict + def join_l3(): - args = parse_arguments_join() + args = parse_arguments_joinl3() + logging.basicConfig( + format="%(asctime)s; %(levelname)s; %(name)s; %(message)s", + level=logging.INFO, + stream=sys.stdout, + ) - # Check files - if os.path.isfile(args.file1) and os.path.isfile(args.file2): + station_dict = build_station_dict(args.config_folder) - # Load data arrays - ds1, n1 = loadArr(args.file1) - ds2, n2 = loadArr(args.file2) + l3m = xr.Dataset() + for stid in station_dict[args.site]: + logger.info(stid) - # Check stations match - if n1.lower() == n2.lower(): - - # Merge arrays - print(f'Combining {args.file1} with {args.file2}...') - name = n1 - all_ds = ds1.combine_first(ds2) - - # Re-calculate corrected precipitation - if hasattr(all_ds, 'precip_u_cor'): - if ~all_ds['precip_u_cor'].isnull().all(): - all_ds['precip_u_cor'], _ = correctPrecip(all_ds['precip_u'], - all_ds['wspd_u']) - if hasattr(all_ds, 'precip_l_cor'): - if ~all_ds['precip_l_cor'].isnull().all(): - all_ds['precip_l_cor'], _ = correctPrecip(all_ds['precip_l'], - all_ds['wspd_l']) + is_promice = False + is_gcnet = False + filepath = os.path.join(args.folder_l3, stid, stid+'_hour.nc') + if os.path.isfile(filepath): + is_promice = True else: - print(f'Mismatched station names {n1}, {n2}') - exit() - - elif os.path.isfile(args.file1): - ds1, name = loadArr(args.file1) - print(f'Only one file found {args.file1}...') - all_ds = ds1 - - elif os.path.isfile(args.file2): - ds2, name = loadArr(args.file2) - print(f'Only one file found {args.file2}...') - all_ds = ds2 - - else: - print(f'Invalid files {args.file1}, {args.file2}') - exit() - - # Get hourly, daily and monthly datasets - print('Resampling L3 data to hourly, daily and monthly resolutions...') - l3_h = resampleL3(all_ds, '60min') - l3_d = resampleL3(all_ds, '1D') - l3_m = resampleL3(all_ds, 'M') - - print(f'Adding variable information from {args.variables}...') + filepath = os.path.join(args.folder_gcnet, stid+'.csv') + if os.path.isfile(filepath): + is_gcnet = True + if not is_promice and not is_gcnet: + logger.info(stid+' not found either in '+args.folder_l3+' or '+args.folder_gcnet) + continue + + l3, _ = loadArr(filepath) - # Load variables look-up table - var = getVars(args.variables) - - # Round all values to specified decimals places - l3_h = roundValues(l3_h, var) - l3_d = roundValues(l3_d, var) - l3_m = roundValues(l3_m, var) + if is_gcnet: + l3 = gcnet_postprocessing(l3) - # Get columns to keep - if hasattr(all_ds, 'p_l'): - col_names = getColNames(var, 2, args.datatype.lower()) - else: - col_names = getColNames(var, 1, args.datatype.lower()) - - # Assign station id - for l in [l3_h, l3_d, l3_m]: - l.attrs['station_id'] = name - - # Assign metadata - print(f'Adding metadata from {args.metadata}...') - m = getMeta(args.metadata) - l3_h = addMeta(l3_h, m) - l3_d = addMeta(l3_d, m) - l3_m = addMeta(l3_m, m) - - # Set up output path - out = os.path.join(args.outpath, name) - - # Write to files - writeAll(out, name, l3_h, l3_d, l3_m, col_names) - print(f'Files saved to {os.path.join(out, name)}...') + # lat, lon and alt should be just variables, not coordinates + if 'lat' in l3.keys(): + l3 = l3.reset_coords(['lat', 'lon', 'alt']) + + if len(l3m)==0: + # saving attributes of station under an attribute called $stid + l3m.attrs[stid] = l3.attrs.copy() + # then stripping attributes + attrs_list = list(l3.attrs.keys()) + for k in attrs_list: + del l3.attrs[k] + + l3m = l3.copy() + else: + # if l3 (older data) is missing variables compared to l3m (newer data) + # , then we fill them with nan + for v in l3m.data_vars: + if v not in l3.data_vars: + l3[v] = l3.t_u*np.nan + + # if l3 (older data) has variables that does not have l3m (newer data) + # then they are removed from l3 + list_dropped = [] + for v in l3.data_vars: + if v not in l3m.data_vars: + if v != 'z_stake': + list_dropped.append(v) + l3 = l3.drop(v) + else: + l3m[v] = ('time', l3m.t_u.data*np.nan) + logger.info('Unused variables in older dataset: '+' '.join(list_dropped)) + + # saving attributes of station under an attribute called $stid + st_attrs = l3m.attrs.get('stations_attributes', {}) + st_attrs[stid] = l3.attrs.copy() + l3m.attrs["stations_attributes"] = st_attrs + + # then stripping attributes + attrs_list = list(l3.attrs.keys()) + for k in attrs_list: + del l3.attrs[k] + + l3m.attrs['stations_attributes'][stid]['first_timestamp'] = l3.time.isel(time=0).dt.strftime( date_format='%Y-%m-%d %H:%M:%S').item() + l3m.attrs['stations_attributes'][stid]['last_timestamp'] = l3m.time.isel(time=0).dt.strftime( date_format='%Y-%m-%d %H:%M:%S').item() + + # merging by time block + l3m = xr.concat((l3.sel( + time=slice(l3.time.isel(time=0), + l3m.time.isel(time=0)) + ), l3m), dim='time') + + # Assign site id + l3m.attrs['site_id'] = args.site + l3m.attrs['stations'] = station_dict[args.site] + l3m.attrs['level'] = 'L3' + if args.outpath is not None: + prepare_and_write(l3m, args.outpath, args.variables, args.metadata, '60min') + prepare_and_write(l3m, args.outpath, args.variables, args.metadata, '1D') + prepare_and_write(l3m, args.outpath, args.variables, args.metadata, 'M') + if __name__ == "__main__": join_l3() + diff --git a/src/pypromice/process/load.py b/src/pypromice/process/load.py new file mode 100644 index 00000000..a2334096 --- /dev/null +++ b/src/pypromice/process/load.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Load module +""" +import pkg_resources, toml, os, logging +import pandas as pd +import xarray as xr +from typing import Sequence, Optional +from datetime import timedelta +logger = logging.getLogger(__name__) + +def getConfig(config_file, inpath, default_columns: Sequence[str] = ('msg_lat', 'msg_lon')): + '''Load configuration from .toml file. PROMICE .toml files support defining + features at the top level which apply to all nested properties, but do not + overwrite nested properties if they are defined + + Parameters + ---------- + config_file : str + TOML file path + inpath : str + Input folder directory where L0 files can be found + + Returns + ------- + conf : dict + Configuration dictionary + ''' + conf = toml.load(config_file) # Move all top level keys to nested properties, + top = [_ for _ in conf.keys() if not type(conf[_]) is dict] # if they are not already defined in the nested properties + subs = [_ for _ in conf.keys() if type(conf[_]) is dict] # Insert the section name (config_file) as a file property and config file + for s in subs: + for t in top: + if t not in conf[s].keys(): + conf[s][t] = conf[t] + + conf[s]['conf'] = config_file + conf[s]['file'] = os.path.join(inpath, s) + conf[s]["columns"].extend(default_columns) + + for t in top: conf.pop(t) # Delete all top level keys beause each file + # should carry all properties with it + for k in conf.keys(): # Check required fields are present + for field in ["columns", "station_id", "format", "skiprows"]: + assert(field in conf[k].keys()), field+" not in config keys" + return conf + +def getL0(infile, nodata, cols, skiprows, file_version, + delimiter=',', comment='#', time_offset: Optional[float] = None) -> xr.Dataset: + ''' Read L0 data file into pandas DataFrame object + + Parameters + ---------- + infile : str + L0 file path + nodata : list + List containing value for nan values and reassigned value + cols : list + List of columns in file + skiprows : int + Skip rows value + file_version : int + Version of L0 file + delimiter : str + String delimiter for L0 file + comment : str + Notifier of commented sections in L0 file + time_offset : Optional[float] + Time offset in hours for correcting for non utc time data. + Returns + ------- + ds : xarray.Dataset + L0 Dataset + ''' + if file_version == 1: + df = pd.read_csv(infile, comment=comment, index_col=0, + na_values=nodata, names=cols, + sep=delimiter, + skiprows=skiprows, skip_blank_lines=True, + usecols=range(len(cols)), + low_memory=False) + df['time'] = pd.to_datetime( + df.year.astype(str) \ + + df.doy.astype(str).str.zfill(3) \ + + df.hhmm.astype(str).str.zfill(4), + format='%Y%j%H%M' + ) + df = df.set_index('time') + + else: + df = pd.read_csv(infile, comment=comment, index_col=0, + na_values=nodata, names=cols, parse_dates=True, + sep=delimiter, skiprows=skiprows, + skip_blank_lines=True, + usecols=range(len(cols)), + low_memory=False) + try: + df.index = pd.to_datetime(df.index) + except ValueError as e: + logger.info("\n"+ infile) + logger.info("\nValueError:") + logger.info(e) + logger.info('\t\t> Trying pd.to_datetime with format=mixed') + try: + df.index = pd.to_datetime(df.index, format='mixed') + except Exception as e: + logger.info("\nDateParseError:") + logger.info(e) + logger.info('\t\t> Trying again removing apostrophes in timestamp (old files format)') + df.index = pd.to_datetime(df.index.str.replace("\"","")) + + if time_offset is not None: + df.index = df.index + timedelta(hours=time_offset) + + # Drop SKIP columns + for c in df.columns: + if c[0:4] == 'SKIP': + df.drop(columns=c, inplace=True) + + # Carry relevant metadata with ds + ds = xr.Dataset.from_dataframe(df) + ds.attrs['level'] = 'L0' + return ds + +def getVars(v_file=None): + '''Load variables.csv file + + Parameters + ---------- + v_file : str + Variable lookup table file path + + Returns + ------- + pandas.DataFrame + Variables dataframe + ''' + if v_file is None: + with pkg_resources.resource_stream('pypromice', 'ressources/variables.csv') as stream: + return pd.read_csv(stream, index_col=0, comment="#", encoding='utf-8') + else: + return pd.read_csv(v_file, index_col=0, comment="#") + + +def getMeta(m_file=None, delimiter=','): #TODO change to DataFrame output to match variables.csv + '''Load metadata table + + Parameters + ---------- + m_file : str + Metadata file path + delimiter : str + Metadata character delimiter. The default is "," + + Returns + ------- + meta : dict + Metadata dictionary + ''' + meta={} + if m_file is None: + with pkg_resources.resource_stream('pypromice', 'ressources/file_attributes.csv') as stream: + lines = stream.read().decode("utf-8") + lines = lines.split("\n") + else: + with open(m_file, 'r') as f: + lines = f.readlines() + for l in lines[1:]: + try: + meta[l.split(',')[0]] = l.split(delimiter)[1].split('\n')[0].replace(';',',') + except IndexError: + pass + return meta \ No newline at end of file diff --git a/src/pypromice/process/resample.py b/src/pypromice/process/resample.py new file mode 100644 index 00000000..daef8624 --- /dev/null +++ b/src/pypromice/process/resample.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Mon Jun 10 10:58:39 2024 + +@author: pho +""" +import logging +import numpy as np +import xarray as xr +logger = logging.getLogger(__name__) + +def resample_dataset(ds_h, t): + '''Resample L2 AWS data, e.g. hourly to daily average. This uses pandas + DataFrame resampling at the moment as a work-around to the xarray Dataset + resampling. As stated, xarray resampling is a lengthy process that takes + ~2-3 minutes per operation: ds_d = ds_h.resample({'time':"1D"}).mean() + This has now been fixed, so needs implementing: + https://github.com/pydata/xarray/issues/4498#event-6610799698 + + Parameters + ---------- + ds_h : xarray.Dataset + L3 AWS dataset either at 10 min (for raw data) or hourly (for tx data) + t : str + Resample factor, same variable definition as in + pandas.DataFrame.resample() + + Returns + ------- + ds_d : xarray.Dataset + L3 AWS dataset resampled to the frequency defined by t + ''' + df_d = ds_h.to_dataframe().resample(t).mean() + + # recalculating wind direction from averaged directional wind speeds + for var in ['wdir_u','wdir_l']: + if var in df_d.columns: + if ('wspd_x_'+var.split('_')[1] in df_d.columns) & ('wspd_x_'+var.split('_')[1] in df_d.columns): + df_d[var] = _calcWindDir(df_d['wspd_x_'+var.split('_')[1]], + df_d['wspd_y_'+var.split('_')[1]]) + else: + logger.info(var+' in dataframe but not wspd_x_'+var.split('_')[1]+' nor wspd_x_'+var.split('_')[1]) + + # recalculating relative humidity from average vapour pressure and average + # saturation vapor pressure + for var in ['rh_u','rh_l']: + lvl = var.split('_')[1] + if var in df_d.columns: + if ('t_'+lvl in ds_h.keys()): + es_wtr, es_cor = calculateSaturationVaporPressure(ds_h['t_'+lvl]) + p_vap = ds_h[var] / 100 * es_wtr + + df_d[var] = (p_vap.to_series().resample(t).mean() \ + / es_wtr.to_series().resample(t).mean())*100 + if var+'_cor' in df_d.keys(): + df_d[var+'_cor'] = (p_vap.to_series().resample(t).mean() \ + / es_cor.to_series().resample(t).mean())*100 + + vals = [xr.DataArray(data=df_d[c], dims=['time'], + coords={'time':df_d.index}, attrs=ds_h[c].attrs) for c in df_d.columns] + ds_d = xr.Dataset(dict(zip(df_d.columns,vals)), attrs=ds_h.attrs) + return ds_d + + +def calculateSaturationVaporPressure(t, T_0=273.15, T_100=373.15, es_0=6.1071, + es_100=1013.246, eps=0.622): + '''Calculate specific humidity + + Parameters + ---------- + T_0 : float + Steam point temperature. Default is 273.15. + T_100 : float + Steam point temperature in Kelvin + t : xarray.DataArray + Air temperature + es_0 : float + Saturation vapour pressure at the melting point (hPa) + es_100 : float + Saturation vapour pressure at steam point temperature (hPa) + + Returns + ------- + xarray.DataArray + Saturation vapour pressure with regard to water above 0 C (hPa) + xarray.DataArray + Saturation vapour pressure where subfreezing timestamps are with regards to ice (hPa) + ''' + # Saturation vapour pressure above 0 C (hPa) + es_wtr = 10**(-7.90298 * (T_100 / (t + T_0) - 1) + 5.02808 * np.log10(T_100 / (t + T_0)) + - 1.3816E-7 * (10**(11.344 * (1 - (t + T_0) / T_100)) - 1) + + 8.1328E-3 * (10**(-3.49149 * (T_100 / (t + T_0) -1)) - 1) + np.log10(es_100)) + + # Saturation vapour pressure below 0 C (hPa) + es_ice = 10**(-9.09718 * (T_0 / (t + T_0) - 1) - 3.56654 + * np.log10(T_0 / (t + T_0)) + 0.876793 + * (1 - (t + T_0) / T_0) + + np.log10(es_0)) + + # Saturation vapour pressure (hPa) + es_cor = xr.where(t < 0, es_ice, es_wtr) + + return es_wtr, es_cor + +def _calcWindDir(wspd_x, wspd_y): + '''Calculate wind direction in degrees + + Parameters + ---------- + wspd_x : xarray.DataArray + Wind speed in X direction + wspd_y : xarray.DataArray + Wind speed in Y direction + + Returns + ------- + wdir : xarray.DataArray + Wind direction''' + deg2rad = np.pi / 180 + rad2deg = 1 / deg2rad + wdir = np.arctan2(wspd_x, wspd_y) * rad2deg + wdir = (wdir + 360) % 360 + return wdir diff --git a/src/pypromice/process/test.py b/src/pypromice/process/test.py new file mode 100644 index 00000000..b9e15807 --- /dev/null +++ b/src/pypromice/process/test.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Processing test module +""" +from pypromice.process.aws import AWS +from pypromice.process.load import getVars, getMeta +from pypromice.process.utilities import addVars, addMeta +import xarray as xr +import pandas as pd +import unittest, datetime, os +from datetime import timedelta + +class TestProcess(unittest.TestCase): + + def testgetVars(self): + '''Test variable table lookup retrieval''' + v = getVars() + self.assertIsInstance(v, pd.DataFrame) + self.assertTrue(v.columns[0] in 'standard_name') + self.assertTrue(v.columns[2] in 'units') + + def testgetMeta(self): + '''Test AWS names retrieval''' + m = getMeta() + self.assertIsInstance(m, dict) + self.assertTrue('references' in m) + + def testAddAll(self): + '''Test variable and metadata attributes added to Dataset''' + d = xr.Dataset() + v = getVars() + att = list(v.index) + att1 = ['gps_lon', 'gps_lat', 'gps_alt', 'albedo', 'p'] + for a in att: + d[a]=[0,1] + for a in att1: + d[a]=[0,1] + d['time'] = [datetime.datetime.now(), + datetime.datetime.now()-timedelta(days=365)] + d.attrs['station_id']='TEST' + meta = getMeta() + d = addVars(d, v) + d = addMeta(d, meta) + self.assertTrue(d.attrs['station_id']=='TEST') + self.assertIsInstance(d.attrs['references'], str) + + def testL0toL3(self): + '''Test L0 to L3 processing''' + try: + import pypromice + pAWS = AWS(os.path.join(os.path.dirname(pypromice.__file__),'test/test_config1.toml'), + os.path.join(os.path.dirname(pypromice.__file__),'test')) + except: + pAWS = AWS('../test/test_config1.toml', '../test/') + pAWS.process() + self.assertIsInstance(pAWS.L2, xr.Dataset) + self.assertTrue(pAWS.L2.attrs['station_id']=='TEST1') + + def testCLIgetl2(self): + '''Test get_l2 CLI''' + exit_status = os.system('get_l2 -h') + self.assertEqual(exit_status, 0) + + def testCLIjoinl2(self): + '''Test join_l2 CLI''' + exit_status = os.system('join_l2 -h') + self.assertEqual(exit_status, 0) + + def testCLIgetl2tol3(self): + '''Test get_l2tol3 CLI''' + exit_status = os.system('get_l2tol3 -h') + self.assertEqual(exit_status, 0) + + def testCLIjoinl3(self): + '''Test join_l3 CLI''' + exit_status = os.system('join_l3 -h') + self.assertEqual(exit_status, 0) + +if __name__ == "__main__": + + unittest.main() diff --git a/src/pypromice/process/utilities.py b/src/pypromice/process/utilities.py new file mode 100644 index 00000000..4bcca216 --- /dev/null +++ b/src/pypromice/process/utilities.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Utilities module for data formatting, populating and metadata handling +""" +import datetime, uuid +from importlib import metadata +import pandas as pd +import numpy as np + +def roundValues(ds, df, col='max_decimals'): + '''Round all variable values in data array based on pre-defined rounding + value in variables look-up table DataFrame + + Parameters + ---------- + ds : xr.Dataset + Dataset to round values in + df : pd.Dataframe + Variable look-up table with rounding values + col : str + Column in variable look-up table that contains rounding values. The + default is "max_decimals" + ''' + df = df[col] + df = df.dropna(how='all') + for var in df.index: + if var not in list(ds.variables): + continue + if df[var] is not np.nan: + ds[var] = ds[var].round(decimals=int(df[var])) + return ds + +def reformat_time(dataset): + '''Re-format time''' + t = dataset['time'].values + dataset['time'] = list(t) + return dataset + +def reformat_lon(dataset, exempt=['UWN', 'Roof_GEUS', 'Roof_PROMICE']): + '''Switch gps_lon to negative values (degrees_east). We do this here, and + NOT in addMeta, otherwise we switch back to positive when calling getMeta + in joinL2''' + if 'station_id' in dataset.attrs.keys(): + id = dataset.attrs['station_id'] + else: + id = dataset.attrs['site_id'] + + if id not in exempt: + if 'gps_lon' not in dataset.keys(): + print("?????????", id, "missing gps_lon") + return dataset + dataset['gps_lon'] = dataset['gps_lon'] * -1 + return dataset + +def popCols(ds, names): + '''Populate dataset with all given variable names + + Parammeters + ----------- + ds : xr.Dataset + Dataset + names : list + List of variable names to populate + ''' + for v in names: + if v not in list(ds.variables): + ds[v] = (('time'), np.arange(ds['time'].size)*np.nan) + return ds + +def addBasicMeta(ds, vars_df): + ''' Use a variable lookup table DataFrame to add the basic metadata + to the xarray dataset. This is later amended to finalise L3 + + Parameters + ---------- + ds : xr.Dataset + Dataset to add metadata to + vars_df : pd.DataFrame + Metadata dataframe + + Returns + ------- + ds : xr.Dataset + Dataset with added metadata + ''' + for v in vars_df.index: + if v == 'time': continue # coordinate variable, not normal var + if v not in list(ds.variables): continue + for c in ['standard_name', 'long_name', 'units']: + if isinstance(vars_df[c][v], float) and np.isnan(vars_df[c][v]): continue + ds[v].attrs[c] = vars_df[c][v] + return ds + +def populateMeta(ds, conf, skip): + '''Populate L0 Dataset with metadata dictionary + + Parameters + ---------- + ds : xarray.Dataset + L0 dataset + conf : dict + Metadata dictionary + skip : list + List of column names to skip parsing to metadata + + Returns + ------- + ds : xarray.Dataset + L0 dataset with metadata populated as Dataset attributes + ''' + # skip = ["columns", "skiprows"] + for k in conf.keys(): + if k not in skip: ds.attrs[k] = conf[k] + return ds + + +def addVars(ds, variables): + '''Add variable attributes from file to dataset + + Parameters + ---------- + ds : xarray.Dataset + Dataset to add variable attributes to + variables : pandas.DataFrame + Variables lookup table file + + Returns + ------- + ds : xarray.Dataset + Dataset with metadata + ''' + for k in ds.keys(): + if k not in variables.index: continue + ds[k].attrs['standard_name'] = variables.loc[k]['standard_name'] + ds[k].attrs['long_name'] = variables.loc[k]['long_name'] + ds[k].attrs['units'] = variables.loc[k]['units'] + ds[k].attrs['coverage_content_type'] = variables.loc[k]['coverage_content_type'] + ds[k].attrs['coordinates'] = variables.loc[k]['coordinates'] + return ds + +def addMeta(ds, meta): + '''Add metadata attributes from file to dataset + + Parameters + ---------- + ds : xarray.Dataset + Dataset to add metadata attributes to + meta : dict + Metadata file + + Returns + ------- + ds : xarray.Dataset + Dataset with metadata + ''' + if 'gps_lon' in ds.keys(): + ds['lon'] = ds['gps_lon'].mean() + ds['lon'].attrs = ds['gps_lon'].attrs + + ds['lat'] = ds['gps_lat'].mean() + ds['lat'].attrs = ds['gps_lat'].attrs + + ds['alt'] = ds['gps_alt'].mean() + ds['alt'].attrs = ds['gps_alt'].attrs + + # for k in ds.keys(): # for each var + # if 'units' in ds[k].attrs: + # if ds[k].attrs['units'] == 'C': + # ds[k].attrs['units'] = 'degrees_C' + + # https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery_1-3#geospatial_bounds + if 'station_id' in ds.attrs.keys(): + ds.attrs['id'] = 'dk.geus.promice:' + str(uuid.uuid3(uuid.NAMESPACE_DNS, ds.attrs['station_id'])) + else: + ds.attrs['id'] = 'dk.geus.promice:' + str(uuid.uuid3(uuid.NAMESPACE_DNS, ds.attrs['site_id'])) + ds.attrs['history'] = 'Generated on ' + datetime.datetime.utcnow().isoformat() + ds.attrs['date_created'] = str(datetime.datetime.now().isoformat()) + ds.attrs['date_modified'] = ds.attrs['date_created'] + ds.attrs['date_issued'] = ds.attrs['date_created'] + ds.attrs['date_metadata_modified'] = ds.attrs['date_created'] + + ds.attrs['geospatial_bounds'] = "POLYGON((" + \ + f"{ds['lat'].min().values} {ds['lon'].min().values}, " + \ + f"{ds['lat'].min().values} {ds['lon'].max().values}, " + \ + f"{ds['lat'].max().values} {ds['lon'].max().values}, " + \ + f"{ds['lat'].max().values} {ds['lon'].min().values}, " + \ + f"{ds['lat'].min().values} {ds['lon'].min().values}))" + + ds.attrs['geospatial_lat_min'] = str(ds['lat'].min().values) + ds.attrs['geospatial_lat_max'] = str(ds['lat'].max().values) + ds.attrs['geospatial_lon_min'] = str(ds['lon'].min().values) + ds.attrs['geospatial_lon_max'] = str(ds['lon'].max().values) + ds.attrs['geospatial_vertical_min'] = str(ds['alt'].min().values) + ds.attrs['geospatial_vertical_max'] = str(ds['alt'].max().values) + ds.attrs['geospatial_vertical_positive'] = 'up' + ds.attrs['time_coverage_start'] = str(ds['time'][0].values) + ds.attrs['time_coverage_end'] = str(ds['time'][-1].values) + + try: + ds.attrs['source']= 'pypromice v' + str(metadata.version('pypromice')) + except: + ds.attrs['source'] = 'pypromice' + + # https://www.digi.com/resources/documentation/digidocs/90001437-13/reference/r_iso_8601_duration_format.htm + try: + ds.attrs['time_coverage_duration'] = str(pd.Timedelta((ds['time'][-1] - ds['time'][0]).values).isoformat()) + ds.attrs['time_coverage_resolution'] = str(pd.Timedelta((ds['time'][1] - ds['time'][0]).values).isoformat()) + except: + ds.attrs['time_coverage_duration'] = str(pd.Timedelta(0).isoformat()) + ds.attrs['time_coverage_resolution'] = str(pd.Timedelta(0).isoformat()) + + # Note: int64 dtype (long int) is incompatible with OPeNDAP access via THREDDS for NetCDF files + # See https://stackoverflow.com/questions/48895227/output-int32-time-dimension-in-netcdf-using-xarray + ds.time.encoding["dtype"] = "i4" # 32-bit signed integer + #ds.time.encoding["calendar"] = 'proleptic_gregorian' # this is default + + # Load metadata attributes and add to Dataset + [_addAttr(ds, key, value) for key,value in meta.items()] + + # Check attribute formating + for k,v in ds.attrs.items(): + if not isinstance(v, str) or not isinstance(v, int): + ds.attrs[k]=str(v) + return ds + +def _addAttr(ds, key, value): + '''Add attribute to xarray dataset + + ds : xr.Dataset + Dataset to add attribute to + key : str + Attribute name, with "." denoting variable attributes + value : str/int + Value for attribute''' + if len(key.split('.')) == 2: + try: + ds[key.split('.')[0]].attrs[key.split('.')[1]] = str(value) + except: + pass + # logger.info(f'Unable to add metadata to {key.split(".")[0]}') + else: + ds.attrs[key] = value diff --git a/src/pypromice/process/variables.csv b/src/pypromice/process/variables.csv deleted file mode 100644 index 2960259b..00000000 --- a/src/pypromice/process/variables.csv +++ /dev/null @@ -1,92 +0,0 @@ -field,standard_name,long_name,units,lo,hi,OOL,station_type,data_type,max_decimals,coverage_content_type,coordinates,instantaneous_hourly,comment -time,time,Time,yyyy-mm-dd HH:MM:SS,,,,all,all,,physicalMeasurement,time lat lon alt,, -rec,record,Record,-,,,,all,,0,referenceInformation,time lat lon alt,,L0 only -p_u,air_pressure,Air pressure (upper boom),hPa,650,1100,z_pt z_pt_cor dshf_u dlhf_u qh_u,all,all,4,physicalMeasurement,time lat lon alt,False, -p_l,air_pressure,Air pressure (lower boom),hPa,650,1100,dshf_l dlhf_l qh_l,two-boom,all,4,physicalMeasurement,time lat lon alt,False, -t_u,air_temperature,Air temperature (upper boom),degrees_C,-80,40,rh_u_cor cc dsr_cor usr_cor z_boom z_stake dshf_u dlhf_u qh_u,all,all,4,physicalMeasurement,time lat lon alt,False, -t_l,air_temperature,Air temperature (lower boom),degrees_C,-80,40,rh_l_cor z_boom_l dshf_l dlhf_l qh_l ,two-boom,all,4,physicalMeasurement,time lat lon alt,False,PT100 temperature at boom -rh_u,relative_humidity,Relative humidity (upper boom),%,0,100,rh_u_cor,all,all,4,physicalMeasurement,time lat lon alt,False, -rh_u_cor,relative_humidity_corrected,Relative humidity (upper boom) - corrected,%,0,150,dshf_u dlhf_u qh_u,all,all,4,modelResult,time lat lon alt,False, -qh_u,specific_humidity,Specific humidity (upper boom),kg/kg,0,100,,all,all,4,modelResult,time lat lon alt,False,Derived value (L2 or later) -rh_l,relative_humidity,Relative humidity (lower boom),%,0,100,rh_l_cor,two-boom,all,4,physicalMeasurement,time lat lon alt,False, -rh_l_cor,relative_humidity_corrected,Relative humidity (lower boom) - corrected,%,0,150,dshf_l dlhf_l qh_l,two-boom,all,4,modelResult,time lat lon alt,False, -qh_l,specific_humidity,Specific humidity (lower boom),kg/kg,0,100,,two-boom,all,4,modelResult,time lat lon alt,False,Derived value (L2 or later) -wspd_u,wind_speed,Wind speed (upper boom),m s-1,0,100,"wdir_u wspd_x_u wspd_y_u dshf_u dlhf_u qh_u, precip_u",all,all,4,physicalMeasurement,time lat lon alt,False, -wspd_l,wind_speed,Wind speed (lower boom),m s-1,0,100,"wdir_l wspd_x_l wspd_y_l dshf_l dlhf_l qh_l , precip_l",two-boom,all,4,physicalMeasurement,time lat lon alt,False, -wdir_u,wind_from_direction,Wind from direction (upper boom),degrees,1,360,wspd_x_u wspd_y_u,all,all,4,physicalMeasurement,time lat lon alt,False, -wdir_std_u,wind_from_direction_standard_deviation,Wind from direction (standard deviation),degrees,,,,one-boom,,4,qualityInformation,time lat lon alt,False,L0 only -wdir_l,wind_from_direction,Wind from direction (lower boom),degrees,1,360,wspd_x_l wspd_y_l,two-boom,all,4,physicalMeasurement,time lat lon alt,False, -wspd_x_u,wind_speed_from_x_direction,Wind speed from x direction (upper boom),m s-1,-100,100,wdir_u wspd_u,all,all,4,modelResult,time lat lon alt,False,L0 only -wspd_y_u,wind_speed_from_y_direction,Wind speed from y direction (upper boom),m s-1,-100,100,wdir_u wspd_u,all,all,4,modelResult,time lat lon alt,False,L0 only -wspd_x_l,wind_speed_from_x_direction,Wind speed from x direction (lower boom),m s-1,-100,100,wdir_l wspd_l,two-boom,all,4,modelResult,time lat lon alt,False,L0 only -wspd_y_l,wind_speed_from_y_direction,Wind speed from y direction (lower boom),m s-1,-100,100,wdir_l wspd_l,two-boom,all,4,modelResult,time lat lon alt,False,L0 only -dsr,surface_downwelling_shortwave_flux,Downwelling shortwave radiation,W m-2,-10,1500,albedo dsr_cor usr_cor,all,all,4,physicalMeasurement,time lat lon alt,False,"Actually radiation_at_sensor, not flux. Units 1E-5 V. Engineering units." -dsr_cor,surface_downwelling_shortwave_flux_corrected,Downwelling shortwave radiation - corrected,W m-2,,,,all,all,4,modelResult,time lat lon alt,False,Derived value (L2 or later) -usr,surface_upwelling_shortwave_flux,Upwelling shortwave radiation,W m-2,-10,1000,albedo dsr_cor usr_cor,all,all,4,physicalMeasurement,time lat lon alt,False, -usr_cor,surface_upwelling_shortwave_flux_corrected,Upwelling shortwave radiation - corrected,W m-2,0,1000,,all,all,4,modelResult,time lat lon alt,False,Derived value (L2 or later) -albedo,surface_albedo,Albedo,-,,,,all,all,4,modelResult,time lat lon alt,False,Derived value (L2 or later) -dlr,surface_downwelling_longwave_flux,Downwelling longwave radiation,W m-2,50,500,albedo dsr_cor usr_cor cc t_surf,all,all,4,physicalMeasurement,time lat lon alt,False, -ulr,surface_upwelling_longwave_flux,Upwelling longwave radiation,W m-2,50,500,t_surf,all,all,4,physicalMeasurement,time lat lon alt,False, -cc,cloud_area_fraction,Cloud cover,%,,,,all,all,4,modelResult,time lat lon alt,False,Derived value (L2 or later) -t_surf,surface_temperature,Surface temperature,C,-80,40,dshf_u dlhf_u qh_u,all,all,4,modelResult,time lat lon alt,False,Derived value (L2 or later) -dlhf_u,surface_downward_latent_heat_flux,Latent heat flux (upper boom),W m-2,,,,all,all,4,modelResult,time lat lon alt,False,Derived value (L2 or later) -dlhf_l,surface_downward_latent_heat_flux,Latent heat flux (lower boom),W m-2,,,,two-boom,all,4,modelResult,time lat lon alt,False,Derived value (L2 or later) -dshf_u,surface_downward_sensible_heat_flux,Sensible heat flux (upper boom),W m-2,,,,all,all,4,modelResult,time lat lon alt,False,Derived value (L2 or later) -dshf_l,surface_downward_sensible_heat_flux,Sensible heat flux (lower boom),W m-2,,,,two-boom,all,4,modelResult,time lat lon alt,False,Derived value (L2 or later) -z_boom_u,distance_to_surface_from_boom,Upper boom height,m,0.3,10,dshf_u dlhf_u qh_u,all,all,4,physicalMeasurement,time lat lon alt,True, -z_boom_q_u,distance_to_surface_from_boom_quality,Upper boom height (quality),-,,,,all,,4,qualityInformation,time lat lon alt,True,L0 only -z_boom_l,distance_to_surface_from_boom,Lower boom height,m,0.3,5,dshf_l dlhf_l qh_l,two-boom,all,4,physicalMeasurement,time lat lon alt,True, -z_boom_q_l,distance_to_surface_from_boom_quality,Lower boom height (quality),-,,,,two-boom,,4,qualityInformation,time lat lon alt,True,L0 only -z_stake,distance_to_surface_from_stake_assembly,Stake height,m,0.3,8,,one-boom,all,4,physicalMeasurement,time lat lon alt,True,HeightStakes(m) -z_stake_q,distance_to_surface_from_stake_assembly_quality,Stake height (quality),-,,,,one-boom,,4,qualityInformation,time lat lon alt,True,L0 only -z_pt,depth_of_pressure_transducer_in_ice,Depth of pressure transducer in ice,m,0,30,z_pt_cor,one-boom,all,4,physicalMeasurement,time lat lon alt,False,DepthPressureTransducer(m) -z_pt_cor,depth_of_pressure_transducer_in_ice_corrected,Depth of pressure transducer in ice - corrected,m,0,30,,one-boom,all,4,modelResult,time lat lon alt,False,Derived value (L2 or later) -precip_u,precipitation,Precipitation (upper boom) (cumulative solid & liquid),mm,0,,precip_u_cor precip_u_rate,all,all,4,physicalMeasurement,time lat lon alt,True,Without wind/undercatch correction -precip_u_cor,precipitation_corrected,Precipitation (upper boom) (cumulative solid & liquid) – corrected,mm,0,,,all,all,4,modelResult,time lat lon alt,True,With wind/undercatch correction -precip_u_rate,precipitation_rate,Precipitation rate (upper boom) (cumulative solid & liquid) – corrected,mm,0,,,all,all,4,modelResult,time lat lon alt,True,L0 only -precip_l,precipitation,Precipitation (lower boom) (cumulative solid & liquid),mm,0,,precip_l_cor precip_l_rate,two-boom,all,4,physicalMeasurement,time lat lon alt,True,Without wind/undercatch correction -precip_l_cor,precipitation_corrected,Precipitation (lower boom) (cumulative solid & liquid) – corrected,mm,0,,,two-boom,all,4,modelResult,time lat lon alt,True,With wind/undercatch correction -precip_l_rate,precipitation_rate,Precipitation rate (lower boom) (cumulative solid & liquid) – corrected,mm,0,,,two-boom,all,4,modelResult,time lat lon alt,True,L0 only -t_i_1,ice_temperature_at_t1,Ice temperature at sensor 1,degrees_C,-80,1,,all,all,4,physicalMeasurement,time lat lon alt,False,t1 is installed @ 1 m depth -t_i_2,ice_temperature_at_t2,Ice temperature at sensor 2,degrees_C,-80,1,,all,all,4,physicalMeasurement,time lat lon alt,False, -t_i_3,ice_temperature_at_t3,Ice temperature at sensor 3,degrees_C,-80,1,,all,all,4,physicalMeasurement,time lat lon alt,False, -t_i_4,ice_temperature_at_t4,Ice temperature at sensor 4,degrees_C,-80,1,,all,all,4,physicalMeasurement,time lat lon alt,False, -t_i_5,ice_temperature_at_t5,Ice temperature at sensor 5,degrees_C,-80,1,,all,all,4,physicalMeasurement,time lat lon alt,False, -t_i_6,ice_temperature_at_t6,Ice temperature at sensor 6,degrees_C,-80,1,,all,all,4,physicalMeasurement,time lat lon alt,False, -t_i_7,ice_temperature_at_t7,Ice temperature at sensor 7,degrees_C,-80,1,,all,all,4,physicalMeasurement,time lat lon alt,False, -t_i_8,ice_temperature_at_t8,Ice temperature at sensor 8,degrees_C,-80,1,,all,all,4,physicalMeasurement,time lat lon alt,False,t8 is installed @ 10 m depth -t_i_9,ice_temperature_at_t9,Ice temperature at sensor 9,degrees_C,-80,1,,two-boom,all,4,physicalMeasurement,time lat lon alt,False, -t_i_10,ice_temperature_at_t10,Ice temperature at sensor 10,degrees_C,-80,1,,two-boom,all,4,physicalMeasurement,time lat lon alt,False, -t_i_11,ice_temperature_at_t11,Ice temperature at sensor 11,degrees_C,-80,1,,two-boom,all,4,physicalMeasurement,time lat lon alt,False, -tilt_x,platform_view_angle_x,Tilt to east,degrees,-30,30,dsr_cor usr_cor albedo,all,all,4,physicalMeasurement,time lat lon alt,False, -tilt_y,platform_view_angle_y,Tilt to north,degrees,-30,30,dsr_cor usr_cor albedo,all,all,4,physicalMeasurement,time lat lon alt,False, -rot,platform_azimuth_angle,Station rotation from true North,degrees,0,360,,all,all,2,physicalMeasurement,time lat lon alt,False,v4 addition -gps_lat,gps_latitude,Latitude,degrees_north,50,83,,all,all,6,coordinate,time lat lon alt,True, -gps_lon,gps_longitude,Longitude,degrees_east,5,70,,all,all,6,coordinate,time lat lon alt,True, -gps_alt,gps_altitude,Altitude,m,0,3000,,all,all,2,coordinate,time lat lon alt,True, -gps_time,gps_time,GPS time,s,0,240000,,all,all,,coordinate,time lat lon alt,True, -gps_geoid,gps_geoid_separation,Height of EGM96 geoid over WGS84 ellipsoid,m,,,,one-boom,all,,physicalMeasurement,time lat lon alt,True, -gps_geounit,gps_geounit,GeoUnit,-,,,,all,,,qualityInformation,time lat lon alt,True,L0 only -gps_hdop,gps_hdop,GPS horizontal dillution of precision (HDOP),m,,,,all,all,2,qualityInformation,time lat lon alt,True,NMEA: Horizontal dilution of precision -gps_numsat,gps_numsat,GPS number of satellites,-,,,,,all,0,qualityInformation,time lat lon alt,True,L0 only -gps_q,gps_q,Quality,-,,,,,all,,qualityInformation,time lat lon alt,True,L0 only -lat,gps_mean_latitude,GPS mean latitude (from all time-series),degrees,,,,all,,6,modelResult,time lat lon alt,True, -lon,gps_mean_longitude,GPS mean longitude (from all time-series),degrees,,,,all,,6,modelResult,time lat lon alt,True, -alt,gps_mean_altitude,GPS mean altitude (from all time-series),degrees,,,,all,,6,modelResult,time lat lon alt,True, -batt_v,battery_voltage,Battery voltage,V,0,30,,all,all,2,physicalMeasurement,time lat lon alt,True, -batt_v_ini,,,-,0,30,,,all,2,physicalMeasurement,time lat lon alt,True,L0 only -batt_v_ss,battery_voltage_at_sample_start,Battery voltage (sample start),V,0,30,,,all,2,physicalMeasurement,time lat lon alt,True,L0 only -fan_dc_u,fan_current,Fan current (upper boom),mA,0,200,,all,all,2,physicalMeasurement,time lat lon alt,True, -fan_dc_l,fan_current,Fan current (lower boom),mA,0,200,,two-boom,all,2,physicalMeasurement,time lat lon alt,True, -freq_vw,frequency_of_precipitation_wire_vibration,Frequency of vibrating wire in precipitation gauge,Hz,0,10000,precip_u,,all,,physicalMeasurement,time lat lon alt,True,L0 only -t_log,temperature_of_logger,Logger temperature,degrees_C,-80,40,,one-boom,all,4,physicalMeasurement,time lat lon alt,True,LoggerTemperature(C) -t_rad,temperature_of_radiation_sensor,Radiation sensor temperature,degrees_C,-80,40,t_surf dlr ulr,all,all,4,physicalMeasurement,time lat lon alt,False, -p_i,air_pressure,Air pressure (instantaneous) minus 1000,hPa,-350,100,,all,all,4,physicalMeasurement,time lat lon alt,True,For meteorological observations -t_i,air_temperature,Air temperature (instantaneous),degrees_C,-80,40,,all,all,4,physicalMeasurement,time lat lon alt,True,"PT100 temperature at boom, for meteorological observations" -rh_i,relative_humidity,Relative humidity (instantaneous),%,0,150,rh_i_cor,all,all,4,physicalMeasurement,time lat lon alt,True,For meteorological observations -rh_i_cor,relative_humidity_corrected,Relative humidity (instantaneous) – corrected,%,0,100,,all,all,4,modelResult,time lat lon alt,True,For meteorological observations -wspd_i,wind_speed,Wind speed (instantaneous),m s-1,0,100,wdir_i wspd_x_i wspd_y_i,all,all,4,physicalMeasurement,time lat lon alt,True,For meteorological observations -wdir_i,wind_from_direction,Wind from direction (instantaneous),degrees,1,360,wspd_x_i wspd_y_i,all,all,4,physicalMeasurement,time lat lon alt,True,For meteorological observations -wspd_x_i,wind_speed_from_x_direction,Wind speed from x direction (instantaneous),m s-1,-100,100,wdir_i wspd_i,all,all,4,modelResult,time lat lon alt,True,For meteorological observations -wspd_y_i,wind_speed_from_y_direction,Wind speed from y direction (instantaneous),m s-1,-100,100,wdir_i wspd_i,all,all,4,modelResult,time lat lon alt,True,For meteorological observations -msg_i,message,Message string (instantaneous),-,,,,all,,,qualityInformation,time lat lon alt,True,L0 only diff --git a/src/pypromice/process/write.py b/src/pypromice/process/write.py new file mode 100644 index 00000000..732ec12e --- /dev/null +++ b/src/pypromice/process/write.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Write dataset module +""" +import os, logging +import pandas as pd +logger = logging.getLogger(__name__) + +from pypromice.process.resample import resample_dataset +from pypromice.process import utilities, load + +def prepare_and_write(dataset, outpath, vars_df=None, meta_dict=None, time='60min', resample=True): + '''Prepare data with resampling, formating and metadata population; then + write data to .nc and .csv hourly and daily files + + Parameters + ---------- + dataset : xarray.Dataset + Dataset to write to file + outpath : str + Output directory + vars_df : pandas.DataFrame + Variables look-up table dataframe + meta_dict : dictionary + Metadata dictionary to write to dataset + time : str + Resampling interval for output dataset + ''' + # Resample dataset + if resample: + d2 = resample_dataset(dataset, time) + logger.info('Resampling to '+str(time)) + else: + d2 = dataset.copy() + + # Reformat time + d2 = utilities.reformat_time(d2) + + # finding station/site name + if 'station_id' in d2.attrs.keys(): + name = d2.attrs['station_id'] + else: + name = d2.attrs['site_id'] + + # Reformat longitude (to negative values) + if 'gps_lon' in d2.keys(): + d2 = utilities.reformat_lon(d2) + else: + logger.info('%s does not have gpd_lon'%name) + + # Add variable attributes and metadata + if vars_df is None: + vars_df = load.getVars() + if meta_dict is None: + meta_dict = load.getMeta() + + d2 = utilities.addVars(d2, vars_df) + d2 = utilities.addMeta(d2, meta_dict) + + # Round all values to specified decimals places + d2 = utilities.roundValues(d2, vars_df) + + # Get variable names to write out + col_names = getColNames(vars_df, d2, remove_nan_fields=True) + + # Define filename based on resample rate + t = int(pd.Timedelta((d2['time'][1] - d2['time'][0]).values).total_seconds()) + + # Create out directory + outdir = os.path.join(outpath, name) + if not os.path.isdir(outdir): + os.mkdir(outdir) + + if t == 600: + out_csv = os.path.join(outdir, name+'_10min.csv') + out_nc = os.path.join(outdir, name+'_10min.nc') + elif t == 3600: + out_csv = os.path.join(outdir, name+'_hour.csv') + out_nc = os.path.join(outdir, name+'_hour.nc') + elif t == 86400: + # removing instantaneous values from daily and monthly files + for v in col_names: + if ('_i' in v) and ('_i_' not in v): + col_names.remove(v) + out_csv = os.path.join(outdir, name+'_day.csv') + out_nc = os.path.join(outdir, name+'_day.nc') + else: + # removing instantaneous values from daily and monthly files + for v in col_names: + if ('_i' in v) and ('_i_' not in v): + col_names.remove(v) + out_csv = os.path.join(outdir, name+'_month.csv') + out_nc = os.path.join(outdir, name+'_month.nc') + if not os.path.isdir(outdir): + os.mkdir(outdir) + # Write to csv file + logger.info('Writing to files...') + writeCSV(out_csv, d2, col_names) + + # Write to netcdf file + col_names = col_names + ['lat', 'lon', 'alt'] + writeNC(out_nc, d2, col_names) + logger.info(f'Written to {out_csv}') + logger.info(f'Written to {out_nc}') + + + +def writeAll(outpath, station_id, l3_h, l3_d, l3_m, csv_order=None): + '''Write L3 hourly, daily and monthly datasets to .nc and .csv + files + + Parameters + ---------- + outpath : str + Output file path + station_id : str + Station name + l3_h : xr.Dataset + L3 hourly data + l3_d : xr.Dataset + L3 daily data + l3_m : xr.Dataset + L3 monthly data + csv_order : list, optional + List order of variables + ''' + if not os.path.isdir(outpath): + os.mkdir(outpath) + outfile_h = os.path.join(outpath, station_id + '_hour') + outfile_d = os.path.join(outpath, station_id + '_day') + outfile_m = os.path.join(outpath, station_id + '_month') + for o,l in zip([outfile_h, outfile_d, outfile_m], [l3_h ,l3_d, l3_m]): + writeCSV(o+'.csv',l, csv_order) + writeNC(o+'.nc',l) + +def writeCSV(outfile, Lx, csv_order): + '''Write data product to CSV file + + Parameters + ---------- + outfile : str + Output file path + Lx : xr.Dataset + Dataset to write to file + csv_order : list + List order of variables + ''' + Lcsv = Lx.to_dataframe().dropna(how='all') + if csv_order is not None: + names = [c for c in csv_order if c in list(Lcsv.columns)] + Lcsv = Lcsv[names] + Lcsv.to_csv(outfile) + +def writeNC(outfile, Lx, col_names=None): + '''Write data product to NetCDF file + + Parameters + ---------- + outfile : str + Output file path + Lx : xr.Dataset + Dataset to write to file + ''' + if os.path.isfile(outfile): + os.remove(outfile) + if col_names is not None: + names = [c for c in col_names if c in list(Lx.keys())] + else: + names = list(Lx.keys()) + for var in names: + Lx[var].encoding = {} + Lx[names].to_netcdf(outfile, mode='w', format='NETCDF4', compute=True) + +def getColNames(vars_df, ds, remove_nan_fields=False): + '''Get all variable names for a given data type, based on a variables + look-up table. This is mainly for exporting purposes + + Parameters + ---------- + vars_df : pd.DataFrame + Variables look-up table + ds: xr.dataset + Dataset to write + Returns + ------- + list + Variable names + ''' + # selecting variable list based on level + vars_df = vars_df.loc[vars_df[ds.attrs['level']] == 1] + + # selecting variable list based on geometry + if ds.attrs['level'] in ['L0', 'L1', 'L2']: + if ds.attrs['number_of_booms']==1: + vars_df = vars_df.loc[vars_df['station_type'].isin(['one-boom','all'])] + elif ds.attrs['number_of_booms']==2: + vars_df = vars_df.loc[vars_df['station_type'].isin(['two-boom','all'])] + + var_list = list(vars_df.index) + if remove_nan_fields: + for v in var_list: + if v not in ds.keys(): + var_list.remove(v) + continue + if ds[v].isnull().all(): + var_list.remove(v) + return var_list + diff --git a/src/pypromice/ressources/__init__.py b/src/pypromice/ressources/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/pypromice/process/metadata.csv b/src/pypromice/ressources/file_attributes.csv similarity index 100% rename from src/pypromice/process/metadata.csv rename to src/pypromice/ressources/file_attributes.csv diff --git a/src/pypromice/ressources/variable_aliases_GC-Net.csv b/src/pypromice/ressources/variable_aliases_GC-Net.csv new file mode 100644 index 00000000..16bff6db --- /dev/null +++ b/src/pypromice/ressources/variable_aliases_GC-Net.csv @@ -0,0 +1,78 @@ +GEUS_name,old_name +time,timestamp +p_u,P +p_l, +t_u,TA2 +t_l,TA1 +rh_u,RH2 +rh_u_cor,RH2_cor +qh_u,Q2 +rh_l,RH1 +rh_l_cor,RH1_cor +qh_l,Q1 +wspd_u,VW2 +wspd_l,VW1 +wdir_u,DW2 +wdir_l,DW1 +dsr, +dsr_cor,ISWR +usr, +usr_cor,OSWR +albedo,Alb +dlr, +ulr, +cc, +t_surf, +dlhf_u,LHF +dlhf_l, +dshf_u,SHF +dshf_l, +z_boom_u,HW2 +z_boom_l,HW1 +precip_u, +precip_u_cor, +precip_l, +precip_l_cor, +t_i_1,TS1 +t_i_2,TS2 +t_i_3,TS3 +t_i_4,TS4 +t_i_5,TS5 +t_i_6,TS6 +t_i_7,TS7 +t_i_8,TS8 +t_i_9,TS9 +t_i_10,TS10 +t_i_11, +tilt_x, +tilt_y, +rot, +gps_lat,latitude +gps_lon,longitude +gps_alt,elevation +gps_time, +gps_geounit, +gps_hdop, +batt_v,V +fan_dc_u, +fan_dc_l, +t_rad, +msg_lat, +msg_lon, +z_surf_1,HS1 +z_surf_2,HS2 +z_surf_1_adj_flag,HS1_adj_flag +z_surf_2_adj_flag,HS2_adj_flag +z_surf_combined,HS_combined +depth_t_i_1,DTS1 +depth_t_i_2,DTS2 +depth_t_i_3,DTS3 +depth_t_i_4,DTS4 +depth_t_i_5,DTS5 +depth_t_i_6,DTS6 +depth_t_i_7,DTS7 +depth_t_i_8,DTS8 +depth_t_i_9,DTS9 +depth_t_i_10,DTS10 +depth_t_i_11, +t_i_10m,TS_10m diff --git a/src/pypromice/ressources/variables.csv b/src/pypromice/ressources/variables.csv new file mode 100644 index 00000000..94c4f743 --- /dev/null +++ b/src/pypromice/ressources/variables.csv @@ -0,0 +1,94 @@ +field,standard_name,long_name,units,coverage_content_type,coordinates,instantaneous_hourly,where_to_find,lo,hi,OOL,station_type,L0,L2,L3,max_decimals +time,time,Time,yyyy-mm-dd HH:MM:SS,physicalMeasurement,time,,,,,,all,1,1,1, +rec,record,Record,-,referenceInformation,time,,L0 or L2,,,,all,1,1,0,0 +p_u,air_pressure,Air pressure (upper boom),hPa,physicalMeasurement,time,FALSE,,650,1100,z_pt z_pt_cor dshf_u dlhf_u qh_u,all,1,1,1,4 +p_l,air_pressure,Air pressure (lower boom),hPa,physicalMeasurement,time,FALSE,,650,1100,dshf_l dlhf_l qh_l,two-boom,1,1,1,4 +t_u,air_temperature,Air temperature (upper boom),degrees_C,physicalMeasurement,time,FALSE,,-80,40,rh_u_cor cc dsr_cor usr_cor z_boom z_stake dshf_u dlhf_u qh_u,all,1,1,1,4 +t_l,air_temperature,Air temperature (lower boom),degrees_C,physicalMeasurement,time,FALSE,,-80,40,rh_l_cor z_boom_l dshf_l dlhf_l qh_l,two-boom,1,1,1,4 +rh_u,relative_humidity,Relative humidity (upper boom),%,physicalMeasurement,time,FALSE,,0,100,rh_u_cor,all,1,1,1,4 +rh_u_cor,relative_humidity_corrected,Relative humidity (upper boom) - corrected,%,modelResult,time,FALSE,L2 or later,0,150,dshf_u dlhf_u qh_u,all,0,1,1,4 +qh_u,specific_humidity,Specific humidity (upper boom),kg/kg,modelResult,time,FALSE,L2 or later,0,100,,all,0,1,1,4 +rh_l,relative_humidity,Relative humidity (lower boom),%,physicalMeasurement,time,FALSE,,0,100,rh_l_cor,two-boom,1,1,1,4 +rh_l_cor,relative_humidity_corrected,Relative humidity (lower boom) - corrected,%,modelResult,time,FALSE,L2 or later,0,150,dshf_l dlhf_l qh_l,two-boom,0,1,1,4 +qh_l,specific_humidity,Specific humidity (lower boom),kg/kg,modelResult,time,FALSE,L2 or later,0,100,,two-boom,0,1,1,4 +wspd_u,wind_speed,Wind speed (upper boom),m s-1,physicalMeasurement,time,FALSE,,0,100,"wdir_u wspd_x_u wspd_y_u dshf_u dlhf_u qh_u, precip_u",all,1,1,1,4 +wspd_l,wind_speed,Wind speed (lower boom),m s-1,physicalMeasurement,time,FALSE,,0,100,"wdir_l wspd_x_l wspd_y_l dshf_l dlhf_l qh_l , precip_l",two-boom,1,1,1,4 +wdir_u,wind_from_direction,Wind from direction (upper boom),degrees,physicalMeasurement,time,FALSE,,1,360,wspd_x_u wspd_y_u,all,1,1,1,4 +wdir_std_u,wind_from_direction_standard_deviation,Wind from direction (standard deviation),degrees,qualityInformation,time,FALSE,L0 or L2,,,,one-boom,1,1,0,4 +wdir_l,wind_from_direction,Wind from direction (lower boom),degrees,physicalMeasurement,time,FALSE,,1,360,wspd_x_l wspd_y_l,two-boom,1,1,1,4 +wspd_x_u,wind_speed_from_x_direction,Wind speed from x direction (upper boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,wdir_u wspd_u,all,0,1,1,4 +wspd_y_u,wind_speed_from_y_direction,Wind speed from y direction (upper boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,wdir_u wspd_u,all,0,1,1,4 +wspd_x_l,wind_speed_from_x_direction,Wind speed from x direction (lower boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,wdir_l wspd_l,two-boom,0,1,1,4 +wspd_y_l,wind_speed_from_y_direction,Wind speed from y direction (lower boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,wdir_l wspd_l,two-boom,0,1,1,4 +dsr,surface_downwelling_shortwave_flux,Downwelling shortwave radiation,W m-2,physicalMeasurement,time,FALSE,,-10,1500,albedo dsr_cor usr_cor,all,1,1,1,4 +dsr_cor,surface_downwelling_shortwave_flux_corrected,Downwelling shortwave radiation - corrected,W m-2,modelResult,time,FALSE,L2 or later,,,,all,0,1,1,4 +usr,surface_upwelling_shortwave_flux,Upwelling shortwave radiation,W m-2,physicalMeasurement,time,FALSE,,-10,1000,albedo dsr_cor usr_cor,all,1,1,1,4 +usr_cor,surface_upwelling_shortwave_flux_corrected,Upwelling shortwave radiation - corrected,W m-2,modelResult,time,FALSE,L2 or later,0,1000,,all,0,1,1,4 +albedo,surface_albedo,Albedo,-,modelResult,time,FALSE,L2 or later,,,,all,0,1,1,4 +dlr,surface_downwelling_longwave_flux,Downwelling longwave radiation,W m-2,physicalMeasurement,time,FALSE,,50,500,albedo dsr_cor usr_cor cc t_surf,all,1,1,1,4 +ulr,surface_upwelling_longwave_flux,Upwelling longwave radiation,W m-2,physicalMeasurement,time,FALSE,,50,500,t_surf,all,1,1,1,4 +cc,cloud_area_fraction,Cloud cover,%,modelResult,time,FALSE,L2 or later,,,,all,0,1,1,4 +t_surf,surface_temperature,Surface temperature,C,modelResult,time,FALSE,L2 or later,-80,40,dshf_u dlhf_u qh_u,all,0,1,1,4 +dlhf_u,surface_downward_latent_heat_flux,Latent heat flux (upper boom),W m-2,modelResult,time,FALSE,L3 or later,,,,all,0,0,1,4 +dlhf_l,surface_downward_latent_heat_flux,Latent heat flux (lower boom),W m-2,modelResult,time,FALSE,L3 or later,,,,two-boom,0,0,1,4 +dshf_u,surface_downward_sensible_heat_flux,Sensible heat flux (upper boom),W m-2,modelResult,time,FALSE,L3 or later,,,,all,0,0,1,4 +dshf_l,surface_downward_sensible_heat_flux,Sensible heat flux (lower boom),W m-2,modelResult,time,FALSE,L3 or later,,,,two-boom,0,0,1,4 +z_boom_u,distance_to_surface_from_boom,Upper boom height,m,physicalMeasurement,time,TRUE,,0.3,10,dshf_u dlhf_u qh_u,all,1,1,1,4 +z_boom_q_u,distance_to_surface_from_boom_quality,Upper boom height (quality),-,qualityInformation,time,TRUE,L0 or L2,,,,all,1,1,0,4 +z_boom_l,distance_to_surface_from_boom,Lower boom height,m,physicalMeasurement,time,TRUE,,0.3,5,dshf_l dlhf_l qh_l,two-boom,1,1,1,4 +z_boom_q_l,distance_to_surface_from_boom_quality,Lower boom height (quality),-,qualityInformation,time,TRUE,L0 or L2,,,,two-boom,1,1,0,4 +z_stake,distance_to_surface_from_stake_assembly,Stake height,m,physicalMeasurement,time,TRUE,,0.3,8,,one-boom,1,1,1,4 +z_stake_q,distance_to_surface_from_stake_assembly_quality,Stake height (quality),-,qualityInformation,time,TRUE,L0 or L2,,,,one-boom,1,1,0,4 +z_pt,depth_of_pressure_transducer_in_ice,Depth of pressure transducer in ice,m,physicalMeasurement,time,FALSE,,0,30,z_pt_cor,one-boom,1,1,1,4 +z_pt_cor,depth_of_pressure_transducer_in_ice_corrected,Depth of pressure transducer in ice - corrected,m,modelResult,time,FALSE,L2 or later,0,30,,one-boom,0,1,1,4 +precip_u,precipitation,Precipitation (upper boom) (cumulative solid & liquid),mm,physicalMeasurement,time,TRUE,,0,,precip_u_cor precip_u_rate,all,1,1,1,4 +precip_u_cor,precipitation_corrected,Precipitation (upper boom) (cumulative solid & liquid) – corrected,mm,modelResult,time,TRUE,L2 or later,0,,,all,0,1,1,4 +precip_u_rate,precipitation_rate,Precipitation rate (upper boom) (cumulative solid & liquid) – corrected,mm,modelResult,time,TRUE,L2 or later,0,,,all,0,1,1,4 +precip_l,precipitation,Precipitation (lower boom) (cumulative solid & liquid),mm,physicalMeasurement,time,TRUE,,0,,precip_l_cor precip_l_rate,two-boom,1,1,1,4 +precip_l_cor,precipitation_corrected,Precipitation (lower boom) (cumulative solid & liquid) – corrected,mm,modelResult,time,TRUE,L2 or later,0,,,two-boom,0,1,1,4 +precip_l_rate,precipitation_rate,Precipitation rate (lower boom) (cumulative solid & liquid) – corrected,mm,modelResult,time,TRUE,L2 or later,0,,,two-boom,0,1,1,4 +t_i_1,ice_temperature_at_t1,Ice temperature at sensor 1,degrees_C,physicalMeasurement,time,FALSE,,-80,1,,all,1,1,1,4 +t_i_2,ice_temperature_at_t2,Ice temperature at sensor 2,degrees_C,physicalMeasurement,time,FALSE,,-80,1,,all,1,1,1,4 +t_i_3,ice_temperature_at_t3,Ice temperature at sensor 3,degrees_C,physicalMeasurement,time,FALSE,,-80,1,,all,1,1,1,4 +t_i_4,ice_temperature_at_t4,Ice temperature at sensor 4,degrees_C,physicalMeasurement,time,FALSE,,-80,1,,all,1,1,1,4 +t_i_5,ice_temperature_at_t5,Ice temperature at sensor 5,degrees_C,physicalMeasurement,time,FALSE,,-80,1,,all,1,1,1,4 +t_i_6,ice_temperature_at_t6,Ice temperature at sensor 6,degrees_C,physicalMeasurement,time,FALSE,,-80,1,,all,1,1,1,4 +t_i_7,ice_temperature_at_t7,Ice temperature at sensor 7,degrees_C,physicalMeasurement,time,FALSE,,-80,1,,all,1,1,1,4 +t_i_8,ice_temperature_at_t8,Ice temperature at sensor 8,degrees_C,physicalMeasurement,time,FALSE,,-80,1,,all,1,1,1,4 +t_i_9,ice_temperature_at_t9,Ice temperature at sensor 9,degrees_C,physicalMeasurement,time,FALSE,,-80,1,,two-boom,1,1,1,4 +t_i_10,ice_temperature_at_t10,Ice temperature at sensor 10,degrees_C,physicalMeasurement,time,FALSE,,-80,1,,two-boom,1,1,1,4 +t_i_11,ice_temperature_at_t11,Ice temperature at sensor 11,degrees_C,physicalMeasurement,time,FALSE,,-80,1,,two-boom,1,1,1,4 +tilt_x,platform_view_angle_x,Tilt to east,degrees,physicalMeasurement,time,FALSE,,-30,30,dsr_cor usr_cor albedo,all,1,1,1,4 +tilt_y,platform_view_angle_y,Tilt to north,degrees,physicalMeasurement,time,FALSE,,-30,30,dsr_cor usr_cor albedo,all,1,1,1,4 +rot,platform_azimuth_angle,Station rotation from true North,degrees,physicalMeasurement,time,FALSE,,0,360,,all,1,1,1,2 +gps_lat,gps_latitude,Latitude,degrees_north,coordinate,time,TRUE,,50,83,,all,1,1,1,6 +gps_lon,gps_longitude,Longitude,degrees_east,coordinate,time,TRUE,,5,70,,all,1,1,1,6 +gps_alt,gps_altitude,Altitude,m,coordinate,time,TRUE,,0,3000,,all,1,1,1,2 +gps_time,gps_time,GPS time,s,physicalMeasurement,time,TRUE,L0 or L2,0,240000,,all,1,1,0, +gps_geoid,gps_geoid_separation,Height of EGM96 geoid over WGS84 ellipsoid,m,physicalMeasurement,time,TRUE,L0 or L2,,,,one-boom,1,1,0, +gps_geounit,gps_geounit,GeoUnit,-,qualityInformation,time,TRUE,L0 or L2,,,,all,1,1,0, +gps_hdop,gps_hdop,GPS horizontal dillution of precision (HDOP),m,qualityInformation,time,TRUE,L0 or L2,,,,all,1,1,0,2 +gps_numsat,gps_numsat,GPS number of satellites,-,qualityInformation,time,TRUE,L0 or L2,,,,,1,1,0,0 +gps_q,gps_q,Quality,-,qualityInformation,time,TRUE,L0 or L2,,,,,1,1,0, +lat,latitude_postprocessed,smoothed and interpolated latitude of station (best estimate),degrees_N,modelResult,time,TRUE,L3,,,,all,0,0,1,6 +lon,longitude_postprocessed,smoothed and interpolated longitude of station (best estimate),degrees_E,modelResult,time,TRUE,L3,,,,all,0,0,1,6 +elev,elevation_postprocessed,smoothed and interpolated elevation of station (best estimate),m a.s.l. (WGS84),modelResult,time,TRUE,L3,,,,all,0,0,1,2 +lat_avg,mean_latitude,mean latitude (from all time-series),degrees,modelResult,time,TRUE,,,,,all,1,1,1,6 +lon_avg,mean_longitude,mean longitude (from all time-series),degrees,modelResult,time,TRUE,,,,,all,1,1,1,6 +alt_avg,mean_altitude,mean altitude (from all time-series),degrees,modelResult,time,TRUE,,,,,all,1,1,1,2 +batt_v,battery_voltage,Battery voltage,V,physicalMeasurement,time,TRUE,,0,30,,all,1,1,1,2 +batt_v_ini,,,-,physicalMeasurement,time,TRUE,L0 or L2,0,30,,,1,1,0,2 +batt_v_ss,battery_voltage_at_sample_start,Battery voltage (sample start),V,physicalMeasurement,time,TRUE,L0 or L2,0,30,,,1,1,0,2 +fan_dc_u,fan_current,Fan current (upper boom),mA,physicalMeasurement,time,TRUE,L0 or L2,0,200,,all,1,1,0,2 +fan_dc_l,fan_current,Fan current (lower boom),mA,physicalMeasurement,time,TRUE,,0,200,,two-boom,1,1,0,2 +freq_vw,frequency_of_precipitation_wire_vibration,Frequency of vibrating wire in precipitation gauge,Hz,physicalMeasurement,time,TRUE,L0 or L2,0,10000,precip_u,,1,1,0, +t_log,temperature_of_logger,Logger temperature,degrees_C,physicalMeasurement,time,TRUE,,-80,40,,one-boom,1,1,0,4 +t_rad,temperature_of_radiation_sensor,Radiation sensor temperature,degrees_C,physicalMeasurement,time,FALSE,,-80,40,t_surf dlr ulr,all,1,1,1,4 +p_i,air_pressure,Air pressure (instantaneous) minus 1000,hPa,physicalMeasurement,time,TRUE,,-350,100,,all,1,1,1,4 +t_i,air_temperature,Air temperature (instantaneous),degrees_C,physicalMeasurement,time,TRUE,,-80,40,,all,1,1,1,4 +rh_i,relative_humidity,Relative humidity (instantaneous),%,physicalMeasurement,time,TRUE,,0,150,rh_i_cor,all,1,1,1,4 +rh_i_cor,relative_humidity_corrected,Relative humidity (instantaneous) – corrected,%,modelResult,time,TRUE,L2 or later,0,100,,all,0,1,1,4 +wspd_i,wind_speed,Wind speed (instantaneous),m s-1,physicalMeasurement,time,TRUE,,0,100,wdir_i wspd_x_i wspd_y_i,all,1,1,1,4 +wdir_i,wind_from_direction,Wind from direction (instantaneous),degrees,physicalMeasurement,time,TRUE,,1,360,wspd_x_i wspd_y_i,all,1,1,1,4 +wspd_x_i,wind_speed_from_x_direction,Wind speed from x direction (instantaneous),m s-1,modelResult,time,TRUE,L2 or later,-100,100,wdir_i wspd_i,all,0,1,1,4 +wspd_y_i,wind_speed_from_y_direction,Wind speed from y direction (instantaneous),m s-1,modelResult,time,TRUE,L2 or later,-100,100,wdir_i wspd_i,all,0,1,1,4 \ No newline at end of file