-
Notifications
You must be signed in to change notification settings - Fork 0
/
09_interp_o2_1m_resolution.py
213 lines (184 loc) · 8.85 KB
/
09_interp_o2_1m_resolution.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
import pandas as pd
import numpy as np
import gsw
import os
from scipy.interpolate import interp1d
import convert
from tqdm import trange
from helpers import get_profile_st_en_idx
def interp_1m_resolution(in_df_name: str, out_df_name: str,
oxy_unit: str, station: str):
"""
Linearly interpolate bottle oxygen to 1m vertical resolution
if not already at that resolution
:param in_df_name:
:param station:
:param out_df_name:
:param oxy_unit: oxygen unit, "mL/L" or "umol/kg"
:return:
"""
in_df = pd.read_csv(in_df_name)
# Check if oxy unit is in umol/kg
if oxy_unit == 'mL/L':
pressure = gsw.p_from_z(
-in_df.loc[:, 'Depth [m]'].to_numpy(float),
in_df.loc[:, 'Latitude [deg N]'].to_numpy(float))
in_df['Oxygen [umol/kg]'] = convert.ml_l_to_umol_kg(
in_df.loc[:, 'Oxygen [mL/L]'].to_numpy(float),
in_df.loc[:, 'Longitude [deg E]'].to_numpy(float),
in_df.loc[:, 'Latitude [deg N]'].to_numpy(float),
in_df.loc[:, 'Temperature [C]'].to_numpy(float),
in_df.loc[:, 'Salinity [PSS-78]'].to_numpy(float),
pressure, in_df_name)[0]
# Get profile start and end indices
profile_start_idx, profile_end_idx = get_profile_st_en_idx(
in_df.loc[:, 'Profile number'])
# Initialize dataframe to hold interpolated data
# Need time, density, oxygen value
out_df_columns = ['Profile number', 'Latitude [deg N]',
'Longitude [deg E]', 'Time',
'Profile is interpolated',
'Depth [m]', 'Oxygen [umol/kg]',
'Potential density anomaly [kg/m]', 'File']
out_df = pd.DataFrame(columns=out_df_columns)
# Iterate through all the profiles
counter_profiles_all_nan = 0
for i in trange(len(profile_start_idx)):
st = profile_start_idx[i]
en = profile_end_idx[i]
# Evaluate conditions for no interpolation
# Profile only contains one measurement
profile_has_len_1 = st == en
# Profile is high resolution with measurements spaced less than 1.5 m apart in the water column
profile_is_high_res = np.all(
np.diff(in_df.loc[st:en, 'Depth [m]']) < 1.5
) if not profile_has_len_1 else False
# Profile is too low resolution with measurements spaced more than 0.2 sigma-theta units apart
profile_spaced_too_far = np.all(np.diff(
in_df.loc[st:en, 'Potential density anomaly [kg/m]']) > 0.2
) if not profile_has_len_1 else False
# Oxygen measurements are all nans
if all(np.isnan(in_df.loc[st:en, 'Oxygen [umol/kg]'].to_numpy())):
# Skip the profile
counter_profiles_all_nan += 1
continue
elif profile_has_len_1 or profile_is_high_res or profile_spaced_too_far:
# print('Not interpolating')
# Do not interpolate and just pass the observed level data
# to the output df
observed_profile_len = len(in_df.loc[st:en, 'Profile number'])
dict_add = {
'Profile number': in_df.loc[st:en, 'Profile number'],
'Latitude [deg N]': in_df.loc[st:en, 'Latitude [deg N]'],
'Longitude [deg E]': in_df.loc[st:en, 'Longitude [deg E]'],
'Time': in_df.loc[st:en, 'Time'],
'Depth [m]': in_df.loc[st:en, 'Depth [m]'],
'Profile is interpolated': np.zeros(observed_profile_len),
'Oxygen [umol/kg]': in_df.loc[st:en, 'Oxygen [umol/kg]'],
'Potential density anomaly [kg/m]':
in_df.loc[st:en, 'Potential density anomaly [kg/m]'],
'File': in_df.loc[st:en, 'File']
}
# for key, value in dict_add.items():
# print(key, len(value))
# print()
out_df = pd.concat((out_df, pd.DataFrame(dict_add)))
else:
# print('Doing linear interpolation')
# Do linear interpolation to 1m vertical resolution
# interpolation is not done to standard depths
interp_oxy_fn = interp1d(
in_df.loc[st:en, 'Depth [m]'].to_numpy(float),
in_df.loc[st:en, 'Oxygen [umol/kg]'].to_numpy(float),
kind='linear',
bounds_error=False,
fill_value=np.nan
)
interp_density_fn = interp1d(
in_df.loc[st:en, 'Depth [m]'].to_numpy(float),
in_df.loc[st:en, 'Potential density anomaly [kg/m]'].to_numpy(float),
kind='linear',
bounds_error=False,
fill_value=np.nan
)
# +1 since numpy range not inclusive of end, but pandas range is inclusive
depth_1m_freq = np.arange(
np.nanmin(in_df.loc[st:en, 'Depth [m]'].to_numpy(float)),
np.nanmax(in_df.loc[st:en, 'Depth [m]'].to_numpy(float)) + 1)
interpolated_profile_len = len(depth_1m_freq)
oxy_interpolated = interp_oxy_fn(depth_1m_freq)
density_interpolated = interp_density_fn(depth_1m_freq)
# Dictionary of values to add to the output df
dict_add = {
'Profile number': np.repeat(in_df.loc[st, 'Profile number'],
interpolated_profile_len),
'Latitude [deg N]': np.repeat(in_df.loc[st, 'Latitude [deg N]'],
interpolated_profile_len),
'Longitude [deg E]': np.repeat(in_df.loc[st, 'Longitude [deg E]'],
interpolated_profile_len),
'Time': np.repeat(in_df.loc[st, 'Time'], interpolated_profile_len),
'Depth [m]': depth_1m_freq,
'Profile is interpolated': np.ones(interpolated_profile_len),
'Oxygen [umol/kg]': oxy_interpolated,
'Potential density anomaly [kg/m]': density_interpolated,
'File': np.repeat(in_df.loc[st, 'File'],
interpolated_profile_len)
}
out_df = pd.concat((out_df, pd.DataFrame(dict_add)))
# Save the summary statistics
out_df.reset_index(drop=True, inplace=True)
out_profile_start_idx = get_profile_st_en_idx(
out_df.loc[:, 'Profile number'])[0]
num_interpolated_profiles = int(sum(
out_df.loc[out_profile_start_idx, 'Profile is interpolated']))
num_not_interpolated_profiles = int(sum(
out_df.loc[out_profile_start_idx, 'Profile is interpolated'] == 0))
summary_stats_file = os.path.join(
os.path.dirname(out_df_name),
'{}_interpolation_summary_statistics.txt'.format(station))
with open(summary_stats_file, 'w') as txtfile:
txtfile.write('Input file: ' + in_df_name + '\n')
txtfile.write('Output file: ' + out_df_name + '\n')
txtfile.write(
'Number of profiles in: {}\n'.format(len(profile_start_idx)))
txtfile.write(
'Number of observations in: {}\n'.format(len(in_df)))
txtfile.write(
'Number of profiles out: {}\n'.format(
len(np.unique(out_df.loc[:, 'Profile number'],
return_index=True)[1])))
txtfile.write(
'Number of observations out: {}\n'.format(len(out_df)))
txtfile.write(
'Number of interpolated profiles: {}\n'.format(
num_interpolated_profiles))
txtfile.write(
'Number of profiles not interpolated: {}\n'.format(
num_not_interpolated_profiles))
txtfile.write('Number of profiles containing all nans: {}'.format(
counter_profiles_all_nan))
# Save the df
out_df.to_csv(out_df_name, index=False)
return
# ----------Interpolate O2 to 1m resolution-----------
# for each station: P4 and P26, LB08
stn = 'P26'
station_name = stn
# station_name = 'OSP'
# parent_dir = 'C:\\Users\\HourstonH\\Documents\\charles\\' \
# 'line_P_data_products\\csv\\has_osd_ctd_flags\\'
# parent_dir = 'C:\\Users\\HourstonH\\Documents\\charles\\' \
# 'bottom_oxygen\\'
parent_dir = 'D:\\lineP\\processing\\'
# parent_dir = ('C:\\Users\\hourstonh\\Documents\\charles\\line_P_data_products\\'
# 'update_jan2024_sopo\\csv_data\\')
density_dir = '08_potential_density_anomalies\\'
density_file = os.path.join(parent_dir, density_dir,
'{}_data.csv'.format(stn))
# density_file = os.path.join(parent_dir, density_dir,
# '{}_ctd_data_qc.csv'.format(stn))
o2_interp_dir = '09_interpolate_o2_to_1m_res'
o2_interp_file = os.path.join(parent_dir, o2_interp_dir,
os.path.basename(density_file))
oxygen_unit = 'umol/kg' # mL/L umol/kg
interp_1m_resolution(density_file, o2_interp_file, oxygen_unit, stn)