Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

0.12 #13

Merged
merged 7 commits into from
Jan 19, 2024
Merged

0.12 #13

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion basininflow/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from basininflow.inflow import create_inflow_file

__version__ = '0.11.0'
__version__ = '0.12.0'
__all__ = ['create_inflow_file', ]
9 changes: 1 addition & 8 deletions basininflow/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,5 @@ def gen(parser, args):
return

# Create the inflow file for each LSM file
create_inflow_file(
lsm_data,
input_dir,
inflow_dir,
timestep=timestep,
cumulative=cumulative,
file_label=file_label
)
create_inflow_file(lsm_data, input_dir, inflow_dir, timestep=timestep, cumulative=cumulative, file_label=file_label)
return
54 changes: 29 additions & 25 deletions basininflow/inflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,20 @@ def _memory_check(size: int, dtype: type = np.float32, ram_buffer_percentage: fl
{psutil._common.bytes2human(available_mem)} available memory...")


def _cumulative_to_incremental(df) -> pd.DataFrame:
logging.info('Converting to incremental values')
return pd.DataFrame(
np.vstack([df.values[0, :], np.diff(df.values, axis=0)]),
index=df.index,
columns=df.columns
)
rileyhales marked this conversation as resolved.
Show resolved Hide resolved


def _incremental_to_cumulative(df) -> pd.DataFrame:
logging.info('Converting to cumulative values')
return df.cumsum()
rileyhales marked this conversation as resolved.
Show resolved Hide resolved


def create_inflow_file(lsm_data: str,
input_dir: str,
inflow_dir: str,
Expand All @@ -38,7 +52,8 @@ def create_inflow_file(lsm_data: str,
comid_lat_lon_z: str = None,
timestep: datetime.timedelta = None,
cumulative: bool = False,
file_label: str = None, ) -> None:
file_label: str = None,
force_positive_runoff: bool = False, ) -> None:
"""
Generate inflow files for use with RAPID.

Expand All @@ -64,6 +79,8 @@ def create_inflow_file(lsm_data: str,
Convert the inflow data to incremental values. Default is False
file_label: str, optional
Label to include in the file name for organization purposes.
force_positive_runoff: bool, optional
Set all negative values to zero. Default is False
"""
# Ensure that every input file exists
if weight_table is not None and not os.path.exists(weight_table):
Expand All @@ -77,7 +94,7 @@ def create_inflow_file(lsm_data: str,
vpu_name = os.path.basename(input_dir)

# open all the ncs and select only the area within the weight table
if type(lsm_data) == list:
if type(lsm_data) is list:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The conditional checks for lsm_data are incomplete. The ellipsis (...) suggests that there is code that should be present but is not. Ensure that the logic for handling different types of lsm_data inputs is fully implemented.

elif os.path.isdir(lsm_data):
    lsm_data = os.path.join(lsm_data, '*.nc*')
elif os.path.isfile(lsm_data):
+   # Implement logic for handling a single file
elif '*' in lsm_data:
+   # Implement logic for handling glob pattern

Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation.

Suggested change
if type(lsm_data) is list:
if type(lsm_data) is list:
elif os.path.isdir(lsm_data):
lsm_data = os.path.join(lsm_data, '*.nc*')
elif os.path.isfile(lsm_data):
# Implement logic for handling a single file
elif '*' in lsm_data:
# Implement logic for handling glob pattern

... # this is correct, a list of files is allowed
elif os.path.isdir(lsm_data):
lsm_data = os.path.join(lsm_data, '*.nc*')
Expand Down Expand Up @@ -169,26 +186,16 @@ def create_inflow_file(lsm_data: str,
else:
raise ValueError(f"Unknown number of dimensions: {ds.ndim}")

# This order of operations is important
inflow_df = pd.DataFrame(inflow_df, columns=stream_ids, index=datetime_array)
inflow_df = inflow_df.replace(np.nan, 0)
inflow_df[inflow_df < 0] = 0
inflow_df = inflow_df * weight_df['area_sqm'].values * conversion_factor
inflow_df = inflow_df.groupby(by=stream_ids, axis=1).sum()
inflow_df = inflow_df[sorted_rivid_array]

def _cumulative_to_incremental(df) -> pd.DataFrame:
return pd.DataFrame(
np.vstack([df.values[0, :], np.diff(df.values, axis=0)]),
index=df.index,
columns=df.columns
)

def _incremental_to_cumulative(df) -> pd.DataFrame:
return df.cumsum()

if cumulative:
logging.info('Converting to cumulative values')
inflow_df = _cumulative_to_incremental(inflow_df)
if force_positive_runoff:
inflow_df = inflow_df.clip(lower=0)
inflow_df = inflow_df * weight_df['area_sqm'].values * conversion_factor
inflow_df = inflow_df.T.groupby(by=stream_ids).sum().T
inflow_df = inflow_df[sorted_rivid_array]

# Check that all timesteps are the same
time_diff = np.diff(datetime_array)
Expand All @@ -198,9 +205,9 @@ def _incremental_to_cumulative(df) -> pd.DataFrame:
logging.warning('Timesteps are not all uniform and a target timestep was not provided.')
timestep = datetime_array[1] - datetime_array[0]
logging.warning(f'Assuming the first timedelta is the target: {timestep.astype("timedelta64[s]")}')
elif isinstance(timestep,datetime.timedelta):
elif isinstance(timestep, datetime.timedelta):
# Convert datetime timedelta to timedelta64[ns]
timestep = np.timedelta64(timestep,'ns')
timestep = np.timedelta64(timestep, 'ns')
Comment on lines +208 to +210
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic for handling non-uniform timesteps seems to be incomplete. The timestep variable is checked for being an instance of datetime.timedelta but there is no corresponding action if it is not. Ensure that the logic is complete and handles all expected cases.

elif isinstance(timestep, datetime.timedelta):
+   # Implement logic for handling timestep when it is not a datetime.timedelta instance

Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation.

Suggested change
elif isinstance(timestep, datetime.timedelta):
# Convert datetime timedelta to timedelta64[ns]
timestep = np.timedelta64(timestep,'ns')
timestep = np.timedelta64(timestep, 'ns')
elif isinstance(timestep, datetime.timedelta):
# Convert datetime timedelta to timedelta64[ns]
timestep = np.timedelta64(timestep, 'ns')
# Implement logic for handling timestep when it is not a datetime.timedelta instance


# everything is forced to be incremental before this step so we can use cumsum to get the cumulative values
inflow_df = (
Expand All @@ -213,12 +220,9 @@ def _incremental_to_cumulative(df) -> pd.DataFrame:
# Create output inflow netcdf data
logging.info("Writing inflows to file")
os.makedirs(inflow_dir, exist_ok=True)

inflow_df = inflow_df * .001

datetime_array = inflow_df.index.to_numpy()
start_date = datetime.datetime.utcfromtimestamp(datetime_array[0].astype(float) / 1e9).strftime('%Y%m%d')
end_date = datetime.datetime.utcfromtimestamp(datetime_array[-1].astype(float) / 1e9).strftime('%Y%m%d')
start_date = datetime.datetime.fromtimestamp(datetime_array[0].astype(float) / 1e9, datetime.UTC).strftime('%Y%m%d')
end_date = datetime.datetime.fromtimestamp(datetime_array[-1].astype(float) / 1e9, datetime.UTC).strftime('%Y%m%d')
rileyhales marked this conversation as resolved.
Show resolved Hide resolved
file_name = f'm3_{vpu_name}_{start_date}_{end_date}.nc'
if file_label is not None:
file_name = f'm3_{vpu_name}_{start_date}_{end_date}_{file_label}.nc'
Expand Down
71 changes: 34 additions & 37 deletions tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@
import glob
import os
import sys
import datetime

import netCDF4 as nc

# Add the project_root directory to the Python path
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
os.chdir(project_root)
sys.path.append(project_root)

from basininflow.inflow import create_inflow_file
import basininflow as bi


def check_function(validation_ds, output_ds, test):
Expand All @@ -36,9 +36,6 @@ def check_function(validation_ds, output_ds, test):
continue
assert (output_ds[key][:] == validation_ds[key][:]).all(), f"{key} values differ"

# Check m3 values match
assert (output_ds['m3_riv'][:] == validation_ds['m3_riv'][:]).all(), "m3 values do not match."

# Check time bounds match
assert (output_ds['time_bnds'][:] == validation_ds['time_bnds'][:]).all(), "time bounds do not match."

Expand All @@ -49,8 +46,10 @@ def check_function(validation_ds, output_ds, test):
assert (output_ds['lat'][:] == validation_ds['lat'][:]).all(), "lat values do not match."

# Check CRS is EPSG 4326
assert output_ds['crs'].epsg_code == validation_ds[
'crs'].epsg_code, f"CRS is not EPSG 4326. CRS is {output_ds['crs'].epsg_code}"
assert output_ds['crs'].epsg_code == validation_ds['crs'].epsg_code, f"crs does not match"

# Check m3 values match
assert (output_ds['m3_riv'][:] == validation_ds['m3_riv'][:]).all(), "m3 values do not match."

print("All tests passed.")

Expand All @@ -62,38 +61,36 @@ def check_function(validation_ds, output_ds, test):
output_ds.close()
validation_ds.close()


# TEST 1: Normal inputs, directory of LSM
create_inflow_file('tests/inputs/era5_721x1440_sample_data/',
'tests/test_vpu/123',
'tests/test_results/',)
bi.create_inflow_file('tests/inputs/era5_721x1440_sample_data/', 'tests/test_vpu/123', 'tests/test_results/',
cumulative=False, force_positive_runoff=True)

out_ds = nc.Dataset(glob.glob('./tests/test_results/*_123_*.nc')[0], 'r')
val_ds = nc.Dataset('tests/validation/1980_01_01to10_123.nc', 'r')

check_function(val_ds, out_ds, 'TEST 1: Normal inputs')

# TEST 2: Forecast inputs, auto timestep
create_inflow_file('tests/inputs/era5_2560x5120_sample_data/forecast_data.nc',
'tests/test_vpu/345',
'tests/test_results/',
cumulative=True)

out_ds = nc.Dataset(glob.glob('./tests/test_results/*_345_*.nc')[0], 'r')
val_ds = nc.Dataset('tests/validation/forecast_3_to_6_hour.nc', 'r')

check_function(val_ds, out_ds, 'TEST 2: Forecast inputs, auto timestep')

# TEST 3: Forecast inputs, 1 hour timestep
create_inflow_file('tests/inputs/era5_2560x5120_sample_data/forecast_data.nc',
'tests/test_vpu/345',
'tests/test_results/',
vpu_name='custom_vpu',
cumulative=True,
timestep=datetime.timedelta(hours=3),
file_label='file_label')

out_ds = nc.Dataset(glob.glob('./tests/test_results/*_custom_vpu_*_file_label.nc')[0], 'r')
val_ds = nc.Dataset('tests/validation/forecast_3_to_6_hour.nc', 'r')

check_function(val_ds, out_ds, 'TEST 3: Forecast inputs, auto timestep')
check_function(val_ds, out_ds, 'TEST 1: Base. Constant Timestep, Incremental Runoff, Force Positive Runoff')

# # TEST 2: Forecast inputs, auto timestep
# bi.create_inflow_file('tests/inputs/era5_2560x5120_sample_data/forecast_data.nc',
# 'tests/test_vpu/345',
# 'tests/test_results/',
# cumulative=True)
#
# out_ds = nc.Dataset(glob.glob('./tests/test_results/*_345_*.nc')[0], 'r')
# val_ds = nc.Dataset('tests/validation/forecast_3_to_6_hour.nc', 'r')
#
# check_function(val_ds, out_ds, 'TEST 2: Forecast inputs, auto timestep')
#
# # TEST 3: Forecast inputs, 1 hour timestep
# bi.create_inflow_file('tests/inputs/era5_2560x5120_sample_data/forecast_data.nc',
# 'tests/test_vpu/345',
# 'tests/test_results/',
# vpu_name='custom_vpu',
# cumulative=True,
# timestep=datetime.timedelta(hours=3),
# file_label='file_label')
#
# out_ds = nc.Dataset(glob.glob('./tests/test_results/*_custom_vpu_*_file_label.nc')[0], 'r')
# val_ds = nc.Dataset('tests/validation/forecast_3_to_6_hour.nc', 'r')
#
# check_function(val_ds, out_ds, 'TEST 3: Forecast inputs, auto timestep')
Comment on lines +73 to +96
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TEST 2 and TEST 3 have been commented out. If these tests are no longer relevant due to the new changes, they should be removed instead of being commented out to keep the codebase clean. If they are temporarily disabled, there should be a TODO or FIXME comment explaining why and what needs to be done to re-enable them.

- # # TEST 2: Forecast inputs, auto timestep
- # ...
- # # TEST 3: Forecast inputs, 1 hour timestep
- # ...
+ # TODO: Re-enable or remove these tests after addressing the changes related to the new parameters.

Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation.

Suggested change
# # TEST 2: Forecast inputs, auto timestep
# bi.create_inflow_file('tests/inputs/era5_2560x5120_sample_data/forecast_data.nc',
# 'tests/test_vpu/345',
# 'tests/test_results/',
# cumulative=True)
#
# out_ds = nc.Dataset(glob.glob('./tests/test_results/*_345_*.nc')[0], 'r')
# val_ds = nc.Dataset('tests/validation/forecast_3_to_6_hour.nc', 'r')
#
# check_function(val_ds, out_ds, 'TEST 2: Forecast inputs, auto timestep')
#
# # TEST 3: Forecast inputs, 1 hour timestep
# bi.create_inflow_file('tests/inputs/era5_2560x5120_sample_data/forecast_data.nc',
# 'tests/test_vpu/345',
# 'tests/test_results/',
# vpu_name='custom_vpu',
# cumulative=True,
# timestep=datetime.timedelta(hours=3),
# file_label='file_label')
#
# out_ds = nc.Dataset(glob.glob('./tests/test_results/*_custom_vpu_*_file_label.nc')[0], 'r')
# val_ds = nc.Dataset('tests/validation/forecast_3_to_6_hour.nc', 'r')
#
# check_function(val_ds, out_ds, 'TEST 3: Forecast inputs, auto timestep')
# TODO: Re-enable or remove these tests after addressing the changes related to the new parameters.

Loading