Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

0.12 #13

Merged
merged 7 commits into from
Jan 19, 2024
Merged

0.12 #13

Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion basininflow/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from basininflow.inflow import create_inflow_file

__version__ = '0.11.0'
__version__ = '0.12.0'
__all__ = ['create_inflow_file', ]
41 changes: 23 additions & 18 deletions basininflow/inflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,20 @@ def _memory_check(size: int, dtype: type = np.float32, ram_buffer_percentage: fl
{psutil._common.bytes2human(available_mem)} available memory...")


def _cumulative_to_incremental(df) -> pd.DataFrame:
logging.info('Converting to incremental values')
return pd.DataFrame(
np.vstack([df.values[0, :], np.diff(df.values, axis=0)]),
index=df.index,
columns=df.columns
)
rileyhales marked this conversation as resolved.
Show resolved Hide resolved


def _incremental_to_cumulative(df) -> pd.DataFrame:
logging.info('Converting to cumulative values')
return df.cumsum()
rileyhales marked this conversation as resolved.
Show resolved Hide resolved


def create_inflow_file(lsm_data: str,
input_dir: str,
inflow_dir: str,
Expand All @@ -38,7 +52,8 @@ def create_inflow_file(lsm_data: str,
comid_lat_lon_z: str = None,
timestep: datetime.timedelta = None,
cumulative: bool = False,
file_label: str = None, ) -> None:
file_label: str = None,
enforce_positive_runoff: bool = False, ) -> None:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The enforce_positive_runoff parameter has been added to the create_inflow_file function. This parameter should be documented in the function's docstring to explain its purpose and usage.

"""
Generate inflow files for use with RAPID.

Expand Down Expand Up @@ -171,25 +186,16 @@ def create_inflow_file(lsm_data: str,

inflow_df = pd.DataFrame(inflow_df, columns=stream_ids, index=datetime_array)
inflow_df = inflow_df.replace(np.nan, 0)
inflow_df[inflow_df < 0] = 0
inflow_df = inflow_df * weight_df['area_sqm'].values * conversion_factor
inflow_df = inflow_df.groupby(by=stream_ids, axis=1).sum()
inflow_df = inflow_df.T.groupby(by=stream_ids).sum().T
inflow_df = inflow_df[sorted_rivid_array]

def _cumulative_to_incremental(df) -> pd.DataFrame:
return pd.DataFrame(
np.vstack([df.values[0, :], np.diff(df.values, axis=0)]),
index=df.index,
columns=df.columns
)

def _incremental_to_cumulative(df) -> pd.DataFrame:
return df.cumsum()

if cumulative:
logging.info('Converting to cumulative values')
inflow_df = _cumulative_to_incremental(inflow_df)

if enforce_positive_runoff:
inflow_df = inflow_df.clip(lower=0)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The implementation of the enforce_positive_runoff feature uses the clip method to ensure non-negative runoff values. This is a clean and efficient way to enforce the constraint. However, it would be beneficial to log a warning if any values are being clipped, to inform the user of potential data issues.

if enforce_positive_runoff:
+   if (inflow_df < 0).any().any():
+       logging.warning('Negative runoff values detected and will be set to zero.')
    inflow_df = inflow_df.clip(lower=0)

Committable suggestion

IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation.

Suggested change
inflow_df = inflow_df.T.groupby(by=stream_ids).sum().T
inflow_df = inflow_df[sorted_rivid_array]
def _cumulative_to_incremental(df) -> pd.DataFrame:
return pd.DataFrame(
np.vstack([df.values[0, :], np.diff(df.values, axis=0)]),
index=df.index,
columns=df.columns
)
def _incremental_to_cumulative(df) -> pd.DataFrame:
return df.cumsum()
if cumulative:
logging.info('Converting to cumulative values')
inflow_df = _cumulative_to_incremental(inflow_df)
if enforce_positive_runoff:
inflow_df = inflow_df.clip(lower=0)
inflow_df = inflow_df.T.groupby(by=stream_ids).sum().T
inflow_df = inflow_df[sorted_rivid_array]
if cumulative:
inflow_df = _cumulative_to_incremental(inflow_df)
if enforce_positive_runoff:
if (inflow_df < 0).any().any():
logging.warning('Negative runoff values detected and will be set to zero.')
inflow_df = inflow_df.clip(lower=0)

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The enforce_positive_runoff feature is implemented by clipping the DataFrame to ensure non-negative values. Consider adding a check to log a warning if any values are being clipped. This would inform the user of potential data issues.

if enforce_positive_runoff:
+   if (inflow_df < 0).any().any():
+       logging.warning('Negative runoff values detected and will be set to zero.')
    inflow_df = inflow_df.clip(lower=0)

Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation.

Suggested change
if enforce_positive_runoff:
inflow_df = inflow_df.clip(lower=0)
if enforce_positive_runoff:
if (inflow_df < 0).any().any():
logging.warning('Negative runoff values detected and will be set to zero.')
inflow_df = inflow_df.clip(lower=0)


# Check that all timesteps are the same
time_diff = np.diff(datetime_array)
if not np.all(time_diff == datetime_array[1] - datetime_array[0]):
Expand All @@ -198,9 +204,9 @@ def _incremental_to_cumulative(df) -> pd.DataFrame:
logging.warning('Timesteps are not all uniform and a target timestep was not provided.')
timestep = datetime_array[1] - datetime_array[0]
logging.warning(f'Assuming the first timedelta is the target: {timestep.astype("timedelta64[s]")}')
elif isinstance(timestep,datetime.timedelta):
elif isinstance(timestep, datetime.timedelta):
# Convert datetime timedelta to timedelta64[ns]
timestep = np.timedelta64(timestep,'ns')
timestep = np.timedelta64(timestep, 'ns')
Comment on lines +208 to +210
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic for handling non-uniform timesteps seems to be incomplete. The timestep variable is checked for being an instance of datetime.timedelta but there is no corresponding action if it is not. Ensure that the logic is complete and handles all expected cases.

elif isinstance(timestep, datetime.timedelta):
+   # Implement logic for handling timestep when it is not a datetime.timedelta instance

Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation.

Suggested change
elif isinstance(timestep, datetime.timedelta):
# Convert datetime timedelta to timedelta64[ns]
timestep = np.timedelta64(timestep,'ns')
timestep = np.timedelta64(timestep, 'ns')
elif isinstance(timestep, datetime.timedelta):
# Convert datetime timedelta to timedelta64[ns]
timestep = np.timedelta64(timestep, 'ns')
# Implement logic for handling timestep when it is not a datetime.timedelta instance


# everything is forced to be incremental before this step so we can use cumsum to get the cumulative values
inflow_df = (
Expand All @@ -210,12 +216,11 @@ def _incremental_to_cumulative(df) -> pd.DataFrame:
)
inflow_df = _cumulative_to_incremental(inflow_df)

logging.info(np.max(inflow_df.values))
# Create output inflow netcdf data
logging.info("Writing inflows to file")
os.makedirs(inflow_dir, exist_ok=True)

inflow_df = inflow_df * .001

datetime_array = inflow_df.index.to_numpy()
start_date = datetime.datetime.utcfromtimestamp(datetime_array[0].astype(float) / 1e9).strftime('%Y%m%d')
end_date = datetime.datetime.utcfromtimestamp(datetime_array[-1].astype(float) / 1e9).strftime('%Y%m%d')
Expand Down
68 changes: 33 additions & 35 deletions tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,14 @@
import glob
import os
import sys
import datetime

import netCDF4 as nc

# Add the project_root directory to the Python path
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
sys.path.append(project_root)

from basininflow.inflow import create_inflow_file
import basininflow as bi


def check_function(validation_ds, output_ds, test):
Expand All @@ -36,9 +35,6 @@ def check_function(validation_ds, output_ds, test):
continue
assert (output_ds[key][:] == validation_ds[key][:]).all(), f"{key} values differ"

# Check m3 values match
assert (output_ds['m3_riv'][:] == validation_ds['m3_riv'][:]).all(), "m3 values do not match."

# Check time bounds match
assert (output_ds['time_bnds'][:] == validation_ds['time_bnds'][:]).all(), "time bounds do not match."

Expand All @@ -49,8 +45,10 @@ def check_function(validation_ds, output_ds, test):
assert (output_ds['lat'][:] == validation_ds['lat'][:]).all(), "lat values do not match."

# Check CRS is EPSG 4326
assert output_ds['crs'].epsg_code == validation_ds[
'crs'].epsg_code, f"CRS is not EPSG 4326. CRS is {output_ds['crs'].epsg_code}"
assert output_ds['crs'].epsg_code == validation_ds['crs'].epsg_code, f"crs does not match"

# Check m3 values match
assert (output_ds['m3_riv'][:] == validation_ds['m3_riv'][:]).all(), "m3 values do not match."

print("All tests passed.")

Expand All @@ -62,38 +60,38 @@ def check_function(validation_ds, output_ds, test):
output_ds.close()
validation_ds.close()


os.chdir('/Users/rchales/code/basininflow/')
# TEST 1: Normal inputs, directory of LSM
create_inflow_file('tests/inputs/era5_721x1440_sample_data/',
'tests/test_vpu/123',
'tests/test_results/',)
bi.create_inflow_file('tests/inputs/era5_721x1440_sample_data/',
'tests/test_vpu/123',
'tests/test_results/',)

out_ds = nc.Dataset(glob.glob('./tests/test_results/*_123_*.nc')[0], 'r')
val_ds = nc.Dataset('tests/validation/1980_01_01to10_123.nc', 'r')

check_function(val_ds, out_ds, 'TEST 1: Normal inputs')

# TEST 2: Forecast inputs, auto timestep
create_inflow_file('tests/inputs/era5_2560x5120_sample_data/forecast_data.nc',
'tests/test_vpu/345',
'tests/test_results/',
cumulative=True)

out_ds = nc.Dataset(glob.glob('./tests/test_results/*_345_*.nc')[0], 'r')
val_ds = nc.Dataset('tests/validation/forecast_3_to_6_hour.nc', 'r')

check_function(val_ds, out_ds, 'TEST 2: Forecast inputs, auto timestep')

# TEST 3: Forecast inputs, 1 hour timestep
create_inflow_file('tests/inputs/era5_2560x5120_sample_data/forecast_data.nc',
'tests/test_vpu/345',
'tests/test_results/',
vpu_name='custom_vpu',
cumulative=True,
timestep=datetime.timedelta(hours=3),
file_label='file_label')

out_ds = nc.Dataset(glob.glob('./tests/test_results/*_custom_vpu_*_file_label.nc')[0], 'r')
val_ds = nc.Dataset('tests/validation/forecast_3_to_6_hour.nc', 'r')

check_function(val_ds, out_ds, 'TEST 3: Forecast inputs, auto timestep')
# # TEST 2: Forecast inputs, auto timestep
# create_inflow_file('tests/inputs/era5_2560x5120_sample_data/forecast_data.nc',
# 'tests/test_vpu/345',
# 'tests/test_results/',
# cumulative=True)
#
# out_ds = nc.Dataset(glob.glob('./tests/test_results/*_345_*.nc')[0], 'r')
# val_ds = nc.Dataset('tests/validation/forecast_3_to_6_hour.nc', 'r')
#
# check_function(val_ds, out_ds, 'TEST 2: Forecast inputs, auto timestep')
#
# # TEST 3: Forecast inputs, 1 hour timestep
# create_inflow_file('tests/inputs/era5_2560x5120_sample_data/forecast_data.nc',
# 'tests/test_vpu/345',
# 'tests/test_results/',
# vpu_name='custom_vpu',
# cumulative=True,
# timestep=datetime.timedelta(hours=3),
# file_label='file_label')
#
# out_ds = nc.Dataset(glob.glob('./tests/test_results/*_custom_vpu_*_file_label.nc')[0], 'r')
# val_ds = nc.Dataset('tests/validation/forecast_3_to_6_hour.nc', 'r')
#
# check_function(val_ds, out_ds, 'TEST 3: Forecast inputs, auto timestep')
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The tests for TEST 2 and TEST 3 have been commented out. Please provide context for this change. Will these tests be updated or replaced to accommodate the new features?

Loading