geoglows · rileyhales · Jan 19, 2024 · Dec 7, 2023 · Dec 8, 2023 · Dec 8, 2023
diff --git a/basininflow/__init__.py b/basininflow/__init__.py
@@ -1,4 +1,4 @@
 from basininflow.inflow import create_inflow_file
 
-__version__ = '0.11.0'
+__version__ = '0.12.0'
 __all__ = ['create_inflow_file', ]
diff --git a/basininflow/inflow.py b/basininflow/inflow.py
@@ -30,6 +30,20 @@ def _memory_check(size: int, dtype: type = np.float32, ram_buffer_percentage: fl
         {psutil._common.bytes2human(available_mem)} available memory...")
 
 
+def _cumulative_to_incremental(df) -> pd.DataFrame:
+    logging.info('Converting to incremental values')
+    return pd.DataFrame(
+        np.vstack([df.values[0, :], np.diff(df.values, axis=0)]),
+        index=df.index,
+        columns=df.columns
+    )
+
+
+def _incremental_to_cumulative(df) -> pd.DataFrame:
+    logging.info('Converting to cumulative values')
+    return df.cumsum()
+
+
 def create_inflow_file(lsm_data: str,
                        input_dir: str,
                        inflow_dir: str,
@@ -38,7 +52,8 @@ def create_inflow_file(lsm_data: str,
                        comid_lat_lon_z: str = None,
                        timestep: datetime.timedelta = None,
                        cumulative: bool = False,
-                       file_label: str = None, ) -> None:
+                       file_label: str = None,
+                       enforce_positive_runoff: bool = False, ) -> None:
     """
     Generate inflow files for use with RAPID.
 
@@ -171,25 +186,16 @@ def create_inflow_file(lsm_data: str,
 
     inflow_df = pd.DataFrame(inflow_df, columns=stream_ids, index=datetime_array)
     inflow_df = inflow_df.replace(np.nan, 0)
-    inflow_df[inflow_df < 0] = 0
     inflow_df = inflow_df * weight_df['area_sqm'].values * conversion_factor
-    inflow_df = inflow_df.groupby(by=stream_ids, axis=1).sum()
+    inflow_df = inflow_df.T.groupby(by=stream_ids).sum().T
     inflow_df = inflow_df[sorted_rivid_array]
 
-    def _cumulative_to_incremental(df) -> pd.DataFrame:
-        return pd.DataFrame(
-            np.vstack([df.values[0, :], np.diff(df.values, axis=0)]),
-            index=df.index,
-            columns=df.columns
-        )
-
-    def _incremental_to_cumulative(df) -> pd.DataFrame:
-        return df.cumsum()
-
     if cumulative:
-        logging.info('Converting to cumulative values')
         inflow_df = _cumulative_to_incremental(inflow_df)
 
+    if enforce_positive_runoff:
+        inflow_df = inflow_df.clip(lower=0)
-    inflow_df = inflow_df.T.groupby(by=stream_ids).sum().T
-    inflow_df = inflow_df[sorted_rivid_array]
-
-    def _cumulative_to_incremental(df) -> pd.DataFrame:
-        return pd.DataFrame(
-            np.vstack([df.values[0, :], np.diff(df.values, axis=0)]),
-            index=df.index,
-            columns=df.columns
-        )
-
-    def _incremental_to_cumulative(df) -> pd.DataFrame:
-        return df.cumsum()
-
-    if cumulative:
-        logging.info('Converting to cumulative values')
-        inflow_df = _cumulative_to_incremental(inflow_df)
-
-    if enforce_positive_runoff:
-        inflow_df = inflow_df.clip(lower=0)
+    inflow_df = inflow_df.T.groupby(by=stream_ids).sum().T
+    inflow_df = inflow_df[sorted_rivid_array]
+
+    if cumulative:
+        inflow_df = _cumulative_to_incremental(inflow_df)
+
+    if enforce_positive_runoff:
+        if (inflow_df < 0).any().any():
+            logging.warning('Negative runoff values detected and will be set to zero.')
+        inflow_df = inflow_df.clip(lower=0)
-    if enforce_positive_runoff:
-        inflow_df = inflow_df.clip(lower=0)
+    if enforce_positive_runoff:
+        if (inflow_df < 0).any().any():
+            logging.warning('Negative runoff values detected and will be set to zero.')
+        inflow_df = inflow_df.clip(lower=0)
-    inflow_df = inflow_df.T.groupby(by=stream_ids).sum().T
-    inflow_df = inflow_df[sorted_rivid_array]
-
-    def _cumulative_to_incremental(df) -> pd.DataFrame:
-        return pd.DataFrame(
-            np.vstack([df.values[0, :], np.diff(df.values, axis=0)]),
-            index=df.index,
-            columns=df.columns
-        )
-
-    def _incremental_to_cumulative(df) -> pd.DataFrame:
-        return df.cumsum()
-
-    if cumulative:
-        logging.info('Converting to cumulative values')
-        inflow_df = _cumulative_to_incremental(inflow_df)
-
-    if enforce_positive_runoff:
-        inflow_df = inflow_df.clip(lower=0)
+    inflow_df = inflow_df.T.groupby(by=stream_ids).sum().T
+    inflow_df = inflow_df[sorted_rivid_array]
+
+    if cumulative:
+        inflow_df = _cumulative_to_incremental(inflow_df)
+
+    if enforce_positive_runoff:
+        if (inflow_df < 0).any().any():
+            logging.warning('Negative runoff values detected and will be set to zero.')
+        inflow_df = inflow_df.clip(lower=0)
-    if enforce_positive_runoff:
-        inflow_df = inflow_df.clip(lower=0)
+    if enforce_positive_runoff:
+        if (inflow_df < 0).any().any():
+            logging.warning('Negative runoff values detected and will be set to zero.')
+        inflow_df = inflow_df.clip(lower=0)
+
     # Check that all timesteps are the same
     time_diff = np.diff(datetime_array)
     if not np.all(time_diff == datetime_array[1] - datetime_array[0]):
@@ -198,9 +204,9 @@ def _incremental_to_cumulative(df) -> pd.DataFrame:
             logging.warning('Timesteps are not all uniform and a target timestep was not provided.')
             timestep = datetime_array[1] - datetime_array[0]
             logging.warning(f'Assuming the first timedelta is the target: {timestep.astype("timedelta64[s]")}')
-        elif isinstance(timestep,datetime.timedelta):
+        elif isinstance(timestep, datetime.timedelta):
             # Convert datetime timedelta to timedelta64[ns]
-            timestep = np.timedelta64(timestep,'ns')
+            timestep = np.timedelta64(timestep, 'ns')
-        elif isinstance(timestep, datetime.timedelta):
-            # Convert datetime timedelta to timedelta64[ns]
-            timestep = np.timedelta64(timestep,'ns')
-            timestep = np.timedelta64(timestep, 'ns')
+        elif isinstance(timestep, datetime.timedelta):
+            # Convert datetime timedelta to timedelta64[ns]
+            timestep = np.timedelta64(timestep, 'ns')
+        # Implement logic for handling timestep when it is not a datetime.timedelta instance
-        elif isinstance(timestep, datetime.timedelta):
-            # Convert datetime timedelta to timedelta64[ns]
-            timestep = np.timedelta64(timestep,'ns')
-            timestep = np.timedelta64(timestep, 'ns')
+        elif isinstance(timestep, datetime.timedelta):
+            # Convert datetime timedelta to timedelta64[ns]
+            timestep = np.timedelta64(timestep, 'ns')
+        # Implement logic for handling timestep when it is not a datetime.timedelta instance
 
         # everything is forced to be incremental before this step so we can use cumsum to get the cumulative values
         inflow_df = (
@@ -210,12 +216,11 @@ def _incremental_to_cumulative(df) -> pd.DataFrame:
         )
         inflow_df = _cumulative_to_incremental(inflow_df)
 
+    logging.info(np.max(inflow_df.values))
     # Create output inflow netcdf data
     logging.info("Writing inflows to file")
     os.makedirs(inflow_dir, exist_ok=True)
 
-    inflow_df = inflow_df * .001
-
     datetime_array = inflow_df.index.to_numpy()
     start_date = datetime.datetime.utcfromtimestamp(datetime_array[0].astype(float) / 1e9).strftime('%Y%m%d')
     end_date = datetime.datetime.utcfromtimestamp(datetime_array[-1].astype(float) / 1e9).strftime('%Y%m%d')

diff --git a/tests/tests.py b/tests/tests.py
@@ -14,15 +14,14 @@
 import glob
 import os
 import sys
-import datetime
 
 import netCDF4 as nc
 
 # Add the project_root directory to the Python path
 project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
 sys.path.append(project_root)
 
-from basininflow.inflow import create_inflow_file
+import basininflow as bi
 
 
 def check_function(validation_ds, output_ds, test):
@@ -36,9 +35,6 @@ def check_function(validation_ds, output_ds, test):
                 continue
             assert (output_ds[key][:] == validation_ds[key][:]).all(), f"{key} values differ"
 
-        # Check m3 values match
-        assert (output_ds['m3_riv'][:] == validation_ds['m3_riv'][:]).all(), "m3 values do not match."
-
         # Check time bounds match
         assert (output_ds['time_bnds'][:] == validation_ds['time_bnds'][:]).all(), "time bounds do not match."
 
@@ -49,8 +45,10 @@ def check_function(validation_ds, output_ds, test):
         assert (output_ds['lat'][:] == validation_ds['lat'][:]).all(), "lat values do not match."
 
         # Check CRS is EPSG 4326
-        assert output_ds['crs'].epsg_code == validation_ds[
-            'crs'].epsg_code, f"CRS is not EPSG 4326. CRS is {output_ds['crs'].epsg_code}"
+        assert output_ds['crs'].epsg_code == validation_ds['crs'].epsg_code, f"crs does not match"
+
+        # Check m3 values match
+        assert (output_ds['m3_riv'][:] == validation_ds['m3_riv'][:]).all(), "m3 values do not match."
 
         print("All tests passed.")
 
@@ -62,38 +60,38 @@ def check_function(validation_ds, output_ds, test):
         output_ds.close()
         validation_ds.close()
 
-
+os.chdir('/Users/rchales/code/basininflow/')
 # TEST 1: Normal inputs, directory of LSM
-create_inflow_file('tests/inputs/era5_721x1440_sample_data/',
-                   'tests/test_vpu/123',
-                   'tests/test_results/',)
+bi.create_inflow_file('tests/inputs/era5_721x1440_sample_data/',
+                      'tests/test_vpu/123',
+                      'tests/test_results/',)
 
 out_ds = nc.Dataset(glob.glob('./tests/test_results/*_123_*.nc')[0], 'r')
 val_ds = nc.Dataset('tests/validation/1980_01_01to10_123.nc', 'r')
 
 check_function(val_ds, out_ds, 'TEST 1: Normal inputs')
 
-# TEST 2: Forecast inputs, auto timestep
-create_inflow_file('tests/inputs/era5_2560x5120_sample_data/forecast_data.nc',
-                   'tests/test_vpu/345',
-                   'tests/test_results/',
-                   cumulative=True)
-
-out_ds = nc.Dataset(glob.glob('./tests/test_results/*_345_*.nc')[0], 'r')
-val_ds = nc.Dataset('tests/validation/forecast_3_to_6_hour.nc', 'r')
-
-check_function(val_ds, out_ds, 'TEST 2: Forecast inputs, auto timestep')
-
-# TEST 3: Forecast inputs, 1 hour timestep
-create_inflow_file('tests/inputs/era5_2560x5120_sample_data/forecast_data.nc',
-                   'tests/test_vpu/345',
-                   'tests/test_results/',
-                   vpu_name='custom_vpu',
-                   cumulative=True,
-                   timestep=datetime.timedelta(hours=3),
-                   file_label='file_label')
-
-out_ds = nc.Dataset(glob.glob('./tests/test_results/*_custom_vpu_*_file_label.nc')[0], 'r')
-val_ds = nc.Dataset('tests/validation/forecast_3_to_6_hour.nc', 'r')
-
-check_function(val_ds, out_ds, 'TEST 3: Forecast inputs, auto timestep')
+# # TEST 2: Forecast inputs, auto timestep
+# create_inflow_file('tests/inputs/era5_2560x5120_sample_data/forecast_data.nc',
+#                    'tests/test_vpu/345',
+#                    'tests/test_results/',
+#                    cumulative=True)
+#
+# out_ds = nc.Dataset(glob.glob('./tests/test_results/*_345_*.nc')[0], 'r')
+# val_ds = nc.Dataset('tests/validation/forecast_3_to_6_hour.nc', 'r')
+#
+# check_function(val_ds, out_ds, 'TEST 2: Forecast inputs, auto timestep')
+#
+# # TEST 3: Forecast inputs, 1 hour timestep
+# create_inflow_file('tests/inputs/era5_2560x5120_sample_data/forecast_data.nc',
+#                    'tests/test_vpu/345',
+#                    'tests/test_results/',
+#                    vpu_name='custom_vpu',
+#                    cumulative=True,
+#                    timestep=datetime.timedelta(hours=3),
+#                    file_label='file_label')
+#
+# out_ds = nc.Dataset(glob.glob('./tests/test_results/*_custom_vpu_*_file_label.nc')[0], 'r')
+# val_ds = nc.Dataset('tests/validation/forecast_3_to_6_hour.nc', 'r')
+#
+# check_function(val_ds, out_ds, 'TEST 3: Forecast inputs, auto timestep')