Add more tests to dynamic surrogates (#206)

* new branch of dynamic_surrogates for PR * remove figures in FE case study * add header and correct the test data directory * update test files * update test files * update test files * update test files * update test files * update test files * update test files * some changes according to the review * updates * add jupyter notebook * use pathlib, add jupyternotebook and test utils * modified jupyter notebook * 'merge_3_case_studies__version_update_1' * update_for_milestone * delete_prototyped_model * update_tests * remove_unnecessary_tests * fix_header * delete_unnecessary_file_for_v1 * updata_notebook_and_codes * remove old notebook * update CI workflow and notebook * update notebook * solve setup conflicts * add more tests to surrogate related classes * solve notebook failures * fix errors in tests * fix pathlib failure --------- Co-authored-by: Ludovico Bianchi <lbianchi@lbl.gov>
gmlc-dispatches · Jun 27, 2023 · a2a7bb9 · a2a7bb9
1 parent e1771b4
commit a2a7bb9
Show file tree

Hide file tree

Showing 6 changed files with 109 additions and 17 deletions.
diff --git a/dispatches/workflow/train_market_surrogates/dynamic/Time_Series_Clustering.py b/dispatches/workflow/train_market_surrogates/dynamic/Time_Series_Clustering.py
@@ -21,7 +21,7 @@
 import matplotlib.pyplot as plt
 from tslearn.clustering import TimeSeriesKMeans
 from tslearn.utils import to_time_series_dataset
-
+from dispatches.workflow.train_market_surrogates.dynamic.Simulation_Data import SimulationData
 
 plt.rcParams["figure.figsize"] = (12,9)
 
@@ -93,10 +93,10 @@ def simulation_data(self, value):
         Returns:
             None
         '''
-        
-        if not isinstance(value, object):
+
+        if not isinstance(value, SimulationData):
             raise TypeError(
-                f"The simulation_data must be an object, but {type(value)} is given."
+                f"The simulation_data must be created from SimulationData."
             )
         self._simulation_data = value
 
@@ -218,7 +218,7 @@ def filter_opt(self, value):
 
         if not isinstance(value, bool):
             raise TypeError(
-                f"filter_opt must be bool, but {type(value)} is given"
+                f"Filter_opt must be bool, but {type(value)} is given"
             )
 
         self._filter_opt = value

diff --git a/dispatches/workflow/train_market_surrogates/dynamic/Train_Market_Surrogates.ipynb b/dispatches/workflow/train_market_surrogates/dynamic/Train_Market_Surrogates.ipynb
@@ -35,9 +35,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from Simulation_Data import SimulationData\n",
-    "from Train_NN_Surrogates import TrainNNSurrogates\n",
-    "from Time_Series_Clustering import TimeSeriesClustering\n",
+    "from dispatches.workflow.train_market_surrogates.dynamic.Simulation_Data import SimulationData\n",
+    "from dispatches.workflow.train_market_surrogates.dynamic.Train_NN_Surrogates import TrainNNSurrogates\n",
+    "from dispatches.workflow.train_market_surrogates.dynamic.Time_Series_Clustering import TimeSeriesClustering\n",
     "from dispatches_data.api import path\n",
     "import numpy as np\n",
     "import matplotlib.pyplot as plt\n",
@@ -371,10 +371,10 @@
      "text": [
       "Making NN Predictions...\n",
       "Evaluate on test data\n",
-      "2/2 [==============================] - 0s 4ms/step - loss: 0.0857\n",
-      "0.08574607968330383\n",
-      "2/2 [==============================] - 0s 2ms/step\n",
-      "The R2 of dispatch surrogate validation is [0.998719440157763, 0.9196373502907574, 0.9753977486468058, 0.9299650474944201, 0.9526091940837623, 0.9745306328784323, 0.7702591151474634, 0.9796533447012646, 0.9164874951813002, 0.8800394823542108, 0.9844977579100551, 0.9933069133141726, 0.9547329781120841, 0.9041499695371183, 0.9529481992062682, 0.9885302918745016, 0.9405889439394363, 0.9922014559222286, 0.9626928318745459, 0.9724886481659819, 0.6666589071264845, 0.9505532118905716, 0.9278975194643461, 0.9120099955189791, 0.8293439309428298, 0.835064688362013, 0.9868603177780476, 0.9646843914651182, 0.7163130065596263, 0.933469158650037, 0.911631319217196, 0.983073033605537]\n"
+      "2/2 [==============================] - 0s 1ms/step - loss: 0.0825\n",
+      "0.08251975476741791\n",
+      "2/2 [==============================] - 0s 1ms/step\n",
+      "The R2 of dispatch surrogate validation is [0.9989807572567754, 0.9195106356137187, 0.9782616859946545, 0.9482914871901337, 0.9534049294253228, 0.9748667211817922, 0.7623754710380003, 0.9813145200458163, 0.932079755000075, 0.88119231733833, 0.9866646654042197, 0.9901543189902491, 0.9550007170260163, 0.9041126851595744, 0.9585682137492805, 0.9910971153220763, 0.9360610980088924, 0.9919506493271321, 0.964832898638907, 0.9727630565191323, 0.7106237287845176, 0.9576939969602464, 0.9302283326232771, 0.9101037421779389, 0.827072154228201, 0.8399282348457692, 0.9896055965549824, 0.9633592380884979, 0.7089304789077371, 0.9313112338903713, 0.9112210271674019, 0.9862573525188446]\n"
      ]
     }
    ],

diff --git a/dispatches/workflow/train_market_surrogates/dynamic/Train_NN_Surrogates.py b/dispatches/workflow/train_market_surrogates/dynamic/Train_NN_Surrogates.py
@@ -26,6 +26,7 @@
 from tensorflow import keras
 from tensorflow.keras import layers
 from tensorflow.keras.optimizers import Adam
+from dispatches.workflow.train_market_surrogates.dynamic.Simulation_Data import SimulationData
 
 
 class TrainNNSurrogates:
@@ -93,9 +94,9 @@ def simulation_data(self, value):
             None
         '''
 
-        if not isinstance(value, object):
+        if not isinstance(value, SimulationData):
             raise TypeError(
-                f"The simulation_data must be an object, but {type(value)} is given."
+                f"The simulation_data must be created from SimulationData."
             )
         self._simulation_data = value
 
@@ -133,7 +134,7 @@ def data_file(self, value):
             None
         '''
 
-        if not isinstance(value, str):
+        if not (isinstance(value, str) or isinstance(value, pathlib.WindowsPath) or isinstance(value, pathlib.PosixPath)):
             raise TypeError(
                 f"The data_file must be str or object, but {type(value)} is given."
             )
@@ -175,7 +176,7 @@ def filter_opt(self, value):
 
         if not isinstance(value, bool):
             raise TypeError(
-                f"filter_opt must be bool, but {type(value)} is given"
+                f"Filter_opt must be bool, but {type(value)} is given"
             )
 
         self._filter_opt = value

diff --git a/dispatches/workflow/train_market_surrogates/dynamic/tests/test_Simulation_Data.py b/dispatches/workflow/train_market_surrogates/dynamic/tests/test_Simulation_Data.py
@@ -28,6 +28,7 @@
 from dispatches.workflow.train_market_surrogates.dynamic.Simulation_Data import SimulationData
 
 
+
 def _get_data_path(file_name: str, package: str = "dispatches.workflow.train_market_surrogates.dynamic.tests.data") -> Path:
     with resources.path(package, file_name) as p:
         return Path(p)
@@ -104,6 +105,34 @@ def test_create_SimulationData(sample_simulation_data, sample_input_data_NE, num
     assert simulation_data.case_type == case_type_NE
 
 
+@pytest.mark.unit
+def test_invalid_num_sims(sample_simulation_data, sample_input_data_NE, num_sims, case_type_NE):
+    sims = "10"
+    with pytest.raises(TypeError, match=r".*The number of clustering years must be positive integer,*"):
+        simulation_data = SimulationData(sample_simulation_data, sample_input_data_NE, sims, case_type_NE)
+
+
+@pytest.mark.unit
+def test_invalid_num_sims_2(sample_simulation_data, sample_input_data_NE, num_sims, case_type_NE):
+    sims = -1
+    with pytest.raises(ValueError, match=r".*The number of simulation years must be positive integer,*"):
+        simulation_data = SimulationData(sample_simulation_data, sample_input_data_NE, sims, case_type_NE)
+
+
+@pytest.mark.unit
+def test_valid_case_type(sample_simulation_data, sample_input_data_NE, num_sims, case_type_NE):
+    case_type = ["NE"]
+    with pytest.raises(ValueError, match=r".*The value of case_type must be str*"):
+        simulation_data = SimulationData(sample_simulation_data, sample_input_data_NE, num_sims, case_type)
+
+
+@pytest.mark.unit
+def test_valid_case_type(sample_simulation_data, sample_input_data_NE, num_sims, case_type_NE):
+    case_type = "BE"
+    with pytest.raises(ValueError, match=r".*The case_type must be one of 'RE','NE' or 'FE',*"):
+        simulation_data = SimulationData(sample_simulation_data, sample_input_data_NE, num_sims, case_type)
+
+
 @pytest.mark.unit
 def test_read_data_to_array(base_simulationdata_NE):
     dispatch_array, index = base_simulationdata_NE._read_data_to_array()
@@ -159,6 +188,12 @@ def test_read_RE_pmax(base_simulationdata_RE):
     )
 
 
+@pytest.mark.unit
+def test_invalid_RE_gen_name(base_simulationdata_RE):
+    with pytest.raises(NameError, match=r".*wind generator name*"):
+        test_pmax = base_simulationdata_RE._read_RE_pmax(wind_gen = '111_WIND_1')
+
+
 @pytest.mark.unit
 def test_read_FE_pmax(base_simulationdata_FE):
     test_pmax = base_simulationdata_FE._read_FE_pmax()
@@ -222,4 +257,5 @@ def test_read_wind_data(base_simulationdata_RE):
 
     pyo_unittest.assertStructuredAlmostEqual(
         first=data_shape, second=expect_shape
-    )
+    )
+
diff --git a/dispatches/workflow/train_market_surrogates/dynamic/tests/test_Time_Series_Clustering.py b/dispatches/workflow/train_market_surrogates/dynamic/tests/test_Time_Series_Clustering.py
@@ -143,6 +143,40 @@ def test_create_TimeSeriesClustering(base_simulationdata_NE, num_clusters, filte
     assert tsc.metric == metric
 
 
+@pytest.mark.unit
+def test_create_RE_with_filter(base_simulationdata_RE, num_clusters, filter_opt, metric):
+    with pytest.raises(TypeError, match=r".*cannot have set the filter_opt to*"):
+        tsc = TimeSeriesClustering(base_simulationdata_RE, num_clusters, filter_opt, metric)
+
+
+@pytest.mark.unit
+def test_invalid_simulation_data(base_simulationdata_NE, num_clusters, filter_opt, metric):
+    invalid_simulation_data = "simulation_data"
+    with pytest.raises(TypeError, match=r".*The simulation_data must be created from SimulationData.*"):
+        tsc = TimeSeriesClustering(invalid_simulation_data, num_clusters, filter_opt, metric)
+
+
+@pytest.mark.unit
+def test_invalid_metric(base_simulationdata_NE, num_clusters, filter_opt, metric):
+    invalid_metric = "abc"
+    with pytest.raises(ValueError, match=r".*The metric must be one of euclidean or dtw, but*"):
+        tsc = TimeSeriesClustering(base_simulationdata_NE, num_clusters, filter_opt, invalid_metric)
+
+
+@pytest.mark.unit
+def test_invalid_num_clusters(base_simulationdata_NE, num_clusters, filter_opt, metric):
+    invalid_num_clusters = "123"
+    with pytest.raises(TypeError, match=r".*Number of clusters must be integer, but*"):
+        tsc = TimeSeriesClustering(base_simulationdata_NE, invalid_num_clusters, filter_opt, metric)
+
+
+@pytest.mark.unit
+def test_invalid_filter_opt(base_simulationdata_NE, num_clusters, filter_opt, metric):
+    invalid_filter_opt = "True"
+    with pytest.raises(TypeError, match=r".*Filter_opt must be bool, but*"):
+        tsc = TimeSeriesClustering(base_simulationdata_NE, num_clusters, invalid_filter_opt, metric)
+
+
 @pytest.mark.unit
 def test_transform_data_NE(base_timeseriesclustering_NE):
     train_data = base_timeseriesclustering_NE._transform_data()

diff --git a/dispatches/workflow/train_market_surrogates/dynamic/tests/test_Train_NN_Surrogates.py b/dispatches/workflow/train_market_surrogates/dynamic/tests/test_Train_NN_Surrogates.py
@@ -79,6 +79,27 @@ def base_NNtrainer(base_simulationdata, data_file, filter_opt):
     return TrainNNSurrogates(base_simulationdata, str(data_file), filter_opt)
 
 
+@pytest.mark.unit
+def test_invalid_simulation_data(base_simulationdata, data_file, filter_opt):
+    invalid_simulation_data = "simulation_data"
+    with pytest.raises(TypeError, match=r".*The simulation_data must be created from SimulationData.*"):
+        tnn = TrainNNSurrogates(invalid_simulation_data, data_file, filter_opt)
+
+
+@pytest.mark.unit
+def test_invalid_data_file(base_simulationdata, data_file, filter_opt):
+    invalid_data_file = 123
+    with pytest.raises(TypeError, match=r".*The data_file must be str or object, but*"):
+        tnn = TrainNNSurrogates(base_simulationdata, invalid_data_file, filter_opt)
+
+
+@pytest.mark.unit
+def test_invalid_filter_opt(base_simulationdata, data_file, filter_opt):
+    invalid_filter_opt = "True"
+    with pytest.raises(TypeError, match=r".*Filter_opt must be bool, but*"):
+        tsc = TrainNNSurrogates(base_simulationdata, data_file, invalid_filter_opt)
+
+
 @pytest.mark.unit
 def test_create_TrainNNSurrogates(base_simulationdata, data_file, filter_opt):
     NNtrainer = TrainNNSurrogates(base_simulationdata, str(data_file), filter_opt)