openclimatefix · peterdudfield · Feb 20, 2024 · Feb 20, 2024 · Feb 20, 2024 · Feb 20, 2024
diff --git a/exp_reports/015_recent_power/readme.md b/exp_reports/015_recent_power/readme.md
@@ -0,0 +1,24 @@
+# Recent Power
+
+The idea is to add some recent power values.
+The model current uses the last 30 minutes but the average power. It would be good to give the actual values as well. 
+
+We used `uk_pv.py` configuration, but removed the satellite input. We used 6 recent power values. 
+
+We want to A/B test and see the difference. The total MAE for all horizons
+
+|  | Model | Model with recent power |
+|---------|-------|-------------------------|
+| test   | 0.140 ± 0.020      | 0.122 ± 0.017                     |
+| train   |0.192 ± 0.027       |  0.182 ± 0.026                    |
+
+And just for the test set (The test set is 2020-01-01 to 2021-11-00):
+
+| Horizon | MAE  | MAE with recent power |
+|---------|------|--------|
+| 0 -15   | 0.14 | 0.12   |
+| 15-30   | 0.17 | 0.17   |
+| 30-45   | 0.19 | 0.19   |
+| 45-60   | 0.21 | 0.21   |
+
+So this just makes a difference for the first 0-15 forecast of data
diff --git a/psp/models/multi.py b/psp/models/multi.py
@@ -47,12 +47,16 @@ def get_features_without_pv(self, x: X, is_training: bool = False) -> Features:
         # List of features derived from 'pv'
         pv_derived_features = ["recent_power", "h_max", "h_median", "h_mean"]
         nan_pv_derived_features = ["recent_power_nan", "h_max_nan", "h_median_nan", "h_mean_nan"]
-        for feature in pv_derived_features:
+        recent_power_values_features = [f for f in features if f.startswith("recent_power_values")]
+        recent_power_values_nans = [
+            f for f in features if (f.startswith("recent_power_values")) and ("isnan" in f)
+        ]
+        for feature in pv_derived_features + recent_power_values_features:
             if feature in features:
                 # Set the value to NaN
                 features[feature] = np.full_like(features[feature], np.nan)
 
-        for feature in nan_pv_derived_features:
+        for feature in nan_pv_derived_features + recent_power_values_nans:
             if feature in features:
                 # Set the value to 1
                 features[feature] = np.full_like(features[feature], 1)

diff --git a/psp/models/recent_history.py b/psp/models/recent_history.py
@@ -141,6 +141,7 @@ def __init__(
         satellite_dropout: float = 0.1,
         satellite_tolerance: Optional[float] = None,
         satellite_patch_size: float = 0.25,
+        n_recent_power_values: int = 0,
     ):
         """
         Arguments:
@@ -200,6 +201,7 @@ def __init__(
         self._satellite_dropout = satellite_dropout
         self._satellite_tolerance = satellite_tolerance
         self._satellite_patch_size = satellite_patch_size
+        self._n_recent_power_values = n_recent_power_values
 
         self.set_data_sources(
             pv_data_source=pv_data_source,
@@ -432,7 +434,6 @@ def _get_features(self, x: X, is_training: bool) -> Features:
 
         # add another section here fore getting the satellite data
         if self._satellite_data_sources is not None:
-
             # add the forecast horizon to the features. This is because the satellite data is
             # only available for the current time step, but not as a forecast, compared to NWP
             # which are available at all timesteps
@@ -457,7 +458,6 @@ def _get_features(self, x: X, is_training: bool) -> Features:
                 ):
                     satellite_data = None
                 else:
-
                     if self._satellite_patch_size > 0:
                         satellite_data = source.get(
                             now=x.ts,
@@ -527,6 +527,33 @@ def _get_features(self, x: X, is_training: bool) -> Features:
         scalar_features["recent_power"] = 0.0 if recent_power_nan else recent_power
         scalar_features["recent_power_nan"] = recent_power_nan * 1.0
 
+        # recent power values
+        recent_power_values = data.sel(
+            ts=slice(x.ts - timedelta(minutes=recent_power_minutes), x.ts)
+        ).values
+
+        # make sure recent power values is the right length
+        if not hasattr(self, "_n_recent_power_values"):
+            self._n_recent_power_values = 0
+        if len(recent_power_values) < self._n_recent_power_values:
+            recent_power_values = np.pad(
+                recent_power_values,
+                (0, self._n_recent_power_values - len(recent_power_values)),
+                "constant",
+                constant_values=np.nan,
+            )
+        elif len(recent_power_values) > self._n_recent_power_values:
+            recent_power_values = recent_power_values[
+                len(recent_power_values) - self._n_recent_power_values :
+            ]
 def get_features_without_pv(self, x: X, is_training: bool = False) -> Features: 
 def get_features_without_pv(self, x: X, is_training: bool = False) -> Features: 
+
+        if self._normalize_features:
+            recent_power_values = safe_div(recent_power_values, poa_global_now * capacity)
+
+        for i, value in enumerate(recent_power_values):
+            scalar_features[f"recent_power_values_{i}"] = value
+            scalar_features[f"recent_power_values_{i}_isnan"] = np.isnan(value) * 1.0
+
         if self._version >= 2:
             scalar_features["poa_global_now_is_zero"] = poa_global_now == 0.0
 

diff --git a/psp/scripts/eval_model.py b/psp/scripts/eval_model.py
@@ -270,6 +270,14 @@ def main(
 
     df = pd.DataFrame.from_records(error_rows)
 
+    # print out the mae per horizon
+    mae_per_horizon = {}
+    for horizon in model_config.horizons:
+
+        mae = df[df["horizon"] == horizon[0]]["error"].abs().mean()
+        mae_per_horizon[horizon[0]] = mae
+        print(f"MAE for horizon {horizon[0]}: {mae:.2f}")
+
     exp_name = exp_name or dt.datetime.now().isoformat()
 
     output_dir = exp_root / (new_exp_name or exp_name)