pola-rs · ritchie46 · Aug 22, 2024 · Aug 18, 2024 · Aug 18, 2024 · ritchie46
@@ -79,11 +79,15 @@ pub fn rolling_corr(x: Expr, y: Expr, options: RollingCovOptions) -> Expr {
         ..Default::default()
     };
 
+    let non_null_mask = when(x.clone().is_not_null().and(y.clone().is_not_null()))
+        .then(lit(1.0))
+        .otherwise(lit(Null {}));
+
     let mean_x_y = (x.clone() * y.clone()).rolling_mean(rolling_options.clone());
-    let mean_x = x.clone().rolling_mean(rolling_options.clone());
-    let mean_y = y.clone().rolling_mean(rolling_options.clone());
-    let var_x = x.clone().rolling_var(rolling_options.clone());
-    let var_y = y.clone().rolling_var(rolling_options);
+    let mean_x = (x.clone() * non_null_mask.clone()).rolling_mean(rolling_options.clone());
+    let mean_y = (y.clone() * non_null_mask.clone()).rolling_mean(rolling_options.clone());
+    let var_x = (x.clone() * non_null_mask.clone()).rolling_var(rolling_options.clone());
+    let var_y = (y.clone() * non_null_mask.clone()).rolling_var(rolling_options);
 
     let rolling_options_count = RollingOptionsFixedWindow {
         window_size: options.window_size as usize,
@@ -110,9 +114,13 @@ pub fn rolling_cov(x: Expr, y: Expr, options: RollingCovOptions) -> Expr {
         ..Default::default()
     };
 
+    let non_null_mask = when(x.clone().is_not_null().and(y.clone().is_not_null()))
+        .then(lit(1.0))
+        .otherwise(lit(Null {}));
+
     let mean_x_y = (x.clone() * y.clone()).rolling_mean(rolling_options.clone());
-    let mean_x = x.clone().rolling_mean(rolling_options.clone());
-    let mean_y = y.clone().rolling_mean(rolling_options);
+    let mean_x = (x.clone() * non_null_mask.clone()).rolling_mean(rolling_options.clone());
+    let mean_y = (y.clone() * non_null_mask.clone()).rolling_mean(rolling_options);
     let rolling_options_count = RollingOptionsFixedWindow {
         window_size: options.window_size as usize,
         min_periods: 0,

@@ -589,6 +589,36 @@ def test_rolling_cov_corr() -> None:
     assert res["corr"][:2] == [None] * 2
 
 
+def test_rolling_cov_corr_nulls() -> None:
+    df1 = pl.DataFrame(
+        {"a": [1.06, 1.07, 0.93, 0.78, 0.85], "lag_a": [1.0, 1.06, 1.07, 0.93, 0.78]}
+    )
+    df2 = pl.DataFrame(
+        {
+            "a": [1.0, 1.06, 1.07, 0.93, 0.78, 0.85],
+            "lag_a": [None, 1.0, 1.06, 1.07, 0.93, 0.78],
+        }
+    )
+
+    val_1 = df1.select(
+        pl.rolling_corr("a", "lag_a", window_size=10, min_periods=5, ddof=1).tail(1)
+    ).item()
+    val_2 = df2.select(
+        pl.rolling_corr("a", "lag_a", window_size=10, min_periods=5, ddof=1).tail(1)
+    ).item()
+
+    assert val_1 == val_2
+
+    val_1 = df1.select(
+        pl.rolling_cov("a", "lag_a", window_size=10, min_periods=5, ddof=1).tail(1)
+    ).item()
+    val_2 = df2.select(
+        pl.rolling_cov("a", "lag_a", window_size=10, min_periods=5, ddof=1).tail(1)
+    ).item()
+
+    assert val_1 == val_2
+
+
 @pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
 def test_rolling_empty_window_9406(time_unit: TimeUnit) -> None:
     datecol = pl.Series(