diff --git a/doc/source/v0.14.1.txt b/doc/source/v0.14.1.txt index 571e2dc2692b0..63517f532f7e5 100644 --- a/doc/source/v0.14.1.txt +++ b/doc/source/v0.14.1.txt @@ -271,6 +271,8 @@ Bug Fixes - Bug in non-monotonic ``Index.union`` may preserve ``name`` incorrectly (:issue:`7458`) - Bug in ``DatetimeIndex.intersection`` doesn't preserve timezone (:issue:`4690`) +- Bug in ``rolling_var`` where a window larger than the array would raise an error(:issue:`7297`) + - Bug with last plotted timeseries dictating ``xlim`` (:issue:`2960`) - Bug with ``secondary_y`` axis not being considered for timeseries ``xlim`` (:issue:`3490`) diff --git a/pandas/algos.pyx b/pandas/algos.pyx index 431ef97debae6..2a07272acd0e8 100644 --- a/pandas/algos.pyx +++ b/pandas/algos.pyx @@ -1173,6 +1173,10 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1): minp = _check_minp(win, minp, N) + # Check for windows larger than array, addresses #7297 + win = min(win, N) + + # Over the first window, observations can only be added, never removed for i from 0 <= i < win: val = input[i] @@ -1196,23 +1200,27 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1): output[i] = val + # After the first window, observations can both be added and removed for i from win <= i < N: val = input[i] prev = input[i - win] if val == val: if prev == prev: + # Adding one observation and removing another one delta = val - prev prev -= mean_x mean_x += delta / nobs val -= mean_x ssqdm_x += (val + prev) * delta else: + # Adding one observation and not removing any nobs += 1 delta = (val - mean_x) mean_x += delta / nobs ssqdm_x += delta * (val - mean_x) elif prev == prev: + # Adding no new observation, but removing one nobs -= 1 if nobs: delta = (prev - mean_x) @@ -1221,6 +1229,7 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1): else: mean_x = 0 ssqdm_x = 0 + # Variance is unchanged if no observation is added or removed if nobs >= minp: #pathological case diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index 6cd187ddf8981..8f20a4d421045 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -366,7 +366,8 @@ def _check_ndarray(self, func, static_comp, window=50, preserve_nan=True, has_center=True, fill_value=None, - test_stable=False): + test_stable=False, + test_window=True): result = func(self.arr, window) assert_almost_equal(result[-1], @@ -429,6 +430,27 @@ def _check_ndarray(self, func, static_comp, window=50, assert_almost_equal(result[-1], static_comp(self.arr[-50:] + 1e9)) + # Test window larger than array, #7297 + if test_window: + if has_min_periods: + for minp in (0, len(self.arr)-1, len(self.arr)): + result = func(self.arr, len(self.arr)+1, min_periods=minp) + expected = func(self.arr, len(self.arr), min_periods=minp) + nan_mask = np.isnan(result) + self.assertTrue(np.array_equal(nan_mask, + np.isnan(expected))) + nan_mask = ~nan_mask + assert_almost_equal(result[nan_mask], expected[nan_mask]) + else: + result = func(self.arr, len(self.arr)+1) + expected = func(self.arr, len(self.arr)) + nan_mask = np.isnan(result) + self.assertTrue(np.array_equal(nan_mask, np.isnan(expected))) + nan_mask = ~nan_mask + assert_almost_equal(result[nan_mask], expected[nan_mask]) + + + def _check_structures(self, func, static_comp, has_min_periods=True, has_time_rule=True,