Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Error in rolling_var if window is larger than array, fixes #7297 #7572

Merged
merged 1 commit into from
Jul 2, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/v0.14.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,8 @@ Bug Fixes
- Bug in non-monotonic ``Index.union`` may preserve ``name`` incorrectly (:issue:`7458`)
- Bug in ``DatetimeIndex.intersection`` doesn't preserve timezone (:issue:`4690`)

- Bug in ``rolling_var`` where a window larger than the array would raise an error(:issue:`7297`)

- Bug with last plotted timeseries dictating ``xlim`` (:issue:`2960`)
- Bug with ``secondary_y`` axis not being considered for timeseries ``xlim`` (:issue:`3490`)

Expand Down
9 changes: 9 additions & 0 deletions pandas/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1173,6 +1173,10 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1):

minp = _check_minp(win, minp, N)

# Check for windows larger than array, addresses #7297
win = min(win, N)

# Over the first window, observations can only be added, never removed
for i from 0 <= i < win:
val = input[i]

Expand All @@ -1196,23 +1200,27 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1):

output[i] = val

# After the first window, observations can both be added and removed
for i from win <= i < N:
val = input[i]
prev = input[i - win]

if val == val:
if prev == prev:
# Adding one observation and removing another one
delta = val - prev
prev -= mean_x
mean_x += delta / nobs
val -= mean_x
ssqdm_x += (val + prev) * delta
else:
# Adding one observation and not removing any
nobs += 1
delta = (val - mean_x)
mean_x += delta / nobs
ssqdm_x += delta * (val - mean_x)
elif prev == prev:
# Adding no new observation, but removing one
nobs -= 1
if nobs:
delta = (prev - mean_x)
Expand All @@ -1221,6 +1229,7 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1):
else:
mean_x = 0
ssqdm_x = 0
# Variance is unchanged if no observation is added or removed

if nobs >= minp:
#pathological case
Expand Down
24 changes: 23 additions & 1 deletion pandas/stats/tests/test_moments.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,8 @@ def _check_ndarray(self, func, static_comp, window=50,
preserve_nan=True,
has_center=True,
fill_value=None,
test_stable=False):
test_stable=False,
test_window=True):

result = func(self.arr, window)
assert_almost_equal(result[-1],
Expand Down Expand Up @@ -429,6 +430,27 @@ def _check_ndarray(self, func, static_comp, window=50,
assert_almost_equal(result[-1],
static_comp(self.arr[-50:] + 1e9))

# Test window larger than array, #7297
if test_window:
if has_min_periods:
for minp in (0, len(self.arr)-1, len(self.arr)):
result = func(self.arr, len(self.arr)+1, min_periods=minp)
expected = func(self.arr, len(self.arr), min_periods=minp)
nan_mask = np.isnan(result)
self.assertTrue(np.array_equal(nan_mask,
np.isnan(expected)))
nan_mask = ~nan_mask
assert_almost_equal(result[nan_mask], expected[nan_mask])
else:
result = func(self.arr, len(self.arr)+1)
expected = func(self.arr, len(self.arr))
nan_mask = np.isnan(result)
self.assertTrue(np.array_equal(nan_mask, np.isnan(expected)))
nan_mask = ~nan_mask
assert_almost_equal(result[nan_mask], expected[nan_mask])




def _check_structures(self, func, static_comp,
has_min_periods=True, has_time_rule=True,
Expand Down