Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PERF: release gil for ewma_time #37389

Merged
merged 12 commits into from
Nov 4, 2020
48 changes: 28 additions & 20 deletions pandas/_libs/window/aggregations.pyx
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
# cython: boundscheck=False, wraparound=False, cdivision=True

import cython
from cython import Py_ssize_t

from libcpp.deque cimport deque

import numpy as np

cimport numpy as cnp
from numpy cimport float32_t, float64_t, int64_t, ndarray, uint8_t
from numpy cimport float32_t, float64_t, int64_t, ndarray

cnp.import_array()

Expand Down Expand Up @@ -1369,7 +1368,7 @@ def roll_weighted_var(float64_t[:] values, float64_t[:] weights,
# ----------------------------------------------------------------------
# Exponentially weighted moving average

def ewma_time(ndarray[float64_t] vals, int minp, ndarray[int64_t] times,
def ewma_time(const float64_t[:] vals, int minp, ndarray[int64_t] times,
int64_t halflife):
"""
Compute exponentially-weighted moving average using halflife and time
Expand All @@ -1387,30 +1386,39 @@ def ewma_time(ndarray[float64_t] vals, int minp, ndarray[int64_t] times,
ndarray
"""
cdef:
Py_ssize_t i, num_not_nan = 0, N = len(vals)
Py_ssize_t i, j, num_not_nan = 0, N = len(vals)
bint is_not_nan
float64_t last_result
ndarray[uint8_t] mask = np.zeros(N, dtype=np.uint8)
ndarray[float64_t] weights, observations, output = np.empty(N, dtype=np.float64)
float64_t last_result, weights_dot, weights_sum, weight
float64_t[:] times_float
float64_t[:] observations = np.zeros(N, dtype=float)
float64_t[:] times_masked = np.zeros(N, dtype=float)
ndarray[float64_t] output = np.empty(N, dtype=float)
fangchenli marked this conversation as resolved.
Show resolved Hide resolved
float64_t[:] output_view = output

if N == 0:
return output

times_float = times.astype(float)
last_result = vals[0]

for i in range(N):
is_not_nan = vals[i] == vals[i]
num_not_nan += is_not_nan
if is_not_nan:
mask[i] = 1
weights = 0.5 ** ((times[i] - times[mask.view(np.bool_)]) / halflife)
fangchenli marked this conversation as resolved.
Show resolved Hide resolved
observations = vals[mask.view(np.bool_)]
last_result = np.sum(weights * observations) / np.sum(weights)

if num_not_nan >= minp:
output[i] = last_result
else:
output[i] = NaN
with nogil:
for i in range(N):
is_not_nan = vals[i] == vals[i]
num_not_nan += is_not_nan
if is_not_nan:
times_masked[num_not_nan-1] = times_float[i]
observations[num_not_nan-1] = vals[i]

weights_sum = 0
weights_dot = 0
for j in range(num_not_nan):
weight = 0.5 ** ((times_float[i] - times_masked[j]) / halflife)
fangchenli marked this conversation as resolved.
Show resolved Hide resolved
weights_sum += weight
weights_dot += weight * observations[j]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it looks like youve reimplemented the dot-product because np.sum cant be done inside a nogil block? unless there's a real perf benefit, id rather avoid this.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right. I'll play with it more to see if I can find the root cause of the inconsistent performance.


last_result = weights_dot / weights_sum

output_view[i] = last_result if num_not_nan >= minp else NaN

return output

Expand Down