Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PERF: release gil for ewma_time #37389

Merged
merged 12 commits into from
Nov 4, 2020
49 changes: 29 additions & 20 deletions pandas/_libs/window/aggregations.pyx
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
# cython: boundscheck=False, wraparound=False, cdivision=True

import cython
from cython import Py_ssize_t

from libcpp.deque cimport deque

import numpy as np

cimport numpy as cnp
from numpy cimport float32_t, float64_t, int64_t, ndarray, uint8_t
from numpy cimport float32_t, float64_t, int64_t, ndarray

cnp.import_array()

Expand Down Expand Up @@ -1398,7 +1397,7 @@ def roll_weighted_var(float64_t[:] values, float64_t[:] weights,
# ----------------------------------------------------------------------
# Exponentially weighted moving average

def ewma_time(ndarray[float64_t] vals, int minp, ndarray[int64_t] times,
def ewma_time(const float64_t[:] vals, int minp, ndarray[int64_t] times,
int64_t halflife):
"""
Compute exponentially-weighted moving average using halflife and time
Expand All @@ -1416,30 +1415,40 @@ def ewma_time(ndarray[float64_t] vals, int minp, ndarray[int64_t] times,
ndarray
"""
cdef:
Py_ssize_t i, num_not_nan = 0, N = len(vals)
Py_ssize_t i, j, num_not_nan = 0, N = len(vals)
bint is_not_nan
float64_t last_result
ndarray[uint8_t] mask = np.zeros(N, dtype=np.uint8)
ndarray[float64_t] weights, observations, output = np.empty(N, dtype=np.float64)
float64_t last_result, weights_dot, weights_sum, weight, halflife_float
float64_t[:] times_float
float64_t[:] observations = np.zeros(N, dtype=float)
float64_t[:] times_masked = np.zeros(N, dtype=float)
ndarray[float64_t] output = np.empty(N, dtype=float)
fangchenli marked this conversation as resolved.
Show resolved Hide resolved

if N == 0:
return output

halflife_float = <float64_t>halflife
times_float = times.astype(float)
last_result = vals[0]

for i in range(N):
is_not_nan = vals[i] == vals[i]
num_not_nan += is_not_nan
if is_not_nan:
mask[i] = 1
weights = 0.5 ** ((times[i] - times[mask.view(np.bool_)]) / halflife)
fangchenli marked this conversation as resolved.
Show resolved Hide resolved
observations = vals[mask.view(np.bool_)]
last_result = np.sum(weights * observations) / np.sum(weights)

if num_not_nan >= minp:
output[i] = last_result
else:
output[i] = NaN
with nogil:
for i in range(N):
is_not_nan = vals[i] == vals[i]
num_not_nan += is_not_nan
if is_not_nan:
times_masked[num_not_nan-1] = times_float[i]
observations[num_not_nan-1] = vals[i]

weights_sum = 0
weights_dot = 0
for j in range(num_not_nan):
weight = 0.5 ** (
(times_float[i] - times_masked[j]) / halflife_float)
weights_sum += weight
weights_dot += weight * observations[j]

last_result = weights_dot / weights_sum

output[i] = last_result if num_not_nan >= minp else NaN
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved

return output

Expand Down