Skip to content

Commit

Permalink
use memoryviews instead of ndarrays (pandas-dev#22147)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and dberenbaum committed Aug 3, 2018
1 parent d2aaf00 commit 3f1f3b2
Show file tree
Hide file tree
Showing 12 changed files with 100 additions and 98 deletions.
8 changes: 4 additions & 4 deletions pandas/_libs/hashing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import cython

import numpy as np
from numpy cimport ndarray, uint8_t, uint32_t, uint64_t
from numpy cimport uint8_t, uint32_t, uint64_t

from util cimport _checknull
from cpython cimport (PyBytes_Check,
Expand All @@ -17,7 +17,7 @@ DEF dROUNDS = 4


@cython.boundscheck(False)
def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'):
def hash_object_array(object[:] arr, object key, object encoding='utf8'):
"""
Parameters
----------
Expand All @@ -37,7 +37,7 @@ def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'):
"""
cdef:
Py_ssize_t i, l, n
ndarray[uint64_t] result
uint64_t[:] result
bytes data, k
uint8_t *kb
uint64_t *lens
Expand Down Expand Up @@ -89,7 +89,7 @@ def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'):

free(vecs)
free(lens)
return result
return result.base # .base to retrieve underlying np.ndarray


cdef inline uint64_t _rotl(uint64_t x, uint64_t b) nogil:
Expand Down
11 changes: 5 additions & 6 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# -*- coding: utf-8 -*-
# cython: profile=False
cimport cython
from cython cimport Py_ssize_t

from cpython cimport PyFloat_Check, PyUnicode_Check
Expand Down Expand Up @@ -37,8 +36,7 @@ from tslibs.np_datetime import OutOfBoundsDatetime
from tslibs.parsing import parse_datetime_string

from tslibs.timedeltas cimport cast_from_unit
from tslibs.timezones cimport (is_utc, is_tzlocal, is_fixed_offset,
treat_tz_as_pytz, get_dst_info)
from tslibs.timezones cimport is_utc, is_tzlocal, get_dst_info
from tslibs.conversion cimport (tz_convert_single, _TSObject,
convert_datetime_to_tsobject,
get_datetime64_nanos,
Expand Down Expand Up @@ -77,8 +75,7 @@ cdef inline object create_time_from_ts(
return time(dts.hour, dts.min, dts.sec, dts.us)


def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None,
box="datetime"):
def ints_to_pydatetime(int64_t[:] arr, tz=None, freq=None, box="datetime"):
"""
Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp
Expand All @@ -102,7 +99,9 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None,

cdef:
Py_ssize_t i, n = len(arr)
ndarray[int64_t] trans, deltas
ndarray[int64_t] trans
int64_t[:] deltas
Py_ssize_t pos
npy_datetimestruct dts
object dt
int64_t value, delta
Expand Down
50 changes: 27 additions & 23 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def ensure_datetime64ns(ndarray arr, copy=True):
"""
cdef:
Py_ssize_t i, n = arr.size
ndarray[int64_t] ivalues, iresult
int64_t[:] ivalues, iresult
NPY_DATETIMEUNIT unit
npy_datetimestruct dts

Expand Down Expand Up @@ -139,7 +139,7 @@ def ensure_timedelta64ns(ndarray arr, copy=True):
return arr.astype(TD_DTYPE, copy=copy)


def datetime_to_datetime64(ndarray[object] values):
def datetime_to_datetime64(object[:] values):
"""
Convert ndarray of datetime-like objects to int64 array representing
nanosecond timestamps.
Expand All @@ -156,7 +156,7 @@ def datetime_to_datetime64(ndarray[object] values):
cdef:
Py_ssize_t i, n = len(values)
object val, inferred_tz = None
ndarray[int64_t] iresult
int64_t[:] iresult
npy_datetimestruct dts
_TSObject _ts

Expand Down Expand Up @@ -525,7 +525,8 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz):
Sets obj.tzinfo inplace, alters obj.dts inplace.
"""
cdef:
ndarray[int64_t] trans, deltas
ndarray[int64_t] trans
int64_t[:] deltas
int64_t local_val
Py_ssize_t pos

Expand Down Expand Up @@ -631,15 +632,16 @@ cdef inline int64_t[:] _tz_convert_dst(ndarray[int64_t] values, tzinfo tz,
cdef:
Py_ssize_t n = len(values)
Py_ssize_t i, j, pos
ndarray[int64_t] result = np.empty(n, dtype=np.int64)
ndarray[int64_t] tt, trans, deltas
ndarray[Py_ssize_t] posn
int64_t[:] result = np.empty(n, dtype=np.int64)
ndarray[int64_t] tt, trans
int64_t[:] deltas
Py_ssize_t[:] posn
int64_t v

trans, deltas, typ = get_dst_info(tz)
if not to_utc:
# We add `offset` below instead of subtracting it
deltas = -1 * deltas
deltas = -1 * np.array(deltas, dtype='i8')

tt = values[values != NPY_NAT]
if not len(tt):
Expand Down Expand Up @@ -728,7 +730,7 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2):
converted: int64
"""
cdef:
ndarray[int64_t] trans, deltas
int64_t[:] deltas
Py_ssize_t pos
int64_t v, offset, utc_date
npy_datetimestruct dts
Expand Down Expand Up @@ -756,7 +758,7 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2):
else:
# Convert UTC to other timezone
arr = np.array([utc_date])
# Note: at least with cython 0.28.3, doing a looking `[0]` in the next
# Note: at least with cython 0.28.3, doing a lookup `[0]` in the next
# line is sensitive to the declared return type of _tz_convert_dst;
# if it is declared as returning ndarray[int64_t], a compile-time error
# is raised.
Expand All @@ -781,10 +783,9 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
"""

cdef:
ndarray[int64_t] utc_dates, tt, result, trans, deltas
ndarray[int64_t] utc_dates, result
Py_ssize_t i, j, pos, n = len(vals)
int64_t v, offset, delta
npy_datetimestruct dts
int64_t v

if len(vals) == 0:
return np.array([], dtype=np.int64)
Expand Down Expand Up @@ -843,7 +844,8 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
localized : ndarray[int64_t]
"""
cdef:
ndarray[int64_t] trans, deltas, idx_shifted
ndarray[int64_t] trans
int64_t[:] deltas, idx_shifted
ndarray ambiguous_array
Py_ssize_t i, idx, pos, ntrans, n = len(vals)
int64_t *tdata
Expand Down Expand Up @@ -1069,7 +1071,7 @@ def normalize_date(object dt):

@cython.wraparound(False)
@cython.boundscheck(False)
def normalize_i8_timestamps(ndarray[int64_t] stamps, tz=None):
def normalize_i8_timestamps(int64_t[:] stamps, tz=None):
"""
Normalize each of the (nanosecond) timestamps in the given array by
rounding down to the beginning of the day (i.e. midnight). If `tz`
Expand All @@ -1087,7 +1089,7 @@ def normalize_i8_timestamps(ndarray[int64_t] stamps, tz=None):
cdef:
Py_ssize_t i, n = len(stamps)
npy_datetimestruct dts
ndarray[int64_t] result = np.empty(n, dtype=np.int64)
int64_t[:] result = np.empty(n, dtype=np.int64)

if tz is not None:
tz = maybe_get_tz(tz)
Expand All @@ -1101,12 +1103,12 @@ def normalize_i8_timestamps(ndarray[int64_t] stamps, tz=None):
dt64_to_dtstruct(stamps[i], &dts)
result[i] = _normalized_stamp(&dts)

return result
return result.base # .base to access underlying np.ndarray


@cython.wraparound(False)
@cython.boundscheck(False)
cdef ndarray[int64_t] _normalize_local(ndarray[int64_t] stamps, object tz):
cdef int64_t[:] _normalize_local(int64_t[:] stamps, object tz):
"""
Normalize each of the (nanosecond) timestamps in the given array by
rounding down to the beginning of the day (i.e. midnight) for the
Expand All @@ -1123,8 +1125,9 @@ cdef ndarray[int64_t] _normalize_local(ndarray[int64_t] stamps, object tz):
"""
cdef:
Py_ssize_t n = len(stamps)
ndarray[int64_t] result = np.empty(n, dtype=np.int64)
ndarray[int64_t] trans, deltas
int64_t[:] result = np.empty(n, dtype=np.int64)
ndarray[int64_t] trans
int64_t[:] deltas
Py_ssize_t[:] pos
npy_datetimestruct dts
int64_t delta
Expand Down Expand Up @@ -1190,7 +1193,7 @@ cdef inline int64_t _normalized_stamp(npy_datetimestruct *dts) nogil:
return dtstruct_to_dt64(dts)


def is_date_array_normalized(ndarray[int64_t] stamps, tz=None):
def is_date_array_normalized(int64_t[:] stamps, tz=None):
"""
Check if all of the given (nanosecond) timestamps are normalized to
midnight, i.e. hour == minute == second == 0. If the optional timezone
Expand All @@ -1206,8 +1209,9 @@ def is_date_array_normalized(ndarray[int64_t] stamps, tz=None):
is_normalized : bool True if all stamps are normalized
"""
cdef:
Py_ssize_t i, n = len(stamps)
ndarray[int64_t] trans, deltas
Py_ssize_t pos, i, n = len(stamps)
ndarray[int64_t] trans
int64_t[:] deltas
npy_datetimestruct dts
int64_t local_val, delta

Expand Down
5 changes: 2 additions & 3 deletions pandas/_libs/tslibs/fields.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,7 @@ def build_field_sarray(ndarray[int64_t] dtindex):

@cython.wraparound(False)
@cython.boundscheck(False)
def get_date_name_field(ndarray[int64_t] dtindex, object field,
object locale=None):
def get_date_name_field(int64_t[:] dtindex, object field, object locale=None):
"""
Given a int64-based datetime index, return array of strings of date
name based on requested field (e.g. weekday_name)
Expand Down Expand Up @@ -134,7 +133,7 @@ def get_date_name_field(ndarray[int64_t] dtindex, object field,


@cython.wraparound(False)
def get_start_end_field(ndarray[int64_t] dtindex, object field,
def get_start_end_field(int64_t[:] dtindex, object field,
object freqstr=None, int month_kw=12):
"""
Given an int64-based datetime index return array of indicators
Expand Down
37 changes: 18 additions & 19 deletions pandas/_libs/tslibs/parsing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ from cpython.datetime cimport datetime
import time

import numpy as np
from numpy cimport ndarray

# Avoid import from outside _libs
if sys.version_info.major == 2:
Expand Down Expand Up @@ -381,11 +380,11 @@ cpdef object _get_rule_month(object source, object default='DEC'):
# Parsing for type-inference


def try_parse_dates(ndarray[object] values, parser=None,
def try_parse_dates(object[:] values, parser=None,
dayfirst=False, default=None):
cdef:
Py_ssize_t i, n
ndarray[object] result
object[:] result

n = len(values)
result = np.empty(n, dtype='O')
Expand Down Expand Up @@ -420,15 +419,15 @@ def try_parse_dates(ndarray[object] values, parser=None,
# raise if passed parser and it failed
raise

return result
return result.base # .base to access underlying ndarray


def try_parse_date_and_time(ndarray[object] dates, ndarray[object] times,
def try_parse_date_and_time(object[:] dates, object[:] times,
date_parser=None, time_parser=None,
dayfirst=False, default=None):
cdef:
Py_ssize_t i, n
ndarray[object] result
object[:] result

n = len(dates)
if len(times) != n:
Expand Down Expand Up @@ -457,14 +456,14 @@ def try_parse_date_and_time(ndarray[object] dates, ndarray[object] times,
result[i] = datetime(d.year, d.month, d.day,
t.hour, t.minute, t.second)

return result
return result.base # .base to access underlying ndarray


def try_parse_year_month_day(ndarray[object] years, ndarray[object] months,
ndarray[object] days):
def try_parse_year_month_day(object[:] years, object[:] months,
object[:] days):
cdef:
Py_ssize_t i, n
ndarray[object] result
object[:] result

n = len(years)
if len(months) != n or len(days) != n:
Expand All @@ -474,19 +473,19 @@ def try_parse_year_month_day(ndarray[object] years, ndarray[object] months,
for i in range(n):
result[i] = datetime(int(years[i]), int(months[i]), int(days[i]))

return result
return result.base # .base to access underlying ndarray


def try_parse_datetime_components(ndarray[object] years,
ndarray[object] months,
ndarray[object] days,
ndarray[object] hours,
ndarray[object] minutes,
ndarray[object] seconds):
def try_parse_datetime_components(object[:] years,
object[:] months,
object[:] days,
object[:] hours,
object[:] minutes,
object[:] seconds):

cdef:
Py_ssize_t i, n
ndarray[object] result
object[:] result
int secs
double float_secs
double micros
Expand All @@ -509,7 +508,7 @@ def try_parse_datetime_components(ndarray[object] years,
int(hours[i]), int(minutes[i]), secs,
int(micros))

return result
return result.base # .base to access underlying ndarray


# ----------------------------------------------------------------------
Expand Down
Loading

0 comments on commit 3f1f3b2

Please sign in to comment.