From 2ec957b0c2e87ae06eba6b4c9421088ce2ad4f19 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 12 Sep 2018 04:33:43 -0700 Subject: [PATCH] pythonize cython code (#22638) --- .coveragerc | 30 ------------------ pandas/_libs/algos.pyx | 6 ++-- pandas/_libs/hashing.pyx | 16 ++-------- pandas/_libs/index.pyx | 15 ++++----- pandas/_libs/internals.pyx | 47 +++++++++++++++++------------ pandas/_libs/interval.pyx | 2 +- pandas/_libs/lib.pyx | 42 ++++++++++++-------------- pandas/_libs/missing.pyx | 13 ++++---- pandas/_libs/ops.pyx | 11 +++---- pandas/_libs/parsers.pyx | 14 ++++----- pandas/_libs/properties.pyx | 2 +- pandas/_libs/reduction.pyx | 2 +- pandas/_libs/sparse.pyx | 2 +- pandas/_libs/testing.pyx | 7 +++++ pandas/_libs/tslib.pyx | 8 ++--- pandas/_libs/tslibs/ccalendar.pyx | 4 +-- pandas/_libs/tslibs/conversion.pyx | 4 +-- pandas/_libs/tslibs/fields.pyx | 4 +-- pandas/_libs/tslibs/frequencies.pyx | 4 +-- pandas/_libs/tslibs/nattype.pyx | 1 - pandas/_libs/tslibs/np_datetime.pyx | 4 +-- pandas/_libs/tslibs/offsets.pyx | 4 +-- pandas/_libs/tslibs/parsing.pyx | 2 +- pandas/_libs/tslibs/period.pyx | 3 +- pandas/_libs/tslibs/resolution.pyx | 2 +- pandas/_libs/tslibs/strptime.pyx | 2 +- pandas/_libs/tslibs/timedeltas.pyx | 6 ++-- pandas/_libs/tslibs/timezones.pyx | 2 +- pandas/_libs/window.pyx | 4 +-- pandas/_libs/writers.pyx | 14 +++++---- setup.cfg | 30 ++++++++++++++++++ 31 files changed, 152 insertions(+), 155 deletions(-) delete mode 100644 .coveragerc diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index 13baa100b84b7..0000000000000 --- a/.coveragerc +++ /dev/null @@ -1,30 +0,0 @@ -# .coveragerc to control coverage.py -[run] -branch = False -omit = */tests/* -plugins = Cython.Coverage - -[report] -# Regexes for lines to exclude from consideration -exclude_lines = - # Have to re-enable the standard pragma - pragma: no cover - - # Don't complain about missing debug-only code: - def __repr__ - if self\.debug - - # Don't complain if tests don't hit defensive assertion code: - raise AssertionError - raise NotImplementedError - AbstractMethodError - - # Don't complain if non-runnable code isn't run: - if 0: - if __name__ == .__main__.: - -ignore_errors = False -show_missing = True - -[html] -directory = coverage_html_report diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 249033b8636bd..415e7026e09c8 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t from libc.stdlib cimport malloc, free from libc.string cimport memmove @@ -114,7 +114,7 @@ cpdef ndarray[int64_t, ndim=1] unique_deltas(ndarray[int64_t] arr): @cython.wraparound(False) @cython.boundscheck(False) -def is_lexsorted(list list_of_arrays): +def is_lexsorted(list_of_arrays: list) -> bint: cdef: Py_ssize_t i Py_ssize_t n, nlevels diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index 88b4d97de492c..c2305c8f3ff00 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -3,7 +3,6 @@ # at https://github.com/veorq/SipHash import cython -from cpython cimport PyBytes_Check, PyUnicode_Check from libc.stdlib cimport malloc, free import numpy as np @@ -44,6 +43,7 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'): char **vecs char *cdata object val + list datas = [] k = key.encode(encoding) kb = k @@ -57,12 +57,11 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'): vecs = malloc(n * sizeof(char *)) lens = malloc(n * sizeof(uint64_t)) - cdef list datas = [] for i in range(n): val = arr[i] - if PyBytes_Check(val): + if isinstance(val, bytes): data = val - elif PyUnicode_Check(val): + elif isinstance(val, unicode): data = val.encode(encoding) elif val is None or is_nan(val): # null, stringify and encode @@ -132,15 +131,6 @@ cdef inline void _sipround(uint64_t* v0, uint64_t* v1, v2[0] = _rotl(v2[0], 32) -# TODO: This appears unused; remove? -cpdef uint64_t siphash(bytes data, bytes key) except? 0: - if len(key) != 16: - raise ValueError("key should be a 16-byte bytestring, " - "got {key} (len {klen})" - .format(key=key, klen=len(key))) - return low_level_siphash(data, len(data), key) - - @cython.cdivision(True) cdef uint64_t low_level_siphash(uint8_t* data, size_t datalen, uint8_t* key) nogil: diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index d5846f2b42378..562c1ba218141 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -1,10 +1,7 @@ # -*- coding: utf-8 -*- from datetime import datetime, timedelta, date -cimport cython - -from cpython cimport PyTuple_Check, PyList_Check -from cpython.slice cimport PySlice_Check +import cython import numpy as np cimport numpy as cnp @@ -30,15 +27,15 @@ cdef int64_t iNaT = util.get_nat() cdef inline bint is_definitely_invalid_key(object val): - if PyTuple_Check(val): + if isinstance(val, tuple): try: hash(val) except TypeError: return True # we have a _data, means we are a NDFrame - return (PySlice_Check(val) or util.is_array(val) - or PyList_Check(val) or hasattr(val, '_data')) + return (isinstance(val, slice) or util.is_array(val) + or isinstance(val, list) or hasattr(val, '_data')) cpdef get_value_at(ndarray arr, object loc, object tz=None): @@ -88,7 +85,7 @@ cdef class IndexEngine: void* data_ptr loc = self.get_loc(key) - if PySlice_Check(loc) or util.is_array(loc): + if isinstance(loc, slice) or util.is_array(loc): return arr[loc] else: return get_value_at(arr, loc, tz=tz) @@ -640,7 +637,7 @@ cdef class BaseMultiIndexCodesEngine: def get_loc(self, object key): if is_definitely_invalid_key(key): raise TypeError("'{key}' is an invalid key".format(key=key)) - if not PyTuple_Check(key): + if not isinstance(key, tuple): raise KeyError(key) try: indices = [0 if checknull(v) else lev.get_loc(v) + 1 diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 996570dae3302..681530ed494d7 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -1,10 +1,9 @@ # -*- coding: utf-8 -*- -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t from cpython cimport PyObject -from cpython.slice cimport PySlice_Check cdef extern from "Python.h": Py_ssize_t PY_SSIZE_T_MAX @@ -30,14 +29,15 @@ cdef class BlockPlacement: cdef bint _has_slice, _has_array, _is_known_slice_like def __init__(self, val): - cdef slice slc + cdef: + slice slc self._as_slice = None self._as_array = None self._has_slice = False self._has_array = False - if PySlice_Check(val): + if isinstance(val, slice): slc = slice_canonize(val) if slc.start != slc.stop: @@ -55,7 +55,8 @@ cdef class BlockPlacement: self._has_array = True def __str__(self): - cdef slice s = self._ensure_has_slice() + cdef: + slice s = self._ensure_has_slice() if s is not None: v = self._as_slice else: @@ -66,15 +67,17 @@ cdef class BlockPlacement: __repr__ = __str__ def __len__(self): - cdef slice s = self._ensure_has_slice() + cdef: + slice s = self._ensure_has_slice() if s is not None: return slice_len(s) else: return len(self._as_array) def __iter__(self): - cdef slice s = self._ensure_has_slice() - cdef Py_ssize_t start, stop, step, _ + cdef: + slice s = self._ensure_has_slice() + Py_ssize_t start, stop, step, _ if s is not None: start, stop, step, _ = slice_get_indices_ex(s) return iter(range(start, stop, step)) @@ -83,7 +86,8 @@ cdef class BlockPlacement: @property def as_slice(self): - cdef slice s = self._ensure_has_slice() + cdef: + slice s = self._ensure_has_slice() if s is None: raise TypeError('Not slice-like') else: @@ -91,7 +95,8 @@ cdef class BlockPlacement: @property def indexer(self): - cdef slice s = self._ensure_has_slice() + cdef: + slice s = self._ensure_has_slice() if s is not None: return s else: @@ -103,7 +108,8 @@ cdef class BlockPlacement: @property def as_array(self): - cdef Py_ssize_t start, stop, end, _ + cdef: + Py_ssize_t start, stop, end, _ if not self._has_array: start, stop, step, _ = slice_get_indices_ex(self._as_slice) self._as_array = np.arange(start, stop, step, @@ -113,17 +119,19 @@ cdef class BlockPlacement: @property def is_slice_like(self): - cdef slice s = self._ensure_has_slice() + cdef: + slice s = self._ensure_has_slice() return s is not None def __getitem__(self, loc): - cdef slice s = self._ensure_has_slice() + cdef: + slice s = self._ensure_has_slice() if s is not None: val = slice_getitem(s, loc) else: val = self._as_array[loc] - if not PySlice_Check(val) and val.ndim == 0: + if not isinstance(val, slice) and val.ndim == 0: return val return BlockPlacement(val) @@ -139,8 +147,9 @@ cdef class BlockPlacement: [o.as_array for o in others])) cdef iadd(self, other): - cdef slice s = self._ensure_has_slice() - cdef Py_ssize_t other_int, start, stop, step, l + cdef: + slice s = self._ensure_has_slice() + Py_ssize_t other_int, start, stop, step, l if isinstance(other, int) and s is not None: other_int = other @@ -184,7 +193,7 @@ cdef class BlockPlacement: return self._as_slice -cdef slice_canonize(slice s): +cdef slice slice_canonize(slice s): """ Convert slice to canonical bounded form. """ @@ -282,7 +291,7 @@ def slice_getitem(slice slc not None, ind): s_start, s_stop, s_step, s_len = slice_get_indices_ex(slc) - if PySlice_Check(ind): + if isinstance(ind, slice): ind_start, ind_stop, ind_step, ind_len = slice_get_indices_ex(ind, s_len) diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index d8e2e8eb4b4ea..82261094022fb 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -271,7 +271,7 @@ cdef class Interval(IntervalMixin): return ((self.left < key if self.open_left else self.left <= key) and (key < self.right if self.open_right else key <= self.right)) - def __richcmp__(self, other, int op): + def __richcmp__(self, other, op: int): if hasattr(other, 'ndim'): # let numpy (or IntervalIndex) handle vectorization return NotImplemented diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 6b425d7022ecd..0b9793a6ef97a 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2,14 +2,10 @@ from decimal import Decimal import sys -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t from cpython cimport (Py_INCREF, PyTuple_SET_ITEM, - PyList_Check, - PyString_Check, - PyBytes_Check, - PyUnicode_Check, PyTuple_New, Py_EQ, PyObject_RichCompareBool) @@ -91,13 +87,14 @@ def values_from_object(object obj): @cython.wraparound(False) @cython.boundscheck(False) -def memory_usage_of_objects(object[:] arr): +def memory_usage_of_objects(arr: object[:]) -> int64_t: """ return the memory usage of an object array in bytes, does not include the actual bytes of the pointers """ - cdef: - Py_ssize_t i, n - int64_t size = 0 + i: Py_ssize_t + n: Py_ssize_t + size: int64_t + size = 0 n = len(arr) for i in range(n): size += arr[i].__sizeof__() @@ -127,7 +124,7 @@ def is_scalar(val: object) -> bint: return (cnp.PyArray_IsAnyScalar(val) # As of numpy-1.9, PyArray_IsAnyScalar misses bytearrays on Py3. - or PyBytes_Check(val) + or isinstance(val, bytes) # We differ from numpy (as of 1.10), which claims that None is # not scalar in np.isscalar(). or val is None @@ -140,7 +137,7 @@ def is_scalar(val: object) -> bint: or util.is_offset_object(val)) -def item_from_zerodim(object val): +def item_from_zerodim(val: object) -> object: """ If the value is a zerodim array, return the item it contains. @@ -359,7 +356,7 @@ def get_reverse_indexer(ndarray[int64_t] indexer, Py_ssize_t length): return rev_indexer -def has_infs_f4(ndarray[float32_t] arr): +def has_infs_f4(ndarray[float32_t] arr) -> bint: cdef: Py_ssize_t i, n = len(arr) float32_t inf, neginf, val @@ -374,7 +371,7 @@ def has_infs_f4(ndarray[float32_t] arr): return False -def has_infs_f8(ndarray[float64_t] arr): +def has_infs_f8(ndarray[float64_t] arr) -> bint: cdef: Py_ssize_t i, n = len(arr) float64_t inf, neginf, val @@ -530,7 +527,8 @@ def clean_index_list(list obj): for i in range(n): v = obj[i] - if not (PyList_Check(v) or util.is_array(v) or hasattr(v, '_data')): + if not (isinstance(v, list) or + util.is_array(v) or hasattr(v, '_data')): all_arrays = 0 break @@ -1120,7 +1118,7 @@ def infer_dtype(object value, bint skipna=False): .format(typ=type(value))) else: - if not PyList_Check(value): + if not isinstance(value, list): value = list(value) from pandas.core.dtypes.cast import ( construct_1d_object_array_from_listlike) @@ -1209,15 +1207,15 @@ def infer_dtype(object value, bint skipna=False): if is_bool_array(values, skipna=skipna): return 'boolean' - elif PyString_Check(val): + elif isinstance(val, str): if is_string_array(values, skipna=skipna): return 'string' - elif PyUnicode_Check(val): + elif isinstance(val, unicode): if is_unicode_array(values, skipna=skipna): return 'unicode' - elif PyBytes_Check(val): + elif isinstance(val, bytes): if is_bytes_array(values, skipna=skipna): return 'bytes' @@ -1474,7 +1472,7 @@ cpdef bint is_float_array(ndarray values): cdef class StringValidator(Validator): cdef inline bint is_value_typed(self, object value) except -1: - return PyString_Check(value) + return isinstance(value, str) cdef inline bint is_array_typed(self) except -1: return issubclass(self.dtype.type, np.str_) @@ -1490,7 +1488,7 @@ cpdef bint is_string_array(ndarray values, bint skipna=False): cdef class UnicodeValidator(Validator): cdef inline bint is_value_typed(self, object value) except -1: - return PyUnicode_Check(value) + return isinstance(value, unicode) cdef inline bint is_array_typed(self) except -1: return issubclass(self.dtype.type, np.unicode_) @@ -1506,7 +1504,7 @@ cdef bint is_unicode_array(ndarray values, bint skipna=False): cdef class BytesValidator(Validator): cdef inline bint is_value_typed(self, object value) except -1: - return PyBytes_Check(value) + return isinstance(value, bytes) cdef inline bint is_array_typed(self) except -1: return issubclass(self.dtype.type, np.bytes_) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index c787cc61e8773..2590a30c57f33 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -1,9 +1,7 @@ # -*- coding: utf-8 -*- -from cpython cimport PyFloat_Check, PyComplex_Check - -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t import numpy as np cimport numpy as cnp @@ -23,8 +21,9 @@ cdef int64_t NPY_NAT = util.get_nat() cdef inline bint _check_all_nulls(object val): """ utility to check if a value is any type of null """ - cdef bint res - if PyFloat_Check(val) or PyComplex_Check(val): + res: bint + + if isinstance(val, (float, complex)): res = val != val elif val is NaT: res = 1 @@ -117,7 +116,7 @@ cpdef bint checknull_old(object val): cdef inline bint _check_none_nan_inf_neginf(object val): try: - return val is None or (PyFloat_Check(val) and + return val is None or (isinstance(val, float) and (val != val or val == INF or val == NEGINF)) except ValueError: return False diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx index a194f1588e231..e21bce177b38b 100644 --- a/pandas/_libs/ops.pyx +++ b/pandas/_libs/ops.pyx @@ -1,12 +1,11 @@ # -*- coding: utf-8 -*- import operator -from cpython cimport (PyFloat_Check, PyBool_Check, - PyObject_RichCompareBool, +from cpython cimport (PyObject_RichCompareBool, Py_EQ, Py_NE, Py_LT, Py_LE, Py_GT, Py_GE) -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t import numpy as np from numpy cimport ndarray, uint8_t, import_array @@ -272,7 +271,7 @@ def maybe_convert_bool(ndarray[object] arr, for i in range(n): val = arr[i] - if PyBool_Check(val): + if isinstance(val, bool): if val is True: result[i] = 1 else: @@ -281,7 +280,7 @@ def maybe_convert_bool(ndarray[object] arr, result[i] = 1 elif val in false_vals: result[i] = 0 - elif PyFloat_Check(val): + elif isinstance(val, float): result[i] = UINT8_MAX na_count += 1 else: diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 91faed678192f..e3df391c5c45d 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -10,12 +10,12 @@ from csv import QUOTE_MINIMAL, QUOTE_NONNUMERIC, QUOTE_NONE from libc.stdlib cimport free from libc.string cimport strncpy, strlen, strcasecmp -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t from cpython cimport (PyObject, PyBytes_FromString, - PyBytes_AsString, PyBytes_Check, - PyUnicode_Check, PyUnicode_AsUTF8String, + PyBytes_AsString, + PyUnicode_AsUTF8String, PyErr_Occurred, PyErr_Fetch) from cpython.ref cimport Py_XDECREF @@ -1341,9 +1341,9 @@ cdef object _false_values = [b'False', b'FALSE', b'false'] def _ensure_encoded(list lst): cdef list result = [] for x in lst: - if PyUnicode_Check(x): + if isinstance(x, unicode): x = PyUnicode_AsUTF8String(x) - elif not PyBytes_Check(x): + elif not isinstance(x, bytes): x = asbytes(x) result.append(x) @@ -2046,7 +2046,7 @@ cdef kh_str_t* kset_from_list(list values) except NULL: val = values[i] # None creeps in sometimes, which isn't possible here - if not PyBytes_Check(val): + if not isinstance(val, bytes): raise ValueError('Must be all encoded bytes') k = kh_put_str(table, PyBytes_AsString(val), &ret) diff --git a/pandas/_libs/properties.pyx b/pandas/_libs/properties.pyx index 0f2900619fdb6..6e4c0c62b0dd8 100644 --- a/pandas/_libs/properties.pyx +++ b/pandas/_libs/properties.pyx @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from cython cimport Py_ssize_t +from cython import Py_ssize_t from cpython cimport ( PyDict_Contains, PyDict_GetItem, PyDict_SetItem) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index d87a590730fd6..681ea2c6295f2 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from distutils.version import LooseVersion -from cython cimport Py_ssize_t +from cython import Py_ssize_t from cpython cimport Py_INCREF from libc.stdlib cimport malloc, free diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 7f5990ce5d65c..2993114a668bb 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -2,7 +2,7 @@ import operator import sys -cimport cython +import cython import numpy as np cimport numpy as cnp diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index ab7f3c3de2131..10f68187938c0 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -22,24 +22,30 @@ cdef NUMERIC_TYPES = ( np.float64, ) + cdef bint is_comparable_as_number(obj): return isinstance(obj, NUMERIC_TYPES) + cdef bint isiterable(obj): return hasattr(obj, '__iter__') + cdef bint has_length(obj): return hasattr(obj, '__len__') + cdef bint is_dictlike(obj): return hasattr(obj, 'keys') and hasattr(obj, '__getitem__') + cdef bint decimal_almost_equal(double desired, double actual, int decimal): # Code from # http://docs.scipy.org/doc/numpy/reference/generated # /numpy.testing.assert_almost_equal.html return abs(desired - actual) < (0.5 * 10.0 ** -decimal) + cpdef assert_dict_equal(a, b, bint compare_keys=True): assert is_dictlike(a) and is_dictlike(b), ( "Cannot compare dict objects, one or both is not dict-like" @@ -56,6 +62,7 @@ cpdef assert_dict_equal(a, b, bint compare_keys=True): return True + cpdef assert_almost_equal(a, b, check_less_precise=False, bint check_dtype=True, diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 93fae695d51fd..16fea0615f199 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1,7 +1,5 @@ # -*- coding: utf-8 -*- -from cython cimport Py_ssize_t - -from cpython cimport PyFloat_Check, PyUnicode_Check +from cython import Py_ssize_t from cpython.datetime cimport (PyDateTime_Check, PyDate_Check, PyDateTime_CheckExact, @@ -601,7 +599,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', if len(val) == 0 or val in nat_strings: iresult[i] = NPY_NAT continue - if PyUnicode_Check(val) and PY2: + if isinstance(val, unicode) and PY2: val = val.encode('utf-8') try: @@ -740,7 +738,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', # set as nan except if its a NaT if checknull_with_nat(val): - if PyFloat_Check(val): + if isinstance(val, float): oresult[i] = np.nan else: oresult[i] = NaT diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index ec54c023290b3..7d58b43e5d460 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -4,8 +4,8 @@ Cython implementations of functions resembling the stdlib calendar module """ -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t from numpy cimport int64_t, int32_t diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index fe664cf03b0b9..d7eef546befbd 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t import numpy as np cimport numpy as cnp diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 9cbad8acabff1..684344ceb9002 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -4,8 +4,8 @@ Functions for accessing attributes of Timestamp/datetime64/datetime-like objects and arrays """ -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t import numpy as np cimport numpy as cnp diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index 70a3f3f410636..c555fce9dd007 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -321,7 +321,7 @@ cpdef object get_freq(object freq): # ---------------------------------------------------------------------- # Frequency comparison -cpdef bint is_subperiod(source, target): +def is_subperiod(source, target) -> bint: """ Returns True if downsampling is possible between source and target frequencies @@ -374,7 +374,7 @@ cpdef bint is_subperiod(source, target): return source in {'N'} -cpdef bint is_superperiod(source, target): +def is_superperiod(source, target) -> bint: """ Returns True if upsampling is possible between source and target frequencies diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 08d9128ff660c..fd8486f690745 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- from cpython cimport ( - PyFloat_Check, PyComplex_Check, PyObject_RichCompare, Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE) diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index f0aa6389fba56..e0ecfc24804a9 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from cpython cimport (Py_EQ, Py_NE, Py_GE, Py_GT, Py_LT, Py_LE, - PyUnicode_Check, PyUnicode_AsASCIIString) + PyUnicode_AsASCIIString) from cpython.datetime cimport (datetime, date, PyDateTime_IMPORT, @@ -175,7 +175,7 @@ cdef inline int _string_to_dts(object val, npy_datetimestruct* dts, int result char *tmp - if PyUnicode_Check(val): + if isinstance(val, unicode): val = PyUnicode_AsASCIIString(val) tmp = val diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 8c53fabffdbeb..4d611f89bca9c 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t import time from cpython.datetime cimport (PyDateTime_IMPORT, diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 6ee6c4b9d9026..3887957aeefd4 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -537,7 +537,7 @@ except (ImportError, AttributeError): pass -def _format_is_iso(f): +def _format_is_iso(f) -> bint: """ Does format match the iso8601 set that can be handled by the C parser? Generally of form YYYY-MM-DDTHH:MM:SS - date separator can be different diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index f68b6d8fdef57..43dc415bfd464 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -2397,7 +2397,6 @@ class Period(_Period): # ('T', 5) but may be passed in as a string like '5T' # ordinal is the period offset from the gregorian proleptic epoch - cdef _Period self if freq is not None: @@ -2495,7 +2494,7 @@ cdef int64_t _ordinal_from_fields(int year, int month, quarter, int day, minute, second, 0, 0, base) -def quarter_to_myear(int year, int quarter, freq): +def quarter_to_myear(year: int, quarter: int, freq): """ A quarterly frequency defines a "year" which may not coincide with the calendar-year. Find the calendar-year and calendar-month associated diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 4e3350395400c..4acffdea78f55 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from cython cimport Py_ssize_t +from cython import Py_ssize_t import numpy as np from numpy cimport ndarray, int64_t, int32_t diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index d472320cfdb12..46a1145009857 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -20,7 +20,7 @@ except: except: from _dummy_thread import allocate_lock as _thread_allocate_lock -from cython cimport Py_ssize_t +from cython import Py_ssize_t import pytz diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index b84c1a753215a..9b13ef5982396 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -6,9 +6,9 @@ import warnings import sys cdef bint PY3 = (sys.version_info[0] >= 3) -from cython cimport Py_ssize_t +from cython import Py_ssize_t -from cpython cimport PyUnicode_Check, Py_NE, Py_EQ, PyObject_RichCompare +from cpython cimport Py_NE, Py_EQ, PyObject_RichCompare import numpy as np cimport numpy as cnp @@ -281,7 +281,7 @@ cpdef inline int64_t cast_from_unit(object ts, object unit) except? -1: cdef inline _decode_if_necessary(object ts): # decode ts if necessary - if not PyUnicode_Check(ts) and not PY3: + if not isinstance(ts, unicode) and not PY3: ts = str(ts).decode('utf-8') return ts diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 36ec499c7335c..b7e4de81da35c 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from cython cimport Py_ssize_t +from cython import Py_ssize_t # dateutil compat from dateutil.tz import ( diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index b25fb47065fdd..d4b61b8611b68 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -1,8 +1,8 @@ # -*- coding: utf-8 -*- # cython: boundscheck=False, wraparound=False, cdivision=True -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t from libcpp.deque cimport deque from libc.stdlib cimport malloc, free diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx index 8e55ffad8d231..9af12cbec1e9c 100644 --- a/pandas/_libs/writers.pyx +++ b/pandas/_libs/writers.pyx @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t from cpython cimport PyBytes_GET_SIZE, PyUnicode_GET_SIZE @@ -36,9 +36,10 @@ def write_csv_rows(list data, ndarray data_index, cols : ndarray writer : object """ - cdef int N, j, i, ncols - cdef list rows - cdef object val + cdef: + int N, j, i, ncols + list rows + object val # In crude testing, N>100 yields little marginal improvement N = 100 @@ -157,8 +158,9 @@ def string_array_replace_from_nan_rep( Replace the values in the array with 'replacement' if they are 'nan_rep'. Return the same array. """ + cdef: + int length = arr.shape[0], i = 0 - cdef int length = arr.shape[0], i = 0 if replace is None: replace = np.nan diff --git a/setup.cfg b/setup.cfg index c4e3243d824e5..5fc0236066b93 100644 --- a/setup.cfg +++ b/setup.cfg @@ -40,3 +40,33 @@ markers = high_memory: mark a test as a high-memory only doctest_optionflags = NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL addopts = --strict-data-files + + +[coverage:run] +branch = False +omit = */tests/* +plugins = Cython.Coverage + +[coverage:report] +ignore_errors = False +show_missing = True +# Regexes for lines to exclude from consideration +exclude_lines = + # Have to re-enable the standard pragma + pragma: no cover + + # Don't complain about missing debug-only code: + def __repr__ + if self\.debug + + # Don't complain if tests don't hit defensive assertion code: + raise AssertionError + raise NotImplementedError + AbstractMethodError + + # Don't complain if non-runnable code isn't run: + if 0: + if __name__ == .__main__.: + +[coverage:html] +directory = coverage_html_report