Skip to content

Commit

Permalink
ENH: add method use_inf_as_null to core.common (GH pandas-dev#1919)
Browse files Browse the repository at this point in the history
  • Loading branch information
aflaxman committed Oct 9, 2012
1 parent fe12c8a commit 7b79b66
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 1 deletion.
76 changes: 76 additions & 0 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,58 @@ def isnull(obj):
return _isnull_ndarraylike(obj)
else:
return obj is None
isnull_new = isnull

def isnull_old(obj):
'''
Replacement for numpy.isnan / -numpy.isfinite which is suitable
for use on object arrays. Treat None, NaN, INF, -INF as null.
Parameters
----------
arr: ndarray or object value
Returns
-------
boolean ndarray or boolean
'''
if lib.isscalar(obj):
return lib.checknull_old(obj)

from pandas.core.generic import PandasObject
if isinstance(obj, np.ndarray):
return _isnull_ndarraylike_old(obj)
elif isinstance(obj, PandasObject):
# TODO: optimize for DataFrame, etc.
return obj.apply(isnull_old)
elif isinstance(obj, list) or hasattr(obj, '__array__'):
return _isnull_ndarraylike_old(obj)
else:
return obj is None

def use_inf_as_null(flag):
'''
Choose which replacement for numpy.isnan / -numpy.isfinite is used.
Parameters
----------
flag: bool
True means treat None, NaN, INF, -INF as null (old way),
False means None and NaN are null, but INF, -INF are not null
(new way).
Notes
-----
This approach to setting global module values is discussed and
approved here:
* http://stackoverflow.com/questions/4859217/programmatically-creating-variables-in-python/4859312#4859312
'''
if flag == True:
globals()['isnull'] = isnull_old
else:
globals()['isnull'] = isnull_new


def _isnull_ndarraylike(obj):
from pandas import Series
Expand All @@ -90,6 +142,30 @@ def _isnull_ndarraylike(obj):
result = -np.isfinite(obj)
return result

def _isnull_ndarraylike_old(obj):
from pandas import Series
values = np.asarray(obj)

if values.dtype.kind in ('O', 'S', 'U'):
# Working around NumPy ticket 1542
shape = values.shape

if values.dtype.kind in ('S', 'U'):
result = np.zeros(values.shape, dtype=bool)
else:
result = np.empty(shape, dtype=bool)
vec = lib.isnullobj_old(values.ravel())
result[:] = vec.reshape(shape)

if isinstance(obj, Series):
result = Series(result, index=obj.index, copy=False)
elif values.dtype == np.dtype('M8[ns]'):
# this is the NaT pattern
result = values.view('i8') == lib.iNaT
else:
result = -np.isfinite(obj)
return result

def notnull(obj):
'''
Replacement for numpy.isfinite / -numpy.isnan which is suitable
Expand Down
41 changes: 41 additions & 0 deletions pandas/src/tseries.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,18 @@ cpdef checknull(object val):
else:
return util._checknull(val)

cpdef checknull_old(object val):
if util.is_float_object(val) or util.is_complex_object(val):
return val != val or val == INF or val == NEGINF
elif util.is_datetime64_object(val):
return get_datetime64_value(val) == NPY_NAT
elif isinstance(val, _NaT):
return True
elif is_array(val):
return False
else:
return util._checknull(val)


def isscalar(object val):
return np.isscalar(val) or val is None or isinstance(val, _Timestamp)
Expand All @@ -207,6 +219,19 @@ def isnullobj(ndarray[object] arr):
result[i] = util._checknull(arr[i])
return result.view(np.bool_)

@cython.wraparound(False)
@cython.boundscheck(False)
def isnullobj_old(ndarray[object] arr):
cdef Py_ssize_t i, n
cdef object val
cdef ndarray[uint8_t] result

n = len(arr)
result = np.zeros(n, dtype=np.uint8)
for i from 0 <= i < n:
result[i] = util._checknull_old(arr[i])
return result.view(np.bool_)


@cython.wraparound(False)
@cython.boundscheck(False)
Expand All @@ -224,6 +249,22 @@ def isnullobj2d(ndarray[object, ndim=2] arr):
result[i, j] = 1
return result.view(np.bool_)

@cython.wraparound(False)
@cython.boundscheck(False)
def isnullobj2d_old(ndarray[object, ndim=2] arr):
cdef Py_ssize_t i, j, n, m
cdef object val
cdef ndarray[uint8_t, ndim=2] result

n, m = (<object> arr).shape
result = np.zeros((n, m), dtype=np.uint8)
for i from 0 <= i < n:
for j from 0 <= j < m:
val = arr[i, j]
if checknull_old(val):
result[i, j] = 1
return result.view(np.bool_)

def list_to_object_array(list obj):
'''
Convert list to object ndarray. Seriously can't believe I had to write this
Expand Down
6 changes: 6 additions & 0 deletions pandas/src/util.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -68,5 +68,11 @@ cdef inline bint _checknull(object val):
except ValueError:
return False

cdef inline bint _checknull_old(object val):
try:
return bool(val is None or val != val)
except ValueError:
return False

cdef inline bint _checknan(object val):
return not cnp.PyArray_Check(val) and val != val
10 changes: 9 additions & 1 deletion pandas/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import unittest

from pandas import Series, DataFrame, date_range, DatetimeIndex
from pandas.core.common import notnull, isnull
from pandas.core.common import notnull, isnull, use_inf_as_null
import pandas.core.common as com
import pandas.util.testing as tm

Expand All @@ -18,9 +18,17 @@ def test_notnull():
assert notnull(1.)
assert not notnull(None)
assert not notnull(np.NaN)

use_inf_as_null(False)
assert notnull(np.inf)
assert notnull(-np.inf)

use_inf_as_null(True)
assert not notnull(np.inf)
assert not notnull(-np.inf)



float_series = Series(np.random.randn(5))
obj_series = Series(np.random.randn(5), dtype=object)
assert(isinstance(notnull(float_series), Series))
Expand Down

0 comments on commit 7b79b66

Please sign in to comment.