Skip to content

Commit

Permalink
Merge pull request #3333 from jreback/series_perf
Browse files Browse the repository at this point in the history
PERF: series construction perf enhancements, use a fast path based on dt...
  • Loading branch information
jreback committed Apr 12, 2013
2 parents 9764ea6 + c54848f commit 174196d
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 7 deletions.
5 changes: 5 additions & 0 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ class AmbiguousIndexError(PandasError, KeyError):
pass


_POSSIBLY_CAST_DTYPES = set([ np.dtype(t) for t in ['M8[ns]','m8[ns]','O','int8','uint8','int16','uint16','int32','uint32','int64','uint64'] ])

def isnull(obj):
'''
Detect missing values (NaN in numeric arrays, None/NaN in object arrays)
Expand Down Expand Up @@ -1038,6 +1040,9 @@ def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True):

return values

def _possibly_castable(arr):
return arr.dtype not in _POSSIBLY_CAST_DTYPES

def _possibly_convert_platform(values):
""" try to do platform conversion, allow ndarray or list here """

Expand Down
19 changes: 12 additions & 7 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3196,7 +3196,6 @@ def remove_na(arr):
"""
return arr[notnull(arr)]


def _sanitize_array(data, index, dtype=None, copy=False,
raise_cast_failure=False):

Expand All @@ -3208,7 +3207,13 @@ def _sanitize_array(data, index, dtype=None, copy=False,
else:
data = data.copy()

def _try_cast(arr):
def _try_cast(arr, take_fast_path):

# perf shortcut as this is the most common case
if take_fast_path:
if com._possibly_castable(arr) and not copy and dtype is None:
return arr

try:
arr = com._possibly_cast_to_datetime(arr, dtype)
subarr = pa.array(arr, dtype=dtype, copy=copy)
Expand All @@ -3227,7 +3232,7 @@ def _try_cast(arr):
# possibility of nan -> garbage
if com.is_float_dtype(data.dtype) and com.is_integer_dtype(dtype):
if not isnull(data).any():
subarr = _try_cast(data)
subarr = _try_cast(data, True)
elif copy:
subarr = data.copy()
else:
Expand All @@ -3239,17 +3244,17 @@ def _try_cast(arr):
elif raise_cast_failure:
raise TypeError('Cannot cast datetime64 to %s' % dtype)
else:
subarr = _try_cast(data)
subarr = _try_cast(data, True)
else:
subarr = _try_cast(data)
subarr = _try_cast(data, True)

if copy:
subarr = data.copy()

elif isinstance(data, list) and len(data) > 0:
if dtype is not None:
try:
subarr = _try_cast(data)
subarr = _try_cast(data, False)
except Exception:
if raise_cast_failure: # pragma: no cover
raise
Expand All @@ -3262,7 +3267,7 @@ def _try_cast(arr):
subarr = com._possibly_cast_to_datetime(subarr, dtype)

else:
subarr = _try_cast(data)
subarr = _try_cast(data, False)

# scalar like
if subarr.ndim == 0:
Expand Down

0 comments on commit 174196d

Please sign in to comment.