Skip to content

Commit

Permalink
PERF: use fast-paths for dtype checks (#34118)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored May 11, 2020
1 parent 9080d30 commit 08a0cb0
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 26 deletions.
18 changes: 13 additions & 5 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,16 @@ class IntervalArray(IntervalMixin, ExtensionArray):
can_hold_na = True
_na_value = _fill_value = np.nan

def __new__(cls, data, closed=None, dtype=None, copy=False, verify_integrity=True):
def __new__(
cls,
data,
closed=None,
dtype=None,
copy: bool = False,
verify_integrity: bool = True,
):

if isinstance(data, ABCSeries) and is_interval_dtype(data):
if isinstance(data, ABCSeries) and is_interval_dtype(data.dtype):
data = data._values

if isinstance(data, (cls, ABCIntervalIndex)):
Expand Down Expand Up @@ -569,8 +576,8 @@ def __eq__(self, other):

# determine the dtype of the elements we want to compare
if isinstance(other, Interval):
other_dtype = "interval"
elif not is_categorical_dtype(other):
other_dtype = pandas_dtype("interval")
elif not is_categorical_dtype(other.dtype):
other_dtype = other.dtype
else:
# for categorical defer to categories for dtype
Expand Down Expand Up @@ -674,7 +681,8 @@ def astype(self, dtype, copy=True):
array : ExtensionArray or ndarray
ExtensionArray or NumPy ndarray with 'dtype' for its dtype.
"""
dtype = pandas_dtype(dtype)
if dtype is not None:
dtype = pandas_dtype(dtype)
if is_interval_dtype(dtype):
if dtype == self.dtype:
return self.copy() if copy else self
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ def __reduce__(self):
def astype(self, dtype, copy=True):
with rewrite_exception("IntervalArray", type(self).__name__):
new_values = self._values.astype(dtype, copy=copy)
if is_interval_dtype(new_values):
if is_interval_dtype(new_values.dtype):
return self._shallow_copy(new_values)
return Index.astype(self, dtype, copy=copy)

Expand Down
34 changes: 19 additions & 15 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1079,10 +1079,10 @@ def _maybe_coerce_merge_keys(self):
if (len(lk) and not len(rk)) or (not len(lk) and len(rk)):
continue

lk_is_cat = is_categorical_dtype(lk)
rk_is_cat = is_categorical_dtype(rk)
lk_is_object = is_object_dtype(lk)
rk_is_object = is_object_dtype(rk)
lk_is_cat = is_categorical_dtype(lk.dtype)
rk_is_cat = is_categorical_dtype(rk.dtype)
lk_is_object = is_object_dtype(lk.dtype)
rk_is_object = is_object_dtype(rk.dtype)

# if either left or right is a categorical
# then the must match exactly in categories & ordered
Expand All @@ -1105,12 +1105,12 @@ def _maybe_coerce_merge_keys(self):
# kinds to proceed, eg. int64 and int8, int and float
# further if we are object, but we infer to
# the same, then proceed
if is_numeric_dtype(lk) and is_numeric_dtype(rk):
if is_numeric_dtype(lk.dtype) and is_numeric_dtype(rk.dtype):
if lk.dtype.kind == rk.dtype.kind:
continue

# check whether ints and floats
elif is_integer_dtype(rk) and is_float_dtype(lk):
elif is_integer_dtype(rk.dtype) and is_float_dtype(lk.dtype):
if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():
warnings.warn(
"You are merging on int and float "
Expand All @@ -1120,7 +1120,7 @@ def _maybe_coerce_merge_keys(self):
)
continue

elif is_float_dtype(rk) and is_integer_dtype(lk):
elif is_float_dtype(rk.dtype) and is_integer_dtype(lk.dtype):
if not (rk == rk.astype(lk.dtype))[~np.isnan(rk)].all():
warnings.warn(
"You are merging on int and float "
Expand All @@ -1140,14 +1140,14 @@ def _maybe_coerce_merge_keys(self):
# incompatible dtypes GH 9780, GH 15800

# bool values are coerced to object
elif (lk_is_object and is_bool_dtype(rk)) or (
is_bool_dtype(lk) and rk_is_object
elif (lk_is_object and is_bool_dtype(rk.dtype)) or (
is_bool_dtype(lk.dtype) and rk_is_object
):
pass

# object values are allowed to be merged
elif (lk_is_object and is_numeric_dtype(rk)) or (
is_numeric_dtype(lk) and rk_is_object
elif (lk_is_object and is_numeric_dtype(rk.dtype)) or (
is_numeric_dtype(lk.dtype) and rk_is_object
):
inferred_left = lib.infer_dtype(lk, skipna=False)
inferred_right = lib.infer_dtype(rk, skipna=False)
Expand All @@ -1167,13 +1167,17 @@ def _maybe_coerce_merge_keys(self):
raise ValueError(msg)

# datetimelikes must match exactly
elif needs_i8_conversion(lk) and not needs_i8_conversion(rk):
elif needs_i8_conversion(lk.dtype) and not needs_i8_conversion(rk.dtype):
raise ValueError(msg)
elif not needs_i8_conversion(lk) and needs_i8_conversion(rk):
elif not needs_i8_conversion(lk.dtype) and needs_i8_conversion(rk.dtype):
raise ValueError(msg)
elif is_datetime64tz_dtype(lk) and not is_datetime64tz_dtype(rk):
elif is_datetime64tz_dtype(lk.dtype) and not is_datetime64tz_dtype(
rk.dtype
):
raise ValueError(msg)
elif not is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk):
elif not is_datetime64tz_dtype(lk.dtype) and is_datetime64tz_dtype(
rk.dtype
):
raise ValueError(msg)

elif lk_is_object and rk_is_object:
Expand Down
10 changes: 5 additions & 5 deletions pandas/core/reshape/tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,22 +461,22 @@ def _coerce_to_type(x):
"""
dtype = None

if is_datetime64tz_dtype(x):
if is_datetime64tz_dtype(x.dtype):
dtype = x.dtype
elif is_datetime64_dtype(x):
elif is_datetime64_dtype(x.dtype):
x = to_datetime(x)
dtype = np.dtype("datetime64[ns]")
elif is_timedelta64_dtype(x):
elif is_timedelta64_dtype(x.dtype):
x = to_timedelta(x)
dtype = np.dtype("timedelta64[ns]")
elif is_bool_dtype(x):
elif is_bool_dtype(x.dtype):
# GH 20303
x = x.astype(np.int64)
# To support cut and qcut for IntegerArray we convert to float dtype.
# Will properly support in the future.
# https://github.com/pandas-dev/pandas/pull/31290
# https://github.com/pandas-dev/pandas/issues/31389
elif is_extension_array_dtype(x) and is_integer_dtype(x):
elif is_extension_array_dtype(x.dtype) and is_integer_dtype(x.dtype):
x = x.to_numpy(dtype=np.float64, na_value=np.nan)

if dtype is not None:
Expand Down

0 comments on commit 08a0cb0

Please sign in to comment.