Skip to content

Commit

Permalink
BUG: to_dict should return a native datetime object for NumPy backed …
Browse files Browse the repository at this point in the history
…dataframes (#37571)
  • Loading branch information
arw2019 authored Nov 4, 2020
1 parent 0297710 commit e0699ca
Show file tree
Hide file tree
Showing 11 changed files with 97 additions and 66 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ Numeric
Conversion
^^^^^^^^^^

-
- Bug in :meth:`DataFrame.to_dict` with ``orient='records'`` now returns python native datetime objects for datetimelike columns (:issue:`21256`)
-

Strings
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
construct_1d_arraylike_from_scalar,
find_common_type,
infer_dtype_from_scalar,
maybe_box_datetimelike,
)
from pandas.core.dtypes.common import (
is_array_like,
Expand Down Expand Up @@ -805,7 +806,7 @@ def _get_val_at(self, loc):
return self.fill_value
else:
val = self.sp_values[sp_loc]
val = com.maybe_box_datetimelike(val, self.sp_values.dtype)
val = maybe_box_datetimelike(val, self.sp_values.dtype)
return val

def take(self, indices, allow_fill=False, fill_value=None) -> "SparseArray":
Expand Down
35 changes: 1 addition & 34 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,14 @@

from collections import abc, defaultdict
import contextlib
from datetime import datetime, timedelta
from functools import partial
import inspect
from typing import Any, Collection, Iterable, Iterator, List, Union, cast
import warnings

import numpy as np

from pandas._libs import lib, tslibs
from pandas._libs import lib
from pandas._typing import AnyArrayLike, Scalar, T
from pandas.compat.numpy import np_version_under1p18

Expand Down Expand Up @@ -78,21 +77,6 @@ def consensus_name_attr(objs):
return name


def maybe_box_datetimelike(value, dtype=None):
# turn a datetime like into a Timestamp/timedelta as needed
if dtype == object:
# If we dont have datetime64/timedelta64 dtype, we dont want to
# box datetimelike scalars
return value

if isinstance(value, (np.datetime64, datetime)):
value = tslibs.Timestamp(value)
elif isinstance(value, (np.timedelta64, timedelta)):
value = tslibs.Timedelta(value)

return value


def is_bool_indexer(key: Any) -> bool:
"""
Check whether `key` is a valid boolean indexer.
Expand Down Expand Up @@ -347,23 +331,6 @@ def apply_if_callable(maybe_callable, obj, **kwargs):
return maybe_callable


def dict_compat(d):
"""
Helper function to convert datetimelike-keyed dicts
to Timestamp-keyed dict.
Parameters
----------
d: dict like object
Returns
-------
dict
"""
return {maybe_box_datetimelike(key): value for key, value in d.items()}


def standardize_mapping(into):
"""
Helper function to standardize a supplied mapping.
Expand Down
43 changes: 42 additions & 1 deletion pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import (
TYPE_CHECKING,
Any,
Dict,
List,
Optional,
Sequence,
Expand All @@ -19,7 +20,7 @@

import numpy as np

from pandas._libs import lib, tslib
from pandas._libs import lib, tslib, tslibs
from pandas._libs.tslibs import (
NaT,
OutOfBoundsDatetime,
Expand Down Expand Up @@ -134,6 +135,30 @@ def is_nested_object(obj) -> bool:
return False


def maybe_box_datetimelike(value: Scalar, dtype: Optional[Dtype] = None) -> Scalar:
"""
Cast scalar to Timestamp or Timedelta if scalar is datetime-like
and dtype is not object.
Parameters
----------
value : scalar
dtype : Dtype, optional
Returns
-------
scalar
"""
if dtype == object:
pass
elif isinstance(value, (np.datetime64, datetime)):
value = tslibs.Timestamp(value)
elif isinstance(value, (np.timedelta64, timedelta)):
value = tslibs.Timedelta(value)

return value


def maybe_downcast_to_dtype(result, dtype: Union[str, np.dtype]):
"""
try to cast to the specified dtype (e.g. convert back to bool/int
Expand Down Expand Up @@ -791,6 +816,22 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj,
return dtype, val


def dict_compat(d: Dict[Scalar, Scalar]) -> Dict[Scalar, Scalar]:
"""
Convert datetimelike-keyed dicts to a Timestamp-keyed dict.
Parameters
----------
d: dict-like object
Returns
-------
dict
"""
return {maybe_box_datetimelike(key): value for key, value in d.items()}


def infer_dtype_from_array(
arr, pandas_dtype: bool = False
) -> Tuple[DtypeObj, ArrayLike]:
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
find_common_type,
infer_dtype_from_scalar,
invalidate_string_dtypes,
maybe_box_datetimelike,
maybe_cast_to_datetime,
maybe_casted_values,
maybe_convert_platform,
Expand Down Expand Up @@ -1538,15 +1539,15 @@ def to_dict(self, orient="dict", into=dict):
(
"data",
[
list(map(com.maybe_box_datetimelike, t))
list(map(maybe_box_datetimelike, t))
for t in self.itertuples(index=False, name=None)
],
),
)
)

elif orient == "series":
return into_c((k, com.maybe_box_datetimelike(v)) for k, v in self.items())
return into_c((k, maybe_box_datetimelike(v)) for k, v in self.items())

elif orient == "records":
columns = self.columns.tolist()
Expand All @@ -1555,7 +1556,7 @@ def to_dict(self, orient="dict", into=dict):
for row in self.itertuples(index=False, name=None)
)
return [
into_c((k, com.maybe_box_datetimelike(v)) for k, v in row.items())
into_c((k, maybe_box_datetimelike(v)) for k, v in row.items())
for row in rows
]

Expand Down
5 changes: 3 additions & 2 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from pandas.core.dtypes.cast import (
find_common_type,
infer_dtype_from_scalar,
maybe_box_datetimelike,
maybe_downcast_to_dtype,
)
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -1193,8 +1194,8 @@ def interval_range(
IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]],
closed='both', dtype='interval[int64]')
"""
start = com.maybe_box_datetimelike(start)
end = com.maybe_box_datetimelike(end)
start = maybe_box_datetimelike(start)
end = maybe_box_datetimelike(end)
endpoint = start if start is not None else end

if freq is None and com.any_none(periods, start, end):
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
find_common_type,
infer_dtype_from,
infer_dtype_from_scalar,
maybe_box_datetimelike,
maybe_downcast_numeric,
maybe_downcast_to_dtype,
maybe_infer_dtype_type,
Expand Down Expand Up @@ -843,7 +844,7 @@ def comp(s: Scalar, mask: np.ndarray, regex: bool = False) -> np.ndarray:
if isna(s):
return ~mask

s = com.maybe_box_datetimelike(s)
s = maybe_box_datetimelike(s)
return compare_or_regex_search(self.values, s, regex, mask)

# Calculate the mask once, prior to the call of comp
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from pandas.core.dtypes.cast import (
construct_1d_arraylike_from_scalar,
construct_1d_ndarray_preserving_na,
dict_compat,
maybe_cast_to_datetime,
maybe_convert_platform,
maybe_infer_to_datetimelike,
Expand Down Expand Up @@ -346,7 +347,7 @@ def _homogenize(data, index, dtype: Optional[DtypeObj]):
oindex = index.astype("O")

if isinstance(index, (ABCDatetimeIndex, ABCTimedeltaIndex)):
val = com.dict_compat(val)
val = dict_compat(val)
else:
val = dict(val)
val = lib.fast_multiget(val, oindex._values, default=np.nan)
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/dtypes/cast/test_dict_compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import numpy as np

from pandas.core.dtypes.cast import dict_compat

from pandas import Timestamp


def test_dict_compat():
data_datetime64 = {np.datetime64("1990-03-15"): 1, np.datetime64("2015-03-15"): 2}
data_unchanged = {1: 2, 3: 4, 5: 6}
expected = {Timestamp("1990-3-15"): 1, Timestamp("2015-03-15"): 2}
assert dict_compat(data_datetime64) == expected
assert dict_compat(expected) == expected
assert dict_compat(data_unchanged) == data_unchanged
37 changes: 25 additions & 12 deletions pandas/tests/frame/methods/test_to_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,17 +257,30 @@ def test_to_dict_wide(self):
assert result == expected

def test_to_dict_orient_dtype(self):
# GH#22620
# Input Data
input_data = {"a": [1, 2, 3], "b": [1.0, 2.0, 3.0], "c": ["X", "Y", "Z"]}
df = DataFrame(input_data)
# Expected Dtypes
expected = {"a": int, "b": float, "c": str}
# Extracting dtypes out of to_dict operation
for df_dict in df.to_dict("records"):
result = {
"a": type(df_dict["a"]),
"b": type(df_dict["b"]),
"c": type(df_dict["c"]),
# GH22620 & GH21256

df = DataFrame(
{
"bool": [True, True, False],
"datetime": [
datetime(2018, 1, 1),
datetime(2019, 2, 2),
datetime(2020, 3, 3),
],
"float": [1.0, 2.0, 3.0],
"int": [1, 2, 3],
"str": ["X", "Y", "Z"],
}
)

expected = {
"int": int,
"float": float,
"str": str,
"datetime": Timestamp,
"bool": bool,
}

for df_dict in df.to_dict("records"):
result = {col: type(df_dict[col]) for col in list(df.columns)}
assert result == expected
11 changes: 1 addition & 10 deletions pandas/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pandas.compat.numpy import np_version_under1p17

import pandas as pd
from pandas import Series, Timestamp
from pandas import Series
import pandas._testing as tm
from pandas.core import ops
import pandas.core.common as com
Expand Down Expand Up @@ -109,15 +109,6 @@ def test_maybe_match_name(left, right, expected):
assert ops.common._maybe_match_name(left, right) == expected


def test_dict_compat():
data_datetime64 = {np.datetime64("1990-03-15"): 1, np.datetime64("2015-03-15"): 2}
data_unchanged = {1: 2, 3: 4, 5: 6}
expected = {Timestamp("1990-3-15"): 1, Timestamp("2015-03-15"): 2}
assert com.dict_compat(data_datetime64) == expected
assert com.dict_compat(expected) == expected
assert com.dict_compat(data_unchanged) == data_unchanged


def test_standardize_mapping():
# No uninitialized defaultdicts
msg = r"to_dict\(\) only accepts initialized defaultdicts"
Expand Down

0 comments on commit e0699ca

Please sign in to comment.