From 1a7256a2737e05c5f2faf3d405565ec5dba4f2aa Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 17 Dec 2020 19:46:04 -0800 Subject: [PATCH 1/5] REF: helpers for sanitize_array --- pandas/core/construction.py | 48 +++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index a8ca457cdf2a7..248963ca3c859 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -508,11 +508,7 @@ def sanitize_array( # the result that we want elif subarr.ndim == 1: - if index is not None: - - # a 1-element ndarray - if len(subarr) != len(index) and len(subarr) == 1: - subarr = subarr.repeat(len(index)) + subarr = _maybe_repeat(subarr, index) elif subarr.ndim > 1: if isinstance(data, np.ndarray): @@ -521,16 +517,7 @@ def sanitize_array( subarr = com.asarray_tuplesafe(data, dtype=dtype) if not (is_extension_array_dtype(subarr.dtype) or is_extension_array_dtype(dtype)): - # This is to prevent mixed-type Series getting all casted to - # NumPy string type, e.g. NaN --> '-1#IND'. - if issubclass(subarr.dtype.type, str): - # GH#16605 - # If not empty convert the data to dtype - # GH#19853: If data is a scalar, subarr has already the result - if not lib.is_scalar(data): - if not np.all(isna(data)): - data = np.array(data, dtype=dtype, copy=False) - subarr = np.array(data, dtype=object, copy=copy) + subarr = _sanitize_str_dtypes(subarr, data, dtype, copy) is_object_or_str_dtype = is_object_dtype(dtype) or is_string_dtype(dtype) if is_object_dtype(subarr.dtype) and not is_object_or_str_dtype: @@ -541,6 +528,37 @@ def sanitize_array( return subarr +def _sanitize_str_dtypes( + result: np.ndarray, data, dtype: Optional[DtypeObj], copy: bool +) -> np.ndarray: + """ + Ensure we have a dtype that is supported by pandas. + """ + + # This is to prevent mixed-type Series getting all casted to + # NumPy string type, e.g. NaN --> '-1#IND'. + if issubclass(result.dtype.type, str): + # GH#16605 + # If not empty convert the data to dtype + # GH#19853: If data is a scalar, result has already the result + if not lib.is_scalar(data): + if not np.all(isna(data)): + data = np.array(data, dtype=dtype, copy=False) + result = np.array(data, dtype=object, copy=copy) + return result + + +def _maybe_repeat(arr: ArrayLike, index: Optional[Index]) -> ArrayLike: + """ + If we have a length-1 array and an index describing how long we expect + the result to be, repeat the array. + """ + if index is not None: + if 1 == len(arr) != len(index): + arr = arr.repeat(len(index)) + return arr + + def _try_cast(arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bool): """ Convert input to numpy ndarray and optionally cast to a given dtype. From e68b9a28e8528fb1e3819eefc9e58468163ef984 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 17 Dec 2020 20:21:58 -0800 Subject: [PATCH 2/5] REF: helper for ensuring ndim=1 --- pandas/core/construction.py | 46 ++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 248963ca3c859..7190e71889fe8 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -496,25 +496,7 @@ def sanitize_array( else: subarr = _try_cast(data, dtype, copy, raise_cast_failure) - # scalar like, GH - if getattr(subarr, "ndim", 0) == 0: - if isinstance(data, list): # pragma: no cover - subarr = np.array(data, dtype=object) - elif index is not None: - subarr = construct_1d_arraylike_from_scalar(data, len(index), dtype) - - else: - return subarr.item() - - # the result that we want - elif subarr.ndim == 1: - subarr = _maybe_repeat(subarr, index) - - elif subarr.ndim > 1: - if isinstance(data, np.ndarray): - raise ValueError("Data must be 1-dimensional") - else: - subarr = com.asarray_tuplesafe(data, dtype=dtype) + subarr = _sanitize_ndim(subarr, data, dtype, index) if not (is_extension_array_dtype(subarr.dtype) or is_extension_array_dtype(dtype)): subarr = _sanitize_str_dtypes(subarr, data, dtype, copy) @@ -528,6 +510,32 @@ def sanitize_array( return subarr +def _sanitize_ndim(result, data, dtype: Optional[DtypeObj], index: Optional[Index]): + """ + Ensure we have a 1-dimensional result array. + """ + # scalar like, GH???? + if getattr(result, "ndim", 0) == 0: + if isinstance(data, list): # pragma: no cover + result = np.array(data, dtype=object) + elif index is not None: + result = construct_1d_arraylike_from_scalar(data, len(index), dtype) + + else: # FIXME: not what we want! + return result.item() + + elif result.ndim == 1: + # the result that we want + result = _maybe_repeat(result, index) + + elif result.ndim > 1: + if isinstance(data, np.ndarray): + raise ValueError("Data must be 1-dimensional") + else: + result = com.asarray_tuplesafe(data, dtype=dtype) + return result + + def _sanitize_str_dtypes( result: np.ndarray, data, dtype: Optional[DtypeObj], copy: bool ) -> np.ndarray: From e1d5f28257054129dfad8781db497ec6a077a3c1 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 18 Dec 2020 10:11:17 -0800 Subject: [PATCH 3/5] REF: catch scalar-like up-front --- pandas/core/construction.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 7190e71889fe8..8a4d6231370ef 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -449,6 +449,11 @@ def sanitize_array( # extract ndarray or ExtensionArray, ensure we have no PandasArray data = extract_array(data, extract_numpy=True) + if isinstance(data, np.ndarray) and data.ndim == 0: + if dtype is None: + dtype = data.dtype + data = lib.item_from_zerodim(data) + # GH#846 if isinstance(data, np.ndarray): @@ -462,7 +467,7 @@ def sanitize_array( else: subarr = np.array(data, copy=False) else: - # we will try to copy be-definition here + # we will try to copy by-definition here subarr = _try_cast(data, dtype, copy, raise_cast_failure) elif isinstance(data, ABCExtensionArray): @@ -491,8 +496,12 @@ def sanitize_array( # GH#16804 arr = np.arange(data.start, data.stop, data.step, dtype="int64") subarr = _try_cast(arr, dtype, copy, raise_cast_failure) - elif lib.is_scalar(data) and index is not None and dtype is not None: + + elif not is_list_like(data): + if index is None: + raise ValueError("index must be specified when data is not list-like") subarr = construct_1d_arraylike_from_scalar(data, len(index), dtype) + else: subarr = _try_cast(data, dtype, copy, raise_cast_failure) @@ -510,19 +519,14 @@ def sanitize_array( return subarr -def _sanitize_ndim(result, data, dtype: Optional[DtypeObj], index: Optional[Index]): +def _sanitize_ndim( + result: ArrayLike, data, dtype: Optional[DtypeObj], index: Optional[Index] +) -> ArrayLike: """ Ensure we have a 1-dimensional result array. """ - # scalar like, GH???? if getattr(result, "ndim", 0) == 0: - if isinstance(data, list): # pragma: no cover - result = np.array(data, dtype=object) - elif index is not None: - result = construct_1d_arraylike_from_scalar(data, len(index), dtype) - - else: # FIXME: not what we want! - return result.item() + raise ValueError("result should be arraylike with ndim > 0") elif result.ndim == 1: # the result that we want From 25507046be243d915088c14b885b654cf4915755 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 18 Dec 2020 10:31:39 -0800 Subject: [PATCH 4/5] update docstring --- pandas/core/construction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 8a4d6231370ef..3dc7acc6cf0b5 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -577,7 +577,7 @@ def _try_cast(arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bo Parameters ---------- - arr : ndarray, scalar, list, tuple, iterator (catchall) + arr : ndarray, list, tuple, iterator (catchall) Excludes: ExtensionArray, Series, Index. dtype : np.dtype, ExtensionDtype or None copy : bool From a08bbb5b2689628a02321b03251f21f533a9f529 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 18 Dec 2020 12:21:38 -0800 Subject: [PATCH 5/5] catch OOB --- pandas/core/dtypes/cast.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index abcc60a15c641..034fd927a8017 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1543,7 +1543,10 @@ def construct_1d_arraylike_from_scalar( """ if dtype is None: - dtype, value = infer_dtype_from_scalar(value, pandas_dtype=True) + try: + dtype, value = infer_dtype_from_scalar(value, pandas_dtype=True) + except OutOfBoundsDatetime: + dtype = np.dtype(object) if is_extension_array_dtype(dtype): cls = dtype.construct_array_type()