Skip to content

Commit

Permalink
searchsorted, repeat broken off from pandas-dev#24024 (pandas-dev#24461)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and Pingviinituutti committed Feb 28, 2019
1 parent 0553b8b commit 5a9816f
Show file tree
Hide file tree
Showing 6 changed files with 249 additions and 0 deletions.
135 changes: 135 additions & 0 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pandas._libs.tslibs.timestamps import (
RoundTo, maybe_integer_op_deprecated, round_nsint64)
import pandas.compat as compat
from pandas.compat.numpy import function as nv
from pandas.errors import (
AbstractMethodError, NullFrequencyError, PerformanceWarning)
from pandas.util._decorators import Appender, Substitution, deprecate_kwarg
Expand Down Expand Up @@ -82,6 +83,79 @@ def _get_attributes_dict(self):
"""
return {k: getattr(self, k, None) for k in self._attributes}

@property
def _scalar_type(self):
# type: () -> Union[type, Tuple[type]]
"""The scalar associated with this datelike
* PeriodArray : Period
* DatetimeArray : Timestamp
* TimedeltaArray : Timedelta
"""
raise AbstractMethodError(self)

def _scalar_from_string(self, value):
# type: (str) -> Union[Period, Timestamp, Timedelta, NaTType]
"""
Construct a scalar type from a string.
Parameters
----------
value : str
Returns
-------
Period, Timestamp, or Timedelta, or NaT
Whatever the type of ``self._scalar_type`` is.
Notes
-----
This should call ``self._check_compatible_with`` before
unboxing the result.
"""
raise AbstractMethodError(self)

def _unbox_scalar(self, value):
# type: (Union[Period, Timestamp, Timedelta, NaTType]) -> int
"""
Unbox the integer value of a scalar `value`.
Parameters
----------
value : Union[Period, Timestamp, Timedelta]
Returns
-------
int
Examples
--------
>>> self._unbox_scalar(Timedelta('10s')) # DOCTEST: +SKIP
10000000000
"""
raise AbstractMethodError(self)

def _check_compatible_with(self, other):
# type: (Union[Period, Timestamp, Timedelta, NaTType]) -> None
"""
Verify that `self` and `other` are compatible.
* DatetimeArray verifies that the timezones (if any) match
* PeriodArray verifies that the freq matches
* Timedelta has no verification
In each case, NaT is considered compatible.
Parameters
----------
other
Raises
------
Exception
"""
raise AbstractMethodError(self)


class DatelikeOps(object):
"""
Expand Down Expand Up @@ -515,6 +589,67 @@ def _values_for_factorize(self):
def _from_factorized(cls, values, original):
return cls(values, dtype=original.dtype)

def _values_for_argsort(self):
return self._data

# ------------------------------------------------------------------
# Additional array methods
# These are not part of the EA API, but we implement them because
# pandas assumes they're there.

def searchsorted(self, value, side='left', sorter=None):
"""
Find indices where elements should be inserted to maintain order.
Find the indices into a sorted array `self` such that, if the
corresponding elements in `value` were inserted before the indices,
the order of `self` would be preserved.
Parameters
----------
value : array_like
Values to insert into `self`.
side : {'left', 'right'}, optional
If 'left', the index of the first suitable location found is given.
If 'right', return the last such index. If there is no suitable
index, return either 0 or N (where N is the length of `self`).
sorter : 1-D array_like, optional
Optional array of integer indices that sort `self` into ascending
order. They are typically the result of ``np.argsort``.
Returns
-------
indices : array of ints
Array of insertion points with the same shape as `value`.
"""
if isinstance(value, compat.string_types):
value = self._scalar_from_string(value)

if not (isinstance(value, (self._scalar_type, type(self)))
or isna(value)):
raise ValueError("Unexpected type for 'value': {valtype}"
.format(valtype=type(value)))

self._check_compatible_with(value)
if isinstance(value, type(self)):
value = value.asi8
else:
value = self._unbox_scalar(value)

return self.asi8.searchsorted(value, side=side, sorter=sorter)

def repeat(self, repeats, *args, **kwargs):
"""
Repeat elements of an array.
See Also
--------
numpy.ndarray.repeat
"""
nv.validate_repeat(args, kwargs)
values = self._data.repeat(repeats)
return type(self)(values, dtype=self.dtype)

# ------------------------------------------------------------------
# Null Handling

Expand Down
21 changes: 21 additions & 0 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin,
_data
"""
_typ = "datetimearray"
_scalar_type = Timestamp

# define my properties & methods for delegation
_bool_ops = ['is_month_start', 'is_month_end',
Expand Down Expand Up @@ -347,6 +348,26 @@ def _generate_range(cls, start, end, periods, freq, tz=None,

return cls._simple_new(index.asi8, freq=freq, tz=tz)

# -----------------------------------------------------------------
# DatetimeLike Interface

def _unbox_scalar(self, value):
if not isinstance(value, self._scalar_type) and value is not NaT:
raise ValueError("'value' should be a Timestamp.")
if not isna(value):
self._check_compatible_with(value)
return value.value

def _scalar_from_string(self, value):
return Timestamp(value, tz=self.tz)

def _check_compatible_with(self, other):
if other is NaT:
return
if not timezones.tz_compare(self.tz, other.tz):
raise ValueError("Timezones don't match. '{own} != {other}'"
.format(own=self.tz, other=other.tz))

# -----------------------------------------------------------------
# Descriptive Properties

Expand Down
22 changes: 22 additions & 0 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ class PeriodArray(dtl.DatetimeLikeArrayMixin,
__array_priority__ = 1000
_attributes = ["freq"]
_typ = "periodarray" # ABCPeriodArray
_scalar_type = Period

# Names others delegate to us
_other_ops = []
Expand Down Expand Up @@ -240,7 +241,28 @@ def _generate_range(cls, start, end, periods, freq, fields):

return subarr, freq

# -----------------------------------------------------------------
# DatetimeLike Interface

def _unbox_scalar(self, value):
# type: (Union[Period, NaTType]) -> int
if value is NaT:
return value.value
elif isinstance(value, self._scalar_type):
if not isna(value):
self._check_compatible_with(value)
return value.ordinal
else:
raise ValueError("'value' should be a Period. Got '{val}' instead."
.format(val=value))

def _scalar_from_string(self, value):
# type: (str) -> Period
return Period(value, freq=self.freq)

def _check_compatible_with(self, other):
if other is NaT:
return
if self.freqstr != other.freqstr:
_raise_on_incompatible(self, other)

Expand Down
17 changes: 17 additions & 0 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ def wrapper(self, other):

class TimedeltaArrayMixin(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps):
_typ = "timedeltaarray"
_scalar_type = Timedelta
__array_priority__ = 1000
# define my properties & methods for delegation
_other_ops = []
Expand Down Expand Up @@ -221,6 +222,22 @@ def _generate_range(cls, start, end, periods, freq, closed=None):

return cls._simple_new(index, freq=freq)

# ----------------------------------------------------------------
# DatetimeLike Interface

def _unbox_scalar(self, value):
if not isinstance(value, self._scalar_type) and value is not NaT:
raise ValueError("'value' should be a Timedelta.")
self._check_compatible_with(value)
return value.value

def _scalar_from_string(self, value):
return Timedelta(value)

def _check_compatible_with(self, other):
# we don't have anything to validate.
pass

# ----------------------------------------------------------------
# Array-Like / EA-Interface Methods

Expand Down
1 change: 1 addition & 0 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,7 @@ def repeat(self, repeats, axis=None):
nv.validate_repeat(tuple(), dict(axis=axis))
freq = self.freq if is_period_dtype(self) else None
return self._shallow_copy(self.asi8.repeat(repeats), freq=freq)
# TODO: dispatch to _eadata

@Appender(_index_shared_docs['where'] % _index_doc_kwargs)
def where(self, cond, other=None):
Expand Down
53 changes: 53 additions & 0 deletions pandas/tests/arrays/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import numpy as np
import pytest

import pandas.compat as compat

import pandas as pd
from pandas.core.arrays import (
DatetimeArrayMixin as DatetimeArray, PeriodArray,
Expand Down Expand Up @@ -129,6 +131,57 @@ def test_concat_same_type(self):

tm.assert_index_equal(self.index_cls(result), expected)

def test_unbox_scalar(self):
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
arr = self.array_cls(data, freq='D')
result = arr._unbox_scalar(arr[0])
assert isinstance(result, (int, compat.long))

result = arr._unbox_scalar(pd.NaT)
assert isinstance(result, (int, compat.long))

with pytest.raises(ValueError):
arr._unbox_scalar('foo')

def test_check_compatible_with(self):
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
arr = self.array_cls(data, freq='D')

arr._check_compatible_with(arr[0])
arr._check_compatible_with(arr[:1])
arr._check_compatible_with(pd.NaT)

def test_scalar_from_string(self):
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
arr = self.array_cls(data, freq='D')
result = arr._scalar_from_string(str(arr[0]))
assert result == arr[0]

def test_searchsorted(self):
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
arr = self.array_cls(data, freq='D')

# scalar
result = arr.searchsorted(arr[1])
assert result == 1

result = arr.searchsorted(arr[2], side="right")
assert result == 3

# own-type
result = arr.searchsorted(arr[1:3])
expected = np.array([1, 2], dtype=np.int64)
tm.assert_numpy_array_equal(result, expected)

result = arr.searchsorted(arr[1:3], side="right")
expected = np.array([2, 3], dtype=np.int64)
tm.assert_numpy_array_equal(result, expected)

# Following numpy convention, NaT goes at the beginning
# (unlike NaN which goes at the end)
result = arr.searchsorted(pd.NaT)
assert result == 0


class TestDatetimeArray(SharedTests):
index_cls = pd.DatetimeIndex
Expand Down

0 comments on commit 5a9816f

Please sign in to comment.