Skip to content

Commit

Permalink
Some performance testing
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke committed Sep 25, 2017
1 parent 7a35c4b commit 2a1a064
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 19 deletions.
44 changes: 35 additions & 9 deletions asv_bench/benchmarks/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,9 +356,17 @@ def setup(self):

self.s = Series((['19MAY11', '19MAY11:00:00:00'] * 100000))
self.s2 = self.s.str.replace(':\\S+$', '')
self.dup_numeric_data = Series([1000] * 100000)
self.dup_string_data = ['2013-01-01'] * 100000
self.dup_datetime_data = [dt.datetime(2010, 1, 1)] * 100000
self.dup_numeric_data_10_5 = Series([1000] * 100000)
self.dup_string_data_10_5 = ['2013-01-01 01:00:00'] * 100000
self.dup_datetime_data_10_5 = [dt.datetime(2010, 1, 1)] * 100000

self.dup_numeric_data_10_3 = Series([1000] * 100)
self.dup_string_data_10_3 = ['2013-01-01 01:00:00'] * 100
self.dup_datetime_data_10_3 = [dt.datetime(2010, 1, 1)] * 100

self.dup_numeric_data_10_7 = Series([1000] * 10**7)
self.dup_string_data_10_7 = ['2013-01-01 01:00:00'] * 10**7
self.dup_datetime_data_10_7 = [dt.datetime(2010, 1, 1)] * 10**7

def time_format_YYYYMMDD(self):
to_datetime(self.stringsD, format='%Y%m%d')
Expand All @@ -384,14 +392,32 @@ def time_format_exact(self):
def time_format_no_exact(self):
to_datetime(self.s, format='%d%b%y', exact=False)

def time_cache_dup_numeric_data(self):
to_datetime(self.dup_numeric_data, unit='s')
def time_cache_dup_numeric_data_10_3(self):
to_datetime(self.dup_numeric_data_10_3, unit='s')

def time_cache_dup_datetime_data_10_3(self):
to_datetime(self.dup_datetime_data_10_3)

def time_cache_dup_string_data_10_3(self):
to_datetime(self.dup_string_data_10_3)

def time_cache_dup_numeric_data_10_5(self):
to_datetime(self.dup_numeric_data_10_5, unit='s')

def time_cache_dup_datetime_data_10_5(self):
to_datetime(self.dup_datetime_data_10_5)

def time_cache_dup_string_data_10_5(self):
to_datetime(self.dup_string_data_10_5)

def time_cache_dup_numeric_data_10_7(self):
to_datetime(self.dup_numeric_data_10_7, unit='s')

def time_cache_dup_datetime_data(self):
to_datetime(self.dup_datetime_data)
def time_cache_dup_datetime_data_10_7(self):
to_datetime(self.dup_datetime_data_10_7)

def time_cache_dup_string_data(self):
to_datetime(self.dup_string_data)
def time_cache_dup_string_data_10_7(self):
to_datetime(self.dup_string_data_10_7)


class Offsets(object):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ def __new__(cls, data=None,
if not (is_datetime64_dtype(data) or is_datetimetz(data) or
is_integer_dtype(data)):
data = tools.to_datetime(data, dayfirst=dayfirst,
yearfirst=yearfirst)
yearfirst=yearfirst, cache=False)

if issubclass(data.dtype.type, np.datetime64) or is_datetimetz(data):

Expand Down
18 changes: 9 additions & 9 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,

def _convert_listlike(arg, box, format, name=None, tz=tz):


import pdb; pdb.set_trace()
if isinstance(arg, (list, tuple)):
arg = np.array(arg, dtype='O')

Expand Down Expand Up @@ -525,16 +525,16 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
if cache and is_list_like(arg):
# Create a cache only if there are more than 10k values and the user
# passes in datestrings
min_cache_threshold = 10**5
if len(arg) >= min_cache_threshold and is_string_dtype(arg):
#min_cache_threshold = 10**5
#if len(arg) >= min_cache_threshold and is_string_dtype(arg):
# unique currently cannot determine dates that are out of bounds
# recurison errors with datetime
unique_dates = algorithms.unique(arg)
# Essentially they need to all be the same value
if len(unique_dates) == 1:
from pandas import Series
cache_data = _convert_listlike(unique_dates, True, format)
convert_cache = Series(cache_data, index=unique_dates)
unique_dates = algorithms.unique(arg)
# Essentially they need to all be the same value
if len(unique_dates) != len(arg):
from pandas import Series
cache_data = _convert_listlike(unique_dates, False, format)
convert_cache = Series(cache_data, index=unique_dates)

if isinstance(arg, tslib.Timestamp):
result = arg
Expand Down

0 comments on commit 2a1a064

Please sign in to comment.