From 89ce37fb80882cf44e530f861b8cad9ada4ed9b8 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 25 Sep 2017 16:55:25 -0700 Subject: [PATCH] Add asvs, modify tests for caches --- asv_bench/benchmarks/timeseries.py | 53 +-- pandas/core/tools/datetimes.py | 21 +- pandas/tests/indexes/datetimes/test_tools.py | 475 +++++++++++-------- 3 files changed, 308 insertions(+), 241 deletions(-) diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index a1cc822b58f2b..d9d1b48f420e6 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -346,27 +346,21 @@ class ToDatetime(object): def setup(self): self.rng = date_range(start='1/1/2000', periods=10000, freq='D') - self.stringsD = Series((((self.rng.year * 10000) + (self.rng.month * 100)) + self.rng.day), dtype=np.int64).apply(str) + self.stringsD = Series(self.rng.strftime('%Y%m%d')) self.rng = date_range(start='1/1/2000', periods=20000, freq='H') - self.strings = [x.strftime('%Y-%m-%d %H:%M:%S') for x in self.rng] - self.strings_nosep = [x.strftime('%Y%m%d %H:%M:%S') for x in self.rng] + self.strings = self.rng.strftime('%Y-%m-%d %H:%M:%S').tolist() + self.strings_nosep = self.rng.strftime('%Y%m%d %H:%M:%S').tolist() self.strings_tz_space = [x.strftime('%Y-%m-%d %H:%M:%S') + ' -0800' for x in self.rng] self.s = Series((['19MAY11', '19MAY11:00:00:00'] * 100000)) self.s2 = self.s.str.replace(':\\S+$', '') - self.dup_numeric_data_10_5 = Series([1000] * 100000) - self.dup_string_data_10_5 = ['2013-01-01 01:00:00'] * 100000 - self.dup_datetime_data_10_5 = [dt.datetime(2010, 1, 1)] * 100000 - self.dup_numeric_data_10_3 = Series([1000] * 100) - self.dup_string_data_10_3 = ['2013-01-01 01:00:00'] * 100 - self.dup_datetime_data_10_3 = [dt.datetime(2010, 1, 1)] * 100 - - self.dup_numeric_data_10_7 = Series([1000] * 10**7) - self.dup_string_data_10_7 = ['2013-01-01 01:00:00'] * 10**7 - self.dup_datetime_data_10_7 = [dt.datetime(2010, 1, 1)] * 10**7 + self.unique_numeric_seconds = range(10000) + self.dup_numeric_seconds = [1000] * 10000 + self.dup_string_dates = ['2000-02-11'] * 10000 + self.dup_string_with_tz = ['2000-02-11 15:00:00-0800'] * 10000 def time_format_YYYYMMDD(self): to_datetime(self.stringsD, format='%Y%m%d') @@ -392,32 +386,23 @@ def time_format_exact(self): def time_format_no_exact(self): to_datetime(self.s, format='%d%b%y', exact=False) - def time_cache_dup_numeric_data_10_3(self): - to_datetime(self.dup_numeric_data_10_3, unit='s') - - def time_cache_dup_datetime_data_10_3(self): - to_datetime(self.dup_datetime_data_10_3) - - def time_cache_dup_string_data_10_3(self): - to_datetime(self.dup_string_data_10_3) - - def time_cache_dup_numeric_data_10_5(self): - to_datetime(self.dup_numeric_data_10_5, unit='s') + def time_cache_with_unique_seconds_and unit(self): + to_datetime(self.unique_numeric_seconds, unit='s') - def time_cache_dup_datetime_data_10_5(self): - to_datetime(self.dup_datetime_data_10_5) + def time_cache_with_dup_seconds_and_unit(self): + to_datetime(self.dup_numeric_seconds, unit='s') - def time_cache_dup_string_data_10_5(self): - to_datetime(self.dup_string_data_10_5) + def time_cache_with_dup_string_dates(self): + to_datetime(self.dup_string_dates) - def time_cache_dup_numeric_data_10_7(self): - to_datetime(self.dup_numeric_data_10_7, unit='s') + def time_cache_with_dup_string_dates_and_format(self): + to_datetime(self.dup_string_dates, format='%Y-%m-%d') - def time_cache_dup_datetime_data_10_7(self): - to_datetime(self.dup_datetime_data_10_7) + def time_cache_with_dup_string_tzoffset_dates(self): + to_datetime(self.dup_string_with_tz) - def time_cache_dup_string_data_10_7(self): - to_datetime(self.dup_string_data_10_7) + def time_cache_with_dup_string_tzoffset_dates_and_format(self): + to_datetim(self.dup_string_with_tz, format='%Y-%m-%d %H:%M:%S%z') class Offsets(object): diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 0b138c8b9cd98..87dc9b012d51d 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -261,7 +261,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, origin. .. versionadded: 0.20.0 - cache_datetime : boolean, default False + cache : boolean, default False If True, use a cache of unique, converted dates to apply the datetime conversion. Produces signficant speed-ups when parsing duplicate date. @@ -355,7 +355,6 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, def _convert_listlike(arg, box, format, name=None, tz=tz): - import pdb; pdb.set_trace() if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') @@ -523,18 +522,12 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): convert_cache = None if cache and is_list_like(arg): - # Create a cache only if there are more than 10k values and the user - # passes in datestrings - #min_cache_threshold = 10**5 - #if len(arg) >= min_cache_threshold and is_string_dtype(arg): - # unique currently cannot determine dates that are out of bounds - # recurison errors with datetime - unique_dates = algorithms.unique(arg) - # Essentially they need to all be the same value - if len(unique_dates) != len(arg): - from pandas import Series - cache_data = _convert_listlike(unique_dates, False, format) - convert_cache = Series(cache_data, index=unique_dates) + if len(arg) >= 1000: + unique_dates = algorithms.unique(arg) + if len(unique_dates) != len(arg): + from pandas import Series + cache_dates = _convert_listlike(unique_dates, False, format) + convert_cache = Series(cache_dates, index=unique_dates) if isinstance(arg, tslib.Timestamp): result = arg diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 383b61bf13e99..203dbe066bf30 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -27,7 +27,8 @@ class TestTimeConversionFormats(object): - def test_to_datetime_format(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_format(self, cache): values = ['1/1/2000', '1/2/2000', '1/3/2000'] results1 = [Timestamp('20000101'), Timestamp('20000201'), @@ -42,7 +43,7 @@ def test_to_datetime_format(self): (values[2], (results1[2], results2[2]))]: for i, fmt in enumerate(['%d/%m/%Y', '%m/%d/%Y']): - result = to_datetime(vals, format=fmt) + result = to_datetime(vals, format=fmt, cache=cache) expected = expecteds[i] if isinstance(expected, Series): @@ -52,14 +53,15 @@ def test_to_datetime_format(self): else: tm.assert_index_equal(result, expected) - def test_to_datetime_format_YYYYMMDD(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_format_YYYYMMDD(self, cache): s = Series([19801222, 19801222] + [19810105] * 5) expected = Series([Timestamp(x) for x in s.apply(str)]) - result = to_datetime(s, format='%Y%m%d') + result = to_datetime(s, format='%Y%m%d', cache=cache) assert_series_equal(result, expected) - result = to_datetime(s.apply(str), format='%Y%m%d') + result = to_datetime(s.apply(str), format='%Y%m%d', cache=cache) assert_series_equal(result, expected) # with NaT @@ -68,44 +70,48 @@ def test_to_datetime_format_YYYYMMDD(self): expected[2] = np.nan s[2] = np.nan - result = to_datetime(s, format='%Y%m%d') + result = to_datetime(s, format='%Y%m%d', cache=cache) assert_series_equal(result, expected) # string with NaT s = s.apply(str) s[2] = 'nat' - result = to_datetime(s, format='%Y%m%d') + result = to_datetime(s, format='%Y%m%d', cache=cache) assert_series_equal(result, expected) # coercion # GH 7930 s = Series([20121231, 20141231, 99991231]) - result = pd.to_datetime(s, format='%Y%m%d', errors='ignore') + result = pd.to_datetime(s, format='%Y%m%d', errors='ignore', + cache=cache) expected = Series([datetime(2012, 12, 31), datetime(2014, 12, 31), datetime(9999, 12, 31)], dtype=object) tm.assert_series_equal(result, expected) - result = pd.to_datetime(s, format='%Y%m%d', errors='coerce') + result = pd.to_datetime(s, format='%Y%m%d', errors='coerce', + cache=cache) expected = Series(['20121231', '20141231', 'NaT'], dtype='M8[ns]') assert_series_equal(result, expected) - # GH 10178 - def test_to_datetime_format_integer(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_format_integer(self, cache): + # GH 10178 s = Series([2000, 2001, 2002]) expected = Series([Timestamp(x) for x in s.apply(str)]) - result = to_datetime(s, format='%Y') + result = to_datetime(s, format='%Y', cache=cache) assert_series_equal(result, expected) s = Series([200001, 200105, 200206]) expected = Series([Timestamp(x[:4] + '-' + x[4:]) for x in s.apply(str) ]) - result = to_datetime(s, format='%Y%m') + result = to_datetime(s, format='%Y%m', cache=cache) assert_series_equal(result, expected) - def test_to_datetime_format_microsecond(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_format_microsecond(self, cache): # these are locale dependent lang, _ = locale.getlocale() @@ -113,11 +119,12 @@ def test_to_datetime_format_microsecond(self): val = '01-{}-2011 00:00:01.978'.format(month_abbr) format = '%d-%b-%Y %H:%M:%S.%f' - result = to_datetime(val, format=format) + result = to_datetime(val, format=format, cache=cache) exp = datetime.strptime(val, format) assert result == exp - def test_to_datetime_format_time(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_format_time(self, cache): data = [ ['01/10/2010 15:20', '%m/%d/%Y %H:%M', Timestamp('2010-01-10 15:20')], @@ -133,9 +140,10 @@ def test_to_datetime_format_time(self): # Timestamp('2010-01-10 09:12:56')] ] for s, format, dt in data: - assert to_datetime(s, format=format) == dt + assert to_datetime(s, format=format, cache=cache) == dt - def test_to_datetime_with_non_exact(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_with_non_exact(self, cache): # GH 10834 tm._skip_if_has_locale() @@ -146,12 +154,13 @@ def test_to_datetime_with_non_exact(self): s = Series(['19MAY11', 'foobar19MAY11', '19MAY11:00:00:00', '19MAY11 00:00:00Z']) - result = to_datetime(s, format='%d%b%y', exact=False) + result = to_datetime(s, format='%d%b%y', exact=False, cache=cache) expected = to_datetime(s.str.extract(r'(\d+\w+\d+)', expand=False), - format='%d%b%y') + format='%d%b%y', cache=cache) assert_series_equal(result, expected) - def test_parse_nanoseconds_with_formula(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_parse_nanoseconds_with_formula(self, cache): # GH8989 # trunctaing the nanoseconds when a format was provided @@ -160,44 +169,48 @@ def test_parse_nanoseconds_with_formula(self): "2012-01-01 09:00:00.001", "2012-01-01 09:00:00.001000", "2012-01-01 09:00:00.001000000", ]: - expected = pd.to_datetime(v) - result = pd.to_datetime(v, format="%Y-%m-%d %H:%M:%S.%f") + expected = pd.to_datetime(v, cache=cache) + result = pd.to_datetime(v, format="%Y-%m-%d %H:%M:%S.%f", + cache=cache) assert result == expected - def test_to_datetime_format_weeks(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_format_weeks(self, cache): data = [ ['2009324', '%Y%W%w', Timestamp('2009-08-13')], ['2013020', '%Y%U%w', Timestamp('2013-01-13')] ] for s, format, dt in data: - assert to_datetime(s, format=format) == dt + assert to_datetime(s, format=format, cache=cache) == dt class TestToDatetime(object): - def test_to_datetime_dt64s(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_dt64s(self, cache): in_bound_dts = [ np.datetime64('2000-01-01'), np.datetime64('2000-01-02'), ] for dt in in_bound_dts: - assert pd.to_datetime(dt) == Timestamp(dt) + assert pd.to_datetime(dt, cache=cache) == Timestamp(dt) oob_dts = [np.datetime64('1000-01-01'), np.datetime64('5000-01-02'), ] for dt in oob_dts: pytest.raises(ValueError, pd.to_datetime, dt, errors='raise') pytest.raises(ValueError, Timestamp, dt) - assert pd.to_datetime(dt, errors='coerce') is NaT + assert pd.to_datetime(dt, errors='coerce', cache=cache) is NaT - def test_to_datetime_array_of_dt64s(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_array_of_dt64s(self, cache): dts = [np.datetime64('2000-01-01'), np.datetime64('2000-01-02'), ] # Assuming all datetimes are in bounds, to_datetime() returns # an array that is equal to Timestamp() parsing tm.assert_numpy_array_equal( - pd.to_datetime(dts, box=False), + pd.to_datetime(dts, box=False, cache=cache), np.array([Timestamp(x).asm8 for x in dts]) ) @@ -208,7 +221,8 @@ def test_to_datetime_array_of_dt64s(self): errors='raise') tm.assert_numpy_array_equal( - pd.to_datetime(dts_with_oob, box=False, errors='coerce'), + pd.to_datetime(dts_with_oob, box=False, errors='coerce', + cache=cache), np.array( [ Timestamp(dts_with_oob[0]).asm8, @@ -223,20 +237,22 @@ def test_to_datetime_array_of_dt64s(self): # are converted to their .item(), which depending on the version of # numpy is either a python datetime.datetime or datetime.date tm.assert_numpy_array_equal( - pd.to_datetime(dts_with_oob, box=False, errors='ignore'), + pd.to_datetime(dts_with_oob, box=False, errors='ignore', + cache=cache), np.array( [dt.item() for dt in dts_with_oob], dtype='O' ) ) - def test_to_datetime_tz(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_tz(self, cache): # xref 8260 # uniform returns a DatetimeIndex arr = [pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')] - result = pd.to_datetime(arr) + result = pd.to_datetime(arr, cache=cache) expected = DatetimeIndex( ['2013-01-01 13:00:00', '2013-01-02 14:00:00'], tz='US/Pacific') tm.assert_index_equal(result, expected) @@ -244,9 +260,10 @@ def test_to_datetime_tz(self): # mixed tzs will raise arr = [pd.Timestamp('2013-01-01 13:00:00', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00', tz='US/Eastern')] - pytest.raises(ValueError, lambda: pd.to_datetime(arr)) + pytest.raises(ValueError, lambda: pd.to_datetime(arr, cache=cache)) - def test_to_datetime_tz_pytz(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_tz_pytz(self, cache): # see gh-8260 us_eastern = pytz.timezone('US/Eastern') arr = np.array([us_eastern.localize(datetime(year=2000, month=1, day=1, @@ -254,18 +271,20 @@ def test_to_datetime_tz_pytz(self): us_eastern.localize(datetime(year=2000, month=6, day=1, hour=3, minute=0))], dtype=object) - result = pd.to_datetime(arr, utc=True) + result = pd.to_datetime(arr, utc=True, cache=cache) expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', '2000-06-01 07:00:00+00:00'], dtype='datetime64[ns, UTC]', freq=None) tm.assert_index_equal(result, expected) + @pytest.mark.parametrize('cache', [True, False]) @pytest.mark.parametrize("init_constructor, end_constructor, test_method", [(Index, DatetimeIndex, tm.assert_index_equal), (list, DatetimeIndex, tm.assert_index_equal), (np.array, DatetimeIndex, tm.assert_index_equal), (Series, Series, tm.assert_series_equal)]) def test_to_datetime_utc_true(self, + cache, init_constructor, end_constructor, test_method): @@ -276,39 +295,47 @@ def test_to_datetime_utc_true(self, result = pd.to_datetime(init_constructor(data), format='%Y%m%d %H%M%S', - utc=True) + utc=True, + cache=cache) expected = end_constructor(expected_data) test_method(result, expected) # Test scalar case as well for scalar, expected in zip(data, expected_data): - result = pd.to_datetime(scalar, format='%Y%m%d %H%M%S', utc=True) + result = pd.to_datetime(scalar, format='%Y%m%d %H%M%S', utc=True, + cache=cache) assert result == expected - def test_to_datetime_utc_true_with_series_single_value(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_utc_true_with_series_single_value(self, cache): # GH 15760 UTC=True with Series ts = 1.5e18 - result = pd.to_datetime(pd.Series([ts]), utc=True) + result = pd.to_datetime(pd.Series([ts]), utc=True, cache=cache) expected = pd.Series([pd.Timestamp(ts, tz='utc')]) tm.assert_series_equal(result, expected) - def test_to_datetime_utc_true_with_series_tzaware_string(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_utc_true_with_series_tzaware_string(self, cache): ts = '2013-01-01 00:00:00-01:00' expected_ts = '2013-01-01 01:00:00' data = pd.Series([ts] * 3) - result = pd.to_datetime(data, utc=True) + result = pd.to_datetime(data, utc=True, cache=cache) expected = pd.Series([pd.Timestamp(expected_ts, tz='utc')] * 3) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize('cache', [True, False]) @pytest.mark.parametrize('date, dtype', [('2013-01-01 01:00:00', 'datetime64[ns]'), ('2013-01-01 01:00:00', 'datetime64[ns, UTC]')]) - def test_to_datetime_utc_true_with_series_datetime_ns(self, date, dtype): + def test_to_datetime_utc_true_with_series_datetime_ns(self, cache, date, + dtype): expected = pd.Series([pd.Timestamp('2013-01-01 01:00:00', tz='UTC')]) - result = pd.to_datetime(pd.Series([date], dtype=dtype), utc=True) + result = pd.to_datetime(pd.Series([date], dtype=dtype), utc=True, + cache=cache) tm.assert_series_equal(result, expected) - def test_to_datetime_tz_psycopg2(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_tz_psycopg2(self, cache): # xref 8260 try: @@ -323,7 +350,7 @@ def test_to_datetime_tz_psycopg2(self): datetime(2000, 6, 1, 3, 0, tzinfo=tz2)], dtype=object) - result = pd.to_datetime(arr, errors='coerce', utc=True) + result = pd.to_datetime(arr, errors='coerce', utc=True, cache=cache) expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', '2000-06-01 07:00:00+00:00'], dtype='datetime64[ns, UTC]', freq=None) @@ -336,32 +363,35 @@ def test_to_datetime_tz_psycopg2(self): assert is_datetime64_ns_dtype(i) # tz coerceion - result = pd.to_datetime(i, errors='coerce') + result = pd.to_datetime(i, errors='coerce', cache=cache) tm.assert_index_equal(result, i) - result = pd.to_datetime(i, errors='coerce', utc=True) + result = pd.to_datetime(i, errors='coerce', utc=True, cache=cache) expected = pd.DatetimeIndex(['2000-01-01 13:00:00'], dtype='datetime64[ns, UTC]') tm.assert_index_equal(result, expected) - def test_datetime_bool(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_datetime_bool(self, cache): # GH13176 with pytest.raises(TypeError): to_datetime(False) - assert to_datetime(False, errors="coerce") is NaT - assert to_datetime(False, errors="ignore") is False + assert to_datetime(False, errors="coerce", cache=cache) is NaT + assert to_datetime(False, errors="ignore", cache=cache) is False with pytest.raises(TypeError): to_datetime(True) - assert to_datetime(True, errors="coerce") is NaT - assert to_datetime(True, errors="ignore") is True + assert to_datetime(True, errors="coerce", cache=cache) is NaT + assert to_datetime(True, errors="ignore", cache=cache) is True with pytest.raises(TypeError): - to_datetime([False, datetime.today()]) + to_datetime([False, datetime.today()], cache=cache) with pytest.raises(TypeError): - to_datetime(['20130101', True]) + to_datetime(['20130101', True], cache=cache) tm.assert_index_equal(to_datetime([0, False, NaT, 0.0], - errors="coerce"), - DatetimeIndex([to_datetime(0), NaT, - NaT, to_datetime(0)])) + errors="coerce", cache=cache), + DatetimeIndex([to_datetime(0, cache=cache), + NaT, + NaT, + to_datetime(0, cache=cache)])) def test_datetime_invalid_datatype(self): # GH13176 @@ -406,71 +436,77 @@ def test_to_datetime_cache_scalar(self): class TestToDatetimeUnit(object): - def test_unit(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_unit(self, cache): # GH 11758 # test proper behavior with erros with pytest.raises(ValueError): - to_datetime([1], unit='D', format='%Y%m%d') + to_datetime([1], unit='D', format='%Y%m%d', cache=cache) values = [11111111, 1, 1.0, tslib.iNaT, NaT, np.nan, 'NaT', ''] - result = to_datetime(values, unit='D', errors='ignore') + result = to_datetime(values, unit='D', errors='ignore', cache=cache) expected = Index([11111111, Timestamp('1970-01-02'), Timestamp('1970-01-02'), NaT, NaT, NaT, NaT, NaT], dtype=object) tm.assert_index_equal(result, expected) - result = to_datetime(values, unit='D', errors='coerce') + result = to_datetime(values, unit='D', errors='coerce', cache=cache) expected = DatetimeIndex(['NaT', '1970-01-02', '1970-01-02', 'NaT', 'NaT', 'NaT', 'NaT', 'NaT']) tm.assert_index_equal(result, expected) with pytest.raises(tslib.OutOfBoundsDatetime): - to_datetime(values, unit='D', errors='raise') + to_datetime(values, unit='D', errors='raise', cache=cache) values = [1420043460000, tslib.iNaT, NaT, np.nan, 'NaT'] - result = to_datetime(values, errors='ignore', unit='s') + result = to_datetime(values, errors='ignore', unit='s', cache=cache) expected = Index([1420043460000, NaT, NaT, NaT, NaT], dtype=object) tm.assert_index_equal(result, expected) - result = to_datetime(values, errors='coerce', unit='s') + result = to_datetime(values, errors='coerce', unit='s', cache=cache) expected = DatetimeIndex(['NaT', 'NaT', 'NaT', 'NaT', 'NaT']) tm.assert_index_equal(result, expected) with pytest.raises(tslib.OutOfBoundsDatetime): - to_datetime(values, errors='raise', unit='s') + to_datetime(values, errors='raise', unit='s', cache=cache) # if we have a string, then we raise a ValueError # and NOT an OutOfBoundsDatetime for val in ['foo', Timestamp('20130101')]: try: - to_datetime(val, errors='raise', unit='s') + to_datetime(val, errors='raise', unit='s', cache=cache) except tslib.OutOfBoundsDatetime: raise AssertionError("incorrect exception raised") except ValueError: pass - def test_unit_consistency(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_unit_consistency(self, cache): # consistency of conversions expected = Timestamp('1970-05-09 14:25:11') - result = pd.to_datetime(11111111, unit='s', errors='raise') + result = pd.to_datetime(11111111, unit='s', errors='raise', + cache=cache) assert result == expected assert isinstance(result, Timestamp) - result = pd.to_datetime(11111111, unit='s', errors='coerce') + result = pd.to_datetime(11111111, unit='s', errors='coerce', + cache=cache) assert result == expected assert isinstance(result, Timestamp) - result = pd.to_datetime(11111111, unit='s', errors='ignore') + result = pd.to_datetime(11111111, unit='s', errors='ignore', + cache=cache) assert result == expected assert isinstance(result, Timestamp) - def test_unit_with_numeric(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_unit_with_numeric(self, cache): # GH 13180 # coercions from floats/ints are ok @@ -479,10 +515,10 @@ def test_unit_with_numeric(self): arr1 = [1.434692e+18, 1.432766e+18] arr2 = np.array(arr1).astype('int64') for errors in ['ignore', 'raise', 'coerce']: - result = pd.to_datetime(arr1, errors=errors) + result = pd.to_datetime(arr1, errors=errors, cache=cache) tm.assert_index_equal(result, expected) - result = pd.to_datetime(arr2, errors=errors) + result = pd.to_datetime(arr2, errors=errors, cache=cache) tm.assert_index_equal(result, expected) # but we want to make sure that we are coercing @@ -491,7 +527,7 @@ def test_unit_with_numeric(self): '2015-06-19 05:33:20', '2015-05-27 22:33:20']) arr = ['foo', 1.434692e+18, 1.432766e+18] - result = pd.to_datetime(arr, errors='coerce') + result = pd.to_datetime(arr, errors='coerce', cache=cache) tm.assert_index_equal(result, expected) expected = DatetimeIndex(['2015-06-19 05:33:20', @@ -499,31 +535,33 @@ def test_unit_with_numeric(self): 'NaT', 'NaT']) arr = [1.434692e+18, 1.432766e+18, 'foo', 'NaT'] - result = pd.to_datetime(arr, errors='coerce') + result = pd.to_datetime(arr, errors='coerce', cache=cache) tm.assert_index_equal(result, expected) - def test_unit_mixed(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_unit_mixed(self, cache): # mixed integers/datetimes expected = DatetimeIndex(['2013-01-01', 'NaT', 'NaT']) arr = [pd.Timestamp('20130101'), 1.434692e+18, 1.432766e+18] - result = pd.to_datetime(arr, errors='coerce') + result = pd.to_datetime(arr, errors='coerce', cache=cache) tm.assert_index_equal(result, expected) with pytest.raises(ValueError): - pd.to_datetime(arr, errors='raise') + pd.to_datetime(arr, errors='raise', cache=cache) expected = DatetimeIndex(['NaT', 'NaT', '2013-01-01']) arr = [1.434692e+18, 1.432766e+18, pd.Timestamp('20130101')] - result = pd.to_datetime(arr, errors='coerce') + result = pd.to_datetime(arr, errors='coerce', cache=cache) tm.assert_index_equal(result, expected) with pytest.raises(ValueError): - pd.to_datetime(arr, errors='raise') + pd.to_datetime(arr, errors='raise', cache=cache) - def test_dataframe(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_dataframe(self, cache): df = DataFrame({'year': [2015, 2016], 'month': [2, 3], @@ -537,19 +575,20 @@ def test_dataframe(self): result = to_datetime({'year': df['year'], 'month': df['month'], - 'day': df['day']}) + 'day': df['day']}, cache=cache) expected = Series([Timestamp('20150204 00:00:00'), Timestamp('20160305 00:0:00')]) assert_series_equal(result, expected) # dict-like - result = to_datetime(df[['year', 'month', 'day']].to_dict()) + result = to_datetime(df[['year', 'month', 'day']].to_dict(), + cache=cache) assert_series_equal(result, expected) # dict but with constructable df2 = df[['year', 'month', 'day']].to_dict() df2['month'] = 2 - result = to_datetime(df2) + result = to_datetime(df2, cache=cache) expected2 = Series([Timestamp('20150204 00:00:00'), Timestamp('20160205 00:0:00')]) assert_series_equal(result, expected2) @@ -570,7 +609,8 @@ def test_dataframe(self): ] for d in units: - result = to_datetime(df[list(d.keys())].rename(columns=d)) + result = to_datetime(df[list(d.keys())].rename(columns=d), + cache=cache) expected = Series([Timestamp('20150204 06:58:10'), Timestamp('20160305 07:59:11')]) assert_series_equal(result, expected) @@ -585,13 +625,13 @@ def test_dataframe(self): 'us': 'us', 'ns': 'ns'} - result = to_datetime(df.rename(columns=d)) + result = to_datetime(df.rename(columns=d), cache=cache) expected = Series([Timestamp('20150204 06:58:10.001002003'), Timestamp('20160305 07:59:11.001002003')]) assert_series_equal(result, expected) # coerce back to int - result = to_datetime(df.astype(str)) + result = to_datetime(df.astype(str), cache=cache) assert_series_equal(result, expected) # passing coerce @@ -602,8 +642,8 @@ def test_dataframe(self): msg = ("cannot assemble the datetimes: time data .+ does not " "match format '%Y%m%d' \(match\)") with tm.assert_raises_regex(ValueError, msg): - to_datetime(df2) - result = to_datetime(df2, errors='coerce') + to_datetime(df2, cache=cache) + result = to_datetime(df2, errors='coerce', cache=cache) expected = Series([Timestamp('20150204 00:00:00'), NaT]) assert_series_equal(result, expected) @@ -614,7 +654,7 @@ def test_dataframe(self): with tm.assert_raises_regex(ValueError, msg): df2 = df.copy() df2['foo'] = 1 - to_datetime(df2) + to_datetime(df2, cache=cache) # not enough msg = ('to assemble mappings requires at least that \[year, month, ' @@ -625,7 +665,7 @@ def test_dataframe(self): ['month', 'day'], ['year', 'day', 'second']]: with tm.assert_raises_regex(ValueError, msg): - to_datetime(df[c]) + to_datetime(df[c], cache=cache) # duplicates msg = 'cannot assemble with duplicate keys' @@ -634,7 +674,7 @@ def test_dataframe(self): 'day': [4, 5]}) df2.columns = ['year', 'year', 'day'] with tm.assert_raises_regex(ValueError, msg): - to_datetime(df2) + to_datetime(df2, cache=cache) df2 = DataFrame({'year': [2015, 2016], 'month': [2, 20], @@ -642,16 +682,17 @@ def test_dataframe(self): 'hour': [4, 5]}) df2.columns = ['year', 'month', 'day', 'day'] with tm.assert_raises_regex(ValueError, msg): - to_datetime(df2) + to_datetime(df2, cache=cache) - def test_dataframe_dtypes(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_dataframe_dtypes(self, cache): # #13451 df = DataFrame({'year': [2015, 2016], 'month': [2, 3], 'day': [4, 5]}) # int16 - result = to_datetime(df.astype('int16')) + result = to_datetime(df.astype('int16'), cache=cache) expected = Series([Timestamp('20150204 00:00:00'), Timestamp('20160305 00:00:00')]) assert_series_equal(result, expected) @@ -659,7 +700,7 @@ def test_dataframe_dtypes(self): # mixed dtypes df['month'] = df['month'].astype('int8') df['day'] = df['day'].astype('int8') - result = to_datetime(df) + result = to_datetime(df, cache=cache) expected = Series([Timestamp('20150204 00:00:00'), Timestamp('20160305 00:00:00')]) assert_series_equal(result, expected) @@ -669,18 +710,19 @@ def test_dataframe_dtypes(self): 'month': [1.5, 1], 'day': [1, 1]}) with pytest.raises(ValueError): - to_datetime(df) + to_datetime(df, cache=cache) class TestToDatetimeMisc(object): - def test_index_to_datetime(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_index_to_datetime(self, cache): idx = Index(['1/1/2000', '1/2/2000', '1/3/2000']) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = idx.to_datetime() - expected = DatetimeIndex(pd.to_datetime(idx.values)) + expected = DatetimeIndex(pd.to_datetime(idx.values, cache=cache)) tm.assert_index_equal(result, expected) with tm.assert_produces_warning(FutureWarning, @@ -691,17 +733,19 @@ def test_index_to_datetime(self): expected = DatetimeIndex([today]) tm.assert_index_equal(result, expected) - def test_to_datetime_iso8601(self): - result = to_datetime(["2012-01-01 00:00:00"]) + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_iso8601(self, cache): + result = to_datetime(["2012-01-01 00:00:00"], cache=cache) exp = Timestamp("2012-01-01 00:00:00") assert result[0] == exp - result = to_datetime(['20121001']) # bad iso 8601 + result = to_datetime(['20121001'], cache=cache) # bad iso 8601 exp = Timestamp('2012-10-01') assert result[0] == exp - def test_to_datetime_default(self): - rs = to_datetime('2001') + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_default(self, cache): + rs = to_datetime('2001', cache=cache) xp = datetime(2001, 1, 1) assert rs == xp @@ -711,71 +755,80 @@ def test_to_datetime_default(self): # pytest.raises(ValueError, to_datetime('01-13-2012', # dayfirst=True)) - def test_to_datetime_on_datetime64_series(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_on_datetime64_series(self, cache): # #2699 s = Series(date_range('1/1/2000', periods=10)) - result = to_datetime(s) + result = to_datetime(s, cache=cache) assert result[0] == s[0] - def test_to_datetime_with_space_in_series(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_with_space_in_series(self, cache): # GH 6428 s = Series(['10/18/2006', '10/18/2008', ' ']) - pytest.raises(ValueError, lambda: to_datetime(s, errors='raise')) - result_coerce = to_datetime(s, errors='coerce') + pytest.raises(ValueError, lambda: to_datetime(s, + errors='raise', + cache=cache)) + result_coerce = to_datetime(s, errors='coerce', cache=cache) expected_coerce = Series([datetime(2006, 10, 18), datetime(2008, 10, 18), NaT]) tm.assert_series_equal(result_coerce, expected_coerce) - result_ignore = to_datetime(s, errors='ignore') + result_ignore = to_datetime(s, errors='ignore', cache=cache) tm.assert_series_equal(result_ignore, s) - def test_to_datetime_with_apply(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_with_apply(self, cache): # this is only locale tested with US/None locales tm._skip_if_has_locale() # GH 5195 # with a format and coerce a single item to_datetime fails td = Series(['May 04', 'Jun 02', 'Dec 11'], index=[1, 2, 3]) - expected = pd.to_datetime(td, format='%b %y') - result = td.apply(pd.to_datetime, format='%b %y') + expected = pd.to_datetime(td, format='%b %y', cache=cache) + result = td.apply(pd.to_datetime, format='%b %y', cache=cache) assert_series_equal(result, expected) td = pd.Series(['May 04', 'Jun 02', ''], index=[1, 2, 3]) pytest.raises(ValueError, lambda: pd.to_datetime(td, format='%b %y', - errors='raise')) + errors='raise', + cache=cache)) pytest.raises(ValueError, lambda: td.apply(pd.to_datetime, format='%b %y', - errors='raise')) - expected = pd.to_datetime(td, format='%b %y', errors='coerce') + errors='raise', cache=cache)) + expected = pd.to_datetime(td, format='%b %y', errors='coerce', + cache=cache) result = td.apply( - lambda x: pd.to_datetime(x, format='%b %y', errors='coerce')) + lambda x: pd.to_datetime(x, format='%b %y', errors='coerce', + cache=cache)) assert_series_equal(result, expected) - def test_to_datetime_types(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_types(self, cache): # empty string - result = to_datetime('') + result = to_datetime('', cache=cache) assert result is NaT - result = to_datetime(['', '']) + result = to_datetime(['', ''], cache=cache) assert isna(result).all() # ints result = Timestamp(0) - expected = to_datetime(0) + expected = to_datetime(0, cache=cache) assert result == expected # GH 3888 (strings) - expected = to_datetime(['2012'])[0] - result = to_datetime('2012') + expected = to_datetime(['2012'], cache=cache)[0] + result = to_datetime('2012', cache=cache) assert result == expected # array = ['2012','20120101','20120101 12:01:01'] array = ['20120101', '20120101 12:01:01'] - expected = list(to_datetime(array)) + expected = list(to_datetime(array, cache=cache)) result = lmap(Timestamp, array) tm.assert_almost_equal(result, expected) @@ -784,13 +837,15 @@ def test_to_datetime_types(self): # expected = to_datetime('2012') # assert result == expected - def test_to_datetime_unprocessable_input(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_unprocessable_input(self, cache): # GH 4928 tm.assert_numpy_array_equal( - to_datetime([1, '1'], errors='ignore'), + to_datetime([1, '1'], errors='ignore', cache=cache), np.array([1, '1'], dtype='O') ) - pytest.raises(TypeError, to_datetime, [1, '1'], errors='raise') + pytest.raises(TypeError, to_datetime, [1, '1'], errors='raise', + cache=cache) def test_to_datetime_other_datetime64_units(self): # 5/25/2012 @@ -826,7 +881,8 @@ def test_to_datetime_overflow(self): with pytest.raises(OverflowError): date_range(start='1/1/1700', freq='B', periods=100000) - def test_string_na_nat_conversion(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_string_na_nat_conversion(self, cache): # GH #999, #858 from pandas.compat import parse_date @@ -844,7 +900,7 @@ def test_string_na_nat_conversion(self): result = tslib.array_to_datetime(strings) tm.assert_almost_equal(result, expected) - result2 = to_datetime(strings) + result2 = to_datetime(strings, cache=cache) assert isinstance(result2, DatetimeIndex) tm.assert_numpy_array_equal(result, result2.values) @@ -852,22 +908,25 @@ def test_string_na_nat_conversion(self): # GH 10636, default is now 'raise' pytest.raises(ValueError, - lambda: to_datetime(malformed, errors='raise')) + lambda: to_datetime(malformed, errors='raise', + cache=cache)) - result = to_datetime(malformed, errors='ignore') + result = to_datetime(malformed, errors='ignore', cache=cache) tm.assert_numpy_array_equal(result, malformed) - pytest.raises(ValueError, to_datetime, malformed, errors='raise') + pytest.raises(ValueError, to_datetime, malformed, errors='raise', + cache=cache) idx = ['a', 'b', 'c', 'd', 'e'] series = Series(['1/1/2000', np.nan, '1/3/2000', np.nan, '1/5/2000'], index=idx, name='foo') - dseries = Series([to_datetime('1/1/2000'), np.nan, - to_datetime('1/3/2000'), np.nan, - to_datetime('1/5/2000')], index=idx, name='foo') + dseries = Series([to_datetime('1/1/2000', cache=cache), np.nan, + to_datetime('1/3/2000', cache=cache), np.nan, + to_datetime('1/5/2000', cache=cache)], + index=idx, name='foo') - result = to_datetime(series) - dresult = to_datetime(dseries) + result = to_datetime(series, cache=cache) + dresult = to_datetime(dseries, cache=cache) expected = Series(np.empty(5, dtype='M8[ns]'), index=idx) for i in range(5): @@ -875,7 +934,7 @@ def test_string_na_nat_conversion(self): if isna(x): expected[i] = tslib.iNaT else: - expected[i] = to_datetime(x) + expected[i] = to_datetime(x, cache=cache) assert_series_equal(result, expected, check_names=False) assert result.name == 'foo' @@ -883,26 +942,29 @@ def test_string_na_nat_conversion(self): assert_series_equal(dresult, expected, check_names=False) assert dresult.name == 'foo' - def test_dti_constructor_numpy_timeunits(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_dti_constructor_numpy_timeunits(self, cache): # GH 9114 - base = pd.to_datetime(['2000-01-01T00:00', '2000-01-02T00:00', 'NaT']) + base = pd.to_datetime(['2000-01-01T00:00', '2000-01-02T00:00', 'NaT'], + cache=cache) for dtype in ['datetime64[h]', 'datetime64[m]', 'datetime64[s]', 'datetime64[ms]', 'datetime64[us]', 'datetime64[ns]']: values = base.values.astype(dtype) tm.assert_index_equal(DatetimeIndex(values), base) - tm.assert_index_equal(to_datetime(values), base) + tm.assert_index_equal(to_datetime(values, cache=cache), base) - def test_dayfirst(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_dayfirst(self, cache): # GH 5917 arr = ['10/02/2014', '11/02/2014', '12/02/2014'] expected = DatetimeIndex([datetime(2014, 2, 10), datetime(2014, 2, 11), datetime(2014, 2, 12)]) idx1 = DatetimeIndex(arr, dayfirst=True) idx2 = DatetimeIndex(np.array(arr), dayfirst=True) - idx3 = to_datetime(arr, dayfirst=True) - idx4 = to_datetime(np.array(arr), dayfirst=True) + idx3 = to_datetime(arr, dayfirst=True, cache=cache) + idx4 = to_datetime(np.array(arr), dayfirst=True, cache=cache) idx5 = DatetimeIndex(Index(arr), dayfirst=True) idx6 = DatetimeIndex(Series(arr), dayfirst=True) tm.assert_index_equal(expected, idx1) @@ -1000,7 +1062,8 @@ def test_guess_datetime_format_for_array(self): class TestToDatetimeInferFormat(object): - def test_to_datetime_infer_datetime_format_consistent_format(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_infer_datetime_format_consistent_format(self, cache): s = pd.Series(pd.date_range('20000101', periods=50, freq='H')) test_formats = ['%m-%d-%Y', '%m/%d/%Y %H:%M:%S.%f', @@ -1009,85 +1072,107 @@ def test_to_datetime_infer_datetime_format_consistent_format(self): for test_format in test_formats: s_as_dt_strings = s.apply(lambda x: x.strftime(test_format)) - with_format = pd.to_datetime(s_as_dt_strings, format=test_format) + with_format = pd.to_datetime(s_as_dt_strings, format=test_format, + cache=cache) no_infer = pd.to_datetime(s_as_dt_strings, - infer_datetime_format=False) + infer_datetime_format=False, + cache=cache) yes_infer = pd.to_datetime(s_as_dt_strings, - infer_datetime_format=True) + infer_datetime_format=True, + cache=cache) # Whether the format is explicitly passed, it is inferred, or # it is not inferred, the results should all be the same tm.assert_series_equal(with_format, no_infer) tm.assert_series_equal(no_infer, yes_infer) - def test_to_datetime_infer_datetime_format_inconsistent_format(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_infer_datetime_format_inconsistent_format(self, + cache): s = pd.Series(np.array(['01/01/2011 00:00:00', '01-02-2011 00:00:00', '2011-01-03T00:00:00'])) # When the format is inconsistent, infer_datetime_format should just # fallback to the default parsing - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False, + cache=cache), + pd.to_datetime(s, infer_datetime_format=True, + cache=cache)) s = pd.Series(np.array(['Jan/01/2011', 'Feb/01/2011', 'Mar/01/2011'])) - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False, + cache=cache), + pd.to_datetime(s, infer_datetime_format=True, + cache=cache)) - def test_to_datetime_infer_datetime_format_series_with_nans(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_infer_datetime_format_series_with_nans(self, cache): s = pd.Series(np.array(['01/01/2011 00:00:00', np.nan, '01/03/2011 00:00:00', np.nan])) - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) - - def test_to_datetime_infer_datetime_format_series_starting_with_nans(self): + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False, + cache=cache), + pd.to_datetime(s, infer_datetime_format=True, + cache=cache)) + + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_infer_datetime_format_series_starting_with_nans(self, + cache): s = pd.Series(np.array([np.nan, np.nan, '01/01/2011 00:00:00', '01/02/2011 00:00:00', '01/03/2011 00:00:00'])) - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False, + cache=cache), + pd.to_datetime(s, infer_datetime_format=True, + cache=cache)) - def test_to_datetime_iso8601_noleading_0s(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_iso8601_noleading_0s(self, cache): # GH 11871 s = pd.Series(['2014-1-1', '2014-2-2', '2015-3-3']) expected = pd.Series([pd.Timestamp('2014-01-01'), pd.Timestamp('2014-02-02'), pd.Timestamp('2015-03-03')]) - tm.assert_series_equal(pd.to_datetime(s), expected) - tm.assert_series_equal(pd.to_datetime(s, format='%Y-%m-%d'), expected) + tm.assert_series_equal(pd.to_datetime(s, cache=cache), expected) + tm.assert_series_equal(pd.to_datetime(s, format='%Y-%m-%d', + cache=cache), expected) class TestDaysInMonth(object): # tests for issue #10154 - def test_day_not_in_month_coerce(self): - assert isna(to_datetime('2015-02-29', errors='coerce')) + @pytest.mark.parametrize('cache', [True, False]) + def test_day_not_in_month_coerce(self, cache): + assert isna(to_datetime('2015-02-29', errors='coerce', cache=cache)) assert isna(to_datetime('2015-02-29', format="%Y-%m-%d", - errors='coerce')) + errors='coerce', cache=cache)) assert isna(to_datetime('2015-02-32', format="%Y-%m-%d", - errors='coerce')) + errors='coerce', cache=cache)) assert isna(to_datetime('2015-04-31', format="%Y-%m-%d", - errors='coerce')) + errors='coerce', cache=cache)) - def test_day_not_in_month_raise(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_day_not_in_month_raise(self, cache): pytest.raises(ValueError, to_datetime, '2015-02-29', - errors='raise') + errors='raise', cache=cache) pytest.raises(ValueError, to_datetime, '2015-02-29', - errors='raise', format="%Y-%m-%d") + errors='raise', format="%Y-%m-%d", cache=cache) pytest.raises(ValueError, to_datetime, '2015-02-32', - errors='raise', format="%Y-%m-%d") + errors='raise', format="%Y-%m-%d", cache=cache) pytest.raises(ValueError, to_datetime, '2015-04-31', - errors='raise', format="%Y-%m-%d") + errors='raise', format="%Y-%m-%d", cache=cache) - def test_day_not_in_month_ignore(self): - assert to_datetime('2015-02-29', errors='ignore') == '2015-02-29' + @pytest.mark.parametrize('cache', [True, False]) + def test_day_not_in_month_ignore(self, cache): + assert to_datetime('2015-02-29', errors='ignore', + cache=cache) == '2015-02-29' assert to_datetime('2015-02-29', errors='ignore', - format="%Y-%m-%d") == '2015-02-29' + format="%Y-%m-%d", cache=cache) == '2015-02-29' assert to_datetime('2015-02-32', errors='ignore', - format="%Y-%m-%d") == '2015-02-32' + format="%Y-%m-%d", cache=cache) == '2015-02-32' assert to_datetime('2015-04-31', errors='ignore', - format="%Y-%m-%d") == '2015-04-31' + format="%Y-%m-%d", cache=cache) == '2015-04-31' class TestDatetimeParsingWrappers(object): @@ -1107,7 +1192,8 @@ def test_does_not_convert_mixed_integer(self): for good_date_string in good_date_strings: assert tslib._does_string_look_like_datetime(good_date_string) - def test_parsers(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_parsers(self, cache): # https://github.com/dateutil/dateutil/issues/217 import dateutil @@ -1167,11 +1253,11 @@ def test_parsers(self): for date_str, expected in compat.iteritems(cases): result1, _, _ = tools.parse_time_string(date_str, yearfirst=yearfirst) - result2 = to_datetime(date_str, yearfirst=yearfirst) - result3 = to_datetime([date_str], yearfirst=yearfirst) + result2 = to_datetime(date_str, yearfirst=yearfirst, cache=cache) + result3 = to_datetime([date_str], yearfirst=yearfirst, cache=cache) # result5 is used below result4 = to_datetime(np.array([date_str], dtype=object), - yearfirst=yearfirst) + yearfirst=yearfirst, cache=cache) result6 = DatetimeIndex([date_str], yearfirst=yearfirst) # result7 is used below result8 = DatetimeIndex(Index([date_str]), yearfirst=yearfirst) @@ -1193,7 +1279,7 @@ def test_parsers(self): # NaT result1, _, _ = tools.parse_time_string('NaT') - result2 = to_datetime('NaT') + result2 = to_datetime('NaT', cache=cache) result3 = Timestamp('NaT') result4 = DatetimeIndex(['NaT'])[0] assert result1 is tslib.NaT @@ -1207,7 +1293,8 @@ def test_parsers_quarter_invalid(self): for case in cases: pytest.raises(ValueError, tools.parse_time_string, case) - def test_parsers_dayfirst_yearfirst(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_parsers_dayfirst_yearfirst(self, cache): # OK # 2.5.1 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00 # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2012-10-11 00:00:00 @@ -1291,7 +1378,7 @@ def test_parsers_dayfirst_yearfirst(self): assert result2 == expected result3 = to_datetime(date_str, dayfirst=dayfirst, - yearfirst=yearfirst) + yearfirst=yearfirst, cache=cache) result4 = DatetimeIndex([date_str], dayfirst=dayfirst, yearfirst=yearfirst)[0] @@ -1300,15 +1387,16 @@ def test_parsers_dayfirst_yearfirst(self): assert result3 == expected assert result4 == expected - def test_parsers_timestring(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_parsers_timestring(self, cache): # must be the same as dateutil result cases = {'10:15': (parse('10:15'), datetime(1, 1, 1, 10, 15)), '9:05': (parse('9:05'), datetime(1, 1, 1, 9, 5))} for date_str, (exp_now, exp_def) in compat.iteritems(cases): result1, _, _ = tools.parse_time_string(date_str) - result2 = to_datetime(date_str) - result3 = to_datetime([date_str]) + result2 = to_datetime(date_str, cache=cache) + result3 = to_datetime([date_str], cache=cache) result4 = Timestamp(date_str) result5 = DatetimeIndex([date_str])[0] # parse time string return time string based on default date @@ -1383,9 +1471,10 @@ def test_parsers_quarterly_with_freq(self): result, _, _ = tools.parse_time_string(date_str, freq=freq) assert result == exp - def test_parsers_timezone_minute_offsets_roundtrip(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_parsers_timezone_minute_offsets_roundtrip(self, cache): # GH11708 - base = to_datetime("2013-01-01 00:00:00") + base = to_datetime("2013-01-01 00:00:00", cache=cache) dt_strings = [ ('2013-01-01 05:45+0545', "Asia/Katmandu", @@ -1396,7 +1485,7 @@ def test_parsers_timezone_minute_offsets_roundtrip(self): ] for dt_string, tz, dt_string_repr in dt_strings: - dt_time = to_datetime(dt_string) + dt_time = to_datetime(dt_string, cache=cache) assert base == dt_time converted_time = dt_time.tz_localize('UTC').tz_convert(tz) assert dt_string_repr == repr(converted_time)