From 7f67ac9eed344223cafdc3ddd10a3ed30947c911 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Wed, 6 Sep 2017 07:50:42 -0700
Subject: [PATCH 01/25] move cache into convert_listlike

---
 pandas/core/tools/datetimes.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index ae8aa275b2bae..b4f4fffe1ba8f 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -38,7 +38,8 @@ def _guess_datetime_format_for_array(arr, **kwargs):
 
 def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
                 utc=None, box=True, format=None, exact=True,
-                unit=None, infer_datetime_format=False, origin='unix'):
+                unit=None, infer_datetime_format=False, origin='unix',
+                cache=True):
     """
     Convert argument to datetime.
 
@@ -111,7 +112,11 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
           origin.
 
         .. versionadded: 0.20.0
+    cache_datetime : boolean, default False
+        If True, use a cache of unique, converted dates to apply the datetime
+        conversion. Produces signficant speed-ups when parsing duplicate date.
 
+        .. versionadded: 0.20.2
     Returns
     -------
     ret : datetime if parsing succeeded.
@@ -201,6 +206,16 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
 
     def _convert_listlike(arg, box, format, name=None, tz=tz):
 
+        datetime_cache = None
+        if cache and is_list_like(arg) and not isinstance(arg, DatetimeIndex):
+            unique_dates = algorithms.unique(arg)
+            if len(unique_dates) != len(arg):
+                datetime_cache = Series(pd.to_datetime(unique_dates,
+                     errors=errors, dayfirst=dayfirst,
+                     yearfirst=yearfirst, utc=utc, box=box, format=format,
+                     exact=exact, unit=unit,
+                     infer_datetime_format=infer_datetime_format,
+                     origin=origin, cache=False), index=unique_dates)
         if isinstance(arg, (list, tuple)):
             arg = np.array(arg, dtype='O')
 

From a7c65f75757aa823c9250938140b47d00f8acfd1 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Sat, 9 Sep 2017 10:58:07 -0700
Subject: [PATCH 02/25] Move cache down the stack, explore threshold to trigger
 cache

---
 pandas/core/tools/datetimes.py | 46 +++++++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 15 deletions(-)

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index b4f4fffe1ba8f..6a3fbf8176aed 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -206,16 +206,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
 
     def _convert_listlike(arg, box, format, name=None, tz=tz):
 
-        datetime_cache = None
-        if cache and is_list_like(arg) and not isinstance(arg, DatetimeIndex):
-            unique_dates = algorithms.unique(arg)
-            if len(unique_dates) != len(arg):
-                datetime_cache = Series(pd.to_datetime(unique_dates,
-                     errors=errors, dayfirst=dayfirst,
-                     yearfirst=yearfirst, utc=utc, box=box, format=format,
-                     exact=exact, unit=unit,
-                     infer_datetime_format=infer_datetime_format,
-                     origin=origin, cache=False), index=unique_dates)
+
         if isinstance(arg, (list, tuple)):
             arg = np.array(arg, dtype='O')
 
@@ -381,18 +372,43 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
             arg = np.asarray(arg)
         arg = arg + offset
 
+    convert_cache = None
+    if cache and is_list_like(arg) and not isinstance(arg, DatetimeIndex):
+        # unique currently cannot determine dates that are out of bounds
+        # use the cache only if the data is a string and there are more than 10**5 values
+        unique_dates = algorithms.unique(arg)
+        if len(unique_dates) != len(arg):
+            from pandas import Series
+            cache_data = _convert_listlike(unique_dates, True, format)
+            convert_cache = Series(cache_data, index=unique_dates)
+
     if isinstance(arg, tslib.Timestamp):
         result = arg
     elif isinstance(arg, ABCSeries):
-        from pandas import Series
-        values = _convert_listlike(arg._values, True, format)
-        result = Series(values, index=arg.index, name=arg.name)
+        if convert_cache is not None:
+            result = arg.map(convert_cache)
+        else:
+            from pandas import Series
+            values = _convert_listlike(arg._values, True, format)
+            result = Series(values, index=arg.index, name=arg.name)
     elif isinstance(arg, (ABCDataFrame, MutableMapping)):
         result = _assemble_from_unit_mappings(arg, errors=errors)
     elif isinstance(arg, ABCIndexClass):
-        result = _convert_listlike(arg, box, format, name=arg.name)
+        if convert_cache is not None:
+            from pandas import Series
+            result = Series(arg).map(convert_cache).values
+            if box:
+                result = DatetimeIndex(result, tz=tz, name=arg.name)
+        else:
+            result = _convert_listlike(arg, box, format, name=arg.name)
     elif is_list_like(arg):
-        result = _convert_listlike(arg, box, format)
+        if convert_cache is not None:
+            from pandas import Series
+            result = Series(arg).map(convert_cache).values
+            if box:
+                result = DatetimeIndex(result, tz=tz)
+        else:
+            result = _convert_listlike(arg, box, format)
     else:
         result = _convert_listlike(np.array([arg]), box, format)[0]
 

From 243349aaef0ecf83c477efce26d62493c1a347e3 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Sun, 10 Sep 2017 18:42:38 -0700
Subject: [PATCH 03/25] Add more cache conditions

---
 asv_bench/benchmarks/timeseries.py           |  8 +++++
 pandas/core/tools/datetimes.py               | 22 +++++++++-----
 pandas/tests/indexes/datetimes/test_tools.py | 32 ++++++++++++++++++++
 3 files changed, 54 insertions(+), 8 deletions(-)

diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py
index 779fc0bd20964..0a95ebab8c9a4 100644
--- a/asv_bench/benchmarks/timeseries.py
+++ b/asv_bench/benchmarks/timeseries.py
@@ -356,6 +356,8 @@ def setup(self):
 
         self.s = Series((['19MAY11', '19MAY11:00:00:00'] * 100000))
         self.s2 = self.s.str.replace(':\\S+$', '')
+        self.numeric_data = Series([range(100000)])
+        self.datetime_data = [dt.datetime(2010, 1, 1)] * 100000
 
     def time_format_YYYYMMDD(self):
         to_datetime(self.stringsD, format='%Y%m%d')
@@ -381,6 +383,12 @@ def time_format_exact(self):
     def time_format_no_exact(self):
         to_datetime(self.s, format='%d%b%y', exact=False)
 
+    def time_cache_numeric_data(self):
+        to_datetime(self.numeric_data)
+
+    def time_cache_datetime_data(self):
+        to_datetime(self.datetime_data)
+
 
 class Offsets(object):
     goal_time = 0.2
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 6a3fbf8176aed..102f20a746db6 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -21,7 +21,8 @@
     is_float,
     is_list_like,
     is_scalar,
-    is_numeric_dtype)
+    is_numeric_dtype,
+    is_string_dtype)
 from pandas.core.dtypes.generic import (
     ABCIndexClass, ABCSeries,
     ABCDataFrame)
@@ -373,14 +374,19 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
         arg = arg + offset
 
     convert_cache = None
-    if cache and is_list_like(arg) and not isinstance(arg, DatetimeIndex):
+    if cache and is_list_like(arg):
+        # Create a cache only if there are more than 10k values and the user
+        # passes in datestrings
+        min_cache_threshold = 10**5
+        if len(arg) >= min_cache_threshold and is_string_dtype(arg):
         # unique currently cannot determine dates that are out of bounds
-        # use the cache only if the data is a string and there are more than 10**5 values
-        unique_dates = algorithms.unique(arg)
-        if len(unique_dates) != len(arg):
-            from pandas import Series
-            cache_data = _convert_listlike(unique_dates, True, format)
-            convert_cache = Series(cache_data, index=unique_dates)
+        # recurison errors with datetime
+            unique_dates = algorithms.unique(arg)
+            # Essentially they need to all be the same value
+            if len(unique_dates) == 1:
+                from pandas import Series
+                cache_data = _convert_listlike(unique_dates, True, format)
+                convert_cache = Series(cache_data, index=unique_dates)
 
     if isinstance(arg, tslib.Timestamp):
         result = arg
diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py
index 8205b4fde217b..01ae747c35278 100644
--- a/pandas/tests/indexes/datetimes/test_tools.py
+++ b/pandas/tests/indexes/datetimes/test_tools.py
@@ -371,6 +371,38 @@ def test_datetime_invalid_datatype(self):
             pd.to_datetime(bool)
         with pytest.raises(TypeError):
             pd.to_datetime(pd.to_datetime)
+    
+    @pytest.mark.parametrize("utc", [True, None])
+    @pytest.mark.parametrize("format", ['%Y%m%d %H:%M:%S', None])
+    @pytest.mark.parametrize("box", [True, False])
+    @pytest.mark.parametrize("constructor", [list, tuple, np.array, pd.Index])
+    def test_to_datetime_cache(self, utc, format, box, constructor):
+        date = '20130101 00:00:00'
+        test_dates = [date] * 10**5
+        data = constructor(test_dates)
+        result = pd.to_datetime(data, utc=utc, format=format, box=box)
+        expected = pd.to_datetime(data, utc=utc, format=format, box=box,
+                                  cache=False)
+        if box:
+            tm.assert_index_equal(result, expected)
+        else:
+            tm.assert_numpy_array_equal(result, expected)
+
+    @pytest.mark.parametrize("utc", [True, None])
+    @pytest.mark.parametrize("format", ['%Y%m%d %H:%M:%S', None])
+    def test_to_datetime_cache_series(self, utc, format):
+        date = '20130101 00:00:00'
+        test_dates = [date] * 10**5
+        data = pd.Series(test_dates)
+        result = pd.to_datetime(data, utc=utc, format=format, cache=True)
+        expected = pd.to_datetime(data, utc=utc, format=format)
+        tm.assert_series_equal(result, expected)
+
+    def test_to_datetime_cache_scalar(self):
+        date = '20130101 00:00:00'
+        result = pd.to_datetime(date, cache=True)
+        expected = pd.Timestamp('20130101 00:00:00')
+        assert result == expected
 
     @pytest.mark.parametrize('date, format',
                              [('2017-20', '%Y-%W'),

From d154a6dd72d058129253b6a75018b0a85ade0c15 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Sun, 10 Sep 2017 22:54:14 -0700
Subject: [PATCH 04/25] Add some benchmarks

---
 asv_bench/benchmarks/timeseries.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py
index 0a95ebab8c9a4..e084d18a68ed3 100644
--- a/asv_bench/benchmarks/timeseries.py
+++ b/asv_bench/benchmarks/timeseries.py
@@ -356,8 +356,9 @@ def setup(self):
 
         self.s = Series((['19MAY11', '19MAY11:00:00:00'] * 100000))
         self.s2 = self.s.str.replace(':\\S+$', '')
-        self.numeric_data = Series([range(100000)])
-        self.datetime_data = [dt.datetime(2010, 1, 1)] * 100000
+        self.dup_numeric_data = Series([1000] * 100000)
+        self.dup_string_data = ['2013-01-01'] * 100000
+        self.dup_datetime_data = [dt.datetime(2010, 1, 1)] * 100000
 
     def time_format_YYYYMMDD(self):
         to_datetime(self.stringsD, format='%Y%m%d')
@@ -383,11 +384,14 @@ def time_format_exact(self):
     def time_format_no_exact(self):
         to_datetime(self.s, format='%d%b%y', exact=False)
 
-    def time_cache_numeric_data(self):
-        to_datetime(self.numeric_data)
+    def time_cache_dup_numeric_data(self):
+        to_datetime(self.dup_numeric_data, unit='s')
 
-    def time_cache_datetime_data(self):
-        to_datetime(self.datetime_data)
+    def time_cache_dup_datetime_data(self):
+        to_datetime(self.dup_datetime_data)
+
+    def time_cache_dup_string_data(self):
+        to_datetime(self.dup_string_data)
 
 
 class Offsets(object):

From b5e71d25474f7ee3ee054dfba7abbdfb647a248d Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Wed, 13 Sep 2017 21:27:52 -0700
Subject: [PATCH 05/25] Some performance testing

---
 asv_bench/benchmarks/timeseries.py | 44 ++++++++++++++++++++++++------
 pandas/core/indexes/datetimes.py   |  2 +-
 pandas/core/tools/datetimes.py     | 18 ++++++------
 3 files changed, 45 insertions(+), 19 deletions(-)

diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py
index e084d18a68ed3..a1cc822b58f2b 100644
--- a/asv_bench/benchmarks/timeseries.py
+++ b/asv_bench/benchmarks/timeseries.py
@@ -356,9 +356,17 @@ def setup(self):
 
         self.s = Series((['19MAY11', '19MAY11:00:00:00'] * 100000))
         self.s2 = self.s.str.replace(':\\S+$', '')
-        self.dup_numeric_data = Series([1000] * 100000)
-        self.dup_string_data = ['2013-01-01'] * 100000
-        self.dup_datetime_data = [dt.datetime(2010, 1, 1)] * 100000
+        self.dup_numeric_data_10_5 = Series([1000] * 100000)
+        self.dup_string_data_10_5 = ['2013-01-01 01:00:00'] * 100000
+        self.dup_datetime_data_10_5 = [dt.datetime(2010, 1, 1)] * 100000
+
+        self.dup_numeric_data_10_3 = Series([1000] * 100)
+        self.dup_string_data_10_3 = ['2013-01-01 01:00:00'] * 100
+        self.dup_datetime_data_10_3 = [dt.datetime(2010, 1, 1)] * 100
+
+        self.dup_numeric_data_10_7 = Series([1000] * 10**7)
+        self.dup_string_data_10_7 = ['2013-01-01 01:00:00'] * 10**7
+        self.dup_datetime_data_10_7 = [dt.datetime(2010, 1, 1)] * 10**7
 
     def time_format_YYYYMMDD(self):
         to_datetime(self.stringsD, format='%Y%m%d')
@@ -384,14 +392,32 @@ def time_format_exact(self):
     def time_format_no_exact(self):
         to_datetime(self.s, format='%d%b%y', exact=False)
 
-    def time_cache_dup_numeric_data(self):
-        to_datetime(self.dup_numeric_data, unit='s')
+    def time_cache_dup_numeric_data_10_3(self):
+        to_datetime(self.dup_numeric_data_10_3, unit='s')
+
+    def time_cache_dup_datetime_data_10_3(self):
+        to_datetime(self.dup_datetime_data_10_3)
+
+    def time_cache_dup_string_data_10_3(self):
+        to_datetime(self.dup_string_data_10_3)
+
+    def time_cache_dup_numeric_data_10_5(self):
+        to_datetime(self.dup_numeric_data_10_5, unit='s')
+
+    def time_cache_dup_datetime_data_10_5(self):
+        to_datetime(self.dup_datetime_data_10_5)
+
+    def time_cache_dup_string_data_10_5(self):
+        to_datetime(self.dup_string_data_10_5)
+
+    def time_cache_dup_numeric_data_10_7(self):
+        to_datetime(self.dup_numeric_data_10_7, unit='s')
 
-    def time_cache_dup_datetime_data(self):
-        to_datetime(self.dup_datetime_data)
+    def time_cache_dup_datetime_data_10_7(self):
+        to_datetime(self.dup_datetime_data_10_7)
 
-    def time_cache_dup_string_data(self):
-        to_datetime(self.dup_string_data)
+    def time_cache_dup_string_data_10_7(self):
+        to_datetime(self.dup_string_data_10_7)
 
 
 class Offsets(object):
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 78869de318dce..dd1c7306d2c26 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -339,7 +339,7 @@ def __new__(cls, data=None,
         if not (is_datetime64_dtype(data) or is_datetimetz(data) or
                 is_integer_dtype(data)):
             data = tools.to_datetime(data, dayfirst=dayfirst,
-                                     yearfirst=yearfirst)
+                                     yearfirst=yearfirst, cache=False)
 
         if issubclass(data.dtype.type, np.datetime64) or is_datetimetz(data):
 
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 102f20a746db6..165c438608493 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -207,7 +207,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
 
     def _convert_listlike(arg, box, format, name=None, tz=tz):
 
-
+        import pdb; pdb.set_trace()
         if isinstance(arg, (list, tuple)):
             arg = np.array(arg, dtype='O')
 
@@ -377,16 +377,16 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
     if cache and is_list_like(arg):
         # Create a cache only if there are more than 10k values and the user
         # passes in datestrings
-        min_cache_threshold = 10**5
-        if len(arg) >= min_cache_threshold and is_string_dtype(arg):
+        #min_cache_threshold = 10**5
+        #if len(arg) >= min_cache_threshold and is_string_dtype(arg):
         # unique currently cannot determine dates that are out of bounds
         # recurison errors with datetime
-            unique_dates = algorithms.unique(arg)
-            # Essentially they need to all be the same value
-            if len(unique_dates) == 1:
-                from pandas import Series
-                cache_data = _convert_listlike(unique_dates, True, format)
-                convert_cache = Series(cache_data, index=unique_dates)
+        unique_dates = algorithms.unique(arg)
+        # Essentially they need to all be the same value
+        if len(unique_dates) != len(arg):
+            from pandas import Series
+            cache_data = _convert_listlike(unique_dates, False, format)
+            convert_cache = Series(cache_data, index=unique_dates)
 
     if isinstance(arg, tslib.Timestamp):
         result = arg

From fb2e831009ed57c9affd368d3630bb0b40d082a3 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Mon, 25 Sep 2017 16:55:25 -0700
Subject: [PATCH 06/25] Add asvs, modify tests for caches

---
 asv_bench/benchmarks/timeseries.py           |  53 +--
 pandas/core/tools/datetimes.py               |  21 +-
 pandas/tests/indexes/datetimes/test_tools.py | 459 +++++++++++--------
 3 files changed, 299 insertions(+), 234 deletions(-)

diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py
index a1cc822b58f2b..d9d1b48f420e6 100644
--- a/asv_bench/benchmarks/timeseries.py
+++ b/asv_bench/benchmarks/timeseries.py
@@ -346,27 +346,21 @@ class ToDatetime(object):
 
     def setup(self):
         self.rng = date_range(start='1/1/2000', periods=10000, freq='D')
-        self.stringsD = Series((((self.rng.year * 10000) + (self.rng.month * 100)) + self.rng.day), dtype=np.int64).apply(str)
+        self.stringsD = Series(self.rng.strftime('%Y%m%d'))
 
         self.rng = date_range(start='1/1/2000', periods=20000, freq='H')
-        self.strings = [x.strftime('%Y-%m-%d %H:%M:%S') for x in self.rng]
-        self.strings_nosep = [x.strftime('%Y%m%d %H:%M:%S') for x in self.rng]
+        self.strings = self.rng.strftime('%Y-%m-%d %H:%M:%S').tolist()
+        self.strings_nosep = self.rng.strftime('%Y%m%d %H:%M:%S').tolist()
         self.strings_tz_space = [x.strftime('%Y-%m-%d %H:%M:%S') + ' -0800'
                                  for x in self.rng]
 
         self.s = Series((['19MAY11', '19MAY11:00:00:00'] * 100000))
         self.s2 = self.s.str.replace(':\\S+$', '')
-        self.dup_numeric_data_10_5 = Series([1000] * 100000)
-        self.dup_string_data_10_5 = ['2013-01-01 01:00:00'] * 100000
-        self.dup_datetime_data_10_5 = [dt.datetime(2010, 1, 1)] * 100000
 
-        self.dup_numeric_data_10_3 = Series([1000] * 100)
-        self.dup_string_data_10_3 = ['2013-01-01 01:00:00'] * 100
-        self.dup_datetime_data_10_3 = [dt.datetime(2010, 1, 1)] * 100
-
-        self.dup_numeric_data_10_7 = Series([1000] * 10**7)
-        self.dup_string_data_10_7 = ['2013-01-01 01:00:00'] * 10**7
-        self.dup_datetime_data_10_7 = [dt.datetime(2010, 1, 1)] * 10**7
+        self.unique_numeric_seconds = range(10000)
+        self.dup_numeric_seconds = [1000] * 10000
+        self.dup_string_dates = ['2000-02-11'] * 10000
+        self.dup_string_with_tz = ['2000-02-11 15:00:00-0800'] * 10000
 
     def time_format_YYYYMMDD(self):
         to_datetime(self.stringsD, format='%Y%m%d')
@@ -392,32 +386,23 @@ def time_format_exact(self):
     def time_format_no_exact(self):
         to_datetime(self.s, format='%d%b%y', exact=False)
 
-    def time_cache_dup_numeric_data_10_3(self):
-        to_datetime(self.dup_numeric_data_10_3, unit='s')
-
-    def time_cache_dup_datetime_data_10_3(self):
-        to_datetime(self.dup_datetime_data_10_3)
-
-    def time_cache_dup_string_data_10_3(self):
-        to_datetime(self.dup_string_data_10_3)
-
-    def time_cache_dup_numeric_data_10_5(self):
-        to_datetime(self.dup_numeric_data_10_5, unit='s')
+    def time_cache_with_unique_seconds_and unit(self):
+    	to_datetime(self.unique_numeric_seconds, unit='s')
 
-    def time_cache_dup_datetime_data_10_5(self):
-        to_datetime(self.dup_datetime_data_10_5)
+    def time_cache_with_dup_seconds_and_unit(self):
+    	to_datetime(self.dup_numeric_seconds, unit='s')
 
-    def time_cache_dup_string_data_10_5(self):
-        to_datetime(self.dup_string_data_10_5)
+    def time_cache_with_dup_string_dates(self):
+    	to_datetime(self.dup_string_dates)
 
-    def time_cache_dup_numeric_data_10_7(self):
-        to_datetime(self.dup_numeric_data_10_7, unit='s')
+    def time_cache_with_dup_string_dates_and_format(self):
+    	to_datetime(self.dup_string_dates, format='%Y-%m-%d')
 
-    def time_cache_dup_datetime_data_10_7(self):
-        to_datetime(self.dup_datetime_data_10_7)
+    def time_cache_with_dup_string_tzoffset_dates(self):
+    	to_datetime(self.dup_string_with_tz)
 
-    def time_cache_dup_string_data_10_7(self):
-        to_datetime(self.dup_string_data_10_7)
+    def time_cache_with_dup_string_tzoffset_dates_and_format(self):
+    	to_datetim(self.dup_string_with_tz, format='%Y-%m-%d %H:%M:%S%z')
 
 
 class Offsets(object):
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 165c438608493..ac29c7144556c 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -113,7 +113,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
           origin.
 
         .. versionadded: 0.20.0
-    cache_datetime : boolean, default False
+    cache : boolean, default False
         If True, use a cache of unique, converted dates to apply the datetime
         conversion. Produces signficant speed-ups when parsing duplicate date.
 
@@ -207,7 +207,6 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
 
     def _convert_listlike(arg, box, format, name=None, tz=tz):
 
-        import pdb; pdb.set_trace()
         if isinstance(arg, (list, tuple)):
             arg = np.array(arg, dtype='O')
 
@@ -375,18 +374,12 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
 
     convert_cache = None
     if cache and is_list_like(arg):
-        # Create a cache only if there are more than 10k values and the user
-        # passes in datestrings
-        #min_cache_threshold = 10**5
-        #if len(arg) >= min_cache_threshold and is_string_dtype(arg):
-        # unique currently cannot determine dates that are out of bounds
-        # recurison errors with datetime
-        unique_dates = algorithms.unique(arg)
-        # Essentially they need to all be the same value
-        if len(unique_dates) != len(arg):
-            from pandas import Series
-            cache_data = _convert_listlike(unique_dates, False, format)
-            convert_cache = Series(cache_data, index=unique_dates)
+        if len(arg) >= 1000:
+            unique_dates = algorithms.unique(arg)
+            if len(unique_dates) != len(arg):
+                from pandas import Series
+                cache_dates = _convert_listlike(unique_dates, False, format)
+                convert_cache = Series(cache_dates, index=unique_dates)
 
     if isinstance(arg, tslib.Timestamp):
         result = arg
diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py
index 01ae747c35278..1a099fe2d1505 100644
--- a/pandas/tests/indexes/datetimes/test_tools.py
+++ b/pandas/tests/indexes/datetimes/test_tools.py
@@ -28,7 +28,8 @@
 
 class TestTimeConversionFormats(object):
 
-    def test_to_datetime_format(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_to_datetime_format(self, cache):
         values = ['1/1/2000', '1/2/2000', '1/3/2000']
 
         results1 = [Timestamp('20000101'), Timestamp('20000201'),
@@ -43,7 +44,7 @@ def test_to_datetime_format(self):
                                 (values[2], (results1[2], results2[2]))]:
 
             for i, fmt in enumerate(['%d/%m/%Y', '%m/%d/%Y']):
-                result = to_datetime(vals, format=fmt)
+                result = to_datetime(vals, format=fmt, cache=cache)
                 expected = expecteds[i]
 
                 if isinstance(expected, Series):
@@ -53,14 +54,15 @@ def test_to_datetime_format(self):
                 else:
                     tm.assert_index_equal(result, expected)
 
-    def test_to_datetime_format_YYYYMMDD(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_to_datetime_format_YYYYMMDD(self, cache):
         s = Series([19801222, 19801222] + [19810105] * 5)
         expected = Series([Timestamp(x) for x in s.apply(str)])
 
-        result = to_datetime(s, format='%Y%m%d')
+        result = to_datetime(s, format='%Y%m%d', cache=cache)
         assert_series_equal(result, expected)
 
-        result = to_datetime(s.apply(str), format='%Y%m%d')
+        result = to_datetime(s.apply(str), format='%Y%m%d', cache=cache)
         assert_series_equal(result, expected)
 
         # with NaT
@@ -69,44 +71,48 @@ def test_to_datetime_format_YYYYMMDD(self):
         expected[2] = np.nan
         s[2] = np.nan
 
-        result = to_datetime(s, format='%Y%m%d')
+        result = to_datetime(s, format='%Y%m%d', cache=cache)
         assert_series_equal(result, expected)
 
         # string with NaT
         s = s.apply(str)
         s[2] = 'nat'
-        result = to_datetime(s, format='%Y%m%d')
+        result = to_datetime(s, format='%Y%m%d', cache=cache)
         assert_series_equal(result, expected)
 
         # coercion
         # GH 7930
         s = Series([20121231, 20141231, 99991231])
-        result = pd.to_datetime(s, format='%Y%m%d', errors='ignore')
+        result = pd.to_datetime(s, format='%Y%m%d', errors='ignore',
+                                cache=cache)
         expected = Series([datetime(2012, 12, 31),
                            datetime(2014, 12, 31), datetime(9999, 12, 31)],
                           dtype=object)
         tm.assert_series_equal(result, expected)
 
-        result = pd.to_datetime(s, format='%Y%m%d', errors='coerce')
+        result = pd.to_datetime(s, format='%Y%m%d', errors='coerce',
+                                cache=cache)
         expected = Series(['20121231', '20141231', 'NaT'], dtype='M8[ns]')
         assert_series_equal(result, expected)
 
-    # GH 10178
-    def test_to_datetime_format_integer(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_to_datetime_format_integer(self, cache):
+        # GH 10178
         s = Series([2000, 2001, 2002])
         expected = Series([Timestamp(x) for x in s.apply(str)])
 
-        result = to_datetime(s, format='%Y')
+        result = to_datetime(s, format='%Y', cache=cache)
         assert_series_equal(result, expected)
 
         s = Series([200001, 200105, 200206])
         expected = Series([Timestamp(x[:4] + '-' + x[4:]) for x in s.apply(str)
                            ])
 
-        result = to_datetime(s, format='%Y%m')
+        result = to_datetime(s, format='%Y%m', cache=cache)
         assert_series_equal(result, expected)
 
-    def test_to_datetime_format_microsecond(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_to_datetime_format_microsecond(self, cache):
 
         # these are locale dependent
         lang, _ = locale.getlocale()
@@ -114,11 +120,12 @@ def test_to_datetime_format_microsecond(self):
         val = '01-{}-2011 00:00:01.978'.format(month_abbr)
 
         format = '%d-%b-%Y %H:%M:%S.%f'
-        result = to_datetime(val, format=format)
+        result = to_datetime(val, format=format, cache=cache)
         exp = datetime.strptime(val, format)
         assert result == exp
 
-    def test_to_datetime_format_time(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_to_datetime_format_time(self, cache):
         data = [
             ['01/10/2010 15:20', '%m/%d/%Y %H:%M',
              Timestamp('2010-01-10 15:20')],
@@ -134,9 +141,10 @@ def test_to_datetime_format_time(self):
             #  Timestamp('2010-01-10 09:12:56')]
         ]
         for s, format, dt in data:
-            assert to_datetime(s, format=format) == dt
+            assert to_datetime(s, format=format, cache=cache) == dt
 
-    def test_to_datetime_with_non_exact(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_to_datetime_with_non_exact(self, cache):
         # GH 10834
         tm._skip_if_has_locale()
 
@@ -147,12 +155,13 @@ def test_to_datetime_with_non_exact(self):
 
         s = Series(['19MAY11', 'foobar19MAY11', '19MAY11:00:00:00',
                     '19MAY11 00:00:00Z'])
-        result = to_datetime(s, format='%d%b%y', exact=False)
+        result = to_datetime(s, format='%d%b%y', exact=False, cache=cache)
         expected = to_datetime(s.str.extract(r'(\d+\w+\d+)', expand=False),
-                               format='%d%b%y')
+                               format='%d%b%y', cache=cache)
         assert_series_equal(result, expected)
 
-    def test_parse_nanoseconds_with_formula(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_parse_nanoseconds_with_formula(self, cache):
 
         # GH8989
         # trunctaing the nanoseconds when a format was provided
@@ -161,44 +170,48 @@ def test_parse_nanoseconds_with_formula(self):
                   "2012-01-01 09:00:00.001",
                   "2012-01-01 09:00:00.001000",
                   "2012-01-01 09:00:00.001000000", ]:
-            expected = pd.to_datetime(v)
-            result = pd.to_datetime(v, format="%Y-%m-%d %H:%M:%S.%f")
+            expected = pd.to_datetime(v, cache=cache)
+            result = pd.to_datetime(v, format="%Y-%m-%d %H:%M:%S.%f",
+                                    cache=cache)
             assert result == expected
 
-    def test_to_datetime_format_weeks(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_to_datetime_format_weeks(self, cache):
         data = [
             ['2009324', '%Y%W%w', Timestamp('2009-08-13')],
             ['2013020', '%Y%U%w', Timestamp('2013-01-13')]
         ]
         for s, format, dt in data:
-            assert to_datetime(s, format=format) == dt
+            assert to_datetime(s, format=format, cache=cache) == dt
 
 
 class TestToDatetime(object):
 
-    def test_to_datetime_dt64s(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_to_datetime_dt64s(self, cache):
         in_bound_dts = [
             np.datetime64('2000-01-01'),
             np.datetime64('2000-01-02'),
         ]
 
         for dt in in_bound_dts:
-            assert pd.to_datetime(dt) == Timestamp(dt)
+            assert pd.to_datetime(dt, cache=cache) == Timestamp(dt)
 
         oob_dts = [np.datetime64('1000-01-01'), np.datetime64('5000-01-02'), ]
 
         for dt in oob_dts:
             pytest.raises(ValueError, pd.to_datetime, dt, errors='raise')
             pytest.raises(ValueError, Timestamp, dt)
-            assert pd.to_datetime(dt, errors='coerce') is NaT
+            assert pd.to_datetime(dt, errors='coerce', cache=cache) is NaT
 
-    def test_to_datetime_array_of_dt64s(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_to_datetime_array_of_dt64s(self, cache):
         dts = [np.datetime64('2000-01-01'), np.datetime64('2000-01-02'), ]
 
         # Assuming all datetimes are in bounds, to_datetime() returns
         # an array that is equal to Timestamp() parsing
         tm.assert_numpy_array_equal(
-            pd.to_datetime(dts, box=False),
+            pd.to_datetime(dts, box=False, cache=cache),
             np.array([Timestamp(x).asm8 for x in dts])
         )
 
@@ -209,7 +222,8 @@ def test_to_datetime_array_of_dt64s(self):
                       errors='raise')
 
         tm.assert_numpy_array_equal(
-            pd.to_datetime(dts_with_oob, box=False, errors='coerce'),
+            pd.to_datetime(dts_with_oob, box=False, errors='coerce', 
+                           cache=cache),
             np.array(
                 [
                     Timestamp(dts_with_oob[0]).asm8,
@@ -224,20 +238,22 @@ def test_to_datetime_array_of_dt64s(self):
         # are converted to their .item(), which depending on the version of
         # numpy is either a python datetime.datetime or datetime.date
         tm.assert_numpy_array_equal(
-            pd.to_datetime(dts_with_oob, box=False, errors='ignore'),
+            pd.to_datetime(dts_with_oob, box=False, errors='ignore',
+                           cache=cache),
             np.array(
                 [dt.item() for dt in dts_with_oob],
                 dtype='O'
             )
         )
 
-    def test_to_datetime_tz(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_to_datetime_tz(self, cache):
 
         # xref 8260
         # uniform returns a DatetimeIndex
         arr = [pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'),
                pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')]
-        result = pd.to_datetime(arr)
+        result = pd.to_datetime(arr, cache=cache)
         expected = DatetimeIndex(
             ['2013-01-01 13:00:00', '2013-01-02 14:00:00'], tz='US/Pacific')
         tm.assert_index_equal(result, expected)
@@ -245,9 +261,10 @@ def test_to_datetime_tz(self):
         # mixed tzs will raise
         arr = [pd.Timestamp('2013-01-01 13:00:00', tz='US/Pacific'),
                pd.Timestamp('2013-01-02 14:00:00', tz='US/Eastern')]
-        pytest.raises(ValueError, lambda: pd.to_datetime(arr))
+        pytest.raises(ValueError, lambda: pd.to_datetime(arr, cache=cache))
 
-    def test_to_datetime_tz_pytz(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_to_datetime_tz_pytz(self, cache):
         # see gh-8260
         us_eastern = pytz.timezone('US/Eastern')
         arr = np.array([us_eastern.localize(datetime(year=2000, month=1, day=1,
@@ -255,18 +272,20 @@ def test_to_datetime_tz_pytz(self):
                         us_eastern.localize(datetime(year=2000, month=6, day=1,
                                                      hour=3, minute=0))],
                        dtype=object)
-        result = pd.to_datetime(arr, utc=True)
+        result = pd.to_datetime(arr, utc=True, cache=cache)
         expected = DatetimeIndex(['2000-01-01 08:00:00+00:00',
                                   '2000-06-01 07:00:00+00:00'],
                                  dtype='datetime64[ns, UTC]', freq=None)
         tm.assert_index_equal(result, expected)
 
+    @pytest.mark.parametrize('cache', [True, False])
     @pytest.mark.parametrize("init_constructor, end_constructor, test_method",
                              [(Index, DatetimeIndex, tm.assert_index_equal),
                               (list, DatetimeIndex, tm.assert_index_equal),
                               (np.array, DatetimeIndex, tm.assert_index_equal),
                               (Series, Series, tm.assert_series_equal)])
     def test_to_datetime_utc_true(self,
+                                  cache,
                                   init_constructor,
                                   end_constructor,
                                   test_method):
@@ -277,39 +296,47 @@ def test_to_datetime_utc_true(self,
 
         result = pd.to_datetime(init_constructor(data),
                                 format='%Y%m%d %H%M%S',
-                                utc=True)
+                                utc=True,
+                                cache=cache)
         expected = end_constructor(expected_data)
         test_method(result, expected)
 
         # Test scalar case as well
         for scalar, expected in zip(data, expected_data):
-            result = pd.to_datetime(scalar, format='%Y%m%d %H%M%S', utc=True)
+            result = pd.to_datetime(scalar, format='%Y%m%d %H%M%S', utc=True,
+                                    cache=cache)
             assert result == expected
 
-    def test_to_datetime_utc_true_with_series_single_value(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_to_datetime_utc_true_with_series_single_value(self, cache):
         # GH 15760 UTC=True with Series
         ts = 1.5e18
-        result = pd.to_datetime(pd.Series([ts]), utc=True)
+        result = pd.to_datetime(pd.Series([ts]), utc=True, cache=cache)
         expected = pd.Series([pd.Timestamp(ts, tz='utc')])
         tm.assert_series_equal(result, expected)
 
-    def test_to_datetime_utc_true_with_series_tzaware_string(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_to_datetime_utc_true_with_series_tzaware_string(self, cache):
         ts = '2013-01-01 00:00:00-01:00'
         expected_ts = '2013-01-01 01:00:00'
         data = pd.Series([ts] * 3)
-        result = pd.to_datetime(data, utc=True)
+        result = pd.to_datetime(data, utc=True, cache=cache)
         expected = pd.Series([pd.Timestamp(expected_ts, tz='utc')] * 3)
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.parametrize('cache', [True, False])
     @pytest.mark.parametrize('date, dtype',
                              [('2013-01-01 01:00:00', 'datetime64[ns]'),
                               ('2013-01-01 01:00:00', 'datetime64[ns, UTC]')])
-    def test_to_datetime_utc_true_with_series_datetime_ns(self, date, dtype):
+    def test_to_datetime_utc_true_with_series_datetime_ns(self, cache, date,
+                                                          dtype):
         expected = pd.Series([pd.Timestamp('2013-01-01 01:00:00', tz='UTC')])
-        result = pd.to_datetime(pd.Series([date], dtype=dtype), utc=True)
+        result = pd.to_datetime(pd.Series([date], dtype=dtype), utc=True,
+                                cache=cache)
         tm.assert_series_equal(result, expected)
 
-    def test_to_datetime_tz_psycopg2(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_to_datetime_tz_psycopg2(self, cache):
 
         # xref 8260
         try:
@@ -324,7 +351,7 @@ def test_to_datetime_tz_psycopg2(self):
                         datetime(2000, 6, 1, 3, 0, tzinfo=tz2)],
                        dtype=object)
 
-        result = pd.to_datetime(arr, errors='coerce', utc=True)
+        result = pd.to_datetime(arr, errors='coerce', utc=True, cache=cache)
         expected = DatetimeIndex(['2000-01-01 08:00:00+00:00',
                                   '2000-06-01 07:00:00+00:00'],
                                  dtype='datetime64[ns, UTC]', freq=None)
@@ -337,32 +364,35 @@ def test_to_datetime_tz_psycopg2(self):
         assert is_datetime64_ns_dtype(i)
 
         # tz coerceion
-        result = pd.to_datetime(i, errors='coerce')
+        result = pd.to_datetime(i, errors='coerce', cache=cache)
         tm.assert_index_equal(result, i)
 
-        result = pd.to_datetime(i, errors='coerce', utc=True)
+        result = pd.to_datetime(i, errors='coerce', utc=True, cache=cache)
         expected = pd.DatetimeIndex(['2000-01-01 13:00:00'],
                                     dtype='datetime64[ns, UTC]')
         tm.assert_index_equal(result, expected)
 
-    def test_datetime_bool(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_datetime_bool(self, cache):
         # GH13176
         with pytest.raises(TypeError):
             to_datetime(False)
-        assert to_datetime(False, errors="coerce") is NaT
-        assert to_datetime(False, errors="ignore") is False
+        assert to_datetime(False, errors="coerce", cache=cache) is NaT
+        assert to_datetime(False, errors="ignore", cache=cache) is False
         with pytest.raises(TypeError):
             to_datetime(True)
-        assert to_datetime(True, errors="coerce") is NaT
-        assert to_datetime(True, errors="ignore") is True
+        assert to_datetime(True, errors="coerce", cache=cache) is NaT
+        assert to_datetime(True, errors="ignore", cache=cache) is True
         with pytest.raises(TypeError):
-            to_datetime([False, datetime.today()])
+            to_datetime([False, datetime.today()], cache=cache)
         with pytest.raises(TypeError):
-            to_datetime(['20130101', True])
+            to_datetime(['20130101', True], cache=cache)
         tm.assert_index_equal(to_datetime([0, False, NaT, 0.0],
-                                          errors="coerce"),
-                              DatetimeIndex([to_datetime(0), NaT,
-                                             NaT, to_datetime(0)]))
+                                          errors="coerce", cache=cache),
+                              DatetimeIndex([to_datetime(0, cache=cache),
+                                             NaT,
+                                             NaT,
+                                             to_datetime(0, cache=cache)]))
 
     def test_datetime_invalid_datatype(self):
         # GH13176
@@ -421,71 +451,77 @@ def test_week_without_day_and_calendar_year(self, date, format):
 
 class TestToDatetimeUnit(object):
 
-    def test_unit(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_unit(self, cache):
         # GH 11758
         # test proper behavior with erros
 
         with pytest.raises(ValueError):
-            to_datetime([1], unit='D', format='%Y%m%d')
+            to_datetime([1], unit='D', format='%Y%m%d', cache=cache)
 
         values = [11111111, 1, 1.0, tslib.iNaT, NaT, np.nan,
                   'NaT', '']
-        result = to_datetime(values, unit='D', errors='ignore')
+        result = to_datetime(values, unit='D', errors='ignore', cache=cache)
         expected = Index([11111111, Timestamp('1970-01-02'),
                           Timestamp('1970-01-02'), NaT,
                           NaT, NaT, NaT, NaT],
                          dtype=object)
         tm.assert_index_equal(result, expected)
 
-        result = to_datetime(values, unit='D', errors='coerce')
+        result = to_datetime(values, unit='D', errors='coerce', cache=cache)
         expected = DatetimeIndex(['NaT', '1970-01-02', '1970-01-02',
                                   'NaT', 'NaT', 'NaT', 'NaT', 'NaT'])
         tm.assert_index_equal(result, expected)
 
         with pytest.raises(tslib.OutOfBoundsDatetime):
-            to_datetime(values, unit='D', errors='raise')
+            to_datetime(values, unit='D', errors='raise', cache=cache)
 
         values = [1420043460000, tslib.iNaT, NaT, np.nan, 'NaT']
 
-        result = to_datetime(values, errors='ignore', unit='s')
+        result = to_datetime(values, errors='ignore', unit='s', cache=cache)
         expected = Index([1420043460000, NaT, NaT,
                           NaT, NaT], dtype=object)
         tm.assert_index_equal(result, expected)
 
-        result = to_datetime(values, errors='coerce', unit='s')
+        result = to_datetime(values, errors='coerce', unit='s', cache=cache)
         expected = DatetimeIndex(['NaT', 'NaT', 'NaT', 'NaT', 'NaT'])
         tm.assert_index_equal(result, expected)
 
         with pytest.raises(tslib.OutOfBoundsDatetime):
-            to_datetime(values, errors='raise', unit='s')
+            to_datetime(values, errors='raise', unit='s', cache=cache)
 
         # if we have a string, then we raise a ValueError
         # and NOT an OutOfBoundsDatetime
         for val in ['foo', Timestamp('20130101')]:
             try:
-                to_datetime(val, errors='raise', unit='s')
+                to_datetime(val, errors='raise', unit='s', cache=cache)
             except tslib.OutOfBoundsDatetime:
                 raise AssertionError("incorrect exception raised")
             except ValueError:
                 pass
 
-    def test_unit_consistency(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_unit_consistency(self, cache):
 
         # consistency of conversions
         expected = Timestamp('1970-05-09 14:25:11')
-        result = pd.to_datetime(11111111, unit='s', errors='raise')
+        result = pd.to_datetime(11111111, unit='s', errors='raise',
+                                cache=cache)
         assert result == expected
         assert isinstance(result, Timestamp)
 
-        result = pd.to_datetime(11111111, unit='s', errors='coerce')
+        result = pd.to_datetime(11111111, unit='s', errors='coerce',
+                                cache=cache)
         assert result == expected
         assert isinstance(result, Timestamp)
 
-        result = pd.to_datetime(11111111, unit='s', errors='ignore')
+        result = pd.to_datetime(11111111, unit='s', errors='ignore',
+                                cache=cache)
         assert result == expected
         assert isinstance(result, Timestamp)
 
-    def test_unit_with_numeric(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_unit_with_numeric(self, cache):
 
         # GH 13180
         # coercions from floats/ints are ok
@@ -494,10 +530,10 @@ def test_unit_with_numeric(self):
         arr1 = [1.434692e+18, 1.432766e+18]
         arr2 = np.array(arr1).astype('int64')
         for errors in ['ignore', 'raise', 'coerce']:
-            result = pd.to_datetime(arr1, errors=errors)
+            result = pd.to_datetime(arr1, errors=errors, cache=cache)
             tm.assert_index_equal(result, expected)
 
-            result = pd.to_datetime(arr2, errors=errors)
+            result = pd.to_datetime(arr2, errors=errors, cache=cache)
             tm.assert_index_equal(result, expected)
 
         # but we want to make sure that we are coercing
@@ -506,7 +542,7 @@ def test_unit_with_numeric(self):
                                   '2015-06-19 05:33:20',
                                   '2015-05-27 22:33:20'])
         arr = ['foo', 1.434692e+18, 1.432766e+18]
-        result = pd.to_datetime(arr, errors='coerce')
+        result = pd.to_datetime(arr, errors='coerce', cache=cache)
         tm.assert_index_equal(result, expected)
 
         expected = DatetimeIndex(['2015-06-19 05:33:20',
@@ -514,31 +550,33 @@ def test_unit_with_numeric(self):
                                   'NaT',
                                   'NaT'])
         arr = [1.434692e+18, 1.432766e+18, 'foo', 'NaT']
-        result = pd.to_datetime(arr, errors='coerce')
+        result = pd.to_datetime(arr, errors='coerce', cache=cache)
         tm.assert_index_equal(result, expected)
 
-    def test_unit_mixed(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_unit_mixed(self, cache):
 
         # mixed integers/datetimes
         expected = DatetimeIndex(['2013-01-01', 'NaT', 'NaT'])
         arr = [pd.Timestamp('20130101'), 1.434692e+18, 1.432766e+18]
-        result = pd.to_datetime(arr, errors='coerce')
+        result = pd.to_datetime(arr, errors='coerce', cache=cache)
         tm.assert_index_equal(result, expected)
 
         with pytest.raises(ValueError):
-            pd.to_datetime(arr, errors='raise')
+            pd.to_datetime(arr, errors='raise', cache=cache)
 
         expected = DatetimeIndex(['NaT',
                                   'NaT',
                                   '2013-01-01'])
         arr = [1.434692e+18, 1.432766e+18, pd.Timestamp('20130101')]
-        result = pd.to_datetime(arr, errors='coerce')
+        result = pd.to_datetime(arr, errors='coerce', cache=cache)
         tm.assert_index_equal(result, expected)
 
         with pytest.raises(ValueError):
-            pd.to_datetime(arr, errors='raise')
+            pd.to_datetime(arr, errors='raise', cache=cache)
 
-    def test_dataframe(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_dataframe(self, cache):
 
         df = DataFrame({'year': [2015, 2016],
                         'month': [2, 3],
@@ -552,19 +590,20 @@ def test_dataframe(self):
 
         result = to_datetime({'year': df['year'],
                               'month': df['month'],
-                              'day': df['day']})
+                              'day': df['day']}, cache=cache)
         expected = Series([Timestamp('20150204 00:00:00'),
                            Timestamp('20160305 00:0:00')])
         assert_series_equal(result, expected)
 
         # dict-like
-        result = to_datetime(df[['year', 'month', 'day']].to_dict())
+        result = to_datetime(df[['year', 'month', 'day']].to_dict(),
+                             cache=cache)
         assert_series_equal(result, expected)
 
         # dict but with constructable
         df2 = df[['year', 'month', 'day']].to_dict()
         df2['month'] = 2
-        result = to_datetime(df2)
+        result = to_datetime(df2, cache=cache)
         expected2 = Series([Timestamp('20150204 00:00:00'),
                             Timestamp('20160205 00:0:00')])
         assert_series_equal(result, expected2)
@@ -585,7 +624,8 @@ def test_dataframe(self):
                  ]
 
         for d in units:
-            result = to_datetime(df[list(d.keys())].rename(columns=d))
+            result = to_datetime(df[list(d.keys())].rename(columns=d),
+                                 cache=cache)
             expected = Series([Timestamp('20150204 06:58:10'),
                                Timestamp('20160305 07:59:11')])
             assert_series_equal(result, expected)
@@ -600,13 +640,13 @@ def test_dataframe(self):
              'us': 'us',
              'ns': 'ns'}
 
-        result = to_datetime(df.rename(columns=d))
+        result = to_datetime(df.rename(columns=d), cache=cache)
         expected = Series([Timestamp('20150204 06:58:10.001002003'),
                            Timestamp('20160305 07:59:11.001002003')])
         assert_series_equal(result, expected)
 
         # coerce back to int
-        result = to_datetime(df.astype(str))
+        result = to_datetime(df.astype(str), cache=cache)
         assert_series_equal(result, expected)
 
         # passing coerce
@@ -617,8 +657,8 @@ def test_dataframe(self):
         msg = ("cannot assemble the datetimes: time data .+ does not "
                "match format '%Y%m%d' \(match\)")
         with tm.assert_raises_regex(ValueError, msg):
-            to_datetime(df2)
-        result = to_datetime(df2, errors='coerce')
+            to_datetime(df2, cache=cache)
+        result = to_datetime(df2, errors='coerce', cache=cache)
         expected = Series([Timestamp('20150204 00:00:00'),
                            NaT])
         assert_series_equal(result, expected)
@@ -629,7 +669,7 @@ def test_dataframe(self):
         with tm.assert_raises_regex(ValueError, msg):
             df2 = df.copy()
             df2['foo'] = 1
-            to_datetime(df2)
+            to_datetime(df2, cache=cache)
 
         # not enough
         msg = ('to assemble mappings requires at least that \[year, month, '
@@ -640,7 +680,7 @@ def test_dataframe(self):
                   ['month', 'day'],
                   ['year', 'day', 'second']]:
             with tm.assert_raises_regex(ValueError, msg):
-                to_datetime(df[c])
+                to_datetime(df[c], cache=cache)
 
         # duplicates
         msg = 'cannot assemble with duplicate keys'
@@ -649,7 +689,7 @@ def test_dataframe(self):
                          'day': [4, 5]})
         df2.columns = ['year', 'year', 'day']
         with tm.assert_raises_regex(ValueError, msg):
-            to_datetime(df2)
+            to_datetime(df2, cache=cache)
 
         df2 = DataFrame({'year': [2015, 2016],
                          'month': [2, 20],
@@ -657,16 +697,17 @@ def test_dataframe(self):
                          'hour': [4, 5]})
         df2.columns = ['year', 'month', 'day', 'day']
         with tm.assert_raises_regex(ValueError, msg):
-            to_datetime(df2)
+            to_datetime(df2, cache=cache)
 
-    def test_dataframe_dtypes(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_dataframe_dtypes(self, cache):
         # #13451
         df = DataFrame({'year': [2015, 2016],
                         'month': [2, 3],
                         'day': [4, 5]})
 
         # int16
-        result = to_datetime(df.astype('int16'))
+        result = to_datetime(df.astype('int16'), cache=cache)
         expected = Series([Timestamp('20150204 00:00:00'),
                            Timestamp('20160305 00:00:00')])
         assert_series_equal(result, expected)
@@ -674,7 +715,7 @@ def test_dataframe_dtypes(self):
         # mixed dtypes
         df['month'] = df['month'].astype('int8')
         df['day'] = df['day'].astype('int8')
-        result = to_datetime(df)
+        result = to_datetime(df, cache=cache)
         expected = Series([Timestamp('20150204 00:00:00'),
                            Timestamp('20160305 00:00:00')])
         assert_series_equal(result, expected)
@@ -684,18 +725,19 @@ def test_dataframe_dtypes(self):
                         'month': [1.5, 1],
                         'day': [1, 1]})
         with pytest.raises(ValueError):
-            to_datetime(df)
+            to_datetime(df, cache=cache)
 
 
 class TestToDatetimeMisc(object):
 
-    def test_index_to_datetime(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_index_to_datetime(self, cache):
         idx = Index(['1/1/2000', '1/2/2000', '1/3/2000'])
 
         with tm.assert_produces_warning(FutureWarning,
                                         check_stacklevel=False):
             result = idx.to_datetime()
-            expected = DatetimeIndex(pd.to_datetime(idx.values))
+            expected = DatetimeIndex(pd.to_datetime(idx.values, cache=cache))
             tm.assert_index_equal(result, expected)
 
         with tm.assert_produces_warning(FutureWarning,
@@ -706,17 +748,19 @@ def test_index_to_datetime(self):
             expected = DatetimeIndex([today])
             tm.assert_index_equal(result, expected)
 
-    def test_to_datetime_iso8601(self):
-        result = to_datetime(["2012-01-01 00:00:00"])
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_to_datetime_iso8601(self, cache):
+        result = to_datetime(["2012-01-01 00:00:00"], cache=cache)
         exp = Timestamp("2012-01-01 00:00:00")
         assert result[0] == exp
 
-        result = to_datetime(['20121001'])  # bad iso 8601
+        result = to_datetime(['20121001'], cache=cache)  # bad iso 8601
         exp = Timestamp('2012-10-01')
         assert result[0] == exp
 
-    def test_to_datetime_default(self):
-        rs = to_datetime('2001')
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_to_datetime_default(self, cache):
+        rs = to_datetime('2001', cache=cache)
         xp = datetime(2001, 1, 1)
         assert rs == xp
 
@@ -726,71 +770,80 @@ def test_to_datetime_default(self):
         # pytest.raises(ValueError, to_datetime('01-13-2012',
         #                   dayfirst=True))
 
-    def test_to_datetime_on_datetime64_series(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_to_datetime_on_datetime64_series(self, cache):
         # #2699
         s = Series(date_range('1/1/2000', periods=10))
 
-        result = to_datetime(s)
+        result = to_datetime(s, cache=cache)
         assert result[0] == s[0]
 
-    def test_to_datetime_with_space_in_series(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_to_datetime_with_space_in_series(self, cache):
         # GH 6428
         s = Series(['10/18/2006', '10/18/2008', ' '])
-        pytest.raises(ValueError, lambda: to_datetime(s, errors='raise'))
-        result_coerce = to_datetime(s, errors='coerce')
+        pytest.raises(ValueError, lambda: to_datetime(s,
+                                                      errors='raise',
+                                                      cache=cache))
+        result_coerce = to_datetime(s, errors='coerce', cache=cache)
         expected_coerce = Series([datetime(2006, 10, 18),
                                   datetime(2008, 10, 18),
                                   NaT])
         tm.assert_series_equal(result_coerce, expected_coerce)
-        result_ignore = to_datetime(s, errors='ignore')
+        result_ignore = to_datetime(s, errors='ignore', cache=cache)
         tm.assert_series_equal(result_ignore, s)
 
-    def test_to_datetime_with_apply(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_to_datetime_with_apply(self, cache):
         # this is only locale tested with US/None locales
         tm._skip_if_has_locale()
 
         # GH 5195
         # with a format and coerce a single item to_datetime fails
         td = Series(['May 04', 'Jun 02', 'Dec 11'], index=[1, 2, 3])
-        expected = pd.to_datetime(td, format='%b %y')
-        result = td.apply(pd.to_datetime, format='%b %y')
+        expected = pd.to_datetime(td, format='%b %y', cache=cache)
+        result = td.apply(pd.to_datetime, format='%b %y', cache=cache)
         assert_series_equal(result, expected)
 
         td = pd.Series(['May 04', 'Jun 02', ''], index=[1, 2, 3])
         pytest.raises(ValueError,
                       lambda: pd.to_datetime(td, format='%b %y',
-                                             errors='raise'))
+                                             errors='raise',
+                                             cache=cache))
         pytest.raises(ValueError,
                       lambda: td.apply(pd.to_datetime, format='%b %y',
-                                       errors='raise'))
-        expected = pd.to_datetime(td, format='%b %y', errors='coerce')
+                                       errors='raise', cache=cache))
+        expected = pd.to_datetime(td, format='%b %y', errors='coerce',
+                                  cache=cache)
 
         result = td.apply(
-            lambda x: pd.to_datetime(x, format='%b %y', errors='coerce'))
+            lambda x: pd.to_datetime(x, format='%b %y', errors='coerce',
+                                     cache=cache))
         assert_series_equal(result, expected)
 
-    def test_to_datetime_types(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_to_datetime_types(self, cache):
 
         # empty string
-        result = to_datetime('')
+        result = to_datetime('', cache=cache)
         assert result is NaT
 
-        result = to_datetime(['', ''])
+        result = to_datetime(['', ''], cache=cache)
         assert isna(result).all()
 
         # ints
         result = Timestamp(0)
-        expected = to_datetime(0)
+        expected = to_datetime(0, cache=cache)
         assert result == expected
 
         # GH 3888 (strings)
-        expected = to_datetime(['2012'])[0]
-        result = to_datetime('2012')
+        expected = to_datetime(['2012'], cache=cache)[0]
+        result = to_datetime('2012', cache=cache)
         assert result == expected
 
         # array = ['2012','20120101','20120101 12:01:01']
         array = ['20120101', '20120101 12:01:01']
-        expected = list(to_datetime(array))
+        expected = list(to_datetime(array, cache=cache))
         result = lmap(Timestamp, array)
         tm.assert_almost_equal(result, expected)
 
@@ -799,13 +852,15 @@ def test_to_datetime_types(self):
         # expected = to_datetime('2012')
         # assert result == expected
 
-    def test_to_datetime_unprocessable_input(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_to_datetime_unprocessable_input(self, cache):
         # GH 4928
         tm.assert_numpy_array_equal(
-            to_datetime([1, '1'], errors='ignore'),
+            to_datetime([1, '1'], errors='ignore', cache=cache),
             np.array([1, '1'], dtype='O')
         )
-        pytest.raises(TypeError, to_datetime, [1, '1'], errors='raise')
+        pytest.raises(TypeError, to_datetime, [1, '1'], errors='raise',
+                      cache=cache)
 
     def test_to_datetime_other_datetime64_units(self):
         # 5/25/2012
@@ -841,7 +896,8 @@ def test_to_datetime_overflow(self):
         with pytest.raises(OverflowError):
             date_range(start='1/1/1700', freq='B', periods=100000)
 
-    def test_string_na_nat_conversion(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_string_na_nat_conversion(self, cache):
         # GH #999, #858
 
         from pandas.compat import parse_date
@@ -859,7 +915,7 @@ def test_string_na_nat_conversion(self):
         result = tslib.array_to_datetime(strings)
         tm.assert_almost_equal(result, expected)
 
-        result2 = to_datetime(strings)
+        result2 = to_datetime(strings, cache=cache)
         assert isinstance(result2, DatetimeIndex)
         tm.assert_numpy_array_equal(result, result2.values)
 
@@ -867,22 +923,25 @@ def test_string_na_nat_conversion(self):
 
         # GH 10636, default is now 'raise'
         pytest.raises(ValueError,
-                      lambda: to_datetime(malformed, errors='raise'))
+                      lambda: to_datetime(malformed, errors='raise',
+                                          cache=cache))
 
-        result = to_datetime(malformed, errors='ignore')
+        result = to_datetime(malformed, errors='ignore', cache=cache)
         tm.assert_numpy_array_equal(result, malformed)
 
-        pytest.raises(ValueError, to_datetime, malformed, errors='raise')
+        pytest.raises(ValueError, to_datetime, malformed, errors='raise',
+                      cache=cache)
 
         idx = ['a', 'b', 'c', 'd', 'e']
         series = Series(['1/1/2000', np.nan, '1/3/2000', np.nan,
                          '1/5/2000'], index=idx, name='foo')
-        dseries = Series([to_datetime('1/1/2000'), np.nan,
-                          to_datetime('1/3/2000'), np.nan,
-                          to_datetime('1/5/2000')], index=idx, name='foo')
+        dseries = Series([to_datetime('1/1/2000', cache=cache), np.nan,
+                          to_datetime('1/3/2000', cache=cache), np.nan,
+                          to_datetime('1/5/2000', cache=cache)],
+                         index=idx, name='foo')
 
-        result = to_datetime(series)
-        dresult = to_datetime(dseries)
+        result = to_datetime(series, cache=cache)
+        dresult = to_datetime(dseries, cache=cache)
 
         expected = Series(np.empty(5, dtype='M8[ns]'), index=idx)
         for i in range(5):
@@ -890,7 +949,7 @@ def test_string_na_nat_conversion(self):
             if isna(x):
                 expected[i] = tslib.iNaT
             else:
-                expected[i] = to_datetime(x)
+                expected[i] = to_datetime(x, cache=cache)
 
         assert_series_equal(result, expected, check_names=False)
         assert result.name == 'foo'
@@ -898,26 +957,29 @@ def test_string_na_nat_conversion(self):
         assert_series_equal(dresult, expected, check_names=False)
         assert dresult.name == 'foo'
 
-    def test_dti_constructor_numpy_timeunits(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_dti_constructor_numpy_timeunits(self, cache):
         # GH 9114
-        base = pd.to_datetime(['2000-01-01T00:00', '2000-01-02T00:00', 'NaT'])
+        base = pd.to_datetime(['2000-01-01T00:00', '2000-01-02T00:00', 'NaT'],
+                              cache=cache)
 
         for dtype in ['datetime64[h]', 'datetime64[m]', 'datetime64[s]',
                       'datetime64[ms]', 'datetime64[us]', 'datetime64[ns]']:
             values = base.values.astype(dtype)
 
             tm.assert_index_equal(DatetimeIndex(values), base)
-            tm.assert_index_equal(to_datetime(values), base)
+            tm.assert_index_equal(to_datetime(values, cache=cache), base)
 
-    def test_dayfirst(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_dayfirst(self, cache):
         # GH 5917
         arr = ['10/02/2014', '11/02/2014', '12/02/2014']
         expected = DatetimeIndex([datetime(2014, 2, 10), datetime(2014, 2, 11),
                                   datetime(2014, 2, 12)])
         idx1 = DatetimeIndex(arr, dayfirst=True)
         idx2 = DatetimeIndex(np.array(arr), dayfirst=True)
-        idx3 = to_datetime(arr, dayfirst=True)
-        idx4 = to_datetime(np.array(arr), dayfirst=True)
+        idx3 = to_datetime(arr, dayfirst=True, cache=cache)
+        idx4 = to_datetime(np.array(arr), dayfirst=True, cache=cache)
         idx5 = DatetimeIndex(Index(arr), dayfirst=True)
         idx6 = DatetimeIndex(Series(arr), dayfirst=True)
         tm.assert_index_equal(expected, idx1)
@@ -952,7 +1014,8 @@ def test_guess_datetime_format_for_array(self):
 
 class TestToDatetimeInferFormat(object):
 
-    def test_to_datetime_infer_datetime_format_consistent_format(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_to_datetime_infer_datetime_format_consistent_format(self, cache):
         s = pd.Series(pd.date_range('20000101', periods=50, freq='H'))
 
         test_formats = ['%m-%d-%Y', '%m/%d/%Y %H:%M:%S.%f',
@@ -961,90 +1024,113 @@ def test_to_datetime_infer_datetime_format_consistent_format(self):
         for test_format in test_formats:
             s_as_dt_strings = s.apply(lambda x: x.strftime(test_format))
 
-            with_format = pd.to_datetime(s_as_dt_strings, format=test_format)
+            with_format = pd.to_datetime(s_as_dt_strings, format=test_format,
+                                         cache=cache)
             no_infer = pd.to_datetime(s_as_dt_strings,
-                                      infer_datetime_format=False)
+                                      infer_datetime_format=False,
+                                      cache=cache)
             yes_infer = pd.to_datetime(s_as_dt_strings,
-                                       infer_datetime_format=True)
+                                       infer_datetime_format=True,
+                                       cache=cache)
 
             # Whether the format is explicitly passed, it is inferred, or
             # it is not inferred, the results should all be the same
             tm.assert_series_equal(with_format, no_infer)
             tm.assert_series_equal(no_infer, yes_infer)
 
-    def test_to_datetime_infer_datetime_format_inconsistent_format(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_to_datetime_infer_datetime_format_inconsistent_format(self,
+                                                                   cache):
         s = pd.Series(np.array(['01/01/2011 00:00:00',
                                 '01-02-2011 00:00:00',
                                 '2011-01-03T00:00:00']))
 
         # When the format is inconsistent, infer_datetime_format should just
         # fallback to the default parsing
-        tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False),
-                               pd.to_datetime(s, infer_datetime_format=True))
+        tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False,
+                                              cache=cache),
+                               pd.to_datetime(s, infer_datetime_format=True,
+                                              cache=cache))
 
         s = pd.Series(np.array(['Jan/01/2011', 'Feb/01/2011', 'Mar/01/2011']))
 
-        tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False),
-                               pd.to_datetime(s, infer_datetime_format=True))
+        tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False,
+                                              cache=cache),
+                               pd.to_datetime(s, infer_datetime_format=True,
+                                              cache=cache))
 
-    def test_to_datetime_infer_datetime_format_series_with_nans(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_to_datetime_infer_datetime_format_series_with_nans(self, cache):
         s = pd.Series(np.array(['01/01/2011 00:00:00', np.nan,
                                 '01/03/2011 00:00:00', np.nan]))
-        tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False),
-                               pd.to_datetime(s, infer_datetime_format=True))
-
-    def test_to_datetime_infer_datetime_format_series_starting_with_nans(self):
+        tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False,
+                                              cache=cache),
+                               pd.to_datetime(s, infer_datetime_format=True,
+                                              cache=cache))
+
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_to_datetime_infer_datetime_format_series_starting_with_nans(self,
+        cache):
         s = pd.Series(np.array([np.nan, np.nan, '01/01/2011 00:00:00',
                                 '01/02/2011 00:00:00', '01/03/2011 00:00:00']))
 
-        tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False),
-                               pd.to_datetime(s, infer_datetime_format=True))
+        tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False,
+                                              cache=cache),
+                               pd.to_datetime(s, infer_datetime_format=True,
+                                              cache=cache))
 
-    def test_to_datetime_iso8601_noleading_0s(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_to_datetime_iso8601_noleading_0s(self, cache):
         # GH 11871
         s = pd.Series(['2014-1-1', '2014-2-2', '2015-3-3'])
         expected = pd.Series([pd.Timestamp('2014-01-01'),
                               pd.Timestamp('2014-02-02'),
                               pd.Timestamp('2015-03-03')])
-        tm.assert_series_equal(pd.to_datetime(s), expected)
-        tm.assert_series_equal(pd.to_datetime(s, format='%Y-%m-%d'), expected)
+        tm.assert_series_equal(pd.to_datetime(s, cache=cache), expected)
+        tm.assert_series_equal(pd.to_datetime(s, format='%Y-%m-%d',
+                                              cache=cache), expected)
 
 
 class TestDaysInMonth(object):
     # tests for issue #10154
 
-    def test_day_not_in_month_coerce(self):
-        assert isna(to_datetime('2015-02-29', errors='coerce'))
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_day_not_in_month_coerce(self, cache):
+        assert isna(to_datetime('2015-02-29', errors='coerce', cache=cache))
         assert isna(to_datetime('2015-02-29', format="%Y-%m-%d",
-                                errors='coerce'))
+                                errors='coerce', cache=cache))
         assert isna(to_datetime('2015-02-32', format="%Y-%m-%d",
-                                errors='coerce'))
+                                errors='coerce', cache=cache))
         assert isna(to_datetime('2015-04-31', format="%Y-%m-%d",
-                                errors='coerce'))
+                                errors='coerce', cache=cache))
 
-    def test_day_not_in_month_raise(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_day_not_in_month_raise(self, cache):
         pytest.raises(ValueError, to_datetime, '2015-02-29',
-                      errors='raise')
+                      errors='raise', cache=cache)
         pytest.raises(ValueError, to_datetime, '2015-02-29',
-                      errors='raise', format="%Y-%m-%d")
+                      errors='raise', format="%Y-%m-%d", cache=cache)
         pytest.raises(ValueError, to_datetime, '2015-02-32',
-                      errors='raise', format="%Y-%m-%d")
+                      errors='raise', format="%Y-%m-%d", cache=cache)
         pytest.raises(ValueError, to_datetime, '2015-04-31',
-                      errors='raise', format="%Y-%m-%d")
+                      errors='raise', format="%Y-%m-%d", cache=cache)
 
-    def test_day_not_in_month_ignore(self):
-        assert to_datetime('2015-02-29', errors='ignore') == '2015-02-29'
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_day_not_in_month_ignore(self, cache):
+        assert to_datetime('2015-02-29', errors='ignore',
+                           cache=cache) == '2015-02-29'
         assert to_datetime('2015-02-29', errors='ignore',
-                           format="%Y-%m-%d") == '2015-02-29'
+                           format="%Y-%m-%d", cache=cache) == '2015-02-29'
         assert to_datetime('2015-02-32', errors='ignore',
-                           format="%Y-%m-%d") == '2015-02-32'
+                           format="%Y-%m-%d", cache=cache) == '2015-02-32'
         assert to_datetime('2015-04-31', errors='ignore',
-                           format="%Y-%m-%d") == '2015-04-31'
+                           format="%Y-%m-%d", cache=cache) == '2015-04-31'
 
 
 class TestDatetimeParsingWrappers(object):
 
-    def test_parsers(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_parsers(self, cache):
 
         # https://github.com/dateutil/dateutil/issues/217
         import dateutil
@@ -1108,7 +1194,7 @@ def test_parsers(self):
             result3 = to_datetime([date_str], yearfirst=yearfirst)
             # result5 is used below
             result4 = to_datetime(np.array([date_str], dtype=object),
-                                  yearfirst=yearfirst)
+                                  yearfirst=yearfirst, cache=cache)
             result6 = DatetimeIndex([date_str], yearfirst=yearfirst)
             # result7 is used below
             result8 = DatetimeIndex(Index([date_str]), yearfirst=yearfirst)
@@ -1222,7 +1308,7 @@ def test_parsers_dayfirst_yearfirst(self):
                     assert result2 == expected
 
                 result3 = to_datetime(date_str, dayfirst=dayfirst,
-                                      yearfirst=yearfirst)
+                                      yearfirst=yearfirst, cache=cache)
 
                 result4 = DatetimeIndex([date_str], dayfirst=dayfirst,
                                         yearfirst=yearfirst)[0]
@@ -1231,7 +1317,8 @@ def test_parsers_dayfirst_yearfirst(self):
                 assert result3 == expected
                 assert result4 == expected
 
-    def test_parsers_timestring(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_parsers_timestring(self, cache):
         # must be the same as dateutil result
         cases = {'10:15': (parse('10:15'), datetime(1, 1, 1, 10, 15)),
                  '9:05': (parse('9:05'), datetime(1, 1, 1, 9, 5))}
@@ -1288,7 +1375,7 @@ def test_parsers_time(self):
 
     def test_parsers_timezone_minute_offsets_roundtrip(self):
         # GH11708
-        base = to_datetime("2013-01-01 00:00:00")
+        base = to_datetime("2013-01-01 00:00:00", cache=cache)
         dt_strings = [
             ('2013-01-01 05:45+0545',
              "Asia/Katmandu",
@@ -1299,7 +1386,7 @@ def test_parsers_timezone_minute_offsets_roundtrip(self):
         ]
 
         for dt_string, tz, dt_string_repr in dt_strings:
-            dt_time = to_datetime(dt_string)
+            dt_time = to_datetime(dt_string, cache=cache)
             assert base == dt_time
             converted_time = dt_time.tz_localize('UTC').tz_convert(tz)
             assert dt_string_repr == repr(converted_time)

From 33c79d3218cdf61006ccfdffdd717d72ef0218eb Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Mon, 25 Sep 2017 17:51:07 -0700
Subject: [PATCH 07/25] Fix asv errors and condition

---
 asv_bench/benchmarks/timeseries.py | 15 ++++++---------
 pandas/core/tools/datetimes.py     | 13 ++++++-------
 2 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py
index d9d1b48f420e6..1ae3601320e1e 100644
--- a/asv_bench/benchmarks/timeseries.py
+++ b/asv_bench/benchmarks/timeseries.py
@@ -386,23 +386,20 @@ def time_format_exact(self):
     def time_format_no_exact(self):
         to_datetime(self.s, format='%d%b%y', exact=False)
 
-    def time_cache_with_unique_seconds_and unit(self):
-    	to_datetime(self.unique_numeric_seconds, unit='s')
+    def time_cache_with_unique_seconds_and_unit(self):
+        to_datetime(self.unique_numeric_seconds, unit='s')
 
     def time_cache_with_dup_seconds_and_unit(self):
-    	to_datetime(self.dup_numeric_seconds, unit='s')
+        to_datetime(self.dup_numeric_seconds, unit='s')
 
     def time_cache_with_dup_string_dates(self):
-    	to_datetime(self.dup_string_dates)
+        to_datetime(self.dup_string_dates)
 
     def time_cache_with_dup_string_dates_and_format(self):
-    	to_datetime(self.dup_string_dates, format='%Y-%m-%d')
+        to_datetime(self.dup_string_dates, format='%Y-%m-%d')
 
     def time_cache_with_dup_string_tzoffset_dates(self):
-    	to_datetime(self.dup_string_with_tz)
-
-    def time_cache_with_dup_string_tzoffset_dates_and_format(self):
-    	to_datetim(self.dup_string_with_tz, format='%Y-%m-%d %H:%M:%S%z')
+        to_datetime(self.dup_string_with_tz)
 
 
 class Offsets(object):
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index ac29c7144556c..da4a37b041a8f 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -373,13 +373,12 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
         arg = arg + offset
 
     convert_cache = None
-    if cache and is_list_like(arg):
-        if len(arg) >= 1000:
-            unique_dates = algorithms.unique(arg)
-            if len(unique_dates) != len(arg):
-                from pandas import Series
-                cache_dates = _convert_listlike(unique_dates, False, format)
-                convert_cache = Series(cache_dates, index=unique_dates)
+    if cache and is_list_like(arg) and len(arg) >= 1000:
+        unique_dates = algorithms.unique(arg)
+        if len(unique_dates) != len(arg):
+            from pandas import Series
+            cache_dates = _convert_listlike(unique_dates, False, format)
+            convert_cache = Series(cache_dates, index=unique_dates)
 
     if isinstance(arg, tslib.Timestamp):
         result = arg

From dcaafb68d742d17a89d1eba5bed7433a6bdfdeba Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Mon, 25 Sep 2017 18:19:39 -0700
Subject: [PATCH 08/25] Pep8 fixes

---
 pandas/tests/indexes/datetimes/test_tools.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py
index 1a099fe2d1505..704b2055b41d7 100644
--- a/pandas/tests/indexes/datetimes/test_tools.py
+++ b/pandas/tests/indexes/datetimes/test_tools.py
@@ -222,7 +222,7 @@ def test_to_datetime_array_of_dt64s(self, cache):
                       errors='raise')
 
         tm.assert_numpy_array_equal(
-            pd.to_datetime(dts_with_oob, box=False, errors='coerce', 
+            pd.to_datetime(dts_with_oob, box=False, errors='coerce',
                            cache=cache),
             np.array(
                 [
@@ -401,7 +401,7 @@ def test_datetime_invalid_datatype(self):
             pd.to_datetime(bool)
         with pytest.raises(TypeError):
             pd.to_datetime(pd.to_datetime)
-    
+
     @pytest.mark.parametrize("utc", [True, None])
     @pytest.mark.parametrize("format", ['%Y%m%d %H:%M:%S', None])
     @pytest.mark.parametrize("box", [True, False])
@@ -1069,8 +1069,8 @@ def test_to_datetime_infer_datetime_format_series_with_nans(self, cache):
                                               cache=cache))
 
     @pytest.mark.parametrize('cache', [True, False])
-    def test_to_datetime_infer_datetime_format_series_starting_with_nans(self,
-        cache):
+    def test_to_datetime_infer_datetime_format_series_start_with_nans(self,
+                                                                      cache):
         s = pd.Series(np.array([np.nan, np.nan, '01/01/2011 00:00:00',
                                 '01/02/2011 00:00:00', '01/03/2011 00:00:00']))
 

From 04df9d9f5740e9856c5a40df536462ce3d9420ce Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Mon, 25 Sep 2017 20:53:58 -0700
Subject: [PATCH 09/25] Remove unused import

---
 pandas/core/tools/datetimes.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index da4a37b041a8f..235f9360d1287 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -21,8 +21,7 @@
     is_float,
     is_list_like,
     is_scalar,
-    is_numeric_dtype,
-    is_string_dtype)
+    is_numeric_dtype)
 from pandas.core.dtypes.generic import (
     ABCIndexClass, ABCSeries,
     ABCDataFrame)

From 34b468fa263f75c1cfe7b794e2a9e116c87d84f4 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Tue, 26 Sep 2017 12:21:08 -0700
Subject: [PATCH 10/25] Wrap cache logic in a function

---
 pandas/core/tools/datetimes.py | 57 +++++++++++++++++++++-------------
 1 file changed, 35 insertions(+), 22 deletions(-)

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 235f9360d1287..1018ac57ab870 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -116,7 +116,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
         If True, use a cache of unique, converted dates to apply the datetime
         conversion. Produces signficant speed-ups when parsing duplicate date.
 
-        .. versionadded: 0.20.2
+        .. versionadded: 0.21.0
     Returns
     -------
     ret : datetime if parsing succeeded.
@@ -310,6 +310,28 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
             except (ValueError, TypeError):
                 raise e
 
+    def _maybe_convert_cache(arg, cache):
+        """Try to convert the datetimelike arg using 
+           a cache of converted dates.
+           
+           arg: datetimelike arg from to_datetime
+           cache: bool whether to convert using a cache
+
+           Result:
+               Series of converted datetime arg or 
+               None if the conversion failed
+        """
+        if cache and is_list_like(arg) and len(arg) >= 1000:
+            unique_dates = algorithms.unique(arg)
+            if len(unique_dates) != len(arg):
+                from pandas import Series
+                cache_dates = _convert_listlike(unique_dates, False, format)
+                convert_cache = Series(cache_dates, index=unique_dates)
+                if not isinstance(arg, Series):
+                    arg = Series(arg)
+                return arg.map(convert_cache)
+        return None
+
     if arg is None:
         return None
 
@@ -371,41 +393,32 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
             arg = np.asarray(arg)
         arg = arg + offset
 
-    convert_cache = None
-    if cache and is_list_like(arg) and len(arg) >= 1000:
-        unique_dates = algorithms.unique(arg)
-        if len(unique_dates) != len(arg):
-            from pandas import Series
-            cache_dates = _convert_listlike(unique_dates, False, format)
-            convert_cache = Series(cache_dates, index=unique_dates)
-
     if isinstance(arg, tslib.Timestamp):
         result = arg
     elif isinstance(arg, ABCSeries):
-        if convert_cache is not None:
-            result = arg.map(convert_cache)
-        else:
+        result = _maybe_convert_cache(arg, cache)
+        if result is None:
             from pandas import Series
             values = _convert_listlike(arg._values, True, format)
             result = Series(values, index=arg.index, name=arg.name)
     elif isinstance(arg, (ABCDataFrame, MutableMapping)):
         result = _assemble_from_unit_mappings(arg, errors=errors)
     elif isinstance(arg, ABCIndexClass):
-        if convert_cache is not None:
-            from pandas import Series
-            result = Series(arg).map(convert_cache).values
+        result = _maybe_convert_cache(arg, cache)
+        if result is None:
+            result = _convert_listlike(arg, box, format, name=arg.name)
+        else:
+            result = result.values
             if box:
                 result = DatetimeIndex(result, tz=tz, name=arg.name)
-        else:
-            result = _convert_listlike(arg, box, format, name=arg.name)
     elif is_list_like(arg):
-        if convert_cache is not None:
-            from pandas import Series
-            result = Series(arg).map(convert_cache).values
+        result = _maybe_convert_cache(arg, cache)
+        if result is None:
+            result = _convert_listlike(arg, box, format)
+        else:
+            result = result.values
             if box:
                 result = DatetimeIndex(result, tz=tz)
-        else:
-            result = _convert_listlike(arg, box, format)
     else:
         result = _convert_listlike(np.array([arg]), box, format)[0]
 

From d287cc66c7165d4adbae583bb2129e0455463d7a Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Tue, 26 Sep 2017 13:00:43 -0700
Subject: [PATCH 11/25] Fix Series test

---
 pandas/core/tools/datetimes.py               | 11 ++++++-----
 pandas/tests/indexes/datetimes/test_tools.py |  2 +-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 1018ac57ab870..dc0d6fbf6d61f 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -310,7 +310,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
             except (ValueError, TypeError):
                 raise e
 
-    def _maybe_convert_cache(arg, cache):
+    def _maybe_convert_cache(arg, cache, tz):
         """Try to convert the datetimelike arg using 
            a cache of converted dates.
            
@@ -325,7 +325,8 @@ def _maybe_convert_cache(arg, cache):
             unique_dates = algorithms.unique(arg)
             if len(unique_dates) != len(arg):
                 from pandas import Series
-                cache_dates = _convert_listlike(unique_dates, False, format)
+                cache_dates = _convert_listlike(unique_dates, True, format,
+                                                tz=tz)
                 convert_cache = Series(cache_dates, index=unique_dates)
                 if not isinstance(arg, Series):
                     arg = Series(arg)
@@ -396,7 +397,7 @@ def _maybe_convert_cache(arg, cache):
     if isinstance(arg, tslib.Timestamp):
         result = arg
     elif isinstance(arg, ABCSeries):
-        result = _maybe_convert_cache(arg, cache)
+        result = _maybe_convert_cache(arg, cache, tz)
         if result is None:
             from pandas import Series
             values = _convert_listlike(arg._values, True, format)
@@ -404,7 +405,7 @@ def _maybe_convert_cache(arg, cache):
     elif isinstance(arg, (ABCDataFrame, MutableMapping)):
         result = _assemble_from_unit_mappings(arg, errors=errors)
     elif isinstance(arg, ABCIndexClass):
-        result = _maybe_convert_cache(arg, cache)
+        result = _maybe_convert_cache(arg, cache, tz)
         if result is None:
             result = _convert_listlike(arg, box, format, name=arg.name)
         else:
@@ -412,7 +413,7 @@ def _maybe_convert_cache(arg, cache):
             if box:
                 result = DatetimeIndex(result, tz=tz, name=arg.name)
     elif is_list_like(arg):
-        result = _maybe_convert_cache(arg, cache)
+        result = _maybe_convert_cache(arg, cache, tz)
         if result is None:
             result = _convert_listlike(arg, box, format)
         else:
diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py
index 704b2055b41d7..47bb1c2c6529b 100644
--- a/pandas/tests/indexes/datetimes/test_tools.py
+++ b/pandas/tests/indexes/datetimes/test_tools.py
@@ -425,7 +425,7 @@ def test_to_datetime_cache_series(self, utc, format):
         test_dates = [date] * 10**5
         data = pd.Series(test_dates)
         result = pd.to_datetime(data, utc=utc, format=format, cache=True)
-        expected = pd.to_datetime(data, utc=utc, format=format)
+        expected = pd.to_datetime(data, utc=utc, format=format, cache=False)
         tm.assert_series_equal(result, expected)
 
     def test_to_datetime_cache_scalar(self):

From 1bf4c9dd0ab8be7094d935a9ddc889e81e153ae6 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Tue, 26 Sep 2017 13:36:29 -0700
Subject: [PATCH 12/25] Add whatsnew and small documentation fix

---
 doc/source/whatsnew/v0.21.0.txt | 1 +
 pandas/core/tools/datetimes.py  | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 4c460eeb85b82..9b0d9168c5704 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -999,6 +999,7 @@ Performance Improvements
 - :attr:`Timestamp.microsecond` no longer re-computes on attribute access (:issue:`17331`)
 - Improved performance of the :class:`CategoricalIndex` for data that is already categorical dtype (:issue:`17513`)
 - Improved performance of :meth:`RangeIndex.min` and :meth:`RangeIndex.max` by using ``RangeIndex`` properties to perform the computations (:issue:`17607`)
+- Added a keyword argument, `cache`, to :func:`to_datetime` that improved the performance of converting duplicate datetime arguments (:issue: `11665`)
 
 .. _whatsnew_0210.docs:
 
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index dc0d6fbf6d61f..1ea1735e11314 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -114,7 +114,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
         .. versionadded: 0.20.0
     cache : boolean, default False
         If True, use a cache of unique, converted dates to apply the datetime
-        conversion. Produces signficant speed-ups when parsing duplicate date.
+        conversion. Produces signficant speed-ups when parsing duplicate dates.
 
         .. versionadded: 0.21.0
     Returns

From 3ffdd461e3f02742aedc089ca5f33d4f24d5ef76 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Tue, 26 Sep 2017 13:39:03 -0700
Subject: [PATCH 13/25] pep 8 fixes

---
 pandas/core/tools/datetimes.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 1ea1735e11314..6009a194ac285 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -311,14 +311,14 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
                 raise e
 
     def _maybe_convert_cache(arg, cache, tz):
-        """Try to convert the datetimelike arg using 
+        """Try to convert the datetimelike arg using
            a cache of converted dates.
-           
+
            arg: datetimelike arg from to_datetime
            cache: bool whether to convert using a cache
 
-           Result:
-               Series of converted datetime arg or 
+           Returns:
+               Series of converted datetime arg or
                None if the conversion failed
         """
         if cache and is_list_like(arg) and len(arg) >= 1000:

From a093b88d98fab606f06d1e9a7d2fcf17d00d1434 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Mon, 9 Oct 2017 16:26:55 -0700
Subject: [PATCH 14/25] Move box logic into maybe_convert_cache

---
 pandas/core/tools/datetimes.py | 62 ++++++++++++++++++++--------------
 1 file changed, 37 insertions(+), 25 deletions(-)

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 6009a194ac285..408a96b20ea73 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -112,7 +112,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
           origin.
 
         .. versionadded: 0.20.0
-    cache : boolean, default False
+    cache : boolean, default True
         If True, use a cache of unique, converted dates to apply the datetime
         conversion. Produces signficant speed-ups when parsing duplicate dates.
 
@@ -310,16 +310,32 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
             except (ValueError, TypeError):
                 raise e
 
-    def _maybe_convert_cache(arg, cache, tz):
-        """Try to convert the datetimelike arg using
-           a cache of converted dates.
-
-           arg: datetimelike arg from to_datetime
-           cache: bool whether to convert using a cache
-
-           Returns:
-               Series of converted datetime arg or
-               None if the conversion failed
+    def _maybe_convert_cache(arg, cache, box, format, name=None, tz=tz):
+        """
+        Try to convert the datetimelike arg using
+        a cache of converted dates.
+
+        Parameters
+        ----------
+        arg : integer, float, string, datetime, list, tuple, 1-d array, Series
+            Datetime argument to convert     
+        cache : boolean
+            If True, try to convert the dates with a cache
+            If False, short circuit and return None
+            Flag whether to cache the converted dates
+        box : boolean
+            If True, return a DatetimeIndex  
+            if False, return an ndarray of values     
+        tz : String or None
+            'utc' if UTC=True was passed else None
+        name : String, default None
+            DatetimeIndex name
+        Returns
+        -------
+        Series if original argument was a Series
+        DatetimeIndex if box=True and original argument was not a Series
+        ndarray if box=False and original argument was not a Series
+        None if the conversion failed
         """
         if cache and is_list_like(arg) and len(arg) >= 1000:
             unique_dates = algorithms.unique(arg)
@@ -328,9 +344,13 @@ def _maybe_convert_cache(arg, cache, tz):
                 cache_dates = _convert_listlike(unique_dates, True, format,
                                                 tz=tz)
                 convert_cache = Series(cache_dates, index=unique_dates)
-                if not isinstance(arg, Series):
-                    arg = Series(arg)
-                return arg.map(convert_cache)
+                result = Series(arg, name=name).map(convert_cache)
+                if isinstance(arg, Series):
+                    return result
+                elif box:
+                    return DatetimeIndex(result, name=name)
+                else:
+                    return result.values
         return None
 
     if arg is None:
@@ -397,7 +417,7 @@ def _maybe_convert_cache(arg, cache, tz):
     if isinstance(arg, tslib.Timestamp):
         result = arg
     elif isinstance(arg, ABCSeries):
-        result = _maybe_convert_cache(arg, cache, tz)
+        result = _maybe_convert_cache(arg, cache, box, format, name=arg.name)
         if result is None:
             from pandas import Series
             values = _convert_listlike(arg._values, True, format)
@@ -405,21 +425,13 @@ def _maybe_convert_cache(arg, cache, tz):
     elif isinstance(arg, (ABCDataFrame, MutableMapping)):
         result = _assemble_from_unit_mappings(arg, errors=errors)
     elif isinstance(arg, ABCIndexClass):
-        result = _maybe_convert_cache(arg, cache, tz)
+        result = _maybe_convert_cache(arg, cache, box, format, name=arg.name)
         if result is None:
             result = _convert_listlike(arg, box, format, name=arg.name)
-        else:
-            result = result.values
-            if box:
-                result = DatetimeIndex(result, tz=tz, name=arg.name)
     elif is_list_like(arg):
-        result = _maybe_convert_cache(arg, cache, tz)
+        result = _maybe_convert_cache(arg, cache, box, format)
         if result is None:
             result = _convert_listlike(arg, box, format)
-        else:
-            result = result.values
-            if box:
-                result = DatetimeIndex(result, tz=tz)
     else:
         result = _convert_listlike(np.array([arg]), box, format)[0]
 

From d1fc211936d45d6c7028e9bda2c8725ab1b115f3 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Thu, 19 Oct 2017 22:53:20 -0700
Subject: [PATCH 15/25] Use quicker unique check

---
 pandas/core/tools/datetimes.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 408a96b20ea73..1eca26ad9cb91 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -338,8 +338,10 @@ def _maybe_convert_cache(arg, cache, box, format, name=None, tz=tz):
         None if the conversion failed
         """
         if cache and is_list_like(arg) and len(arg) >= 1000:
-            unique_dates = algorithms.unique(arg)
-            if len(unique_dates) != len(arg):
+            # Perform a quicker unique check
+            from pandas import Index
+            if not Index(arg).is_unique:
+                unique_dates = algorithms.unique(arg)
                 from pandas import Series
                 cache_dates = _convert_listlike(unique_dates, True, format,
                                                 tz=tz)

From 9486df3ef84cf87cad98c18181e9853def28c649 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Sat, 4 Nov 2017 00:31:50 -0700
Subject: [PATCH 16/25] Move caching function outside to_datetime

---
 pandas/core/indexes/datetimes.py             |  2 +-
 pandas/core/tools/datetimes.py               | 90 +++++++++-----------
 pandas/tests/indexes/datetimes/test_tools.py |  6 +-
 3 files changed, 43 insertions(+), 55 deletions(-)

diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index dd1c7306d2c26..78869de318dce 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -339,7 +339,7 @@ def __new__(cls, data=None,
         if not (is_datetime64_dtype(data) or is_datetimetz(data) or
                 is_integer_dtype(data)):
             data = tools.to_datetime(data, dayfirst=dayfirst,
-                                     yearfirst=yearfirst, cache=False)
+                                     yearfirst=yearfirst)
 
         if issubclass(data.dtype.type, np.datetime64) or is_datetimetz(data):
 
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 1eca26ad9cb91..fd96d819ed201 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -35,11 +35,35 @@ def _guess_datetime_format_for_array(arr, **kwargs):
     if len(non_nan_elements):
         return _guess_datetime_format(arr[non_nan_elements[0]], **kwargs)
 
+def _maybe_cache(arg, format, cache, tz, _convert_listlike):
+    """Create a cache of unique dates from an array of dates"""
+    from pandas import Series
+    cache_array = Series()
+    if cache:
+        # Perform a quicker unique check
+        from pandas import Index
+        if not Index(arg).is_unique:
+            unique_dates = algorithms.unique(arg)
+            cache_dates = _convert_listlike(unique_dates, True, format,
+                                            tz=tz)
+            cache_array = Series(cache_dates, index=unique_dates)
+    return cache_array
+
+def _convert_and_box_cache(arg, cache_array, box, name=None):
+    """Convert array of dates with a cache and box the result"""
+    from pandas import Series
+    from pandas.core.indexes.datetimes import DatetimeIndex
+    result = Series(arg).map(cache_array)
+    if box:
+        result = DatetimeIndex(result, name=name)
+    else:
+        result = result.values
+    return result
 
 def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
                 utc=None, box=True, format=None, exact=True,
                 unit=None, infer_datetime_format=False, origin='unix',
-                cache=True):
+                cache=False):
     """
     Convert argument to datetime.
 
@@ -310,51 +334,6 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
             except (ValueError, TypeError):
                 raise e
 
-    def _maybe_convert_cache(arg, cache, box, format, name=None, tz=tz):
-        """
-        Try to convert the datetimelike arg using
-        a cache of converted dates.
-
-        Parameters
-        ----------
-        arg : integer, float, string, datetime, list, tuple, 1-d array, Series
-            Datetime argument to convert     
-        cache : boolean
-            If True, try to convert the dates with a cache
-            If False, short circuit and return None
-            Flag whether to cache the converted dates
-        box : boolean
-            If True, return a DatetimeIndex  
-            if False, return an ndarray of values     
-        tz : String or None
-            'utc' if UTC=True was passed else None
-        name : String, default None
-            DatetimeIndex name
-        Returns
-        -------
-        Series if original argument was a Series
-        DatetimeIndex if box=True and original argument was not a Series
-        ndarray if box=False and original argument was not a Series
-        None if the conversion failed
-        """
-        if cache and is_list_like(arg) and len(arg) >= 1000:
-            # Perform a quicker unique check
-            from pandas import Index
-            if not Index(arg).is_unique:
-                unique_dates = algorithms.unique(arg)
-                from pandas import Series
-                cache_dates = _convert_listlike(unique_dates, True, format,
-                                                tz=tz)
-                convert_cache = Series(cache_dates, index=unique_dates)
-                result = Series(arg, name=name).map(convert_cache)
-                if isinstance(arg, Series):
-                    return result
-                elif box:
-                    return DatetimeIndex(result, name=name)
-                else:
-                    return result.values
-        return None
-
     if arg is None:
         return None
 
@@ -419,20 +398,27 @@ def _maybe_convert_cache(arg, cache, box, format, name=None, tz=tz):
     if isinstance(arg, tslib.Timestamp):
         result = arg
     elif isinstance(arg, ABCSeries):
-        result = _maybe_convert_cache(arg, cache, box, format, name=arg.name)
-        if result is None:
+        cache_array = _maybe_cache(arg, format, cache, tz, _convert_listlike)
+        if not cache_array.empty:
+            result = arg.map(cache_array)
+        else:
             from pandas import Series
             values = _convert_listlike(arg._values, True, format)
             result = Series(values, index=arg.index, name=arg.name)
     elif isinstance(arg, (ABCDataFrame, MutableMapping)):
         result = _assemble_from_unit_mappings(arg, errors=errors)
     elif isinstance(arg, ABCIndexClass):
-        result = _maybe_convert_cache(arg, cache, box, format, name=arg.name)
-        if result is None:
+        cache_array = _maybe_cache(arg, format, cache, tz, _convert_listlike)
+        if not cache_array.empty:
+            result = _convert_and_box_cache(arg, cache_array, box,
+                                            name=arg.name)
+        else:
             result = _convert_listlike(arg, box, format, name=arg.name)
     elif is_list_like(arg):
-        result = _maybe_convert_cache(arg, cache, box, format)
-        if result is None:
+        cache_array = _maybe_cache(arg, format, cache, tz, _convert_listlike)
+        if not cache_array.empty:
+            result = _convert_and_box_cache(arg, cache_array, box)
+        else:
             result = _convert_listlike(arg, box, format)
     else:
         result = _convert_listlike(np.array([arg]), box, format)[0]
diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py
index 47bb1c2c6529b..ede85a1d97bfd 100644
--- a/pandas/tests/indexes/datetimes/test_tools.py
+++ b/pandas/tests/indexes/datetimes/test_tools.py
@@ -1224,7 +1224,8 @@ def test_parsers(self, cache):
         assert result3 is tslib.NaT
         assert result4 is tslib.NaT
 
-    def test_parsers_dayfirst_yearfirst(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_parsers_dayfirst_yearfirst(self, cache):
         # OK
         # 2.5.1 10-11-12   [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00
         # 2.5.2 10-11-12   [dayfirst=0, yearfirst=1] -> 2012-10-11 00:00:00
@@ -1373,7 +1374,8 @@ def test_parsers_time(self):
         assert isinstance(res, list)
         assert res == expected_arr
 
-    def test_parsers_timezone_minute_offsets_roundtrip(self):
+    @pytest.mark.parametrize('cache', [True, False])
+    def test_parsers_timezone_minute_offsets_roundtrip(self, cache):
         # GH11708
         base = to_datetime("2013-01-01 00:00:00", cache=cache)
         dt_strings = [

From d059d44706729f06545859ada4a6efecb2d894b1 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Sat, 4 Nov 2017 22:55:22 -0700
Subject: [PATCH 17/25] Pass most tests

---
 pandas/core/tools/datetimes.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index fd96d819ed201..9f53cf5b91e51 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -49,15 +49,17 @@ def _maybe_cache(arg, format, cache, tz, _convert_listlike):
             cache_array = Series(cache_dates, index=unique_dates)
     return cache_array
 
-def _convert_and_box_cache(arg, cache_array, box, name=None):
+def _convert_and_box_cache(arg, cache_array, box, errors, tz, name=None):
     """Convert array of dates with a cache and box the result"""
     from pandas import Series
     from pandas.core.indexes.datetimes import DatetimeIndex
     result = Series(arg).map(cache_array)
     if box:
-        result = DatetimeIndex(result, name=name)
-    else:
-        result = result.values
+        if errors == 'ignore':
+            from pandas import Index
+            result = Index(result)
+        else:
+            result = DatetimeIndex(result, tz=tz, name=name)
     return result
 
 def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
@@ -410,14 +412,14 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
     elif isinstance(arg, ABCIndexClass):
         cache_array = _maybe_cache(arg, format, cache, tz, _convert_listlike)
         if not cache_array.empty:
-            result = _convert_and_box_cache(arg, cache_array, box,
+            result = _convert_and_box_cache(arg, cache_array, box, errors, tz,
                                             name=arg.name)
         else:
             result = _convert_listlike(arg, box, format, name=arg.name)
     elif is_list_like(arg):
         cache_array = _maybe_cache(arg, format, cache, tz, _convert_listlike)
         if not cache_array.empty:
-            result = _convert_and_box_cache(arg, cache_array, box)
+            result = _convert_and_box_cache(arg, cache_array, box, errors, tz)
         else:
             result = _convert_listlike(arg, box, format)
     else:

From 02ab4f38105204d9dc323e71331336ef8fa4982f Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Sun, 5 Nov 2017 12:36:42 -0800
Subject: [PATCH 18/25] Skip test related to GH 18111, lint

---
 doc/source/whatsnew/v0.21.0.txt              | 1 -
 doc/source/whatsnew/v0.21.1.txt              | 2 +-
 pandas/core/tools/datetimes.py               | 9 ++++++---
 pandas/tests/indexes/datetimes/test_tools.py | 6 ++++--
 4 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 9b0d9168c5704..4c460eeb85b82 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -999,7 +999,6 @@ Performance Improvements
 - :attr:`Timestamp.microsecond` no longer re-computes on attribute access (:issue:`17331`)
 - Improved performance of the :class:`CategoricalIndex` for data that is already categorical dtype (:issue:`17513`)
 - Improved performance of :meth:`RangeIndex.min` and :meth:`RangeIndex.max` by using ``RangeIndex`` properties to perform the computations (:issue:`17607`)
-- Added a keyword argument, `cache`, to :func:`to_datetime` that improved the performance of converting duplicate datetime arguments (:issue: `11665`)
 
 .. _whatsnew_0210.docs:
 
diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt
index 185f08514641f..9534d582591b2 100644
--- a/doc/source/whatsnew/v0.21.1.txt
+++ b/doc/source/whatsnew/v0.21.1.txt
@@ -39,7 +39,7 @@ Deprecations
 Performance Improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
--
+- Added a keyword argument, `cache`, to :func:`to_datetime` that improved the performance of converting duplicate datetime arguments (:issue: `11665`)
 -
 -
 
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 9f53cf5b91e51..2aecdf35c67c0 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -35,6 +35,7 @@ def _guess_datetime_format_for_array(arr, **kwargs):
     if len(non_nan_elements):
         return _guess_datetime_format(arr[non_nan_elements[0]], **kwargs)
 
+
 def _maybe_cache(arg, format, cache, tz, _convert_listlike):
     """Create a cache of unique dates from an array of dates"""
     from pandas import Series
@@ -49,6 +50,7 @@ def _maybe_cache(arg, format, cache, tz, _convert_listlike):
             cache_array = Series(cache_dates, index=unique_dates)
     return cache_array
 
+
 def _convert_and_box_cache(arg, cache_array, box, errors, tz, name=None):
     """Convert array of dates with a cache and box the result"""
     from pandas import Series
@@ -62,6 +64,7 @@ def _convert_and_box_cache(arg, cache_array, box, errors, tz, name=None):
             result = DatetimeIndex(result, tz=tz, name=name)
     return result
 
+
 def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
                 utc=None, box=True, format=None, exact=True,
                 unit=None, infer_datetime_format=False, origin='unix',
@@ -138,11 +141,11 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
           origin.
 
         .. versionadded: 0.20.0
-    cache : boolean, default True
-        If True, use a cache of unique, converted dates to apply the datetime
+    cache : boolean, default False
+        If False, use a cache of unique, converted dates to apply the datetime
         conversion. Produces signficant speed-ups when parsing duplicate dates.
 
-        .. versionadded: 0.21.0
+        .. versionadded: 0.21.1
     Returns
     -------
     ret : datetime if parsing succeeded.
diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py
index ede85a1d97bfd..cf2f2f56e55cb 100644
--- a/pandas/tests/indexes/datetimes/test_tools.py
+++ b/pandas/tests/indexes/datetimes/test_tools.py
@@ -372,7 +372,10 @@ def test_to_datetime_tz_psycopg2(self, cache):
                                     dtype='datetime64[ns, UTC]')
         tm.assert_index_equal(result, expected)
 
-    @pytest.mark.parametrize('cache', [True, False])
+    bool_skip = pytest.mark.skipif(True, reason="GH 18111")
+
+    @pytest.mark.parametrize('cache', [pytest.param(True, marks=bool_skip),
+                                       False])
     def test_datetime_bool(self, cache):
         # GH13176
         with pytest.raises(TypeError):
@@ -450,7 +453,6 @@ def test_week_without_day_and_calendar_year(self, date, format):
 
 
 class TestToDatetimeUnit(object):
-
     @pytest.mark.parametrize('cache', [True, False])
     def test_unit(self, cache):
         # GH 11758

From 82f36d39f00b0f670661ae3c3d93768440ce2503 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Sun, 5 Nov 2017 16:42:48 -0800
Subject: [PATCH 19/25] Update docstring

---
 pandas/core/tools/datetimes.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 2aecdf35c67c0..aa2dd088999ee 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -143,7 +143,8 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
         .. versionadded: 0.20.0
     cache : boolean, default False
         If False, use a cache of unique, converted dates to apply the datetime
-        conversion. Produces signficant speed-ups when parsing duplicate dates.
+        conversion. May produce sigificant speed-up when parsing duplicate date
+        strings, especially ones with timezone offsets.
 
         .. versionadded: 0.21.1
     Returns

From 76547e1631094680c3d7c34a2ee1dd79be9d20aa Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Tue, 7 Nov 2017 20:44:17 -0800
Subject: [PATCH 20/25] adjust imports, docs and move whatsnew

---
 doc/source/whatsnew/v0.21.1.txt | 2 +-
 doc/source/whatsnew/v0.22.0.txt | 2 +-
 pandas/core/tools/datetimes.py  | 8 +++-----
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt
index 9534d582591b2..4483cb90a6f48 100644
--- a/doc/source/whatsnew/v0.21.1.txt
+++ b/doc/source/whatsnew/v0.21.1.txt
@@ -39,7 +39,7 @@ Deprecations
 Performance Improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
-- Added a keyword argument, `cache`, to :func:`to_datetime` that improved the performance of converting duplicate datetime arguments (:issue: `11665`)
+- 
 -
 -
 
diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
index 61679b14a8592..712119caae6f2 100644
--- a/doc/source/whatsnew/v0.22.0.txt
+++ b/doc/source/whatsnew/v0.22.0.txt
@@ -70,7 +70,7 @@ Performance Improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
 - Indexers on ``Series`` or ``DataFrame`` no longer create a reference cycle (:issue:`17956`)
--
+- Added a keyword argument, ``cache``, to :func:`to_datetime` that improved the performance of converting duplicate datetime arguments (:issue:`11665`)
 -
 
 .. _whatsnew_0220.docs:
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index aa2dd088999ee..8e2894d8d917e 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -53,12 +53,10 @@ def _maybe_cache(arg, format, cache, tz, _convert_listlike):
 
 def _convert_and_box_cache(arg, cache_array, box, errors, tz, name=None):
     """Convert array of dates with a cache and box the result"""
-    from pandas import Series
-    from pandas.core.indexes.datetimes import DatetimeIndex
+    from pandas import Series, DatetimeIndex, Index
     result = Series(arg).map(cache_array)
     if box:
         if errors == 'ignore':
-            from pandas import Index
             result = Index(result)
         else:
             result = DatetimeIndex(result, tz=tz, name=name)
@@ -142,11 +140,11 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
 
         .. versionadded: 0.20.0
     cache : boolean, default False
-        If False, use a cache of unique, converted dates to apply the datetime
+        If True, use a cache of unique, converted dates to apply the datetime
         conversion. May produce sigificant speed-up when parsing duplicate date
         strings, especially ones with timezone offsets.
 
-        .. versionadded: 0.21.1
+        .. versionadded: 0.22.0
     Returns
     -------
     ret : datetime if parsing succeeded.

From 590c9cc0547ef553394cb31afacad6ed49bbf26c Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Tue, 7 Nov 2017 20:45:57 -0800
Subject: [PATCH 21/25] Remove whitespace

---
 doc/source/whatsnew/v0.21.1.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt
index 4483cb90a6f48..185f08514641f 100644
--- a/doc/source/whatsnew/v0.21.1.txt
+++ b/doc/source/whatsnew/v0.21.1.txt
@@ -39,7 +39,7 @@ Deprecations
 Performance Improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
-- 
+-
 -
 -
 

From 9a985acb1557b88ab8cc8fe4f4cecf60811212dc Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Wed, 8 Nov 2017 22:42:48 -0800
Subject: [PATCH 22/25] Address comments

---
 asv_bench/benchmarks/timeseries.py           | 35 ++++++++++++++------
 pandas/core/tools/datetimes.py               | 33 ++++++++++++++++--
 pandas/tests/indexes/datetimes/test_tools.py |  7 ++--
 3 files changed, 59 insertions(+), 16 deletions(-)

diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py
index 1ae3601320e1e..9614a63332609 100644
--- a/asv_bench/benchmarks/timeseries.py
+++ b/asv_bench/benchmarks/timeseries.py
@@ -386,20 +386,35 @@ def time_format_exact(self):
     def time_format_no_exact(self):
         to_datetime(self.s, format='%d%b%y', exact=False)
 
-    def time_cache_with_unique_seconds_and_unit(self):
-        to_datetime(self.unique_numeric_seconds, unit='s')
+    def time_cache_true_with_unique_seconds_and_unit(self):
+        to_datetime(self.unique_numeric_seconds, unit='s', cache=True)
 
-    def time_cache_with_dup_seconds_and_unit(self):
-        to_datetime(self.dup_numeric_seconds, unit='s')
+    def time_cache_false_with_unique_seconds_and_unit(self):
+        to_datetime(self.unique_numeric_seconds, unit='s', cache=False)
 
-    def time_cache_with_dup_string_dates(self):
-        to_datetime(self.dup_string_dates)
+    def time_cache_true_with_dup_seconds_and_unit(self):
+        to_datetime(self.dup_numeric_seconds, unit='s', cache=True)
 
-    def time_cache_with_dup_string_dates_and_format(self):
-        to_datetime(self.dup_string_dates, format='%Y-%m-%d')
+    def time_cache_false_with_dup_seconds_and_unit(self):
+        to_datetime(self.dup_numeric_seconds, unit='s', cache=False)
 
-    def time_cache_with_dup_string_tzoffset_dates(self):
-        to_datetime(self.dup_string_with_tz)
+    def time_cache_true_with_dup_string_dates(self):
+        to_datetime(self.dup_string_dates, cache=True)
+
+    def time_cache_false_with_dup_string_dates(self):
+        to_datetime(self.dup_string_dates, cache=False)
+
+    def time_cache_true_with_dup_string_dates_and_format(self):
+        to_datetime(self.dup_string_dates, format='%Y-%m-%d', cache=True)
+
+    def time_cache_false_with_dup_string_dates_and_format(self):
+        to_datetime(self.dup_string_dates, format='%Y-%m-%d', cache=False)
+
+    def time_cache_true_with_dup_string_tzoffset_dates(self):
+        to_datetime(self.dup_string_with_tz, cache=True)
+
+    def time_cache_false_with_dup_string_tzoffset_dates(self):
+        to_datetime(self.dup_string_with_tz, cache=False)
 
 
 class Offsets(object):
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 8e2894d8d917e..95e0c8b552ee1 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -37,7 +37,21 @@ def _guess_datetime_format_for_array(arr, **kwargs):
 
 
 def _maybe_cache(arg, format, cache, tz, _convert_listlike):
-    """Create a cache of unique dates from an array of dates"""
+    """
+    Create a cache of unique dates from an array of dates
+
+    Parameters
+    ----------
+    arg : integer, float, string, datetime, list, tuple, 1-d array of dates
+    format : string, strftime to parse time
+    cache: boolean, whether to convert with cache
+    tz: string, timezone of the dates
+    _convert_listlike: function, conversion function to apply on dates
+
+    Returns
+    -------
+    cache_array: Series, cache of converted, unique dates, can be empty
+    """
     from pandas import Series
     cache_array = Series()
     if cache:
@@ -52,7 +66,22 @@ def _maybe_cache(arg, format, cache, tz, _convert_listlike):
 
 
 def _convert_and_box_cache(arg, cache_array, box, errors, tz, name=None):
-    """Convert array of dates with a cache and box the result"""
+    """
+    Convert array of dates with a cache and box the result
+
+    Parameters
+    ----------
+    arg : integer, float, string, datetime, list, tuple, 1-d array of dates
+    cache_array: Series, cache of converted, unique dates
+    box: boolean, True boxes result as an Index-like
+    errors: string, 'ignore' plus box=True will convert result to Index
+    tz: string, timezone of the dates
+    name: string, default None. name for a DatetimeIndex
+
+    Returns
+    -------
+    result: Index-like if box=True else array-like of converted dates
+    """
     from pandas import Series, DatetimeIndex, Index
     result = Series(arg).map(cache_array)
     if box:
diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py
index cf2f2f56e55cb..1772c0c098b5d 100644
--- a/pandas/tests/indexes/datetimes/test_tools.py
+++ b/pandas/tests/indexes/datetimes/test_tools.py
@@ -372,10 +372,9 @@ def test_to_datetime_tz_psycopg2(self, cache):
                                     dtype='datetime64[ns, UTC]')
         tm.assert_index_equal(result, expected)
 
-    bool_skip = pytest.mark.skipif(True, reason="GH 18111")
-
-    @pytest.mark.parametrize('cache', [pytest.param(True, marks=bool_skip),
-                                       False])
+    @pytest.mark.parametrize('cache',
+        [pytest.param(True, marks=pytest.mark.skipif(True, reason="GH 18111")),
+                      False])
     def test_datetime_bool(self, cache):
         # GH13176
         with pytest.raises(TypeError):

From 85a1f2da24eff47358ec90f8ee5fc803639d0933 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Wed, 8 Nov 2017 22:58:35 -0800
Subject: [PATCH 23/25] Lint fix

---
 pandas/tests/indexes/datetimes/test_tools.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py
index 1772c0c098b5d..abaef39cb58ab 100644
--- a/pandas/tests/indexes/datetimes/test_tools.py
+++ b/pandas/tests/indexes/datetimes/test_tools.py
@@ -372,9 +372,11 @@ def test_to_datetime_tz_psycopg2(self, cache):
                                     dtype='datetime64[ns, UTC]')
         tm.assert_index_equal(result, expected)
 
-    @pytest.mark.parametrize('cache',
-        [pytest.param(True, marks=pytest.mark.skipif(True, reason="GH 18111")),
-                      False])
+    @pytest.mark.parametrize(
+        'cache',
+        [pytest.param(True,
+                      marks=pytest.mark.skipif(True, reason="GH 18111")),
+         False])
     def test_datetime_bool(self, cache):
         # GH13176
         with pytest.raises(TypeError):

From 49f5850148ea5f0904bb2817e2dbc0eca99d3516 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Thu, 9 Nov 2017 20:07:41 -0800
Subject: [PATCH 24/25] Move docs and adjust test

---
 pandas/core/tools/datetimes.py               | 55 ++++++++++++--------
 pandas/tests/indexes/datetimes/test_tools.py |  3 +-
 2 files changed, 35 insertions(+), 23 deletions(-)

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 95e0c8b552ee1..029b166307936 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -36,21 +36,26 @@ def _guess_datetime_format_for_array(arr, **kwargs):
         return _guess_datetime_format(arr[non_nan_elements[0]], **kwargs)
 
 
-def _maybe_cache(arg, format, cache, tz, _convert_listlike):
+def _maybe_cache(arg, format, cache, tz, convert_listlike):
     """
     Create a cache of unique dates from an array of dates
 
     Parameters
     ----------
-    arg : integer, float, string, datetime, list, tuple, 1-d array of dates
-    format : string, strftime to parse time
-    cache: boolean, whether to convert with cache
-    tz: string, timezone of the dates
-    _convert_listlike: function, conversion function to apply on dates
+    arg : integer, float, string, datetime, list, tuple, 1-d array, Series
+    format : string
+        Strftime format to parse time
+    cache : boolean
+        True attempts to create a cache of converted values
+    tz : string
+        Timezone of the dates
+    convert_listlike : function
+        Conversion function to apply on dates
 
     Returns
     -------
-    cache_array: Series, cache of converted, unique dates, can be empty
+    cache_array : Series
+        Cache of converted, unique dates. Can be empty
     """
     from pandas import Series
     cache_array = Series()
@@ -59,37 +64,43 @@ def _maybe_cache(arg, format, cache, tz, _convert_listlike):
         from pandas import Index
         if not Index(arg).is_unique:
             unique_dates = algorithms.unique(arg)
-            cache_dates = _convert_listlike(unique_dates, True, format,
-                                            tz=tz)
+            cache_dates = convert_listlike(unique_dates, True, format, tz=tz)
             cache_array = Series(cache_dates, index=unique_dates)
     return cache_array
 
 
-def _convert_and_box_cache(arg, cache_array, box, errors, tz, name=None):
+def _convert_and_box_cache(arg, cache_array, box, errors, name=None):
     """
     Convert array of dates with a cache and box the result
 
     Parameters
     ----------
-    arg : integer, float, string, datetime, list, tuple, 1-d array of dates
-    cache_array: Series, cache of converted, unique dates
-    box: boolean, True boxes result as an Index-like
-    errors: string, 'ignore' plus box=True will convert result to Index
-    tz: string, timezone of the dates
-    name: string, default None. name for a DatetimeIndex
+    arg : integer, float, string, datetime, list, tuple, 1-d array, Series
+    cache_array : Series
+        Cache of converted, unique dates
+    box : boolean
+        True boxes result as an Index-like, False returns an ndarray
+    errors : string
+        'ignore' plus box=True will convert result to Index
+    name : string, default None
+        Name for a DatetimeIndex
 
     Returns
     -------
-    result: Index-like if box=True else array-like of converted dates
+    result : datetime of converted dates 
+        Returns:
+
+        - Index-like if box=True 
+        - ndarray if box=False
     """
     from pandas import Series, DatetimeIndex, Index
     result = Series(arg).map(cache_array)
     if box:
         if errors == 'ignore':
-            result = Index(result)
+            return Index(result)
         else:
-            result = DatetimeIndex(result, tz=tz, name=name)
-    return result
+            return DatetimeIndex(result, name=name)
+    return result.values
 
 
 def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
@@ -443,14 +454,14 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
     elif isinstance(arg, ABCIndexClass):
         cache_array = _maybe_cache(arg, format, cache, tz, _convert_listlike)
         if not cache_array.empty:
-            result = _convert_and_box_cache(arg, cache_array, box, errors, tz,
+            result = _convert_and_box_cache(arg, cache_array, box, errors,
                                             name=arg.name)
         else:
             result = _convert_listlike(arg, box, format, name=arg.name)
     elif is_list_like(arg):
         cache_array = _maybe_cache(arg, format, cache, tz, _convert_listlike)
         if not cache_array.empty:
-            result = _convert_and_box_cache(arg, cache_array, box, errors, tz)
+            result = _convert_and_box_cache(arg, cache_array, box, errors)
         else:
             result = _convert_listlike(arg, box, format)
     else:
diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py
index abaef39cb58ab..307184cb34e27 100644
--- a/pandas/tests/indexes/datetimes/test_tools.py
+++ b/pandas/tests/indexes/datetimes/test_tools.py
@@ -414,7 +414,8 @@ def test_to_datetime_cache(self, utc, format, box, constructor):
         date = '20130101 00:00:00'
         test_dates = [date] * 10**5
         data = constructor(test_dates)
-        result = pd.to_datetime(data, utc=utc, format=format, box=box)
+        result = pd.to_datetime(data, utc=utc, format=format, box=box,
+                                cache=True)
         expected = pd.to_datetime(data, utc=utc, format=format, box=box,
                                   cache=False)
         if box:

From 07fa22d12302252607cbe2edc3bc31f3a144515e Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Fri, 10 Nov 2017 18:50:21 -0800
Subject: [PATCH 25/25] Lint

---
 pandas/core/tools/datetimes.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 029b166307936..19f7e459d0725 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -87,10 +87,10 @@ def _convert_and_box_cache(arg, cache_array, box, errors, name=None):
 
     Returns
     -------
-    result : datetime of converted dates 
+    result : datetime of converted dates
         Returns:
 
-        - Index-like if box=True 
+        - Index-like if box=True
         - ndarray if box=False
     """
     from pandas import Series, DatetimeIndex, Index