googleapis · tswast · Nov 10, 2021 · Sep 9, 2021 · Sep 9, 2021 · Sep 9, 2021
diff --git a/docs/usage/pandas.rst b/docs/usage/pandas.rst
@@ -95,3 +95,94 @@ and load it into a new table:
    :dedent: 4
    :start-after: [START bigquery_load_table_dataframe]
    :end-before: [END bigquery_load_table_dataframe]
+
+Pandas date and time arrays used for BigQuery DATE and TIME columns
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When BigQuery DATE [#date]_ and TIME data are loaded into Pandas,
+BigQuery-supplied date and time series are used.
+
+Date and time series support comparison and basic statistics, `min`,
+`max` and `median`.
+
+Date series
+-----------
+
+Date series are created when loading BigQuery DATE data [#date]_, but
+they can also be created directly from `datetime.date` data or date strings:
+
+.. literalinclude:: ../samples/snippets/pandas_date_and_time.py
+   :language: python
+   :dedent: 4
+   :start-after: [START bigquery_bqdate_create]
+   :end-before: [END bigquery_bqdate_create]
+
+The data type name for BigQuery-supplied date series is `bqdate`.  You
+need to import `google.cloud.bigquery.dtypes` to cause this to get
+registered with pandas.
+
+You can convert date series to date-time series using `astype("datetime64")`:
+
+.. literalinclude:: ../samples/snippets/pandas_date_and_time.py
+   :language: python
+   :dedent: 4
+   :start-after: [START bigquery_bqdate_as_datetime]
+   :end-before: [END bigquery_bqdate_as_datetime]
+
+You can subtract date series to get timedelta64 series:
+
+.. literalinclude:: ../samples/snippets/pandas_date_and_time.py
+   :language: python
+   :dedent: 4
+   :start-after: [START bigquery_bqdate_sub]
+   :end-before: [END bigquery_bqdate_sub]
+
+You can also add and subtract Pandas date offsets, either as scalars or as arrays:
+
+.. literalinclude:: ../samples/snippets/pandas_date_and_time.py
+   :language: python
+   :dedent: 4
+   :start-after: [START bigquery_bqdate_do]
+   :end-before: [END bigquery_bqdate_do]
+
+Time series
+-----------
+
+Time series are created when loading BigQuery TIME data, but
+they can also be created directly from `datetime.time` data or time strings:
+
+.. literalinclude:: ../samples/snippets/pandas_date_and_time.py
+   :language: python
+   :dedent: 4
+   :start-after: [START bigquery_bqtime_create]
+   :end-before: [END bigquery_bqtime_create]
+
+The data type name for BigQuery-supplied time series is `bqtime`.
+
+You can convert time series to time-delta series using `astype("timedelta64")`:
+
+.. literalinclude:: ../samples/snippets/pandas_date_and_time.py
+   :language: python
+   :dedent: 4
+   :start-after: [START bigquery_bqtime_as_timedelta]
+   :end-before: [END bigquery_bqtime_as_timedelta]
+
+This lets you combine dates and times to create date-time data:
+
+.. literalinclude:: ../samples/snippets/pandas_date_and_time.py
+   :language: python
+   :dedent: 4
+   :start-after: [START bigquery_combine_bqdate_bqtime]
+   :end-before: [END bigquery_combine_bqdate_bqtime]
+
+But you can also add dates and times directly:
+
+.. literalinclude:: ../samples/snippets/pandas_date_and_time.py
+   :language: python
+   :dedent: 4
+   :start-after: [START bigquery_combine2_bqdate_bqtime]
+   :end-before: [END bigquery_combine2_bqdate_bqtime]
+
+.. [#date] Dates before 1678 can't be represented using
+           BigQuery-supplied date series and will be converted as
+           object series.
diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py
@@ -25,9 +25,12 @@
     import pandas
 except ImportError:  # pragma: NO COVER
     pandas = None
+    date_dtype_name = time_dtype_name = ""  # Use '' rather than None because pytype
 else:
     import numpy
 
+    from db_dtypes import date_dtype_name, time_dtype_name
+
 import pyarrow
 import pyarrow.parquet
 
@@ -85,6 +88,8 @@ def _to_wkb(v):
     "FLOAT64": "float64",
     "INT64": "Int64",
     "INTEGER": "Int64",
+    "DATE": date_dtype_name,
+    "TIME": time_dtype_name,
 }
 _PANDAS_DTYPE_TO_BQ = {
     "bool": "BOOLEAN",
@@ -102,6 +107,8 @@ def _to_wkb(v):
     "uint16": "INTEGER",
     "uint32": "INTEGER",
     "geometry": "GEOGRAPHY",
+    date_dtype_name: "DATE",
+    time_dtype_name: "TIME",
 }
 
 

diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py
@@ -1481,7 +1481,6 @@ def to_dataframe(
         dtypes: Dict[str, Any] = None,
         progress_bar_type: str = None,
         create_bqstorage_client: bool = True,
-        date_as_object: bool = True,
         max_results: Optional[int] = None,
         geography_as_object: bool = False,
     ) -> "pandas.DataFrame":
@@ -1524,12 +1523,6 @@ def to_dataframe(
 
                 .. versionadded:: 1.24.0
 
-            date_as_object (Optional[bool]):
-                If ``True`` (default), cast dates to objects. If ``False``, convert
-                to datetime64[ns] dtype.
-
-                .. versionadded:: 1.26.0
-
             max_results (Optional[int]):
                 Maximum number of rows to include in the result. No limit by default.
 
@@ -1563,7 +1556,6 @@ def to_dataframe(
             dtypes=dtypes,
             progress_bar_type=progress_bar_type,
             create_bqstorage_client=create_bqstorage_client,
-            date_as_object=date_as_object,
             geography_as_object=geography_as_object,
         )
 
@@ -1576,7 +1568,6 @@ def to_geodataframe(
         dtypes: Dict[str, Any] = None,
         progress_bar_type: str = None,
         create_bqstorage_client: bool = True,
-        date_as_object: bool = True,
         max_results: Optional[int] = None,
         geography_column: Optional[str] = None,
     ) -> "geopandas.GeoDataFrame":
@@ -1619,12 +1610,6 @@ def to_geodataframe(
 
                 .. versionadded:: 1.24.0
 
-            date_as_object (Optional[bool]):
-                If ``True`` (default), cast dates to objects. If ``False``, convert
-                to datetime64[ns] dtype.
-
-                .. versionadded:: 1.26.0
-
             max_results (Optional[int]):
                 Maximum number of rows to include in the result. No limit by default.
 
@@ -1657,7 +1642,6 @@ def to_geodataframe(
             dtypes=dtypes,
             progress_bar_type=progress_bar_type,
             create_bqstorage_client=create_bqstorage_client,
-            date_as_object=date_as_object,
             geography_column=geography_column,
         )
 

diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py
@@ -28,6 +28,8 @@
     import pandas
 except ImportError:  # pragma: NO COVER
     pandas = None
+else:
+    from db_dtypes import DateArray, date_dtype_name
 
 import pyarrow
 
@@ -1872,7 +1874,6 @@ def to_dataframe(
         dtypes: Dict[str, Any] = None,
         progress_bar_type: str = None,
         create_bqstorage_client: bool = True,
-        date_as_object: bool = True,
         geography_as_object: bool = False,
     ) -> "pandas.DataFrame":
         """Create a pandas DataFrame by loading all pages of a query.
@@ -1922,12 +1923,6 @@ def to_dataframe(
 
                 .. versionadded:: 1.24.0
 
-            date_as_object (Optional[bool]):
-                If ``True`` (default), cast dates to objects. If ``False``, convert
-                to datetime64[ns] dtype.
-
-                .. versionadded:: 1.26.0
-
             geography_as_object (Optional[bool]):
                 If ``True``, convert GEOGRAPHY data to :mod:`shapely`
                 geometry objects. If ``False`` (default), don't cast
@@ -1973,36 +1968,43 @@ def to_dataframe(
             self.schema
         )
 
+        # When converting date or timestamp values to nanosecond precision, the result
+        # can be out of pyarrow bounds. To avoid the error when converting to
+        # Pandas, we set the date_as_object or timestamp_as_object parameter to True,
+        # if necessary.
+        date_as_object = not all(
+            self.__can_cast_timestamp_ns(col)
+            for col in record_batch
+            if str(col.type).startswith("date")
+        )
+        if date_as_object:
+            default_dtypes = {
+                name: type_
+                for name, type_ in default_dtypes.items()
+                if type_ != _pandas_helpers.date_dtype_name
+            }
+
         # Let the user-defined dtypes override the default ones.
         # https://stackoverflow.com/a/26853961/101923
         dtypes = {**default_dtypes, **dtypes}
 
-        # When converting timestamp values to nanosecond precision, the result
-        # can be out of pyarrow bounds. To avoid the error when converting to
-        # Pandas, we set the timestamp_as_object parameter to True, if necessary.
-        types_to_check = {
-            pyarrow.timestamp("us"),
-            pyarrow.timestamp("us", tz=datetime.timezone.utc),
-        }
-
-        for column in record_batch:
-            if column.type in types_to_check:
-                try:
-                    column.cast("timestamp[ns]")
-                except pyarrow.lib.ArrowInvalid:
-                    timestamp_as_object = True
-                    break
-        else:
-            timestamp_as_object = False
-
-        extra_kwargs = {"timestamp_as_object": timestamp_as_object}
+        timestamp_as_object = not all(
+            self.__can_cast_timestamp_ns(col)
+            for col in record_batch
+            if str(col.type).startswith("timestamp")
+        )
 
         df = record_batch.to_pandas(
-            date_as_object=date_as_object, integer_object_nulls=True, **extra_kwargs
+            date_as_object=date_as_object,
+            timestamp_as_object=timestamp_as_object,
+            integer_object_nulls=True,
         )
 
         for column in dtypes:
-            df[column] = pandas.Series(df[column], dtype=dtypes[column])
+            data = df[column]
+            if dtypes[column] == date_dtype_name:
+                data = DateArray(data.to_numpy(copy=False), copy=False)
+            df[column] = pandas.Series(data, dtype=dtypes[column], copy=False)
 
         if geography_as_object:
             for field in self.schema:
@@ -2011,6 +2013,15 @@ def to_dataframe(
 
         return df
 
+    @staticmethod
+    def __can_cast_timestamp_ns(column):
+        try:
+            column.cast("timestamp[ns]")
+        except pyarrow.lib.ArrowInvalid:
+            return False
+        else:
+            return True
+
     # If changing the signature of this method, make sure to apply the same
     # changes to job.QueryJob.to_geodataframe()
     def to_geodataframe(
@@ -2019,7 +2030,6 @@ def to_geodataframe(
         dtypes: Dict[str, Any] = None,
         progress_bar_type: str = None,
         create_bqstorage_client: bool = True,
-        date_as_object: bool = True,
         geography_column: Optional[str] = None,
     ) -> "geopandas.GeoDataFrame":
         """Create a GeoPandas GeoDataFrame by loading all pages of a query.
@@ -2067,10 +2077,6 @@ def to_geodataframe(
 
                 This argument does nothing if ``bqstorage_client`` is supplied.
 
-            date_as_object (Optional[bool]):
-                If ``True`` (default), cast dates to objects. If ``False``, convert
-                to datetime64[ns] dtype.
-
             geography_column (Optional[str]):
                 If there are more than one GEOGRAPHY column,
                 identifies which one to use to construct a geopandas
@@ -2126,7 +2132,6 @@ def to_geodataframe(
             dtypes,
             progress_bar_type,
             create_bqstorage_client,
-            date_as_object,
             geography_as_object=True,
         )
 
@@ -2183,7 +2188,6 @@ def to_dataframe(
         dtypes=None,
         progress_bar_type=None,
         create_bqstorage_client=True,
-        date_as_object=True,
         geography_as_object=False,
     ) -> "pandas.DataFrame":
         """Create an empty dataframe.
@@ -2193,7 +2197,6 @@ def to_dataframe(
             dtypes (Any): Ignored. Added for compatibility with RowIterator.
             progress_bar_type (Any): Ignored. Added for compatibility with RowIterator.
             create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator.
-            date_as_object (bool): Ignored. Added for compatibility with RowIterator.
 
         Returns:
             pandas.DataFrame: An empty :class:`~pandas.DataFrame`.
@@ -2208,7 +2211,6 @@ def to_geodataframe(
         dtypes=None,
         progress_bar_type=None,
         create_bqstorage_client=True,
-        date_as_object=True,
         geography_column: Optional[str] = None,
     ) -> "pandas.DataFrame":
         """Create an empty dataframe.
@@ -2218,7 +2220,6 @@ def to_geodataframe(
             dtypes (Any): Ignored. Added for compatibility with RowIterator.
             progress_bar_type (Any): Ignored. Added for compatibility with RowIterator.
             create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator.
-            date_as_object (bool): Ignored. Added for compatibility with RowIterator.
 
         Returns:
             pandas.DataFrame: An empty :class:`~pandas.DataFrame`.