From 0626fb20966fe445e4bd4c4c579343b5f865e586 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Sun, 19 Nov 2023 10:25:10 -0500
Subject: [PATCH] fix(backends): ensure that returned date results are actually
 proper date values

BREAKING CHANGE: Columns with Ibis `date` types are now returned as object dtype containing `datetime.date` objects when executing with the pandas backend.
---
 ci/make_geography_db.py                       |   5 +-
 ibis/backends/base/sql/registry/literal.py    |   2 +-
 ibis/backends/clickhouse/compiler/values.py   |  14 +-
 ibis/backends/flink/registry.py               |   2 +-
 ibis/backends/flink/utils.py                  |  31 ++--
 .../pandas/tests/execution/test_cast.py       |   2 +-
 .../pandas/tests/execution/test_temporal.py   |   4 +-
 ibis/backends/polars/__init__.py              |   2 +-
 ibis/backends/polars/tests/conftest.py        |   4 +-
 ibis/backends/polars/tests/test_udf.py        |   2 +-
 ibis/backends/pyspark/__init__.py             |   5 +-
 ibis/backends/sqlite/tests/test_types.py      |   6 +-
 ibis/backends/tests/test_generic.py           |   2 +-
 ibis/backends/tests/test_map.py               |   5 -
 ibis/backends/tests/test_param.py             |   5 +-
 ibis/backends/tests/test_temporal.py          | 132 +++++++++++++-----
 ibis/backends/tests/test_uuid.py              |   2 +-
 ibis/expr/api.py                              |  10 +-
 ibis/expr/types/generic.py                    |   4 +-
 ibis/formats/numpy.py                         |   3 +-
 ibis/formats/pandas.py                        |  21 ++-
 ibis/formats/tests/test_numpy.py              |   9 +-
 ibis/formats/tests/test_pandas.py             |   2 +-
 23 files changed, 179 insertions(+), 95 deletions(-)

diff --git a/ci/make_geography_db.py b/ci/make_geography_db.py
index 6c7a3bdba4ca..e98551b143a8 100755
--- a/ci/make_geography_db.py
+++ b/ci/make_geography_db.py
@@ -56,10 +56,7 @@
     "independence": lambda row: toolz.assoc(
         row,
         "independence_date",
-        datetime.datetime.strptime(
-            row["independence_date"],
-            "%Y-%m-%d",
-        ).date(),
+        datetime.datetime.fromisoformat(row["independence_date"]).date(),
     )
 }
 
diff --git a/ibis/backends/base/sql/registry/literal.py b/ibis/backends/base/sql/registry/literal.py
index 10bad209cdf2..b31aec6fac45 100644
--- a/ibis/backends/base/sql/registry/literal.py
+++ b/ibis/backends/base/sql/registry/literal.py
@@ -62,7 +62,7 @@ def _interval_literal_format(translator, op):
 def _date_literal_format(translator, op):
     value = op.value
     if isinstance(value, datetime.date):
-        value = value.strftime("%Y-%m-%d")
+        value = value.isoformat()
 
     return repr(value)
 
diff --git a/ibis/backends/clickhouse/compiler/values.py b/ibis/backends/clickhouse/compiler/values.py
index bc10a2e82266..ea0d77cf1d39 100644
--- a/ibis/backends/clickhouse/compiler/values.py
+++ b/ibis/backends/clickhouse/compiler/values.py
@@ -346,18 +346,14 @@ def _literal(op, *, value, dtype, **kw):
 
         return interval(value, unit=dtype.resolution.upper())
     elif dtype.is_timestamp():
-        funcname = "makeDateTime"
+        funcname = "parseDateTime"
+
         if micros := value.microsecond:
             funcname += "64"
 
-        args = [
-            value.year,
-            value.month,
-            value.day,
-            value.hour,
-            value.minute,
-            value.second,
-        ]
+        funcname += "BestEffort"
+
+        args = [value.isoformat()]
 
         if micros % 1000:
             args.append(micros)
diff --git a/ibis/backends/flink/registry.py b/ibis/backends/flink/registry.py
index 97fca3ff65c0..aa15829d5509 100644
--- a/ibis/backends/flink/registry.py
+++ b/ibis/backends/flink/registry.py
@@ -66,7 +66,7 @@ def _cast(translator: ExprTranslator, op: ops.generic.Cast) -> str:
             arg_translated = f"FROM_UNIXTIME({arg_translated})"
 
         if to.timezone:
-            return f"TO_TIMESTAMP(CONVERT_TZ(CAST({arg_translated} AS STRING), 'UTC+0', '{to.timezone}'))"
+            return f"TO_TIMESTAMP(CONVERT_TZ(CAST({arg_translated} AS STRING), 'UTC', {to.timezone!r}))"
         else:
             return f"TO_TIMESTAMP({arg_translated})"
     elif to.is_date():
diff --git a/ibis/backends/flink/utils.py b/ibis/backends/flink/utils.py
index c27bdbf6737e..6f828efbbeaa 100644
--- a/ibis/backends/flink/utils.py
+++ b/ibis/backends/flink/utils.py
@@ -8,7 +8,7 @@
 import ibis.expr.datatypes as dt
 import ibis.expr.operations as ops
 from ibis.backends.flink.datatypes import FlinkType
-from ibis.common.temporal import IntervalUnit
+from ibis.common.temporal import IntervalUnit, normalize_timezone
 from ibis.util import convert_unit
 
 # For details on what precisions Flink SQL interval types support, see
@@ -264,7 +264,7 @@ def translate_literal(op: ops.Literal) -> str:
         return f"x'{value.hex()}'"
     elif dtype.is_date():
         if isinstance(value, datetime.date):
-            value = value.strftime("%Y-%m-%d")
+            value = value.isoformat()
         return repr(value)
     elif dtype.is_numeric():
         if math.isnan(value):
@@ -285,15 +285,24 @@ def translate_literal(op: ops.Literal) -> str:
         return f"CAST({value} AS {FlinkType.from_ibis(dtype)!s})"
     elif dtype.is_timestamp():
         # TODO(chloeh13q): support timestamp with local timezone
-        if isinstance(value, datetime.datetime):
-            fmt = "%Y-%m-%d %H:%M:%S"
-            # datetime.datetime only supports resolution up to microseconds, even
-            # though Flink supports fractional precision up to 9 digits. We will
-            # need to use numpy or pandas datetime types for higher resolutions.
-            if value.microsecond:
-                fmt += ".%f"
-            return "TIMESTAMP " + repr(value.strftime(fmt))
-        raise NotImplementedError(f"No translation rule for timestamp {value}")
+        assert isinstance(value, datetime.datetime)
+        # datetime.datetime only supports resolution up to microseconds, even
+        # though Flink supports fractional precision up to 9 digits. We will
+        # need to use numpy or pandas datetime types for higher resolutions.
+        #
+        if dtype.timezone is not None:
+            value = value.astimezone(normalize_timezone("UTC"))
+
+        # remove timezone information without altering the ISO output
+        # except for removing the UTC offset
+        #
+        # format to ISO 8601 without the T character
+        value = value.replace(tzinfo=None).isoformat(sep=" ")
+
+        if (tz := dtype.timezone) is not None:
+            return f"TO_TIMESTAMP(CONVERT_TZ({value!r}, 'UTC', {tz!r}))"
+        else:
+            return f"TIMESTAMP {value!r}"
     elif dtype.is_time():
         return f"TIME '{value}'"
     elif dtype.is_interval():
diff --git a/ibis/backends/pandas/tests/execution/test_cast.py b/ibis/backends/pandas/tests/execution/test_cast.py
index 36fb67869a69..bc2d8a60f974 100644
--- a/ibis/backends/pandas/tests/execution/test_cast.py
+++ b/ibis/backends/pandas/tests/execution/test_cast.py
@@ -154,7 +154,7 @@ def test_timestamp_with_timezone_is_inferred_correctly(t, df):
 def test_cast_date(t, df, column):
     expr = t[column].cast("date")
     result = expr.execute()
-    expected = df[column].dt.normalize().dt.tz_localize(None)
+    expected = df[column].dt.normalize().dt.tz_localize(None).dt.date
     tm.assert_series_equal(result, expected)
 
 
diff --git a/ibis/backends/pandas/tests/execution/test_temporal.py b/ibis/backends/pandas/tests/execution/test_temporal.py
index 9a3a5f327d5a..cd9a1e98384b 100644
--- a/ibis/backends/pandas/tests/execution/test_temporal.py
+++ b/ibis/backends/pandas/tests/execution/test_temporal.py
@@ -66,7 +66,7 @@ def test_timestamp_functions(case_func, expected_func):
 def test_cast_datetime_strings_to_date(t, df, column):
     expr = t[column].cast("date")
     result = expr.execute()
-    expected = pd.to_datetime(df[column]).dt.normalize().dt.tz_localize(None)
+    expected = pd.to_datetime(df[column]).dt.normalize().dt.tz_localize(None).dt.date
     tm.assert_series_equal(result, expected)
 
 
@@ -103,7 +103,7 @@ def test_cast_integer_to_date(t, df):
     expr = t.plain_int64.cast("date")
     result = expr.execute()
     expected = pd.Series(
-        pd.to_datetime(df.plain_int64.values, unit="D").values,
+        pd.to_datetime(df.plain_int64.values, unit="D").date,
         index=df.index,
         name="plain_int64",
     )
diff --git a/ibis/backends/polars/__init__.py b/ibis/backends/polars/__init__.py
index 091bd3c29ad7..15b2fcca6120 100644
--- a/ibis/backends/polars/__init__.py
+++ b/ibis/backends/polars/__init__.py
@@ -420,7 +420,7 @@ def execute(
         else:
             assert isinstance(expr, ir.Column), type(expr)
             if expr.type().is_temporal():
-                return df.to_pandas().iloc[:, 0]
+                return expr.__pandas_result__(df.to_pandas())
             else:
                 # note: skip frame-construction overhead
                 return df.to_series().to_pandas()
diff --git a/ibis/backends/polars/tests/conftest.py b/ibis/backends/polars/tests/conftest.py
index a337ea16fbac..68ad32342853 100644
--- a/ibis/backends/polars/tests/conftest.py
+++ b/ibis/backends/polars/tests/conftest.py
@@ -33,10 +33,10 @@ def connect(*, tmpdir, worker_id, **kw):
 
     @classmethod
     def assert_series_equal(cls, left, right, *args, **kwargs) -> None:
-        check_dtype = not (
+        check_dtype = kwargs.pop("check_dtype", True) and not (
             issubclass(left.dtype.type, np.timedelta64)
             and issubclass(right.dtype.type, np.timedelta64)
-        ) and kwargs.pop("check_dtype", True)
+        )
         return super().assert_series_equal(
             left, right, *args, **kwargs, check_dtype=check_dtype
         )
diff --git a/ibis/backends/polars/tests/test_udf.py b/ibis/backends/polars/tests/test_udf.py
index 9d9aa696ff78..49580b8b28fe 100644
--- a/ibis/backends/polars/tests/test_udf.py
+++ b/ibis/backends/polars/tests/test_udf.py
@@ -46,7 +46,7 @@ def test_multiple_argument_udf(alltypes):
     result = expr.execute()
 
     df = alltypes[["smallint_col", "int_col"]].execute()
-    expected = (df.smallint_col + df.int_col).astype("int32")
+    expected = df.smallint_col + df.int_col
 
     tm.assert_series_equal(result, expected.rename("tmp"))
 
diff --git a/ibis/backends/pyspark/__init__.py b/ibis/backends/pyspark/__init__.py
index 3f00b64675b8..755ddbe3d653 100644
--- a/ibis/backends/pyspark/__init__.py
+++ b/ibis/backends/pyspark/__init__.py
@@ -232,10 +232,7 @@ def execute(self, expr: ir.Expr, **kwargs: Any) -> Any:
         table_expr = expr.as_table()
         df = self.compile(table_expr, **kwargs).toPandas()
 
-        # TODO: remove the extra conversion
-        return expr.__pandas_result__(
-            PySparkPandasData.convert_table(df, table_expr.schema())
-        )
+        return expr.__pandas_result__(df)
 
     def _fully_qualified_name(self, name, database):
         if is_fully_qualified(name):
diff --git a/ibis/backends/sqlite/tests/test_types.py b/ibis/backends/sqlite/tests/test_types.py
index 8d2fbec6a834..dc2978332c38 100644
--- a/ibis/backends/sqlite/tests/test_types.py
+++ b/ibis/backends/sqlite/tests/test_types.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import sqlite3
+from datetime import date
 
 import pandas as pd
 import pytest
@@ -86,10 +87,7 @@ def test_type_map(db):
     assert t.schema() == expected_schema
     res = t.filter(t.str_col == "a").execute()
     sol = pd.DataFrame(
-        {
-            "str_col": ["a"],
-            "date_col": pd.Series(["2022-01-01"], dtype="M8[ns]"),
-        }
+        {"str_col": ["a"], "date_col": pd.Series([date(2022, 1, 1)], dtype="object")}
     )
     assert res.equals(sol)
 
diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py
index 5c2b40652128..9423ff2a7db7 100644
--- a/ibis/backends/tests/test_generic.py
+++ b/ibis/backends/tests/test_generic.py
@@ -817,7 +817,7 @@ def test_int_column(alltypes):
     assert result.dtype == np.int8
 
 
-@pytest.mark.notimpl(["druid", "oracle", "exasol"])
+@pytest.mark.notimpl(["druid", "oracle"])
 @pytest.mark.never(
     ["bigquery", "sqlite", "snowflake"], reason="backend only implements int64"
 )
diff --git a/ibis/backends/tests/test_map.py b/ibis/backends/tests/test_map.py
index c25ce14d8cfc..19ec3e71fa21 100644
--- a/ibis/backends/tests/test_map.py
+++ b/ibis/backends/tests/test_map.py
@@ -226,11 +226,6 @@ def test_literal_map_get_broadcast(backend, alltypes, df):
         param(["a", "b"], ["1", "2"], id="int"),
     ],
 )
-@pytest.mark.notyet(
-    ["flink"],
-    raises=AssertionError,
-    reason="got list of tuples instead; requires PyFlink compatibility with PyArrow 13",
-)
 def test_map_construct_dict(con, keys, values):
     expr = ibis.map(keys, values)
     result = con.execute(expr.name("tmp"))
diff --git a/ibis/backends/tests/test_param.py b/ibis/backends/tests/test_param.py
index 61b6c7ea17e7..7a15de8e0cff 100644
--- a/ibis/backends/tests/test_param.py
+++ b/ibis/backends/tests/test_param.py
@@ -225,7 +225,10 @@ def test_scalar_param_date(backend, alltypes, value):
     )
     df = base.execute()
     expected = (
-        df.loc[df.date_col.dt.normalize() == pd.Timestamp(value).normalize()]
+        df.loc[
+            pd.to_datetime(df.date_col).dt.normalize().dt.date
+            == pd.Timestamp(value).normalize().date()
+        ]
         .sort_values("id")
         .reset_index(drop=True)
         .drop(columns=["date_col"])
diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py
index f4fd52582b6c..4eed80ce057d 100644
--- a/ibis/backends/tests/test_temporal.py
+++ b/ibis/backends/tests/test_temporal.py
@@ -43,11 +43,14 @@
     ArrowInvalid = None
 
 try:
+    from clickhouse_connect.driver.exceptions import (
+        DatabaseError as ClickhouseDatabaseError,
+    )
     from clickhouse_connect.driver.exceptions import (
         InternalError as ClickhouseOperationalError,
     )
 except ImportError:
-    ClickhouseOperationalError = None
+    ClickhouseOperationalError = ClickhouseDatabaseError = None
 
 try:
     from impala.error import (
@@ -64,12 +67,16 @@
 except ImportError:
     Py4JJavaError = None
 
-
 try:
     from pyexasol.exceptions import ExaQueryError
 except ImportError:
     ExaQueryError = None
 
+try:
+    from pyspark.sql.utils import IllegalArgumentException
+except ImportError:
+    IllegalArgumentException = None
+
 
 @pytest.mark.parametrize("attr", ["year", "month", "day"])
 @pytest.mark.parametrize(
@@ -707,9 +714,9 @@ def test_date_truncate(backend, alltypes, df, unit):
     unit = PANDAS_UNITS.get(unit, unit)
 
     try:
-        expected = df.timestamp_col.dt.floor(unit)
+        expected = df.timestamp_col.dt.floor(unit).dt.date
     except ValueError:
-        expected = df.timestamp_col.dt.to_period(unit).dt.to_timestamp()
+        expected = df.timestamp_col.dt.to_period(unit).dt.to_timestamp().dt.date
 
     result = expr.execute()
     expected = backend.default_series_rename(expected)
@@ -1017,10 +1024,14 @@ def convert_to_offset(x):
         warnings.simplefilter(
             "ignore", category=(UserWarning, pd.errors.PerformanceWarning)
         )
-        expected = pd.to_datetime(df.date_string_col) + offset
+        expected = (
+            pd.to_datetime(df.date_string_col)
+            .add(offset)
+            .map(lambda ts: ts.normalize().date(), na_action="ignore")
+        )
 
     expected = backend.default_series_rename(expected)
-    backend.assert_series_equal(result, expected.map(lambda ts: ts.normalize()))
+    backend.assert_series_equal(result, expected)
 
 
 date_value = pd.Timestamp("2017-12-31")
@@ -1119,7 +1130,12 @@ def convert_to_offset(x):
         ),
         param(
             lambda t, _: t.timestamp_col.date() + ibis.interval(days=4),
-            lambda t, _: t.timestamp_col.dt.floor("d") + pd.Timedelta(days=4),
+            lambda t, _: (
+                t.timestamp_col.dt.floor("d")
+                .add(pd.Timedelta(days=4))
+                .dt.normalize()
+                .dt.date
+            ),
             id="date-add-interval",
             marks=[
                 pytest.mark.notimpl(
@@ -1131,7 +1147,12 @@ def convert_to_offset(x):
         ),
         param(
             lambda t, _: t.timestamp_col.date() - ibis.interval(days=14),
-            lambda t, _: t.timestamp_col.dt.floor("d") - pd.Timedelta(days=14),
+            lambda t, _: (
+                t.timestamp_col.dt.floor("d")
+                .sub(pd.Timedelta(days=14))
+                .dt.normalize()
+                .dt.date
+            ),
             id="date-subtract-interval",
             marks=[
                 pytest.mark.notimpl(
@@ -1229,7 +1250,9 @@ def test_temporal_binop(backend, con, alltypes, df, expr_fn, expected_fn):
     result = con.execute(expr)
     expected = backend.default_series_rename(expected)
 
-    backend.assert_series_equal(result, expected.astype(result.dtype))
+    backend.assert_series_equal(
+        result, expected.astype(result.dtype), check_dtype=False
+    )
 
 
 plus = lambda t, td: t.timestamp_col + pd.Timedelta(td)
@@ -1624,6 +1647,7 @@ def test_interval_add_cast_column(backend, alltypes, df):
         .dt.normalize()
         .add(df.bigint_col.astype("timedelta64[D]"))
         .rename("tmp")
+        .dt.date
     )
     backend.assert_series_equal(result, expected.astype(result.dtype))
 
@@ -2200,11 +2224,6 @@ def test_timestamp_literal(con, backend):
         "<NUMERIC>, <NUMERIC>, <NUMERIC>, <NUMERIC>)"
     ),
 )
-@pytest.mark.notimpl(
-    ["flink"],
-    "https://github.com/ibis-project/ibis/pull/6920/files#r1372453059",
-    raises=AssertionError,
-)
 @pytest.mark.notimpl(["exasol"], raises=ExaQueryError)
 def test_timestamp_with_timezone_literal(con, timezone, expected):
     expr = ibis.timestamp(2022, 2, 4, 16, 20, 0).cast(dt.Timestamp(timezone=timezone))
@@ -2511,7 +2530,7 @@ def test_date_column_from_iso(backend, con, alltypes, df):
 
     result = con.execute(expr.name("tmp"))
     golden = df.year.astype(str) + "-" + df.month.astype(str).str.rjust(2, "0") + "-13"
-    actual = result.dt.strftime("%Y-%m-%d")
+    actual = result.map(datetime.date.isoformat)
     backend.assert_series_equal(golden.rename("tmp"), actual.rename("tmp"))
 
 
@@ -2976,7 +2995,7 @@ def test_timestamp_bucket_offset(backend, offset_mins):
     backend.assert_series_equal(res, sol)
 
 
-_NO_SQLGLOT_DIALECT = {"pandas", "dask", "druid", "flink", "datafusion", "polars"}
+_NO_SQLGLOT_DIALECT = ("pandas", "dask", "druid", "flink", "datafusion", "polars")
 no_sqlglot_dialect = sorted(
     param(backend, marks=pytest.mark.xfail) for backend in _NO_SQLGLOT_DIALECT
 )
@@ -3002,6 +3021,11 @@ def test_temporal_literal_sql(value, dialect, snapshot):
     snapshot.assert_match(sql, "out.sql")
 
 
+no_time_type = pytest.mark.xfail(
+    raises=NotImplementedError, reason="no time type support"
+)
+
+
 @pytest.mark.parametrize(
     "dialect",
     [
@@ -3011,24 +3035,9 @@ def test_temporal_literal_sql(value, dialect, snapshot):
         ),
         *no_sqlglot_dialect,
         *[
-            param(
-                "impala",
-                marks=pytest.mark.xfail(
-                    raises=NotImplementedError, reason="no time type support"
-                ),
-            ),
-            param(
-                "clickhouse",
-                marks=pytest.mark.xfail(
-                    raises=NotImplementedError, reason="no time type support"
-                ),
-            ),
-            param(
-                "oracle",
-                marks=pytest.mark.xfail(
-                    raises=NotImplementedError, reason="no time type support"
-                ),
-            ),
+            param("impala", marks=no_time_type),
+            param("clickhouse", marks=no_time_type),
+            param("oracle", marks=no_time_type),
         ],
     ],
 )
@@ -3038,3 +3047,58 @@ def test_time_literal_sql(dialect, snapshot, micros):
     expr = ibis.literal(value)
     sql = ibis.to_sql(expr, dialect=dialect)
     snapshot.assert_match(sql, "out.sql")
+
+
+@pytest.mark.notimpl(["druid"], raises=sa.exc.CompileError, reason="no date support")
+@pytest.mark.parametrize(
+    "value",
+    [
+        param("2017-12-31", id="simple"),
+        param(
+            "9999-01-02",
+            marks=[
+                pytest.mark.broken(
+                    ["clickhouse"],
+                    raises=AssertionError,
+                    reason="clickhouse doesn't support dates after 2149-06-06",
+                ),
+                pytest.mark.notyet(["datafusion"], raises=Exception),
+            ],
+            id="large",
+        ),
+        param(
+            "0001-07-17",
+            id="small",
+            marks=[
+                pytest.mark.broken(
+                    ["clickhouse"],
+                    raises=AssertionError,
+                    reason="clickhouse doesn't support dates before the UNIX epoch",
+                ),
+                pytest.mark.notyet(["datafusion"], raises=Exception),
+                pytest.mark.notyet(["pyspark"], raises=IllegalArgumentException),
+            ],
+        ),
+        param(
+            "2150-01-01",
+            marks=pytest.mark.broken(["clickhouse"], raises=AssertionError),
+            id="medium",
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "func",
+    [
+        param(lambda x: x, id="identity"),
+        param(datetime.date.fromisoformat, id="fromstring"),
+    ],
+)
+def test_date_scalar(con, value, func):
+    expr = ibis.date(func(value)).name("tmp")
+
+    result = con.execute(expr)
+
+    assert not isinstance(result, datetime.datetime)
+    assert isinstance(result, datetime.date)
+
+    assert result == datetime.date.fromisoformat(value)
diff --git a/ibis/backends/tests/test_uuid.py b/ibis/backends/tests/test_uuid.py
index 8a7ad0695bcb..eac109b68a89 100644
--- a/ibis/backends/tests/test_uuid.py
+++ b/ibis/backends/tests/test_uuid.py
@@ -39,7 +39,7 @@
     "mssql": TEST_UUID,
     "dask": TEST_UUID,
     "oracle": TEST_UUID,
-    "flink": RAW_TEST_UUID,
+    "flink": TEST_UUID,
     "exasol": TEST_UUID,
 }
 
diff --git a/ibis/expr/api.py b/ibis/expr/api.py
index 17463734e94c..b0e9ba4704fd 100644
--- a/ibis/expr/api.py
+++ b/ibis/expr/api.py
@@ -802,19 +802,19 @@ def date(value_or_year, month=None, day=None, /):
     Create a date scalar from a string
 
     >>> ibis.date("2023-01-02")
-    Timestamp('2023-01-02 00:00:00')
+    datetime.date(2023, 1, 2)
 
     Create a date scalar from year, month, and day
 
     >>> ibis.date(2023, 1, 2)
-    Timestamp('2023-01-02 00:00:00')
+    datetime.date(2023, 1, 2)
 
     Create a date column from year, month, and day
 
-    >>> t = ibis.memtable({"y": [2001, 2002], "m": [1, 3], "d": [2, 4]})
-    >>> ibis.date(t.y, t.m, t.d).name("date")
+    >>> t = ibis.memtable(dict(year=[2001, 2002], month=[1, 3], day=[2, 4]))
+    >>> ibis.date(t.year, t.month, t.day).name("my_date")
     ┏━━━━━━━━━━━━┓
-    ┃ date       ┃
+    ┃ my_date    ┃
     ┡━━━━━━━━━━━━┩
     │ date       │
     ├────────────┤
diff --git a/ibis/expr/types/generic.py b/ibis/expr/types/generic.py
index 8f187b507289..4959141de202 100644
--- a/ibis/expr/types/generic.py
+++ b/ibis/expr/types/generic.py
@@ -1249,7 +1249,9 @@ def __pyarrow_result__(
         return data_mapper.convert_scalar(table[0][0], self.type())
 
     def __pandas_result__(self, df: pd.DataFrame) -> Any:
-        return df.iat[0, 0]
+        from ibis.formats.pandas import PandasData
+
+        return PandasData.convert_scalar(df, self.type())
 
     def as_table(self) -> ir.Table:
         """Promote the scalar expression to a table.
diff --git a/ibis/formats/numpy.py b/ibis/formats/numpy.py
index 8bdc0c3c2559..dd2b96ba8917 100644
--- a/ibis/formats/numpy.py
+++ b/ibis/formats/numpy.py
@@ -68,8 +68,7 @@ def from_ibis(cls, dtype: dt.DataType) -> np.dtype:
             # return np.dtype(f"datetime64[{dtype.unit.short}]")
             return np.dtype("datetime64[ns]")
         elif dtype.is_date():
-            # return np.dtype("datetime64[D]")
-            return np.dtype("datetime64[ns]")
+            return np.dtype("datetime64[D]")
         elif dtype.is_time():
             return np.dtype("timedelta64[ns]")
         elif (
diff --git a/ibis/formats/pandas.py b/ibis/formats/pandas.py
index 271b6c79da3e..e19190c8489e 100644
--- a/ibis/formats/pandas.py
+++ b/ibis/formats/pandas.py
@@ -1,6 +1,8 @@
 from __future__ import annotations
 
 import contextlib
+import datetime
+import json
 import warnings
 
 import numpy as np
@@ -133,6 +135,11 @@ def convert_column(cls, obj, dtype):
         assert not isinstance(result, np.ndarray), f"{convert_method} -> {type(result)}"
         return result
 
+    @classmethod
+    def convert_scalar(cls, obj, dtype):
+        df = PandasData.convert_table(obj, sch.Schema({obj.columns[0]: dtype}))
+        return df.iat[0, 0]
+
     @classmethod
     def convert_GeoSpatial(cls, s, dtype, pandas_type):
         return s
@@ -189,7 +196,19 @@ def convert_Timestamp(cls, s, dtype, pandas_type):
     def convert_Date(cls, s, dtype, pandas_type):
         if isinstance(s.dtype, pd.DatetimeTZDtype):
             s = s.dt.tz_convert("UTC").dt.tz_localize(None)
-        return s.astype(pandas_type, errors="ignore").dt.normalize()
+        try:
+            return s.astype(pandas_type).dt.date
+        except (TypeError, pd._libs.tslibs.OutOfBoundsDatetime):
+
+            def try_date(v):
+                if isinstance(v, datetime.datetime):
+                    return v.date()
+                elif isinstance(v, str):
+                    return datetime.date.fromisoformat(v)
+                else:
+                    return v
+
+            return s.map(try_date, na_action="ignore")
 
     @classmethod
     def convert_Interval(cls, s, dtype, pandas_type):
diff --git a/ibis/formats/tests/test_numpy.py b/ibis/formats/tests/test_numpy.py
index af994b273094..be36cb22cb78 100644
--- a/ibis/formats/tests/test_numpy.py
+++ b/ibis/formats/tests/test_numpy.py
@@ -80,11 +80,16 @@ def test_variadic_to_numpy(ibis_type):
     assert NumpyType.from_ibis(ibis_type) == np.dtype("object")
 
 
-@h.given(ibst.date_dtype() | ibst.timestamp_dtype())
-def test_date_to_numpy(ibis_type):
+@h.given(ibst.timestamp_dtype())
+def test_timestamp_to_numpy(ibis_type):
     assert NumpyType.from_ibis(ibis_type) == np.dtype("datetime64[ns]")
 
 
+@h.given(ibst.date_dtype())
+def test_date_to_numpy(ibis_type):
+    assert NumpyType.from_ibis(ibis_type) == np.dtype("datetime64[D]")
+
+
 @h.given(ibst.time_dtype())
 def test_time_to_numpy(ibis_type):
     assert NumpyType.from_ibis(ibis_type) == np.dtype("timedelta64[ns]")
diff --git a/ibis/formats/tests/test_pandas.py b/ibis/formats/tests/test_pandas.py
index dce69fc5c66d..5bba865f0be5 100644
--- a/ibis/formats/tests/test_pandas.py
+++ b/ibis/formats/tests/test_pandas.py
@@ -32,7 +32,7 @@
         (dt.float32, np.dtype("float32")),
         (dt.float64, np.dtype("float64")),
         (dt.boolean, np.dtype("bool")),
-        (dt.date, np.dtype("datetime64[ns]")),
+        (dt.date, np.dtype("datetime64[D]")),
         (dt.time, np.dtype("timedelta64[ns]")),
         (dt.timestamp, np.dtype("datetime64[ns]")),
         (dt.Interval("s"), np.dtype("timedelta64[s]")),