From 76aef7038bb0f098e876cbf961f7eb45e6ab51ea Mon Sep 17 00:00:00 2001
From: Elia Zaides <elia.zaides@calabrio.com>
Date: Thu, 5 Sep 2024 13:16:17 -0700
Subject: [PATCH] Enable use of dataframe type, in athena2pyarrow type

---
 awswrangler/_arrow.py      |  2 +-
 awswrangler/_data_types.py | 17 ++++++++++++++---
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/awswrangler/_arrow.py b/awswrangler/_arrow.py
index 796b30ddf..12c853fc0 100644
--- a/awswrangler/_arrow.py
+++ b/awswrangler/_arrow.py
@@ -119,7 +119,7 @@ def _df_to_table(
         for col_name, col_type in dtype.items():
             if col_name in table.column_names:
                 col_index = table.column_names.index(col_name)
-                pyarrow_dtype = athena2pyarrow(col_type)
+                pyarrow_dtype = athena2pyarrow(col_type, df.dtypes.get(col_name))
                 field = pa.field(name=col_name, type=pyarrow_dtype)
                 table = table.set_column(col_index, field, table.column(col_name).cast(pyarrow_dtype))
                 _logger.debug("Casting column %s (%s) to %s (%s)", col_name, col_index, col_type, pyarrow_dtype)
diff --git a/awswrangler/_data_types.py b/awswrangler/_data_types.py
index 98ce476c9..6b895126a 100644
--- a/awswrangler/_data_types.py
+++ b/awswrangler/_data_types.py
@@ -306,7 +306,7 @@ def _split_map(s: str) -> list[str]:
     return parts
 
 
-def athena2pyarrow(dtype: str) -> pa.DataType:  # noqa: PLR0911,PLR0912
+def athena2pyarrow(dtype: str, df_type: str = None) -> pa.DataType:  # noqa: PLR0911,PLR0912
     """Athena to PyArrow data types conversion."""
     dtype = dtype.strip()
     if dtype.startswith(("array", "struct", "map")):
@@ -329,7 +329,18 @@ def athena2pyarrow(dtype: str) -> pa.DataType:  # noqa: PLR0911,PLR0912
     if (dtype in ("string", "uuid")) or dtype.startswith("char") or dtype.startswith("varchar"):
         return pa.string()
     if dtype == "timestamp":
-        return pa.timestamp(unit="ns")
+        if df_type:
+            match df_type:
+                case "datetime64[s]": 
+                    return pa.timestamp(unit="s")
+                case "datetime64[ms]": 
+                    return pa.timestamp(unit="ms")
+                case "datetime64[us]": 
+                    return pa.timestamp(unit="us")
+                case "datetime64[ns]": 
+                    return pa.timestamp(unit="ns")
+                case _: 
+                    return pa.timestamp(unit="ns")
     if dtype == "date":
         return pa.date32()
     if dtype in ("binary" or "varbinary"):
@@ -701,7 +712,7 @@ def pyarrow_schema_from_pandas(
     )
     for k, v in casts.items():
         if (k not in ignore) and (k in df.columns or _is_index_name(k, df.index)):
-            columns_types[k] = athena2pyarrow(dtype=v)
+            columns_types[k] = athena2pyarrow(dtype=v, df_type=df.dtypes.get(k))
     columns_types = {k: v for k, v in columns_types.items() if v is not None}
     _logger.debug("columns_types: %s", columns_types)
     return pa.schema(fields=columns_types)