diff --git a/awswrangler/_arrow.py b/awswrangler/_arrow.py index 796b30ddf..12c853fc0 100644 --- a/awswrangler/_arrow.py +++ b/awswrangler/_arrow.py @@ -119,7 +119,7 @@ def _df_to_table( for col_name, col_type in dtype.items(): if col_name in table.column_names: col_index = table.column_names.index(col_name) - pyarrow_dtype = athena2pyarrow(col_type) + pyarrow_dtype = athena2pyarrow(col_type, df.dtypes.get(col_name)) field = pa.field(name=col_name, type=pyarrow_dtype) table = table.set_column(col_index, field, table.column(col_name).cast(pyarrow_dtype)) _logger.debug("Casting column %s (%s) to %s (%s)", col_name, col_index, col_type, pyarrow_dtype) diff --git a/awswrangler/_data_types.py b/awswrangler/_data_types.py index 98ce476c9..6b895126a 100644 --- a/awswrangler/_data_types.py +++ b/awswrangler/_data_types.py @@ -306,7 +306,7 @@ def _split_map(s: str) -> list[str]: return parts -def athena2pyarrow(dtype: str) -> pa.DataType: # noqa: PLR0911,PLR0912 +def athena2pyarrow(dtype: str, df_type: str = None) -> pa.DataType: # noqa: PLR0911,PLR0912 """Athena to PyArrow data types conversion.""" dtype = dtype.strip() if dtype.startswith(("array", "struct", "map")): @@ -329,7 +329,18 @@ def athena2pyarrow(dtype: str) -> pa.DataType: # noqa: PLR0911,PLR0912 if (dtype in ("string", "uuid")) or dtype.startswith("char") or dtype.startswith("varchar"): return pa.string() if dtype == "timestamp": - return pa.timestamp(unit="ns") + if df_type: + match df_type: + case "datetime64[s]": + return pa.timestamp(unit="s") + case "datetime64[ms]": + return pa.timestamp(unit="ms") + case "datetime64[us]": + return pa.timestamp(unit="us") + case "datetime64[ns]": + return pa.timestamp(unit="ns") + case _: + return pa.timestamp(unit="ns") if dtype == "date": return pa.date32() if dtype in ("binary" or "varbinary"): @@ -701,7 +712,7 @@ def pyarrow_schema_from_pandas( ) for k, v in casts.items(): if (k not in ignore) and (k in df.columns or _is_index_name(k, df.index)): - columns_types[k] = athena2pyarrow(dtype=v) + columns_types[k] = athena2pyarrow(dtype=v, df_type=df.dtypes.get(k)) columns_types = {k: v for k, v in columns_types.items() if v is not None} _logger.debug("columns_types: %s", columns_types) return pa.schema(fields=columns_types)