pola-rs · ritchie46 · Feb 10, 2023 · Feb 8, 2023 · Feb 8, 2023 · Feb 8, 2023
diff --git a/polars/polars-io/Cargo.toml b/polars/polars-io/Cargo.toml
@@ -24,7 +24,13 @@ decompress = ["flate2/miniz_oxide"]
 decompress-fast = ["flate2/zlib-ng"]
 dtype-categorical = ["polars-core/dtype-categorical"]
 dtype-date = ["polars-core/dtype-date", "polars-time/dtype-date"]
-dtype-datetime = ["polars-core/dtype-datetime", "polars-core/temporal", "polars-time/dtype-datetime"]
+dtype-datetime = [
+  "polars-core/dtype-datetime",
+  "polars-core/temporal",
+  "polars-time/dtype-datetime",
+  "chrono-tz",
+  "chrono",
+]
 dtype-time = ["polars-core/dtype-time", "polars-core/temporal", "polars-time/dtype-time"]
 dtype-struct = ["polars-core/dtype-struct"]
 dtype-binary = ["polars-core/dtype-binary"]
@@ -46,6 +52,8 @@ anyhow.workspace = true
 arrow.workspace = true
 async-trait = { version = "0.1.59", optional = true }
 bytes = "1.3.0"
+chrono = { version = "0.4.23", optional = true }
+chrono-tz = { version = "0.8.1", optional = true }
 dirs = "4.0"
 flate2 = { version = "1", optional = true, default-features = false }
 futures = { version = "0.3.25", optional = true }

diff --git a/polars/polars-io/src/csv/write_impl.rs b/polars/polars-io/src/csv/write_impl.rs
@@ -1,10 +1,13 @@
 use std::io::Write;
 
 use arrow::temporal_conversions;
+#[cfg(feature = "dtype-datetime")]
+use chrono::TimeZone;
+#[cfg(feature = "dtype-datetime")]
+use chrono_tz::Tz;
 use lexical_core::{FormattedSize, ToLexical};
 use memchr::{memchr, memchr2};
 use polars_core::error::PolarsError::ComputeError;
-use polars_core::fmt::PlTzAware;
 use polars_core::prelude::*;
 use polars_core::series::SeriesIter;
 use polars_core::POOL;
@@ -95,12 +98,17 @@ fn write_anyvalue(
                 TimeUnit::Microseconds => temporal_conversions::timestamp_us_to_datetime(v),
                 TimeUnit::Milliseconds => temporal_conversions::timestamp_ms_to_datetime(v),
             };
-            match tz {
-                None => write!(f, "{}", ndt.format(datetime_format)),
-                Some(tz) => {
-                    write!(f, "{}", PlTzAware::new(ndt, tz))
-                }
-            }
+            let formatted = match tz {
+                Some(tz) => match tz.parse::<Tz>() {
+                    Ok(parsed_tz) => parsed_tz.from_utc_datetime(&ndt).format(datetime_format),
+                    Err(_) => match temporal_conversions::parse_offset(tz) {
+                        Ok(parsed_tz) => parsed_tz.from_utc_datetime(&ndt).format(datetime_format),
+                        Err(_) => unreachable!(),
+                    },
+                },
+                _ => ndt.format(datetime_format),
+            };
+            write!(f, "{formatted}")
         }
         #[cfg(feature = "dtype-time")]
         AnyValue::Time(v) => {
@@ -186,24 +194,36 @@ pub(crate) fn write<W: Write>(
     if options.datetime_format.is_none() {
         for col in df.get_columns() {
             match col.dtype() {
-                DataType::Datetime(TimeUnit::Microseconds, _)
+                DataType::Datetime(TimeUnit::Milliseconds, tz)
+                    // lowest precision; only set if it's not been inferred yet
                     if options.datetime_format.is_none() =>
                 {
-                    options.datetime_format = Some("%FT%H:%M:%S.%6f".to_string());
+                    options.datetime_format = match tz{
+                        Some(_) => Some("%FT%H:%M:%S.%3f%z".to_string()),
+                        None => Some("%FT%H:%M:%S.%3f".to_string()),
+                    };
                 }
-                DataType::Datetime(TimeUnit::Nanoseconds, _) => {
-                    options.datetime_format = Some("%FT%H:%M:%S.%9f".to_string());
+                DataType::Datetime(TimeUnit::Microseconds, tz) => {
+                    options.datetime_format = match tz{
+                        Some(_) => Some("%FT%H:%M:%S.%6f%z".to_string()),
+                        None => Some("%FT%H:%M:%S.%6f".to_string()),
+                    };
+                }
+                DataType::Datetime(TimeUnit::Nanoseconds, tz) => {
+                    options.datetime_format = match tz {
+                        Some(_) => Some("%FT%H:%M:%S.%9f%z".to_string()),
+                        None => Some("%FT%H:%M:%S.%9f".to_string()),
+                    };
                     break; // highest precision; no need to check further
                 }
                 _ => {}
             }
         }
-        // if still not set, no cols require higher precision than "ms" (or no datetime cols)
-        if options.datetime_format.is_none() {
-            options.datetime_format = Some("%FT%H:%M:%S.%3f".to_string());
-        }
     }
-    let datetime_format: &str = options.datetime_format.as_ref().unwrap();
+    let datetime_format: &str = match &options.datetime_format {
+        Some(datetime_format) => datetime_format,
+        None => "%FT%H:%M:%S.%9f",
+    };
 
     let len = df.height();
     let n_threads = POOL.current_num_threads();

diff --git a/py-polars/Cargo.lock b/py-polars/Cargo.lock
diff --git a/py-polars/tests/unit/io/test_csv.py b/py-polars/tests/unit/io/test_csv.py
@@ -6,7 +6,7 @@
 import tempfile
 import textwrap
 import zlib
-from datetime import date, datetime, time
+from datetime import date, datetime, time, timedelta, timezone
 from pathlib import Path
 
 import pytest
@@ -860,6 +860,24 @@ def test_datetime_format(fmt: str, expected: str) -> None:
     assert csv == expected
 
 
+@pytest.mark.parametrize(
+    ("fmt", "expected"),
+    [
+        (None, "dt\n2022-01-02T00:00:00.000000+0000\n"),
+        ("%F %T%.3f%z", "dt\n2022-01-02 00:00:00.000+0000\n"),
+        ("%Y%z", "dt\n2022+0000\n"),
+        ("%m%z", "dt\n01+0000\n"),
+        ("%m$%d%z", "dt\n01$02+0000\n"),
+        ("%R%z", "dt\n00:00+0000\n"),
+    ],
+)
+@pytest.mark.parametrize("tzinfo", [timezone.utc, timezone(timedelta(hours=0))])
+def test_datetime_format_tz_aware(fmt: str, expected: str, tzinfo: timezone) -> None:
+    df = pl.DataFrame({"dt": [datetime(2022, 1, 2, tzinfo=tzinfo)]})
+    csv = df.write_csv(datetime_format=fmt)
+    assert csv == expected
+
+
 @pytest.mark.parametrize(
     ("tu1", "tu2", "expected"),
     [
@@ -1094,7 +1112,7 @@ def test_csv_write_tz_aware() -> None:
     df = pl.DataFrame({"times": datetime(2021, 1, 1)}).with_columns(
         pl.col("times").dt.cast_time_zone("UTC").dt.with_time_zone("Europe/Zurich")
     )
-    assert df.write_csv() == "times\n2021-01-01 01:00:00 CET\n"
+    assert df.write_csv() == "times\n2021-01-01T01:00:00.000000+0100\n"
 
 
 def test_csv_statistics_offset() -> None: