Skip to content

Commit

Permalink
feat(python,rust): add many more auto inferable datetime formats (pol…
Browse files Browse the repository at this point in the history
  • Loading branch information
Julian-J-S authored and Wouittone committed Jun 22, 2024
1 parent f04d1e6 commit e5aa894
Show file tree
Hide file tree
Showing 4 changed files with 219 additions and 95 deletions.
12 changes: 6 additions & 6 deletions crates/polars-time/src/chunkedarray/string/infer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ const DATETIME_DMY_PATTERN: &str = r#"(?x)
^
['"]? # optional quotes
(?:\d{1,2}) # day
[-/] # separator
[-/\.] # separator
(?P<month>[01]?\d{1}) # month
[-/] # separator
[-/\.] # separator
(?:\d{4,}) # year
(?:
[T\ ] # separator
Expand All @@ -41,9 +41,9 @@ const DATETIME_YMD_PATTERN: &str = r#"(?x)
^
['"]? # optional quotes
(?:\d{4,}) # year
[-/] # separator
[-/\.] # separator
(?P<month>[01]?\d{1}) # month
[-/] # separator
[-/\.] # separator
(?:\d{1,2}) # day
(?:
[T\ ] # separator
Expand All @@ -66,9 +66,9 @@ const DATETIME_YMDZ_PATTERN: &str = r#"(?x)
^
['"]? # optional quotes
(?:\d{4,}) # year
[-/] # separator
[-/\.] # separator
(?P<month>[01]?\d{1}) # month
[-/] # separator
[-/\.] # separator
(?:\d{1,2}) # year
[T\ ] # separator
(?:\d{2}) # hour
Expand Down
242 changes: 157 additions & 85 deletions crates/polars-time/src/chunkedarray/string/patterns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,144 +2,216 @@
//! parsing different orders of dates in a single column.

pub(super) static DATE_D_M_Y: &[&str] = &[
"%d.%m.%Y", // 31.12.2021
"%d-%m-%Y", // 31-12-2021
"%d/%m/%Y", // 31/12/2021
"%d.%m.%Y", // 31.12.2021
];

pub(super) static DATE_Y_M_D: &[&str] = &[
"%Y/%m/%d", // 2021/12/31
"%Y-%m-%d", // 2021-12-31
"%Y/%m/%d", // 2021/12/31
"%Y.%m.%d", // 2021.12.31
];

/// NOTE: don't use single letter dates like %F
/// polars parsers does not support them, so it will be slower
pub(super) static DATETIME_D_M_Y: &[&str] = &[
// --
// supported by polars' parser
// ---
// 31/12/2021 24:58:01
"%d/%m/%Y %H:%M:%S",
// 31-12-2021 24:58
"%d-%m-%Y %H:%M",
// 31-12-2021 24:58:01
"%d-%m-%Y %H:%M:%S",
// 31-04-2021T02:45:55.555000000
// milliseconds
"%d-%m-%YT%H:%M:%S.%3f",
// microseconds
"%d-%m-%YT%H:%M:%S.%6f",
// nanoseconds
"%d-%m-%YT%H:%M:%S.%9f",
"%d/%m/%Y 00:00:00",
"%d-%m-%Y 00:00:00",
// no times
"%d-%m-%YT%H:%M:%S.%6f",
"%d-%m-%YT%H:%M:%S.%3f",
"%d-%m-%YT%H:%M:%S",
"%d-%m-%YT%H%M%S.%9f",
"%d-%m-%YT%H%M%S.%6f",
"%d-%m-%YT%H%M%S.%3f",
"%d-%m-%YT%H%M%S",
"%d-%m-%YT%H:%M",
"%d-%m-%YT%H%M",
"%d-%m-%Y %H:%M:%S.%9f",
"%d-%m-%Y %H:%M:%S.%6f",
"%d-%m-%Y %H:%M:%S.%3f",
"%d-%m-%Y %H:%M:%S",
"%d-%m-%Y %H%M%S.%9f",
"%d-%m-%Y %H%M%S.%6f",
"%d-%m-%Y %H%M%S.%3f",
"%d-%m-%Y %H%M%S",
"%d-%m-%Y %H:%M",
"%d-%m-%Y %H%M",
"%d-%m-%Y",
"%d/%m/%YT%H:%M:%S.%9f",
"%d/%m/%YT%H:%M:%S.%6f",
"%d/%m/%YT%H:%M:%S.%3f",
"%d/%m/%YT%H:%M:%S",
"%d/%m/%YT%H%M%S.%9f",
"%d/%m/%YT%H%M%S.%6f",
"%d/%m/%YT%H%M%S.%3f",
"%d/%m/%YT%H%M%S",
"%d/%m/%YT%H:%M",
"%d/%m/%YT%H%M",
"%d/%m/%Y %H:%M:%S.%9f",
"%d/%m/%Y %H:%M:%S.%6f",
"%d/%m/%Y %H:%M:%S.%3f",
"%d/%m/%Y %H:%M:%S",
"%d/%m/%Y %H%M%S.%9f",
"%d/%m/%Y %H%M%S.%6f",
"%d/%m/%Y %H%M%S.%3f",
"%d/%m/%Y %H%M%S",
"%d/%m/%Y %H:%M",
"%d/%m/%Y %H%M",
"%d/%m/%Y",
"%d.%m.%YT%H:%M:%S.%9f",
"%d.%m.%YT%H:%M:%S.%6f",
"%d.%m.%YT%H:%M:%S.%3f",
"%d.%m.%YT%H:%M:%S",
"%d.%m.%YT%H%M%S.%9f",
"%d.%m.%YT%H%M%S.%6f",
"%d.%m.%YT%H%M%S.%3f",
"%d.%m.%YT%H%M%S",
"%d.%m.%YT%H:%M",
"%d.%m.%YT%H%M",
"%d.%m.%Y %H:%M:%S.%9f",
"%d.%m.%Y %H:%M:%S.%6f",
"%d.%m.%Y %H:%M:%S.%3f",
"%d.%m.%Y %H:%M:%S",
"%d.%m.%Y %H%M%S.%9f",
"%d.%m.%Y %H%M%S.%6f",
"%d.%m.%Y %H%M%S.%3f",
"%d.%m.%Y %H%M%S",
"%d.%m.%Y %H:%M",
"%d.%m.%Y %H%M",
"%d.%m.%Y",
];

/// NOTE: don't use single letter dates like %F
/// polars parsers does not support them, so it will be slower
pub(super) static DATETIME_Y_M_D: &[&str] = &[
// ---
// ISO8601-like, generated via the `iso8601_format_datetime` test fixture
// ---
"%Y/%m/%dT%H:%M:%S",
"%Y-%m-%dT%H:%M:%S",
"%Y/%m/%dT%H%M%S",
"%Y-%m-%dT%H%M%S",
"%Y/%m/%dT%H:%M",
"%Y-%m-%dT%H:%M",
"%Y/%m/%dT%H%M",
"%Y-%m-%dT%H%M",
"%Y/%m/%dT%H:%M:%S.%9f",
"%Y-%m-%dT%H:%M:%S.%9f",
"%Y/%m/%dT%H:%M:%S.%6f",
"%Y-%m-%dT%H:%M:%S.%6f",
"%Y/%m/%dT%H:%M:%S.%3f",
"%Y-%m-%dT%H:%M:%S.%3f",
"%Y/%m/%dT%H%M%S.%9f",
"%Y-%m-%dT%H:%M:%S",
"%Y-%m-%dT%H%M%S.%9f",
"%Y/%m/%dT%H%M%S.%6f",
"%Y-%m-%dT%H%M%S.%6f",
"%Y/%m/%dT%H%M%S.%3f",
"%Y-%m-%dT%H%M%S.%3f",
"%Y/%m/%d",
"%Y-%m-%d",
"%Y/%m/%d %H:%M:%S",
"%Y-%m-%dT%H%M%S",
"%Y-%m-%dT%H:%M",
"%Y-%m-%dT%H%M",
"%Y-%m-%d %H:%M:%S.%9f",
"%Y-%m-%d %H:%M:%S.%6f",
"%Y-%m-%d %H:%M:%S.%3f",
"%Y-%m-%d %H:%M:%S",
"%Y/%m/%d %H%M%S",
"%Y-%m-%d %H%M%S.%9f",
"%Y-%m-%d %H%M%S.%6f",
"%Y-%m-%d %H%M%S.%3f",
"%Y-%m-%d %H%M%S",
"%Y/%m/%d %H:%M",
"%Y-%m-%d %H:%M",
"%Y/%m/%d %H%M",
"%Y-%m-%d %H%M",
"%Y-%m-%d",
"%Y/%m/%dT%H:%M:%S.%9f",
"%Y/%m/%dT%H:%M:%S.%6f",
"%Y/%m/%dT%H:%M:%S.%3f",
"%Y/%m/%dT%H:%M:%S",
"%Y/%m/%dT%H%M%S.%9f",
"%Y/%m/%dT%H%M%S.%6f",
"%Y/%m/%dT%H%M%S.%3f",
"%Y/%m/%dT%H%M%S",
"%Y/%m/%dT%H:%M",
"%Y/%m/%dT%H%M",
"%Y/%m/%d %H:%M:%S.%9f",
"%Y-%m-%d %H:%M:%S.%9f",
"%Y/%m/%d %H:%M:%S.%6f",
"%Y-%m-%d %H:%M:%S.%6f",
"%Y/%m/%d %H:%M:%S.%3f",
"%Y-%m-%d %H:%M:%S.%3f",
"%Y/%m/%d %H:%M:%S",
"%Y/%m/%d %H%M%S.%9f",
"%Y-%m-%d %H%M%S.%9f",
"%Y/%m/%d %H%M%S.%6f",
"%Y-%m-%d %H%M%S.%6f",
"%Y/%m/%d %H%M%S.%3f",
"%Y-%m-%d %H%M%S.%3f",
// ---
// other
// ---
// we cannot know this one, because polars needs to know
// the length of the parsed fmt
// ---
"%FT%H:%M:%S%.f",
"%Y/%m/%d %H%M%S",
"%Y/%m/%d %H:%M",
"%Y/%m/%d %H%M",
"%Y/%m/%d",
"%Y.%m.%dT%H:%M:%S.%9f",
"%Y.%m.%dT%H:%M:%S.%6f",
"%Y.%m.%dT%H:%M:%S.%3f",
"%Y.%m.%dT%H:%M:%S",
"%Y.%m.%dT%H%M%S.%9f",
"%Y.%m.%dT%H%M%S.%6f",
"%Y.%m.%dT%H%M%S.%3f",
"%Y.%m.%dT%H%M%S",
"%Y.%m.%dT%H:%M",
"%Y.%m.%dT%H%M",
"%Y.%m.%d %H:%M:%S.%9f",
"%Y.%m.%d %H:%M:%S.%6f",
"%Y.%m.%d %H:%M:%S.%3f",
"%Y.%m.%d %H:%M:%S",
"%Y.%m.%d %H%M%S.%9f",
"%Y.%m.%d %H%M%S.%6f",
"%Y.%m.%d %H%M%S.%3f",
"%Y.%m.%d %H%M%S",
"%Y.%m.%d %H:%M",
"%Y.%m.%d %H%M",
"%Y.%m.%d",
"%Y-%m-%dT%H:%M:%S%.f", // ISO 8601 with dynamic precision and without timezone
];

pub(super) static DATETIME_Y_M_D_Z: &[&str] = &[
// ---
// ISO8601-like, generated via the `iso8601_tz_aware_format_datetime` test fixture
// ---
"%Y/%m/%dT%H:%M:%S%#z",
"%Y-%m-%dT%H:%M:%S%#z",
"%Y/%m/%dT%H%M%S%#z",
"%Y-%m-%dT%H%M%S%#z",
"%Y/%m/%dT%H:%M%#z",
"%Y-%m-%dT%H:%M%#z",
"%Y/%m/%dT%H%M%#z",
"%Y-%m-%dT%H%M%#z",
"%Y/%m/%dT%H:%M:%S.%9f%#z",
"%Y-%m-%dT%H:%M:%S.%9f%#z",
"%Y/%m/%dT%H:%M:%S.%6f%#z",
"%Y-%m-%dT%H:%M:%S.%6f%#z",
"%Y/%m/%dT%H:%M:%S.%3f%#z",
"%Y-%m-%dT%H:%M:%S.%3f%#z",
"%Y/%m/%dT%H%M%S.%9f%#z",
"%Y-%m-%dT%H:%M:%S%#z",
"%Y-%m-%dT%H%M%S.%9f%#z",
"%Y/%m/%dT%H%M%S.%6f%#z",
"%Y-%m-%dT%H%M%S.%6f%#z",
"%Y/%m/%dT%H%M%S.%3f%#z",
"%Y-%m-%dT%H%M%S.%3f%#z",
"%Y/%m/%d %H:%M:%S%#z",
"%Y-%m-%dT%H%M%S%#z",
"%Y-%m-%dT%H:%M%#z",
"%Y-%m-%dT%H%M%#z",
"%Y-%m-%d %H:%M:%S.%9f%#z",
"%Y-%m-%d %H:%M:%S.%6f%#z",
"%Y-%m-%d %H:%M:%S.%3f%#z",
"%Y-%m-%d %H:%M:%S%#z",
"%Y/%m/%d %H%M%S%#z",
"%Y-%m-%d %H%M%S.%9f%#z",
"%Y-%m-%d %H%M%S.%6f%#z",
"%Y-%m-%d %H%M%S.%3f%#z",
"%Y-%m-%d %H%M%S%#z",
"%Y/%m/%d %H:%M%#z",
"%Y-%m-%d %H:%M%#z",
"%Y/%m/%d %H%M%#z",
"%Y-%m-%d %H%M%#z",
"%Y/%m/%dT%H:%M:%S.%9f%#z",
"%Y/%m/%dT%H:%M:%S.%6f%#z",
"%Y/%m/%dT%H:%M:%S.%3f%#z",
"%Y/%m/%dT%H:%M:%S%#z",
"%Y/%m/%dT%H%M%S.%9f%#z",
"%Y/%m/%dT%H%M%S.%6f%#z",
"%Y/%m/%dT%H%M%S.%3f%#z",
"%Y/%m/%dT%H%M%S%#z",
"%Y/%m/%dT%H:%M%#z",
"%Y/%m/%dT%H%M%#z",
"%Y/%m/%d %H:%M:%S.%9f%#z",
"%Y-%m-%d %H:%M:%S.%9f%#z",
"%Y/%m/%d %H:%M:%S.%6f%#z",
"%Y-%m-%d %H:%M:%S.%6f%#z",
"%Y/%m/%d %H:%M:%S.%3f%#z",
"%Y-%m-%d %H:%M:%S.%3f%#z",
"%Y/%m/%d %H:%M:%S%#z",
"%Y/%m/%d %H%M%S.%9f%#z",
"%Y-%m-%d %H%M%S.%9f%#z",
"%Y/%m/%d %H%M%S.%6f%#z",
"%Y-%m-%d %H%M%S.%6f%#z",
"%Y/%m/%d %H%M%S.%3f%#z",
"%Y-%m-%d %H%M%S.%3f%#z",
// other
"%+",
"%Y/%m/%d %H%M%S%#z",
"%Y/%m/%d %H:%M%#z",
"%Y/%m/%d %H%M%#z",
"%Y.%m.%dT%H:%M:%S.%9f%#z",
"%Y.%m.%dT%H:%M:%S.%6f%#z",
"%Y.%m.%dT%H:%M:%S.%3f%#z",
"%Y.%m.%dT%H:%M:%S%#z",
"%Y.%m.%dT%H%M%S.%9f%#z",
"%Y.%m.%dT%H%M%S.%6f%#z",
"%Y.%m.%dT%H%M%S.%3f%#z",
"%Y.%m.%dT%H%M%S%#z",
"%Y.%m.%dT%H:%M%#z",
"%Y.%m.%dT%H%M%#z",
"%Y.%m.%d %H:%M:%S.%9f%#z",
"%Y.%m.%d %H:%M:%S.%6f%#z",
"%Y.%m.%d %H:%M:%S.%3f%#z",
"%Y.%m.%d %H:%M:%S%#z",
"%Y.%m.%d %H%M%S.%9f%#z",
"%Y.%m.%d %H%M%S.%6f%#z",
"%Y.%m.%d %H%M%S.%3f%#z",
"%Y.%m.%d %H%M%S%#z",
"%Y.%m.%d %H:%M%#z",
"%Y.%m.%d %H%M%#z",
"%+", // ISO 8601; Same as %Y-%m-%dT%H:%M:%S%.f%:z; supports Z or UTC
];

#[derive(Eq, Hash, PartialEq, Clone, Copy, Debug)]
Expand Down
8 changes: 4 additions & 4 deletions py-polars/tests/unit/io/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,10 +341,10 @@ def test_datetime_parsing_default_formats() -> None:
csv = textwrap.dedent(
"""\
ts_dmy,ts_dmy_f,ts_dmy_p
01/01/21 00:00:00,31-01-2021T00:00:00.123,31-01-2021 11:00
01/01/21 00:15:00,31-01-2021T00:15:00.123,31-01-2021 01:00
01/01/21 00:30:00,31-01-2021T00:30:00.123,31-01-2021 01:15
01/01/21 00:45:00,31-01-2021T00:45:00.123,31-01-2021 01:30
01/01/2021 00:00:00,31-01-2021T00:00:00.123,31-01-2021 11:00
01/01/2021 00:15:00,31-01-2021T00:15:00.123,31-01-2021 01:00
01/01/2021 00:30:00,31-01-2021T00:30:00.123,31-01-2021 01:15
01/01/2021 00:45:00,31-01-2021T00:45:00.123,31-01-2021 01:30
"""
)

Expand Down
Loading

0 comments on commit e5aa894

Please sign in to comment.