Skip to content

Commit

Permalink
feat(rust, python): add support for am/pm notation in parse_dates rea…
Browse files Browse the repository at this point in the history
…d_csv (#5373)
  • Loading branch information
YuRiTan authored Oct 30, 2022
1 parent 8b9190a commit be02fc0
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 2 deletions.
16 changes: 14 additions & 2 deletions polars/polars-time/src/chunkedarray/utf8/patterns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pub(super) static DATETIME_D_M_Y: &[&str] = &[
// --
// supported by polars' parser
// ---
// 31/12/21 12:54:98
// 31/12/21 12:54:48
"%d/%m/%y %H:%M:%S",
// 31-12-2021 24:58:01
"%d-%m-%Y %H:%M:%S",
Expand All @@ -44,12 +44,18 @@ pub(super) static DATETIME_D_M_Y: &[&str] = &[
// no times
"%d-%m-%Y",
"%d-%m-%y",
// 31/12/2021 11:54:48 PM
"%d/%m/%Y %I:%M:%S %p",
"%d-%m-%Y %I:%M:%S %p",
// 31/12/2021 11:54 PM
"%d/%m/%Y %I:%M %p",
"%d-%m-%Y %I:%M %p",
];

/// NOTE: don't use single letter dates like %F
/// polars parsers does not support them, so it will be slower
pub(super) static DATETIME_Y_M_D: &[&str] = &[
// 21/12/31 12:54:98
// 21/12/31 12:54:48
"%y/%m/%d %H:%M:%S",
// 2021-12-31 24:58:01
"%Y-%m-%d %H:%M:%S",
Expand Down Expand Up @@ -82,6 +88,12 @@ pub(super) static DATETIME_Y_M_D: &[&str] = &[
// no times
"%Y-%m-%d",
"%Y/%m/%d",
// 2021/12/31 11:54:48 PM
"%Y/%m/%d %I:%M:%S %p",
"%Y-%m-%d %I:%M:%S %p",
// 2021/12/31 11:54 PM
"%Y/%m/%d %I:%M %p",
"%Y-%m-%d %I:%M %p",
// --
// not supported by polars' parser
// ---
Expand Down
25 changes: 25 additions & 0 deletions polars/tests/it/io/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,31 @@ fn test_automatic_datetime_parsing() -> PolarsResult<()> {
Ok(())
}

#[test]
#[cfg(feature = "temporal")]
fn test_automatic_datetime_parsing_default_formats() -> PolarsResult<()> {
let csv = r"ts_dmy,ts_dmy_f,ts_dmy_p
01/01/21 00:00:00,31-01-2021T00:00:00.123,31-01-2021 11:00 AM
01/01/21 00:15:00,31-01-2021T00:15:00.123,31-01-2021 01:00 PM
01/01/21 00:30:00,31-01-2021T00:30:00.123,31-01-2021 01:15 PM
01/01/21 00:45:00,31-01-2021T00:45:00.123,31-01-2021 01:30 PM
";

let file = Cursor::new(csv);
let df = CsvReader::new(file).with_parse_dates(true).finish()?;

for col in df.get_column_names() {
let ts = df.column(col)?;
assert_eq!(
ts.dtype(),
&DataType::Datetime(TimeUnit::Microseconds, None)
);
assert_eq!(ts.null_count(), 0);
}

Ok(())
}

#[test]
fn test_no_quotes() -> PolarsResult<()> {
let rolling_stones = r#"linenum,last_name,first_name
Expand Down
16 changes: 16 additions & 0 deletions py-polars/tests/unit/io/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,22 @@ def test_datetime_parsing() -> None:
assert df.dtypes == [pl.Datetime, pl.Float64, pl.Float64]


def test_datetime_parsing_default_formats() -> None:
csv = textwrap.dedent(
"""\
ts_dmy,ts_dmy_f,ts_dmy_p
01/01/21 00:00:00,31-01-2021T00:00:00.123,31-01-2021 11:00 AM
01/01/21 00:15:00,31-01-2021T00:15:00.123,31-01-2021 01:00 PM
01/01/21 00:30:00,31-01-2021T00:30:00.123,31-01-2021 01:15 PM
01/01/21 00:45:00,31-01-2021T00:45:00.123,31-01-2021 01:30 PM
"""
)

f = io.StringIO(csv)
df = pl.read_csv(f, parse_dates=True)
assert df.dtypes == [pl.Datetime, pl.Datetime, pl.Datetime]


def test_partial_dtype_overwrite() -> None:
csv = textwrap.dedent(
"""\
Expand Down

0 comments on commit be02fc0

Please sign in to comment.