diff --git a/README.md b/README.md index 3eb1b15..7311908 100644 --- a/README.md +++ b/README.md @@ -92,7 +92,7 @@ df2 = DataFrame(date = ["20190330120141", "2008-04-05 16-23-07", "2010.06.07 19: @chain df2 begin @mutate(date = ymd_hms(date)) - @mutate(floor_byhr = floor_date(date, "hour")) + @mutate(floor_byhr = floor_date(date, "week")) @mutate(round_bymin = round_date(date, "minute")) @mutate(rounded_bymo = round_date(date, "month")) end @@ -100,18 +100,18 @@ end ``` 9×4 DataFrame - Row │ date floor_byhr round_bymin rounded_bymo - │ DateTime? DateTime? DateTime? Date? -─────┼───────────────────────────────────────────────────────────────────────────── - 1 │ 2019-03-30T12:01:41 2019-03-30T12:00:00 2019-03-30T12:02:00 2019-04-01 - 2 │ 2008-04-05T16:23:07 2008-04-05T16:00:00 2008-04-05T16:23:00 2008-04-01 - 3 │ 2010-06-07T19:45:00 2010-06-07T19:00:00 2010-06-07T19:45:00 2010-06-01 - 4 │ 2011-02-08T14:03:07 2011-02-08T14:00:00 2011-02-08T14:03:00 2011-02-01 - 5 │ 2012-03-09T09:02:37 2012-03-09T09:00:00 2012-03-09T09:03:00 2012-03-01 - 6 │ 2013-05-15T03:02:09 2013-05-15T03:00:00 2013-05-15T03:02:00 2013-05-01 - 7 │ 2013-02-07T13:02:07 2013-02-07T13:00:00 2013-02-07T13:02:00 2013-02-01 - 8 │ 2014-06-18T18:16:08 2014-06-18T18:00:00 2014-06-18T18:16:00 2014-07-01 - 9 │ missing missing missing missing + Row │ date floor_byhr round_bymin rounded_bymo + │ DateTime? DateTime? DateTime? DateTime? +─────┼──────────────────────────────────────────────────────────────────────────────────── + 1 │ 2019-03-30T12:01:41 2019-03-24T00:00:00 2019-03-30T12:02:00 2019-04-01T00:00:00 + 2 │ 2008-04-05T16:23:07 2008-03-30T00:00:00 2008-04-05T16:23:00 2008-04-01T00:00:00 + 3 │ 2010-06-07T19:45:00 2010-06-06T00:00:00 2010-06-07T19:45:00 2010-06-01T00:00:00 + 4 │ 2011-02-08T14:03:07 2011-02-06T00:00:00 2011-02-08T14:03:00 2011-02-01T00:00:00 + 5 │ 2012-03-09T09:02:37 2012-03-04T00:00:00 2012-03-09T09:03:00 2012-03-01T00:00:00 + 6 │ 2013-05-15T03:02:09 2013-05-12T00:00:00 2013-05-15T03:02:00 2013-05-01T00:00:00 + 7 │ 2013-02-12T07:13:02 2013-02-10T00:00:00 2013-02-12T07:13:00 2013-02-01T00:00:00 + 8 │ 2014-06-18T18:16:08 2014-06-15T00:00:00 2014-06-18T18:16:00 2014-07-01T00:00:00 + 9 │ missing missing missing missing ``` #### `difftime()` diff --git a/src/datedocstrings.jl b/src/datedocstrings.jl index 551e272..f94b2d9 100644 --- a/src/datedocstrings.jl +++ b/src/datedocstrings.jl @@ -559,13 +559,12 @@ A DateTime object constructed from the parsed month, day, year, and hour values julia> mdy_h("06-15-2023 09hr") 2023-06-15T09:00:00 -julia> mdy_h("06-15-2023 09hr p") +julia> mdy_h("06-15-2023 09hr pM") 2023-06-15T21:00:00 julia> mdy_h("jan 3 2023 09hr p") 2023-01-03T21:00:00 - julia> mdy_h(missing) missing ``` diff --git a/src/dmys.jl b/src/dmys.jl index e6ec191..0acb8cb 100644 --- a/src/dmys.jl +++ b/src/dmys.jl @@ -7,50 +7,53 @@ function dmy(date_string::Union{AbstractString, Missing}) else date_string = uppercase(date_string) date_string = replace_month_with_number(date_string) - date_string = strip(replace(date_string, r"ST|ND|RD|TH|,|OF|THE" => "")) + date_string = strip(replace(date_string, r"ST|ND|RD|TH|,|OF" => "")) date_string = replace(date_string, r"\s+" => Base.s" ") end - # Match for "ddmmyyyy" or "ddmmyy" format - m = match(r"^(\d{1,2})(\d{1,2})(\d{2,4})$", date_string) + # Add regex match for "ddmmyyyy" format + m = match(r"(\d{1,2})(\d{1,2})(\d{4})", date_string) if m !== nothing day_str, month_str, year_str = m.captures day = parse(Int, day_str) month = parse(Int, month_str) year = parse(Int, year_str) - if length(year_str) == 2 - if year > 30 - year += 1900 - else - year += 2000 - end - end return Date(year, month, day) end - # Match for "dd mm yyyy" or "dd mm yy" format - m = match(r"^(\d{1,2}) (\d{1,2}) (\d{2,4})$", date_string) + m = match(r"(\d{1,2})\s*(\d{1,2})\s*(\d{4})", date_string) + if m !== nothing + day_str, month_str, year_str = m.captures + day = parse(Int, day_str) + month = parse(Int, month_str) + year = parse(Int, year_str) + return Date(year, month, day) + end + + m = match(r"(\d{1,2})[/-](\d{1,2})[/-](\d{4})", date_string) if m !== nothing day_str, month_str, year_str = m.captures day = parse(Int, day_str) month = parse(Int, month_str) year = parse(Int, year_str) - if length(year_str) == 2 - if year > 30 - year += 1900 - else - year += 2000 - end - end return Date(year, month, day) end - # Match for "dd-mm-yyyy", "dd/mm/yyyy", "dd-mm-yy", or "dd/mm/yy" format - m = match(r"^(\d{1,2})[/-](\d{1,2})[/-](\d{2,4})$", date_string) + m = match(r"(\d{1,2})[/-](\d{1,2})[/-](\d{2})", date_string) if m !== nothing day_str, month_str, year_str = m.captures day = parse(Int, day_str) month = parse(Int, month_str) + year = parse(Int, "20" * year_str) # Assuming 21st century for two-digit years + return Date(year, month, day) + end + + m = match(r"(\d{1,2})(?:ST|ND|RD|TH)?\s+(\d{1,2}|\w+)\s+(\d{2,4})", date_string) + if m !== nothing + day_str, month_str, year_str = m.captures + day = parse(Int, day_str) + # Parse month as integer or use month names + month = parse(Int, replace_month_with_number(month_str)) # Handle month names year = parse(Int, year_str) if length(year_str) == 2 if year > 30 @@ -61,44 +64,16 @@ function dmy(date_string::Union{AbstractString, Missing}) end return Date(year, month, day) end - - return missing -end - - - -""" -$docstring_dmy_hms -""" -function dmy_hms(datetime_string::Union{AbstractString, Missing}) - - if ismissing(datetime_string) - return missing - else - datetime_string = uppercase(datetime_string) - datetime_string = replace_month_with_number(datetime_string) - end - - # Extract day, month, year, hour, minute, and second using a flexible regular expression - m = match(r"(\d{1,2}).*?(\d{1,2}).*?(\d{4}).*?(\d{1,2}).*?(\d{1,2}).*?(\d{1,2})", datetime_string) - + + m = match(r"(\d{1,2})(ST|ND|RD|TH)?\s*(OF)?\s*(\d{1,2}),?\s*(\d{4})", date_string) if m !== nothing - day_str, month_str, year_str, hour_str, minute_str, second_str = m.captures + day_str, _, _, month_str, year_str = m.captures day = parse(Int, day_str) month = parse(Int, month_str) year = parse(Int, year_str) - hour = parse(Int, hour_str) - if hour <= 12 && occursin(r"(? 30 + year += 1900 + else + year += 2000 + end end minute = parse(Int, minute_str) second = parse(Int, second_str) - # Return as DateTime + if hour <= 12 && occursin(r"(?