diff --git a/doc/csv/recipes/parsing.rdoc b/doc/csv/recipes/parsing.rdoc index e7bfc072..63673072 100644 --- a/doc/csv/recipes/parsing.rdoc +++ b/doc/csv/recipes/parsing.rdoc @@ -45,6 +45,7 @@ All code snippets on this page assume that the following has been executed: - {Recipe: Convert Fields to Numerics}[#label-Recipe-3A+Convert+Fields+to+Numerics] - {Recipe: Convert Fields to Dates}[#label-Recipe-3A+Convert+Fields+to+Dates] - {Recipe: Convert Fields to DateTimes}[#label-Recipe-3A+Convert+Fields+to+DateTimes] + - {Recipe: Convert Fields to Times}[#label-Recipe-3A+Convert+Fields+to+Times] - {Recipe: Convert Assorted Fields to Objects}[#label-Recipe-3A+Convert+Assorted+Fields+to+Objects] - {Recipe: Convert Fields to Other Objects}[#label-Recipe-3A+Convert+Fields+to+Other+Objects] - {Recipe: Filter Field Strings}[#label-Recipe-3A+Filter+Field+Strings] @@ -339,6 +340,7 @@ There are built-in field converters for converting to objects of certain classes - \Integer - \Date - \DateTime +- \Time Other built-in field converters include: - +:numeric+: converts to \Integer and \Float. @@ -381,6 +383,13 @@ Convert fields to \DateTime objects using built-in converter +:date_time+: parsed = CSV.parse(source, headers: true, converters: :date_time) parsed.map {|row| row['DateTime'].class} # => [DateTime, DateTime, DateTime] +===== Recipe: Convert Fields to Times + +Convert fields to \Time objects using built-in converter +:time+: + source = "Name,Time\nfoo,2001-02-03\nbar,2001-02-04\nbaz,2020-05-07T14:59:00-05:00\n" + parsed = CSV.parse(source, headers: true, converters: :time) + parsed.map {|row| row['Time'].class} # => [Time, Time, Time] + ===== Recipe: Convert Assorted Fields to Objects Convert assorted fields to objects using built-in converter +:all+: @@ -542,4 +551,4 @@ Output: #<struct CSV::FieldInfo index=0, line=2, header=nil> #<struct CSV::FieldInfo index=1, line=2, header=nil> #<struct CSV::FieldInfo index=0, line=3, header=nil> - #<struct CSV::FieldInfo index=1, line=3, header=nil> \ No newline at end of file + #<struct CSV::FieldInfo index=1, line=3, header=nil> diff --git a/lib/csv.rb b/lib/csv.rb index 41667006..b969d73c 100644 --- a/lib/csv.rb +++ b/lib/csv.rb @@ -91,6 +91,7 @@ require "forwardable" require "date" +require "time" require "stringio" require_relative "csv/fields_converter" @@ -521,6 +522,7 @@ # - <tt>:float</tt>: converts each \String-embedded float into a true \Float. # - <tt>:date</tt>: converts each \String-embedded date into a true \Date. # - <tt>:date_time</tt>: converts each \String-embedded date-time into a true \DateTime +# - <tt>:time</tt>: converts each \String-embedded time into a true \Time # . # This example creates a converter proc, then stores it: # strip_converter = proc {|field| field.strip } @@ -631,6 +633,7 @@ # [:numeric, [:integer, :float]] # [:date, Proc] # [:date_time, Proc] +# [:time, Proc] # [:all, [:date_time, :numeric]] # # Each of these converters transcodes values to UTF-8 before attempting conversion. @@ -675,6 +678,15 @@ # csv = CSV.parse_line(data, converters: :date_time) # csv # => [#<DateTime: 2020-05-07T14:59:00-05:00 ((2458977j,71940s,0n),-18000s,2299161j)>, "x"] # +# Converter +time+ converts each field that Time::parse accepts: +# data = '2020-05-07T14:59:00-05:00,x' +# # Without the converter +# csv = CSV.parse_line(data) +# csv # => ["2020-05-07T14:59:00-05:00", "x"] +# # With the converter +# csv = CSV.parse_line(data, converters: :time) +# csv # => [2020-05-07 14:59:00 -0500, "x"] +# # Converter +:numeric+ converts with both +:date_time+ and +:numeric+.. # # As seen above, method #convert adds \converters to a \CSV instance, @@ -871,10 +883,10 @@ def initialize(encoding, line_number) # A Regexp used to find and convert some common Date formats. DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} | \d{4}-\d{2}-\d{2} )\z /x - # A Regexp used to find and convert some common DateTime formats. + # A Regexp used to find and convert some common (Date)Time formats. DateTimeMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} | - # ISO-8601 and RFC-3339 (space instead of T) recognized by DateTime.parse + # ISO-8601 and RFC-3339 (space instead of T) recognized by (Date)Time.parse \d{4}-\d{2}-\d{2} (?:[T\s]\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)? )\z /x @@ -912,6 +924,14 @@ def initialize(encoding, line_number) f end }, + time: lambda { |f| + begin + e = f.encode(ConverterEncoding) + e.match?(DateTimeMatcher) ? Time.parse(e) : f + rescue # encoding conversion or parse errors + f + end + }, all: [:date_time, :numeric], } diff --git a/test/csv/test_data_converters.rb b/test/csv/test_data_converters.rb index c20a5d1f..6c46cd90 100644 --- a/test/csv/test_data_converters.rb +++ b/test/csv/test_data_converters.rb @@ -187,4 +187,146 @@ def test_builtin_date_time_converter_rfc3339_tab_utc assert_equal(datetime, CSV::Converters[:date_time][rfc3339_string]) end + + def test_builtin_time_converter + # does convert + assert_instance_of(Time, + CSV::Converters[:time][@win_safe_time_str]) + + # does not convert + assert_instance_of(String, CSV::Converters[:time]["junk"]) + end + + def test_builtin_time_converter_iso8601_date + iso8601_string = "2018-01-14" + time = Time.new(2018, 1, 14) + assert_equal(time, + CSV::Converters[:time][iso8601_string]) + end + + def test_builtin_time_converter_iso8601_minute + iso8601_string = "2018-01-14T22:25" + time = Time.new(2018, 1, 14, 22, 25) + assert_equal(time, + CSV::Converters[:time][iso8601_string]) + end + + def test_builtin_time_converter_iso8601_second + iso8601_string = "2018-01-14T22:25:19" + time = Time.new(2018, 1, 14, 22, 25, 19) + assert_equal(time, + CSV::Converters[:time][iso8601_string]) + end + + def test_builtin_time_converter_iso8601_under_second + iso8601_string = "2018-01-14T22:25:19.1" + time = Time.new(2018, 1, 14, 22, 25, 19.1r) + assert_equal(time, + CSV::Converters[:time][iso8601_string]) + end + + def test_builtin_time_converter_iso8601_under_second_offset + iso8601_string = "2018-01-14T22:25:19.1+09:00" + time = Time.new(2018, 1, 14, 22, 25, 19.1r, "+09:00") + assert_equal(time, + CSV::Converters[:time][iso8601_string]) + end + + def test_builtin_time_converter_iso8601_offset + iso8601_string = "2018-01-14T22:25:19+09:00" + time = Time.new(2018, 1, 14, 22, 25, 19, "+09:00") + assert_equal(time, + CSV::Converters[:time][iso8601_string]) + end + + def test_builtin_time_converter_iso8601_utc + iso8601_string = "2018-01-14T22:25:19Z" + time = Time.utc(2018, 1, 14, 22, 25, 19) + assert_equal(time, + CSV::Converters[:time][iso8601_string]) + end + + def test_builtin_time_converter_rfc3339_minute + rfc3339_string = "2018-01-14 22:25" + time = Time.new(2018, 1, 14, 22, 25) + assert_equal(time, + CSV::Converters[:time][rfc3339_string]) + end + + def test_builtin_time_converter_rfc3339_second + rfc3339_string = "2018-01-14 22:25:19" + time = Time.new(2018, 1, 14, 22, 25, 19) + assert_equal(time, + CSV::Converters[:time][rfc3339_string]) + end + + def test_builtin_time_converter_rfc3339_under_second + rfc3339_string = "2018-01-14 22:25:19.1" + time = Time.new(2018, 1, 14, 22, 25, 19.1r) + assert_equal(time, + CSV::Converters[:time][rfc3339_string]) + end + + def test_builtin_time_converter_rfc3339_under_second_offset + rfc3339_string = "2018-01-14 22:25:19.1+09:00" + time = Time.new(2018, 1, 14, 22, 25, 19.1r, "+09:00") + assert_equal(time, + CSV::Converters[:time][rfc3339_string]) + end + + def test_builtin_time_converter_rfc3339_offset + rfc3339_string = "2018-01-14 22:25:19+09:00" + time = Time.new(2018, 1, 14, 22, 25, 19, "+09:00") + assert_equal(time, + CSV::Converters[:time][rfc3339_string]) + end + + def test_builtin_time_converter_rfc3339_utc + rfc3339_string = "2018-01-14 22:25:19Z" + time = Time.utc(2018, 1, 14, 22, 25, 19) + assert_equal(time, + CSV::Converters[:time][rfc3339_string]) + end + + def test_builtin_time_converter_rfc3339_tab_minute + rfc3339_string = "2018-01-14\t22:25" + time = Time.new(2018, 1, 14, 22, 25) + assert_equal(time, + CSV::Converters[:time][rfc3339_string]) + end + + def test_builtin_time_converter_rfc3339_tab_second + rfc3339_string = "2018-01-14\t22:25:19" + time = Time.new(2018, 1, 14, 22, 25, 19) + assert_equal(time, + CSV::Converters[:time][rfc3339_string]) + end + + def test_builtin_time_converter_rfc3339_tab_under_second + rfc3339_string = "2018-01-14\t22:25:19.1" + time = Time.new(2018, 1, 14, 22, 25, 19.1r) + assert_equal(time, + CSV::Converters[:time][rfc3339_string]) + end + + def test_builtin_time_converter_rfc3339_tab_under_second_offset + rfc3339_string = "2018-01-14\t22:25:19.1+09:00" + time = Time.new(2018, 1, 14, 22, 25, 19.1r, "+09:00") + assert_equal(time, + CSV::Converters[:time][rfc3339_string]) + end + + def test_builtin_time_converter_rfc3339_tab_offset + rfc3339_string = "2018-01-14\t22:25:19+09:00" + time = Time.new(2018, 1, 14, 22, 25, 19, "+09:00") + assert_equal(time, + CSV::Converters[:time][rfc3339_string]) + end + + def test_builtin_time_converter_rfc3339_tab_utc + rfc3339_string = "2018-01-14\t22:25:19Z" + time = Time.utc(2018, 1, 14, 22, 25, 19) + assert_equal(time, + CSV::Converters[:time][rfc3339_string]) + end end