Skip to content

Commit

Permalink
Allow CSV.open with StringIO argument (#302)
Browse files Browse the repository at this point in the history
Fix #300
  • Loading branch information
MarcPer authored Aug 5, 2024
1 parent bb93c28 commit ce91198
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 18 deletions.
74 changes: 56 additions & 18 deletions lib/csv.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1508,10 +1508,8 @@ def generate_lines(rows, **options)

#
# :call-seq:
# open(file_path, mode = "rb", **options ) -> new_csv
# open(io, mode = "rb", **options ) -> new_csv
# open(file_path, mode = "rb", **options ) { |csv| ... } -> object
# open(io, mode = "rb", **options ) { |csv| ... } -> object
# open(path_or_io, mode = "rb", **options ) -> new_csv
# open(path_or_io, mode = "rb", **options ) { |csv| ... } -> object
#
# possible options elements:
# keyword form:
Expand All @@ -1520,7 +1518,7 @@ def generate_lines(rows, **options)
# :undef => :replace # replace undefined conversion
# :replace => string # replacement string ("?" or "\uFFFD" if not specified)
#
# * Argument +path+, if given, must be the path to a file.
# * Argument +path_or_io+, must be a file path or an \IO stream.
# :include: ../doc/csv/arguments/io.rdoc
# * Argument +mode+, if given, must be a \File mode.
# See {Access Modes}[https://docs.ruby-lang.org/en/master/File.html#class-File-label-Access+Modes].
Expand All @@ -1544,6 +1542,9 @@ def generate_lines(rows, **options)
# path = 't.csv'
# File.write(path, string)
#
# string_io = StringIO.new
# string_io << "foo,0\nbar,1\nbaz,2\n"
#
# ---
#
# With no block given, returns a new \CSV object.
Expand All @@ -1556,6 +1557,9 @@ def generate_lines(rows, **options)
# csv = CSV.open(File.open(path))
# csv # => #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
#
# Create a \CSV object using a \StringIO:
# csv = CSV.open(string_io)
# csv # => #<CSV io_type:StringIO encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
# ---
#
# With a block given, calls the block with the created \CSV object;
Expand All @@ -1573,16 +1577,24 @@ def generate_lines(rows, **options)
# Output:
# #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
#
# Using a \StringIO:
# csv = CSV.open(string_io) {|csv| p csv}
# csv # => #<CSV io_type:StringIO encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
# Output:
# #<CSV io_type:StringIO encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
# ---
#
# Raises an exception if the argument is not a \String object or \IO object:
# # Raises TypeError (no implicit conversion of Symbol into String)
# CSV.open(:foo)
def open(filename, mode="r", **options)
def open(filename_or_io, mode="r", **options)
# wrap a File opened with the remaining +args+ with no newline
# decorator
file_opts = {}
may_enable_bom_deletection_automatically(mode, options, file_opts)
may_enable_bom_detection_automatically(filename_or_io,
mode,
options,
file_opts)
file_opts.merge!(options)
unless file_opts.key?(:newline)
file_opts[:universal_newline] ||= false
Expand All @@ -1592,14 +1604,19 @@ def open(filename, mode="r", **options)
options.delete(:replace)
options.delete_if {|k, _| /newline\z/.match?(k)}

begin
f = File.open(filename, mode, **file_opts)
rescue ArgumentError => e
raise unless /needs binmode/.match?(e.message) and mode == "r"
mode = "rb"
file_opts = {encoding: Encoding.default_external}.merge(file_opts)
retry
if filename_or_io.is_a?(StringIO)
f = create_stringio(filename_or_io.string, mode, **file_opts)
else
begin
f = File.open(filename_or_io, mode, **file_opts)
rescue ArgumentError => e
raise unless /needs binmode/.match?(e.message) and mode == "r"
mode = "rb"
file_opts = {encoding: Encoding.default_external}.merge(file_opts)
retry
end
end

begin
csv = new(f, **options)
rescue Exception
Expand Down Expand Up @@ -1886,16 +1903,37 @@ def table(path, **options)
private_constant :ON_WINDOWS

private
def may_enable_bom_deletection_automatically(mode, options, file_opts)
# "bom|utf-8" may be buggy on Windows:
# https://bugs.ruby-lang.org/issues/20526
return if ON_WINDOWS
def may_enable_bom_detection_automatically(filename_or_io,
mode,
options,
file_opts)
if filename_or_io.is_a?(StringIO)
# Support to StringIO was dropped for Ruby 2.6 and earlier without BOM support:
# https://github.com/ruby/stringio/pull/47
return if RUBY_VERSION < "2.7"
else
# "bom|utf-8" may be buggy on Windows:
# https://bugs.ruby-lang.org/issues/20526
return if ON_WINDOWS
end
return unless Encoding.default_external == Encoding::UTF_8
return if options.key?(:encoding)
return if options.key?(:external_encoding)
return if mode.include?(":")
file_opts[:encoding] = "bom|utf-8"
end

if RUBY_VERSION < "2.7"
def create_stringio(str, mode, opts)
opts.delete_if {|k, _| k == :universal_newline or DEFAULT_OPTIONS.key?(k)}
raise ArgumentError, "Unsupported options parsing StringIO: #{opts.keys}" unless opts.empty?
StringIO.new(str, mode)
end
else
def create_stringio(str, mode, opts)
StringIO.new(str, mode, **opts)
end
end
end

# :call-seq:
Expand Down
18 changes: 18 additions & 0 deletions test/csv/interface/test_read.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,24 @@ def test_foreach
assert_equal(@rows, rows)
end

def test_foreach_stringio
string_io = StringIO.new(@data)
rows = CSV.foreach(string_io, col_sep: "\t", row_sep: "\r\n").to_a
assert_equal(@rows, rows)
end

def test_foreach_stringio_with_bom
if RUBY_VERSION < "2.7"
# Support to StringIO was dropped for Ruby 2.6 and earlier without BOM support:
# https://github.com/ruby/stringio/pull/47
omit("StringIO's BOM support isn't available with Ruby < 2.7")
end

string_io = StringIO.new("\ufeff#{@data}") # U+FEFF ZERO WIDTH NO-BREAK SPACE
rows = CSV.foreach(string_io, col_sep: "\t", row_sep: "\r\n").to_a
assert_equal(@rows, rows)
end

if respond_to?(:ractor)
ractor
def test_foreach_in_ractor
Expand Down

0 comments on commit ce91198

Please sign in to comment.