Skip to content

Commit

Permalink
[GR-19220] Fix StringIO#write transcodes strings with a different enc…
Browse files Browse the repository at this point in the history
…oding (#2927)

PullRequest: truffleruby/3700
  • Loading branch information
eregon committed Mar 16, 2023
2 parents 498159d + e538efa commit 9c9f6ba
Show file tree
Hide file tree
Showing 9 changed files with 98 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ Bug fixes:
* Fix constants lookup when `BasicObject#instance_eval` method is called with a String (#2810, @andrykonchin).
* Don't trigger the `method_added` event when changing a method's visibility or calling `module_function` (@paracycle, @nirvdrum).
* Fix `rb_time_timespec_new` function to not call `Time.at` method directly (@andrykonchin).
* Fix `StringIO#write` to transcode strings with encodings that don't match the `StringIO`'s `external_encoding`. (#2839, @flavorjones)

Compatibility:

Expand Down
8 changes: 8 additions & 0 deletions lib/truffle/stringio.rb
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,14 @@ def write(str)
str = String(str)
return 0 if str.empty?

# difference to IO, see https://github.com/ruby/stringio/blob/009896b973/ext/stringio/stringio.c#L1498-L1506
enc = external_encoding
unless enc == Encoding::BINARY or enc == Encoding::US_ASCII
unless !str.ascii_only? && (str.encoding == Encoding::BINARY || str.encoding == Encoding::US_ASCII)
str = Truffle::IOOperations.write_transcoding(str, enc)
end
end

d = @__data__
TruffleRuby.synchronized(d) do
pos = d.pos
Expand Down
55 changes: 55 additions & 0 deletions spec/ruby/core/io/shared/write.rb
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,58 @@
end
end
end

describe :io_write_transcode, shared: true do
before :each do
@transcode_filename = tmp("io_write_transcode")
end

after :each do
rm_r @transcode_filename
end

it "transcodes the given string when the external encoding is set and neither is BINARY" do
utf8_str = "hello"

File.open(@transcode_filename, "w", external_encoding: Encoding::UTF_16BE) do |file|
file.external_encoding.should == Encoding::UTF_16BE
file.send(@method, utf8_str)
end

result = File.binread(@transcode_filename)
expected = [0, 104, 0, 101, 0, 108, 0, 108, 0, 111] # UTF-16BE bytes for "hello"

result.bytes.should == expected
end

it "transcodes the given string when the external encoding is set and the string encoding is BINARY" do
str = "été".b

File.open(@transcode_filename, "w", external_encoding: Encoding::UTF_16BE) do |file|
file.external_encoding.should == Encoding::UTF_16BE
-> { file.send(@method, str) }.should raise_error(Encoding::UndefinedConversionError)
end
end
end

describe :io_write_no_transcode, shared: true do
before :each do
@transcode_filename = tmp("io_write_no_transcode")
end

after :each do
rm_r @transcode_filename
end

it "does not transcode the given string even when the external encoding is set" do
utf8_str = "hello"

File.open(@transcode_filename, "w", external_encoding: Encoding::UTF_16BE) do |file|
file.external_encoding.should == Encoding::UTF_16BE
file.send(@method, utf8_str)
end

result = File.binread(@transcode_filename)
result.bytes.should == utf8_str.bytes
end
end
1 change: 1 addition & 0 deletions spec/ruby/core/io/syswrite_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,5 @@

describe "IO#syswrite" do
it_behaves_like :io_write, :syswrite
it_behaves_like :io_write_no_transcode, :syswrite
end
1 change: 1 addition & 0 deletions spec/ruby/core/io/write_nonblock_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@

describe "IO#write_nonblock" do
it_behaves_like :io_write, :write_nonblock
it_behaves_like :io_write_no_transcode, :write_nonblock
end
end

Expand Down
1 change: 1 addition & 0 deletions spec/ruby/core/io/write_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@

describe "IO#write" do
it_behaves_like :io_write, :write
it_behaves_like :io_write_transcode, :write

it "accepts multiple arguments" do
IO.pipe do |r, w|
Expand Down
21 changes: 21 additions & 0 deletions spec/ruby/library/stringio/shared/write.rb
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,27 @@
@io.write "fghi"
@io.string.should == "12fghi"
end

it "transcodes the given string when the external encoding is set and neither is BINARY" do
utf8_str = "hello"
io = StringIO.new.set_encoding(Encoding::UTF_16BE)
io.external_encoding.should == Encoding::UTF_16BE

io.send(@method, utf8_str)

expected = [0, 104, 0, 101, 0, 108, 0, 108, 0, 111] # UTF-16BE bytes for "hello"
io.string.bytes.should == expected
end

it "does not transcode the given string when the external encoding is set and the string encoding is BINARY" do
str = "été".b
io = StringIO.new.set_encoding(Encoding::UTF_16BE)
io.external_encoding.should == Encoding::UTF_16BE

io.send(@method, str)

io.string.bytes.should == str.bytes
end
end

describe :stringio_write_not_writable, shared: true do
Expand Down
6 changes: 1 addition & 5 deletions src/main/ruby/truffleruby/core/io.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2324,11 +2324,7 @@ def write(*objects)

ensure_open_and_writable

if external_encoding && external_encoding != string.encoding && external_encoding != Encoding::BINARY
unless string.ascii_only? && external_encoding.ascii_compatible?
string = string.encode(external_encoding)
end
end
string = Truffle::IOOperations.write_transcoding(string, external_encoding)

count = Truffle::POSIX.write_string self, string, true
bytes_written += count
Expand Down
9 changes: 9 additions & 0 deletions src/main/ruby/truffleruby/core/truffle/io_operations.rb
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,15 @@ def self.puts(io, *args)
nil
end

def self.write_transcoding(string, external_encoding)
if external_encoding && external_encoding != string.encoding && external_encoding != Encoding::BINARY &&
!(string.ascii_only? && external_encoding.ascii_compatible?)
string.encode(external_encoding)
else
string
end
end

def self.dup2_with_cloexec(old_fd, new_fd)
if new_fd < 3
# STDIO should not be made close-on-exec. `dup2` clears the close-on-exec bit for the destination FD.
Expand Down

0 comments on commit 9c9f6ba

Please sign in to comment.