Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: StringIO#write transcodes strings with a different encoding #2927

Merged
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ Bug fixes:
* Fix constants lookup when `BasicObject#instance_eval` method is called with a String (#2810, @andrykonchin).
* Don't trigger the `method_added` event when changing a method's visibility or calling `module_function` (@paracycle, @nirvdrum).
* Fix `rb_time_timespec_new` function to not call `Time.at` method directly (@andrykonchin).
* Fix `StringIO#write` to transcode strings with encodings that don't match the `StringIO`'s `external_encoding`. (#2839, @flavorjones)

Compatibility:

Expand Down
2 changes: 2 additions & 0 deletions lib/truffle/stringio.rb
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,8 @@ def write(str)
str = String(str)
return 0 if str.empty?

str = Truffle::IOOperations.write_transcoding(str, external_encoding)

d = @__data__
TruffleRuby.synchronized(d) do
pos = d.pos
Expand Down
52 changes: 52 additions & 0 deletions spec/ruby/core/io/shared/write.rb
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,55 @@
end
end
end

describe :io_write_transcode, shared: true do
before :each do
@transcode_filename = tmp("io_write_transcode")
end

after :each do
rm_r @transcode_filename
end

describe "transcoding when UTF-16 encoding is set" do
it "accepts a UTF-8-encoded string and transcodes it" do
utf8_str = "hello"

File.open(@transcode_filename, "w", external_encoding: Encoding::UTF_16BE) do |file|
file.external_encoding.should == Encoding::UTF_16BE
file.send(@method, utf8_str)
end

result = File.binread(@transcode_filename)
expected = [0, 104, 0, 101, 0, 108, 0, 108, 0, 111] # double-width "hello"

result.bytes.should == expected
end
end
end

describe :io_write_no_transcode, shared: true do
before :each do
@transcode_filename = tmp("io_write_no_transcode")
end

after :each do
rm_r @transcode_filename
end

describe "transcoding when UTF-16 encoding is set" do
it "accepts a UTF-8-encoded string and transcodes it" do
utf8_str = "hello"

File.open(@transcode_filename, "w", external_encoding: Encoding::UTF_16BE) do |file|
file.external_encoding.should == Encoding::UTF_16BE
file.send(@method, utf8_str)
end

result = File.binread(@transcode_filename)
expected = [104, 101, 108, 108, 111] # not transcoded to UTF-16BE

result.bytes.should == expected
end
end
end
1 change: 1 addition & 0 deletions spec/ruby/core/io/syswrite_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,5 @@

describe "IO#syswrite" do
it_behaves_like :io_write, :syswrite
it_behaves_like :io_write_no_transcode, :syswrite
end
1 change: 1 addition & 0 deletions spec/ruby/core/io/write_nonblock_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@

describe "IO#write_nonblock" do
it_behaves_like :io_write, :write_nonblock
it_behaves_like :io_write_no_transcode, :write_nonblock
end
end

Expand Down
1 change: 1 addition & 0 deletions spec/ruby/core/io/write_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@

describe "IO#write" do
it_behaves_like :io_write, :write
it_behaves_like :io_write_transcode, :write

it "accepts multiple arguments" do
IO.pipe do |r, w|
Expand Down
19 changes: 19 additions & 0 deletions spec/ruby/library/stringio/shared/write.rb
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,25 @@
@io.write "fghi"
@io.string.should == "12fghi"
end

describe "transcoding" do
describe "when UTF-16 encoding is set" do
it "accepts a UTF-8-encoded string and transcodes it" do
io = StringIO.new.set_encoding(Encoding::UTF_16BE)
utf8_str = "hello"

io.send(@method, utf8_str)

result = io.string
expected = [
0, 104, 0, 101, 0, 108, 0, 108, 0, 111, # double-width "hello"
]

io.external_encoding.should == Encoding::UTF_16BE
result.bytes.should == expected
end
end
end
end

describe :stringio_write_not_writable, shared: true do
Expand Down
6 changes: 1 addition & 5 deletions src/main/ruby/truffleruby/core/io.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2324,11 +2324,7 @@ def write(*objects)

ensure_open_and_writable

if external_encoding && external_encoding != string.encoding && external_encoding != Encoding::BINARY
unless string.ascii_only? && external_encoding.ascii_compatible?
string = string.encode(external_encoding)
end
end
string = Truffle::IOOperations.write_transcoding(string, external_encoding)

count = Truffle::POSIX.write_string self, string, true
bytes_written += count
Expand Down
9 changes: 9 additions & 0 deletions src/main/ruby/truffleruby/core/truffle/io_operations.rb
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,15 @@ def self.puts(io, *args)
nil
end

def self.write_transcoding(string, external_encoding)
if external_encoding && external_encoding != string.encoding && external_encoding != Encoding::BINARY &&
flavorjones marked this conversation as resolved.
Show resolved Hide resolved
!(string.ascii_only? && external_encoding.ascii_compatible?)
string.encode(external_encoding)
else
string
end
end

def self.dup2_with_cloexec(old_fd, new_fd)
if new_fd < 3
# STDIO should not be made close-on-exec. `dup2` clears the close-on-exec bit for the destination FD.
Expand Down