Skip to content

Commit

Permalink
IO#set_encoding_by_bom
Browse files Browse the repository at this point in the history
* io.c (rb_io_set_encoding_by_bom): IO#set_encoding_by_bom to set
  the encoding by BOM if exists.  [Bug #15210]
  • Loading branch information
nobu committed Jun 13, 2019
1 parent bdc8b37 commit e717d6f
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 3 deletions.
7 changes: 7 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,13 @@ GC::
Details on the algorithm and caveats can be found here:
https://bugs.ruby-lang.org/issues/15626

IO::

New method::

* Added IO#set_encoding_by_bom to check the BOM and set the external
encoding. [Bug #15210]

Integer::

Modified method::
Expand Down
44 changes: 41 additions & 3 deletions io.c
Original file line number Diff line number Diff line change
Expand Up @@ -6170,20 +6170,23 @@ io_strip_bom(VALUE io)
return 0;
}

static void
static rb_encoding *
io_set_encoding_by_bom(VALUE io)
{
int idx = io_strip_bom(io);
rb_io_t *fptr;
rb_encoding *extenc = NULL;

GetOpenFile(io, fptr);
if (idx) {
io_encoding_set(fptr, rb_enc_from_encoding(rb_enc_from_index(idx)),
rb_io_internal_encoding(io), Qnil);
extenc = rb_enc_from_index(idx);
io_encoding_set(fptr, rb_enc_from_encoding(extenc),
rb_io_internal_encoding(io), Qnil);
}
else {
fptr->encs.enc2 = NULL;
}
return extenc;
}

static VALUE
Expand Down Expand Up @@ -8306,6 +8309,40 @@ rb_io_initialize(int argc, VALUE *argv, VALUE io)
return io;
}

/*
* call-seq:
* ios.set_encoding_by_bom -> encoding or nil
*
* Checks if +ios+ starts with a BOM, and then consumes it and sets
* the external encoding. Returns the result encoding if found, or
* nil. If +ios+ is not binmode or its encoding has been set
* already, an exception will be raised.
*
* File.write("bom.txt", "\u{FEFF}abc")
* ios = File.open("bom.txt", "rb")
* ios.set_encoding_by_bom #=> #<Encoding:UTF-8>
*
* File.write("nobom.txt", "abc")
* ios = File.open("nobom.txt", "rb")
* ios.set_encoding_by_bom #=> nil
*/

static VALUE
rb_io_set_encoding_by_bom(VALUE io)
{
rb_io_t *fptr;

GetOpenFile(io, fptr);
if (!(fptr->mode & FMODE_BINMODE)) {
rb_raise(rb_eArgError, "ASCII incompatible encoding needs binmode");
}
if (fptr->encs.enc2) {
rb_raise(rb_eArgError, "encoding conversion is set");
}
if (!io_set_encoding_by_bom(io)) return Qnil;
return rb_enc_from_encoding(fptr->encs.enc);
}

/*
* call-seq:
* File.new(filename, mode="r" [, opt]) -> file
Expand Down Expand Up @@ -13319,6 +13356,7 @@ Init_IO(void)
rb_define_method(rb_cIO, "external_encoding", rb_io_external_encoding, 0);
rb_define_method(rb_cIO, "internal_encoding", rb_io_internal_encoding, 0);
rb_define_method(rb_cIO, "set_encoding", rb_io_set_encoding, -1);
rb_define_method(rb_cIO, "set_encoding_by_bom", rb_io_set_encoding_by_bom, 0);

rb_define_method(rb_cIO, "autoclose?", rb_io_autoclose_p, 0);
rb_define_method(rb_cIO, "autoclose=", rb_io_set_autoclose, 1);
Expand Down
8 changes: 8 additions & 0 deletions test/ruby/test_io_m17n.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2097,6 +2097,10 @@ def test_w_xml_attr
assert_equal(Encoding::UTF_8, result.encoding, message)
assert_equal(stripped, result, message)
end

File.open(path, "rb") {|f|
assert_equal(Encoding.find(name), f.set_encoding_by_bom)
}
}
end
end
Expand Down Expand Up @@ -2139,6 +2143,10 @@ def test_strip_bom_no_bom
assert_equal(stripped, result, bug8323)
result = File.read(path, encoding: 'BOM|UTF-8:UTF-8')
assert_equal(stripped, result, bug8323)

File.open(path, "rb") {|f|
assert_nil(f.set_encoding_by_bom)
}
}
end

Expand Down

0 comments on commit e717d6f

Please sign in to comment.