crystal-lang · straight-shoota · Aug 24, 2021 · Feb 27, 2021 · Feb 27, 2021 · Feb 27, 2021
diff --git a/spec/std/base64_spec.cr b/spec/std/base64_spec.cr
@@ -87,7 +87,7 @@ describe "Base64" do
 
     it "works for most characters" do
       a = String.build(65536 * 4) do |buf|
-        65536.times { |i| buf << (i + 1).chr }
+        65536.times { |i| buf << (i + 1).unsafe_chr }
       end
       b = Base64.encode(a)
       Crystal::Digest::MD5.hexdigest(Base64.decode_string(b)).should eq(Crystal::Digest::MD5.hexdigest(a))

diff --git a/spec/std/char_spec.cr b/spec/std/char_spec.cr
@@ -286,12 +286,6 @@ describe "Char" do
     it "does for unicode" do
       '青'.bytesize.should eq(3)
     end
-
-    it "raises on codepoint bigger than 0x10ffff" do
-      expect_raises InvalidByteSequenceError do
-        (0x10ffff + 1).unsafe_chr.bytesize
-      end
-    end
   end
 
   describe "in_set?" do
@@ -338,12 +332,6 @@ describe "Char" do
     end
   end
 
-  it "raises on codepoint bigger than 0x10ffff when doing each_byte" do
-    expect_raises InvalidByteSequenceError do
-      (0x10ffff + 1).unsafe_chr.each_byte { |b| }
-    end
-  end
-
   it "does each_byte" do
     'a'.each_byte(&.should eq('a'.ord)).should be_nil
   end

diff --git a/spec/std/int_spec.cr b/spec/std/int_spec.cr
@@ -799,9 +799,17 @@ describe "Int" do
   it "#chr" do
     65.chr.should eq('A')
 
-    expect_raises(ArgumentError, "#{0x10ffff + 1} out of char range") do
+    expect_raises(ArgumentError, "0x110000 out of char range") do
       (0x10ffff + 1).chr
     end
+
+    expect_raises(ArgumentError, "0xd800 out of char range") do
+      0xd800.chr
+    end
+
+    expect_raises(ArgumentError, "0xdfff out of char range") do
+      0xdfff.chr
+    end
   end
 
   it "#unsafe_chr" do

diff --git a/src/char.cr b/src/char.cr
@@ -717,14 +717,12 @@ struct Char
       yield (0xe0 | (c >> 12)).to_u8
       yield (0x80 | ((c >> 6) & 0x3f)).to_u8
       yield (0x80 | (c & 0x3f)).to_u8
-    elsif c <= MAX_CODEPOINT
+    else
       # 11110xxx  10xxxxxx  10xxxxxx  10xxxxxx
       yield (0xf0 | (c >> 18)).to_u8
       yield (0x80 | ((c >> 12) & 0x3f)).to_u8
       yield (0x80 | ((c >> 6) & 0x3f)).to_u8
       yield (0x80 | (c & 0x3f)).to_u8
-    else
-      raise InvalidByteSequenceError.new("Invalid char value #{dump}")
     end
   end
 
@@ -747,11 +745,9 @@ struct Char
     elsif c <= 0xffff
       # 1110xxxx  10xxxxxx  10xxxxxx
       3
-    elsif c <= MAX_CODEPOINT
+    else
       # 11110xxx  10xxxxxx  10xxxxxx  10xxxxxx
       4
-    else
-      raise InvalidByteSequenceError.new("Invalid char value #{dump}")
     end
   end
 

diff --git a/src/http/common.cr b/src/http/common.cr
@@ -387,7 +387,7 @@ module HTTP
     String.build do |io|
       while quoted_pair_index
         io.write(data[0, quoted_pair_index])
-        io << data[quoted_pair_index + 1].chr
+        io << data[quoted_pair_index + 1].unsafe_chr
 
         data += quoted_pair_index + 2
         quoted_pair_index = data.index('\\'.ord)

diff --git a/src/int.cr b/src/int.cr
@@ -64,14 +64,15 @@ struct Int
 
   # Returns a `Char` that has the unicode codepoint of `self`.
   #
-  # Raises `ArgumentError` if this integer's value doesn't fit a char's range (`0..0x10ffff`).
+  # Raises `ArgumentError` if this integer's value doesn't fit a char's range
+  # (`0..0xd7ff` and `0xe000..0x10ffff`).
   #
   # ```
   # 97.chr # => 'a'
   # ```
   def chr : Char
-    unless 0 <= self <= Char::MAX_CODEPOINT
-      raise ArgumentError.new("#{self} out of char range")
+    unless 0 <= self <= 0xd7ff || 0xe000 <= self <= Char::MAX_CODEPOINT
+      raise ArgumentError.new("0x#{self.to_s(16)} out of char range")
     end
     unsafe_chr
   end

diff --git a/src/primitives.cr b/src/primitives.cr
@@ -331,7 +331,7 @@ end
     struct {{int.id}}
       # Returns a `Char` that has the unicode codepoint of `self`,
       # without checking if this integer is in the range valid for
-      # chars (`0..0x10ffff`).
+      # chars (`0..0xd7ff` and `0xe000..0x10ffff`).
       #
       # You should never use this method unless `chr` turns out to
       # be a bottleneck.

diff --git a/src/string.cr b/src/string.cr
@@ -712,18 +712,18 @@ class String
     unless v.finite?
       startptr = to_unsafe
       if whitespace
-        while startptr.value.chr.ascii_whitespace?
+        while startptr.value.unsafe_chr.ascii_whitespace?
           startptr += 1
         end
       end
-      if startptr.value.chr.in?('+', '-')
+      if startptr.value.unsafe_chr.in?('+', '-')
         startptr += 1
       end
 
       if v.nan?
-        return unless startptr.value.chr.in?('n', 'N')
+        return unless startptr.value.unsafe_chr.in?('n', 'N')
       else
-        return unless startptr.value.chr.in?('i', 'I')
+        return unless startptr.value.unsafe_chr.in?('i', 'I')
       end
     end
 
@@ -734,7 +734,7 @@ class String
 
     if strict
       if whitespace
-        while endptr < string_end && endptr.value.chr.ascii_whitespace?
+        while endptr < string_end && endptr.value.unsafe_chr.ascii_whitespace?
           endptr += 1
         end
       end
@@ -743,7 +743,7 @@ class String
     else
       ptr = to_unsafe
       if whitespace
-        while ptr < string_end && ptr.value.chr.ascii_whitespace?
+        while ptr < string_end && ptr.value.unsafe_chr.ascii_whitespace?
           ptr += 1
         end
       end