From a620d2131550a3d96dd2a848b2a34021bd513039 Mon Sep 17 00:00:00 2001 From: Quinton Miller Date: Mon, 13 Feb 2023 20:31:52 +0800 Subject: [PATCH] Formatter: escape bidirectional control characters within strings --- spec/compiler/formatter/formatter_spec.cr | 30 +++++++++++++++++++++++ src/compiler/crystal/syntax/lexer.cr | 15 ++++++++++++ src/compiler/crystal/tools/formatter.cr | 14 ++++++----- 3 files changed, 53 insertions(+), 6 deletions(-) diff --git a/spec/compiler/formatter/formatter_spec.cr b/spec/compiler/formatter/formatter_spec.cr index aa012b32a5f3..dba1d0156c82 100644 --- a/spec/compiler/formatter/formatter_spec.cr +++ b/spec/compiler/formatter/formatter_spec.cr @@ -2446,4 +2446,34 @@ describe Crystal::Formatter do CRYSTAL end end + + # CVE-2021-42574 + describe "Unicode bi-directional control characters" do + ['\u202A', '\u202B', '\u202C', '\u202D', '\u202E', '\u2066', '\u2067', '\u2068', '\u2069'].each do |char| + assert_format %("#{char}"), %("#{char.unicode_escape}") + assert_format %("\\c#{char}"), %("c#{char.unicode_escape}") + assert_format %("#{char}\#{1}"), %("#{char.unicode_escape}\#{1}") + assert_format %("\\c#{char}\#{1}"), %("c#{char.unicode_escape}\#{1}") + assert_format %(%(#{char})), %(%(#{char.unicode_escape})) + assert_format %(%Q(#{char})), %(%Q(#{char.unicode_escape})) + assert_format %(%Q(#{char}\#{1})), %(%Q(#{char.unicode_escape}\#{1})) + assert_format %(<<-EOS\n#{char}\nEOS), %(<<-EOS\n#{char.unicode_escape}\nEOS) + assert_format %(<<-EOS\n#{char}\#{1}\nEOS), %(<<-EOS\n#{char.unicode_escape}\#{1}\nEOS) + assert_format %(def foo("#{char}" x)\nend), %(def foo("#{char.unicode_escape}" x)\nend) + assert_format %(foo("#{char}": 1)), %(foo("#{char.unicode_escape}": 1)) + assert_format %(NamedTuple("#{char}": Int32)), %(NamedTuple("#{char.unicode_escape}": Int32)) + assert_format %({"#{char}": 1}), %({"#{char.unicode_escape}": 1}) + + # the following contexts do not accept escape sequences, escaping these + # control characters would alter the meaning of the source code + assert_format %(/#{char}/) + assert_format %(%r(#{char})) + assert_format %(%q(#{char})) + assert_format %(%w(#{char})) + assert_format %(%i(#{char})) + assert_format %(/#{char}\#{1}/) + assert_format %(%r(#{char}\#{1})) + assert_format %(<<-'EOS'\n#{char}\nEOS) + end + end end diff --git a/src/compiler/crystal/syntax/lexer.cr b/src/compiler/crystal/syntax/lexer.cr index 92e4f1377f01..a87bee90805c 100644 --- a/src/compiler/crystal/syntax/lexer.cr +++ b/src/compiler/crystal/syntax/lexer.cr @@ -2783,6 +2783,21 @@ module Crystal end end + private UNICODE_BIDI_CONTROL_CHARACTERS = {'\u202A', '\u202B', '\u202C', '\u202D', '\u202E', '\u2066', '\u2067', '\u2068', '\u2069'} + + private FORBIDDEN_ESCAPES = UNICODE_BIDI_CONTROL_CHARACTERS.to_h { |ch| {ch, ch.unicode_escape} } + + # used by the formatter + def self.escape_forbidden_characters(string) + string.each_char do |ch| + if ch.in?(UNICODE_BIDI_CONTROL_CHARACTERS) + return string.gsub(FORBIDDEN_ESCAPES) + end + end + + string + end + def next_char_no_column_increment char = @reader.next_char if error = @reader.error diff --git a/src/compiler/crystal/tools/formatter.cr b/src/compiler/crystal/tools/formatter.cr index 16688fa4331e..dda04edb26ff 100644 --- a/src/compiler/crystal/tools/formatter.cr +++ b/src/compiler/crystal/tools/formatter.cr @@ -487,11 +487,7 @@ module Crystal while true case @token.type when .string? - if @token.invalid_escape - write @token.value - else - write @token.raw - end + write_sanitized_string_body(@token.delimiter_state.allow_escapes && !is_regex) next_string_token when .interpolation_start? # This is the case of #{__DIR__} @@ -530,6 +526,12 @@ module Crystal false end + private def write_sanitized_string_body(escape) + body = @token.invalid_escape ? @token.value.as(String) : @token.raw + body = Lexer.escape_forbidden_characters(body) if escape + write body + end + def visit(node : StringInterpolation) if @token.delimiter_state.kind.heredoc? # For heredoc, only write the start: on a newline will print it @@ -605,7 +607,7 @@ module Crystal else loop do check :STRING - write @token.invalid_escape ? @token.value : @token.raw + write_sanitized_string_body(delimiter_state.allow_escapes && !is_regex) next_string_token # On heredoc, pieces of contents are combined due to removing indentation.