Skip to content

Commit

Permalink
Merge pull request #2423 from noahgibbs/ripper_compat_equiv_testing
Browse files Browse the repository at this point in the history
Aliases for Ripper Translation
  • Loading branch information
kddnewton authored Feb 15, 2024
2 parents cf821ee + cfd4f28 commit 0e0a50c
Show file tree
Hide file tree
Showing 3 changed files with 229 additions and 43 deletions.
2 changes: 1 addition & 1 deletion bin/prism
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ module Prism
source, filepath = read_source(argv)

ripper = Ripper.sexp_raw(source)
prism = Prism::RipperCompat.sexp_raw(source)
prism = Prism::Translation::Ripper.sexp_raw(source) rescue :parse_error

puts "Ripper:"
pp ripper
Expand Down
169 changes: 144 additions & 25 deletions lib/prism/translation/ripper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,44 @@ def visit_break_node(node)
on_break(on_args_add_block(args_val, false))
end

# Visit an AliasMethodNode.
def visit_alias_method_node(node)
# For both the old and new name, if there is a colon in the symbol
# name (e.g. 'alias :foo :bar') then we do *not* emit the [:symbol] wrapper around
# the lexer token (e.g. :@ident) inside [:symbol_literal]. But if there
# is no colon (e.g. 'alias foo bar') then we *do* still emit the [:symbol] wrapper.

if node.new_name.is_a?(SymbolNode) && !node.new_name.opening
new_name_val = visit_symbol_literal_node(node.new_name, no_symbol_wrapper: true)
else
new_name_val = visit(node.new_name)
end
if node.old_name.is_a?(SymbolNode) && !node.old_name.opening
old_name_val = visit_symbol_literal_node(node.old_name, no_symbol_wrapper: true)
else
old_name_val = visit(node.old_name)
end

on_alias(new_name_val, old_name_val)
end

# Visit an AliasGlobalVariableNode.
def visit_alias_global_variable_node(node)
on_var_alias(visit(node.new_name), visit(node.old_name))
end

# Visit a GlobalVariableReadNode.
def visit_global_variable_read_node(node)
bounds(node.location)
on_gvar(node.name.to_s)
end

# Visit a BackReferenceReadNode.
def visit_back_reference_read_node(node)
bounds(node.location)
on_backref(node.name.to_s)
end

# Visit an AndNode.
def visit_and_node(node)
visit_binary_operator(node)
Expand Down Expand Up @@ -326,23 +364,7 @@ def visit_x_string_node(node)

# Visit an InterpolatedStringNode node.
def visit_interpolated_string_node(node)
parts = node.parts.map do |part|
case part
when StringNode
bounds(part.content_loc)
on_tstring_content(part.content)
when EmbeddedStatementsNode
on_string_embexpr(visit(part))
else
raise NotImplementedError, "Unexpected node type in InterpolatedStringNode"
end
end

string_list = parts.inject(on_string_content) do |items, item|
on_string_add(items, item)
end

on_string_literal(string_list)
on_string_literal(visit_enumerated_node(node))
end

# Visit an EmbeddedStatementsNode node.
Expand All @@ -352,15 +374,12 @@ def visit_embedded_statements_node(node)

# Visit a SymbolNode node.
def visit_symbol_node(node)
if (opening = node.opening) && (['"', "'"].include?(opening[-1]) || opening.start_with?("%s"))
bounds(node.value_loc)
tstring_val = on_tstring_content(node.value.to_s)
return on_dyna_symbol(on_string_add(on_string_content, tstring_val))
end
visit_symbol_literal_node(node)
end

bounds(node.value_loc)
ident_val = on_ident(node.value.to_s)
on_symbol_literal(on_symbol(ident_val))
# Visit an InterpolatedSymbolNode node.
def visit_interpolated_symbol_node(node)
on_dyna_symbol(visit_enumerated_node(node))
end

# Visit a StatementsNode node.
Expand Down Expand Up @@ -459,6 +478,25 @@ def visit_elements(elements)
end
end

# Visit an InterpolatedStringNode or an InterpolatedSymbolNode node.
def visit_enumerated_node(node)
parts = node.parts.map do |part|
case part
when StringNode
bounds(part.content_loc)
on_tstring_content(part.content)
when EmbeddedStatementsNode
on_string_embexpr(visit(part))
else
raise NotImplementedError, "Unexpected node type in visit_enumerated_node"
end
end

parts.inject(on_string_content) do |items, item|
on_string_add(items, item)
end
end

# Visit an operation-and-assign node, such as +=.
def visit_binary_op_assign(node, operator: node.operator)
bounds(node.name_loc)
Expand Down Expand Up @@ -487,6 +525,87 @@ def visit_aref_field_node(node)
on_assign(on_aref_field(visit(node.receiver), args_val), assign_val)
end

# In an alias statement Ripper will emit @kw instead of @ident if the object
# being aliased is a Ruby keyword. For instance, in the line "alias :foo :if",
# the :if is treated as a lexer keyword. So we need to know what symbols are
# also keywords.
RUBY_KEYWORDS = [
"alias",
"and",
"begin",
"BEGIN",
"break",
"case",
"class",
"def",
"defined?",
"do",
"else",
"elsif",
"end",
"END",
"ensure",
"false",
"for",
"if",
"in",
"module",
"next",
"nil",
"not",
"or",
"redo",
"rescue",
"retry",
"return",
"self",
"super",
"then",
"true",
"undef",
"unless",
"until",
"when",
"while",
"yield",
"__ENCODING__",
"__FILE__",
"__LINE__",
]

# Ripper has several methods of emitting a symbol literal. Inside an alias
# sometimes it suppresses the [:symbol] wrapper around ident. If the symbol
# is also the name of a keyword (e.g. :if) it will emit a :@kw wrapper, not
# an :@ident wrapper, with similar treatment for constants and operators.
def visit_symbol_literal_node(node, no_symbol_wrapper: false)
if (opening = node.opening) && (['"', "'"].include?(opening[-1]) || opening.start_with?("%s"))
bounds(node.value_loc)
str_val = node.value.to_s
if str_val == ""
return on_dyna_symbol(on_string_content)
else
tstring_val = on_tstring_content(str_val)
return on_dyna_symbol(on_string_add(on_string_content, tstring_val))
end
end

bounds(node.value_loc)
node_name = node.value.to_s
if RUBY_KEYWORDS.include?(node_name)
token_val = on_kw(node_name)
elsif node_name.length == 0
raise NotImplementedError
elsif /[[:upper:]]/.match(node_name[0])
token_val = on_const(node_name)
elsif /[[:punct:]]/.match(node_name[0])
token_val = on_op(node_name)
else
token_val = on_ident(node_name)
end
sym_val = no_symbol_wrapper ? token_val : on_symbol(token_val)
on_symbol_literal(sym_val)
end

# Visit a node that represents a number. We need to explicitly handle the
# unary - operator.
def visit_number(node)
Expand Down
101 changes: 84 additions & 17 deletions test/prism/ripper_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,50 @@

module Prism
class RipperTest < TestCase
def truffleruby?
RUBY_ENGINE == "truffleruby"
end

def windows?
Gem.win_platform?
end

# Ripper produces certain ambiguous structures. For instance, it often
# adds an :args_add_block with "false" as the block meaning there is
# no block call. It can be hard to tell which of multiple equivalent
# structures it will produce. This method attempts to return a normalized
# comparable structure.
def normalized_sexp(parsed)
if parsed.is_a?(Array)
# For args_add_block, if the third entry is nil or false, remove it.
# Note that CRuby Ripper uses false for no block, while older JRuby
# uses nil. We need to do this for both.
return normalized_sexp(parsed[1]) if parsed[0] == :args_add_block && !parsed[2]

parsed.each.with_index do |item, idx|
if item.is_a?(Array)
parsed[idx] = normalized_sexp(parsed[idx])
end
end
end

parsed
end

def assert_ripper_equivalent(source, path: "inline source code")
expected = Ripper.sexp_raw(source)

refute_nil expected, "Could not parse #{path} with Ripper!"
expected = normalized_sexp(expected)
actual = Prism::Translation::Ripper.sexp_raw(source)
refute_nil actual, "Could not parse #{path} with Prism!"
actual = normalized_sexp(actual)
assert_equal expected, actual, "Expected Ripper and Prism to give equivalent output for #{path}!"
end

end

class RipperShortSourceTest < RipperTest
def test_binary
assert_equivalent("1 + 2")
assert_equivalent("3 - 4 * 5")
Expand Down Expand Up @@ -36,7 +80,7 @@ def test_method_calls_with_variable_names
assert_equivalent("foo.bar")

# TruffleRuby prints emoji symbols differently in a way that breaks here.
if RUBY_ENGINE != "truffleruby"
unless truffleruby?
assert_equivalent("🗻")
assert_equivalent("🗻.location")
assert_equivalent("foo.🗻")
Expand All @@ -57,9 +101,9 @@ def test_method_calls_with_variable_names
def test_method_call_blocks
assert_equivalent("foo { |a| a }")

# assert_equivalent("foo(bar 1)")
# assert_equivalent("foo bar 1")
# assert_equivalent("foo(bar 1) { 7 }")
assert_equivalent("foo(bar 1)")
assert_equivalent("foo bar 1")
assert_equivalent("foo(bar 1) { 7 }")
end

def test_method_calls_on_immediate_values
Expand All @@ -69,7 +113,7 @@ def test_method_calls_on_immediate_values
assert_equivalent("7 and 7")
assert_equivalent("7 || 7")
assert_equivalent("7 or 7")
#assert_equivalent("'racecar'.reverse")
assert_equivalent("'racecar'.reverse")
end

def test_range
Expand Down Expand Up @@ -142,20 +186,49 @@ def test_assign
assert_equivalent("a = 1")
end

def test_alias
assert_equivalent("alias :foo :bar")
assert_equivalent("alias $a $b")
assert_equivalent("alias $a $'")
assert_equivalent("alias foo bar")
assert_equivalent("alias foo if")
assert_equivalent("alias :'def' :\"abc\#{1}\"")
assert_equivalent("alias :\"abc\#{1}\" :'def'")

unless truffleruby?
assert_equivalent("alias :foo :Ę") # Uppercase Unicode character is a constant
assert_equivalent("alias :Ę :foo")
end

assert_equivalent("alias foo +")
assert_equivalent("alias foo :+")
assert_equivalent("alias :foo :''")
assert_equivalent("alias :'' :foo")
end

# This is *exactly* the kind of thing where Ripper would have a weird
# special case we didn't handle correctly. We're still testing with
# a leading colon since putting random keywords there will often get
# parse errors. Mostly we want to know that Ripper will use :@kw
# instead of :@ident for the lexer symbol for all of these.
def test_keyword_aliases
Prism::Translation::Ripper::RUBY_KEYWORDS.each do |keyword|
assert_equivalent("alias :foo :#{keyword}")
end
end

private

def assert_equivalent(source)
expected = Ripper.sexp_raw(source)

refute_nil expected
assert_equal expected, Prism::Translation::Ripper.sexp_raw(source)
assert_ripper_equivalent(source)
end
end

class RipperFixturesTest < TestCase
class RipperFixturesTest < RipperTest
#base = File.join(__dir__, "fixtures")
#relatives = ENV["FOCUS"] ? [ENV["FOCUS"]] : Dir["**/*.txt", base: base]
relatives = [
"alias.txt",
"arithmetic.txt",
"booleans.txt",
"boolean_operators.txt",
Expand All @@ -172,14 +245,8 @@ class RipperFixturesTest < TestCase
# and explicitly set the external encoding to UTF-8 to override the binmode default.
source = File.read(path, binmode: true, external_encoding: Encoding::UTF_8)

expected = Ripper.sexp_raw(source)
if expected.nil?
puts "Could not parse #{path.inspect}!"
end
refute_nil expected
assert_equal expected, Translation::Ripper.sexp_raw(source)
assert_ripper_equivalent(source, path: path)
end
end

end
end

0 comments on commit 0e0a50c

Please sign in to comment.