Skip to content

Commit

Permalink
Speed up creating Ruby AST
Browse files Browse the repository at this point in the history
When creating the Ruby AST, we were previously allocating Location
objects for every node and every inner location. Instead, this
commit changes it to pack both the start offset and length into a
single u64 and pass that into the nodes. Then, when the locations
are requested via a reader method, we lazily allocate the Location
objects.

Co-Authored-By: Aaron Patterson <tenderlove@ruby-lang.org>
  • Loading branch information
kddnewton and tenderlove committed Feb 15, 2024
1 parent 0adb234 commit de203dc
Show file tree
Hide file tree
Showing 8 changed files with 311 additions and 120 deletions.
305 changes: 225 additions & 80 deletions lib/prism/desugar_compiler.rb

Large diffs are not rendered by default.

16 changes: 12 additions & 4 deletions lib/prism/parse_result.rb
Original file line number Diff line number Diff line change
Expand Up @@ -452,17 +452,19 @@ def failure?

# This represents a token from the Ruby source.
class Token
# The Source object that represents the source this token came from.
attr_reader :source
private :source

# The type of token that this token is.
attr_reader :type

# A byteslice of the source that this token represents.
attr_reader :value

# A Location object representing the location of this token in the source.
attr_reader :location

# Create a new token object with the given type, value, and location.
def initialize(type, value, location)
def initialize(source, type, value, location)
@source = source
@type = type
@value = value
@location = location
Expand All @@ -473,6 +475,12 @@ def deconstruct_keys(keys)
{ type: type, value: value, location: location }
end

# A Location object representing the location of this token in the source.
def location
return @location if @location.is_a?(Location)
@location = Location.new(source, @location >> 32, @location & 0xFFFFFFFF)
end

# Implement the pretty print interface for Token.
def pretty_print(q)
q.group do
Expand Down
2 changes: 1 addition & 1 deletion lib/prism/translation/parser/compiler.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1477,7 +1477,7 @@ def visit_statements_node(node)
# ^^^^^
def visit_string_node(node)
if node.opening&.start_with?("<<")
children, closing = visit_heredoc(InterpolatedStringNode.new(node.opening_loc, [node.copy(opening_loc: nil, closing_loc: nil, location: node.content_loc)], node.closing_loc, node.location))
children, closing = visit_heredoc(InterpolatedStringNode.new(node.send(:source), node.opening_loc, [node.copy(opening_loc: nil, closing_loc: nil, location: node.content_loc)], node.closing_loc, node.location))
builder.string_compose(token(node.opening_loc), children, closing)
elsif node.opening == "?"
builder.character([node.unescaped, srange(node.location)])
Expand Down
26 changes: 15 additions & 11 deletions templates/ext/prism/api_node.c.erb
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,24 @@ static VALUE rb_cPrism<%= node.name %>;
<%- end -%>

static VALUE
pm_location_new(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, VALUE source) {
VALUE argv[] = { source, LONG2FIX(start - parser->start), LONG2FIX(end - start) };
return rb_class_new_instance(3, argv, rb_cPrismLocation);
pm_location_new(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
uint64_t value = ((((uint64_t) (start - parser->start)) << 32) | ((uint32_t) (end - start)));
return ULL2NUM(value);
}

VALUE
pm_token_new(pm_parser_t *parser, pm_token_t *token, rb_encoding *encoding, VALUE source) {
ID type = rb_intern(pm_token_type_name(token->type));
VALUE location = pm_location_new(parser, token->start, token->end, source);
VALUE location = pm_location_new(parser, token->start, token->end);

VALUE argv[] = {
source,
ID2SYM(type),
rb_enc_str_new((const char *) token->start, token->end - token->start, encoding),
location
};

return rb_class_new_instance(3, argv, rb_cPrismToken);
return rb_class_new_instance(4, argv, rb_cPrismToken);
}

static VALUE
Expand Down Expand Up @@ -144,8 +145,11 @@ pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding, VALUE so
<%- if node.fields.any? { |field| ![Prism::NodeField, Prism::OptionalNodeField, Prism::FlagsField].include?(field.class) } -%>
pm_<%= node.human %>_t *cast = (pm_<%= node.human %>_t *) node;
<%- end -%>
VALUE argv[<%= node.fields.length + 1 %>];
<%- node.fields.each_with_index do |field, index| -%>
VALUE argv[<%= node.fields.length + 2 %>];

// source
argv[0] = source;
<%- node.fields.each.with_index(1) do |field, index| -%>

// <%= field.name %>
<%- case field -%>
Expand Down Expand Up @@ -176,10 +180,10 @@ pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding, VALUE so
}
<%- when Prism::LocationField -%>
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source);
argv[<%= index %>] = pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end);
<%- when Prism::OptionalLocationField -%>
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = cast-><%= field.name %>.start == NULL ? Qnil : pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source);
argv[<%= index %>] = cast-><%= field.name %>.start == NULL ? Qnil : pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end);
<%- when Prism::UInt8Field -%>
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = UINT2NUM(cast-><%= field.name %>);
Expand All @@ -195,9 +199,9 @@ pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding, VALUE so
<%- end -%>

// location
argv[<%= node.fields.length %>] = pm_location_new(parser, node->location.start, node->location.end, source);
argv[<%= node.fields.length + 1 %>] = pm_location_new(parser, node->location.start, node->location.end);

rb_ary_push(value_stack, rb_class_new_instance(<%= node.fields.length + 1 %>, argv, rb_cPrism<%= node.name %>));
rb_ary_push(value_stack, rb_class_new_instance(<%= node.fields.length + 2 %>, argv, rb_cPrism<%= node.name %>));
break;
}
<%- end -%>
Expand Down
13 changes: 8 additions & 5 deletions templates/lib/prism/dsl.rb.erb
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,23 @@ module Prism
# Prism::IntegerNode.new(
# Prism::IntegerBaseFlags::DECIMAL,
# Prism::Location.new(source, 1, 1),
# source
# )
# ],
# Prism::Location.new(source, 0, 1),
# Prism::Location.new(source, 2, 1)
# Prism::Location.new(source, 2, 1),
# source
# )
#
# you could instead write:
#
# source = Prism::Source.new("[1]")
#
# ArrayNode(
# IntegerNode(Prism::IntegerBaseFlags::DECIMAL, Location(source, 1, 1))),
# IntegerNode(Prism::IntegerBaseFlags::DECIMAL, Location(source, 1, 1)), source),
# Location(source, 0, 1),
# Location(source, 2, 1)
# Location(source, 2, 1),
# source
# )
#
# This is mostly helpful in the context of writing tests, but can also be used
Expand All @@ -37,8 +40,8 @@ module Prism
<%- nodes.each do |node| -%>
# Create a new <%= node.name %> node
def <%= node.name %>(<%= (node.fields.map(&:name) + ["location = Location()"]).join(", ") %>)
<%= node.name %>.new(<%= (node.fields.map(&:name) + ["location"]).join(", ") %>)
def <%= node.name %>(<%= (node.fields.map(&:name) + ["source = nil, location = Location()"]).join(", ") %>)
<%= node.name %>.new(<%= ["source", *node.fields.map(&:name), "location"].join(", ") %>)
end
<%- end -%>
end
Expand Down
59 changes: 44 additions & 15 deletions templates/lib/prism/node.rb.erb
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,16 @@ module Prism
# This represents a node in the tree. It is the parent class of all of the
# various node types.
class Node
# A pointer to the source that this node was created from.
attr_reader :source
private :source

# A Location instance that represents the location of this node in the
# source.
attr_reader :location
def location
return @location if @location.is_a?(Location)
@location = Location.new(source, @location >> 32, @location & 0xFFFFFFFF)
end

def newline? # :nodoc:
@newline ? true : false
Expand Down Expand Up @@ -52,7 +59,7 @@ module Prism
# Returns an array of child nodes, including `nil`s in the place of optional
# nodes that were not present.
def child_nodes
raise NoMethodError, "undefined method `#{__method__}' for #{inspect}"
raise NoMethodError, "undefined method `child_nodes' for #{inspect}"
end

alias deconstruct child_nodes
Expand Down Expand Up @@ -81,24 +88,14 @@ module Prism
#<%= line %>
<%- end -%>
class <%= node.name -%> < Node
<%- node.fields.each do |field| -%>
<%- if field.comment.nil? -%>
# <%= "private " if field.is_a?(Prism::FlagsField) %>attr_reader <%= field.name %>: <%= field.rbs_class %>
<%- else -%>
<%- field.each_comment_line do |line| -%>
#<%= line %>
<%- end -%>
<%- end -%>
attr_reader :<%= field.name -%><%= "\n private :#{field.name}" if field.is_a?(Prism::FlagsField) %>

<%- end -%>
# def initialize: (<%= (node.fields.map { |field| "#{field.rbs_class} #{field.name}" } + ["Location location"]).join(", ") %>) -> void
def initialize(<%= (node.fields.map(&:name) + ["location"]).join(", ") %>)
def initialize(source, <%= (node.fields.map(&:name) + ["location"]).join(", ") %>)
@source = source
@newline = false
@location = location
<%- node.fields.each do |field| -%>
@<%= field.name %> = <%= field.name %>
<%- end -%>
@location = location
end
# def accept: (Visitor visitor) -> void
Expand Down Expand Up @@ -173,6 +170,7 @@ module Prism
# def copy: (**params) -> <%= node.name %>
def copy(**params)
<%= node.name %>.new(
source,
<%- (node.fields.map(&:name) + ["location"]).map do |name| -%>
params.fetch(:<%= name %>) { <%= name %> },
<%- end -%>
Expand All @@ -186,6 +184,37 @@ module Prism
def deconstruct_keys(keys)
{ <%= (node.fields.map { |field| "#{field.name}: #{field.name}" } + ["location: location"]).join(", ") %> }
end

<%- node.fields.each do |field| -%>
<%- if field.comment.nil? -%>
# <%= "private " if field.is_a?(Prism::FlagsField) %>attr_reader <%= field.name %>: <%= field.rbs_class %>
<%- else -%>
<%- field.each_comment_line do |line| -%>
#<%= line %>
<%- end -%>
<%- end -%>
<%- case field -%>
<%- when Prism::LocationField -%>
def <%= field.name %>
return @<%= field.name %> if @<%= field.name %>.is_a?(Location)
@<%= field.name %> = Location.new(source, @<%= field.name %> >> 32, @<%= field.name %> & 0xFFFFFFFF)
end
<%- when Prism::OptionalLocationField -%>
def <%= field.name %>
case @<%= field.name %>
when nil
nil
when Location
@<%= field.name %>
else
@<%= field.name %> = Location.new(source, @<%= field.name %> >> 32, @<%= field.name %> & 0xFFFFFFFF)
end
end
<%- else -%>
attr_reader :<%= field.name -%><%= "\n private :#{field.name}" if field.is_a?(Prism::FlagsField) %>
<%- end -%>
<%- end -%>
<%- node.fields.each do |field| -%>
<%- case field -%>
<%- when Prism::LocationField -%>
Expand Down
8 changes: 5 additions & 3 deletions templates/lib/prism/serialize.rb.erb
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ module Prism
length = load_varuint
lex_state = load_varuint
location = Location.new(@source, start, length)
tokens << [Prism::Token.new(type, location.slice, location), lex_state]
tokens << [Prism::Token.new(source, type, location.slice, location), lex_state]
end

tokens
Expand Down Expand Up @@ -274,7 +274,8 @@ module Prism
<%- if node.needs_serialized_length? -%>
load_serialized_length
<%- end -%>
<%= node.name %>.new(<%= (node.fields.map { |field|
<%= node.name %>.new(
source, <%= (node.fields.map { |field|
case field
when Prism::NodeField then "load_node"
when Prism::OptionalNodeField then "load_optional_node"
Expand Down Expand Up @@ -308,7 +309,8 @@ module Prism
<%- if node.needs_serialized_length? -%>
load_serialized_length
<%- end -%>
<%= node.name %>.new(<%= (node.fields.map { |field|
<%= node.name %>.new(
source, <%= (node.fields.map { |field|
case field
when Prism::NodeField then "load_node"
when Prism::OptionalNodeField then "load_optional_node"
Expand Down
2 changes: 1 addition & 1 deletion test/prism/newline_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
module Prism
class NewlineTest < TestCase
base = File.expand_path("../", __FILE__)
filepaths = Dir["*.rb", base: base] - %w[encoding_test.rb parser_test.rb unescape_test.rb]
filepaths = Dir["*.rb", base: base] - %w[encoding_test.rb errors_test.rb parser_test.rb unescape_test.rb]

filepaths.each do |relative|
define_method("test_newline_flags_#{relative}") do
Expand Down

0 comments on commit de203dc

Please sign in to comment.