Skip to content

Commit

Permalink
Avoid extra String copies in the FFI backend
Browse files Browse the repository at this point in the history
* For Prism.parse_file the file contents would be read as native, then
  converted to a Ruby String, then converted to a native String for
  pm_serialize_parse().
* Refactor the logic to always use a pm_string for the source code and
  pass that to other native functions.
  • Loading branch information
eregon committed Feb 15, 2024
1 parent 0adb234 commit 9002b3c
Showing 1 changed file with 102 additions and 76 deletions.
178 changes: 102 additions & 76 deletions lib/prism/ffi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -119,15 +119,12 @@ def read

# Initialize a new buffer and yield it to the block. The buffer will be
# automatically freed when the block returns.
def self.with(&block)
pointer = FFI::MemoryPointer.new(SIZEOF)

begin
def self.with
FFI::MemoryPointer.new(SIZEOF) do |pointer|
raise unless LibRubyParser.pm_buffer_init(pointer)
yield new(pointer)
return yield new(pointer)
ensure
LibRubyParser.pm_buffer_free(pointer)
pointer.free
end
end
end
Expand All @@ -137,39 +134,47 @@ def self.with(&block)
class PrismString # :nodoc:
SIZEOF = LibRubyParser.pm_string_sizeof

attr_reader :pointer
attr_reader :pointer, :length

def initialize(pointer)
def initialize(pointer, length, from_string)
@pointer = pointer
end

def source
LibRubyParser.pm_string_source(pointer)
end

def length
LibRubyParser.pm_string_length(pointer)
@length = length
@from_string = from_string
end

def read
source.read_string(length)
raise "should use the original String instead" if @from_string
@pointer.read_string(@length)
end

# Yields a pm_string_t pointer to the given block.
def self.with(filepath, &block)
pointer = FFI::MemoryPointer.new(SIZEOF)

begin
raise TypeError unless filepath.is_a?(String)
def self.with_string(string)
raise TypeError unless string.is_a?(String)

length = string.bytesize
# + 1 to never get an address of 0, which pm_parser_init() asserts
FFI::MemoryPointer.new(:char, length + 1, false) do |pointer|
pointer.write_string(string)
# since we have the extra byte we might as well \0-terminate
pointer.put_char(length, 0)
return yield new(pointer, length, true)
end
end

if LibRubyParser.pm_string_mapped_init(pointer, filepath)
yield new(pointer)
# Yields a pm_string_t pointer to the given block.
def self.with_file(filepath)
raise TypeError unless filepath.is_a?(String)

FFI::MemoryPointer.new(SIZEOF) do |pm_string|
if LibRubyParser.pm_string_mapped_init(pm_string, filepath)
pointer = LibRubyParser.pm_string_source(pm_string)
length = LibRubyParser.pm_string_length(pm_string)
return yield new(pointer, length, false)
else
raise SystemCallError.new(filepath, FFI.errno)
end
ensure
LibRubyParser.pm_string_free(pointer)
pointer.free
LibRubyParser.pm_string_free(pm_string)
end
end
end
Expand All @@ -185,52 +190,100 @@ def self.with(filepath, &block)
class << self
# Mirror the Prism.dump API by using the serialization API.
def dump(code, **options)
LibRubyParser::PrismBuffer.with do |buffer|
LibRubyParser.pm_serialize_parse(buffer.pointer, code, code.bytesize, dump_options(options))
buffer.read
end
LibRubyParser::PrismString.with_string(code) { |string| dump_common(string, options) }
end

# Mirror the Prism.dump_file API by using the serialization API.
def dump_file(filepath, **options)
LibRubyParser::PrismString.with(filepath) do |string|
dump(string.read, **options, filepath: filepath)
end
options[:filepath] = filepath
LibRubyParser::PrismString.with_file(filepath) { |string| dump_common(string, options) }
end

# Mirror the Prism.lex API by using the serialization API.
def lex(code, **options)
LibRubyParser::PrismBuffer.with do |buffer|
LibRubyParser.pm_serialize_lex(buffer.pointer, code, code.bytesize, dump_options(options))
Serialize.load_tokens(Source.new(code), buffer.read)
end
LibRubyParser::PrismString.with_string(code) { |string| lex_common(string, code, options) }
end

# Mirror the Prism.lex_file API by using the serialization API.
def lex_file(filepath, **options)
LibRubyParser::PrismString.with(filepath) do |string|
lex(string.read, **options, filepath: filepath)
end
options[:filepath] = filepath
LibRubyParser::PrismString.with_file(filepath) { |string| lex_common(string, string.read, options) }
end

# Mirror the Prism.parse API by using the serialization API.
def parse(code, **options)
Prism.load(code, dump(code, **options))
LibRubyParser::PrismString.with_string(code) { |string| parse_common(string, code, options) }
end

# Mirror the Prism.parse_file API by using the serialization API. This uses
# native strings instead of Ruby strings because it allows us to use mmap when
# it is available.
def parse_file(filepath, **options)
LibRubyParser::PrismString.with(filepath) do |string|
parse(string.read, **options, filepath: filepath)
end
options[:filepath] = filepath
LibRubyParser::PrismString.with_file(filepath) { |string| parse_common(string, string.read, options) }
end

# Mirror the Prism.parse_comments API by using the serialization API.
def parse_comments(code, **options)
LibRubyParser::PrismString.with_string(code) { |string| parse_comments_common(string, code, options) }
end

# Mirror the Prism.parse_file_comments API by using the serialization
# API. This uses native strings instead of Ruby strings because it allows us
# to use mmap when it is available.
def parse_file_comments(filepath, **options)
options[:filepath] = filepath
LibRubyParser::PrismString.with_file(filepath) { |string| parse_comments_common(string, string.read, options) }
end

# Mirror the Prism.parse_lex API by using the serialization API.
def parse_lex(code, **options)
LibRubyParser::PrismString.with_string(code) { |string| parse_lex_common(string, code, options) }
end

# Mirror the Prism.parse_lex_file API by using the serialization API.
def parse_lex_file(filepath, **options)
options[:filepath] = filepath
LibRubyParser::PrismString.with_file(filepath) { |string| parse_lex_common(string, string.read, options) }
end

# Mirror the Prism.parse_success? API by using the serialization API.
def parse_success?(code, **options)
LibRubyParser::PrismString.with_string(code) { |string| parse_file_success_common(string, options) }
end

# Mirror the Prism.parse_file_success? API by using the serialization API.
def parse_file_success?(filepath, **options)
options[:filepath] = filepath
LibRubyParser::PrismString.with_file(filepath) { |string| parse_file_success_common(string, options) }
end

private

def dump_common(string, options) # :nodoc:
LibRubyParser::PrismBuffer.with do |buffer|
LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options))
buffer.read
end
end

def lex_common(string, code, options) # :nodoc:
serialized = LibRubyParser::PrismBuffer.with do |buffer|
LibRubyParser.pm_serialize_lex(buffer.pointer, string.pointer, string.length, dump_options(options))
buffer.read
end

Serialize.load_tokens(Source.new(code), serialized)
end

def parse_common(string, code, options) # :nodoc:
serialized = dump_common(string, options)
Prism.load(code, serialized)
end

def parse_comments_common(string, code, options) # :nodoc:
LibRubyParser::PrismBuffer.with do |buffer|
LibRubyParser.pm_serialize_parse_comments(buffer.pointer, code, code.bytesize, dump_options(options))
LibRubyParser.pm_serialize_parse_comments(buffer.pointer, string.pointer, string.length, dump_options(options))

source = Source.new(code)
loader = Serialize::Loader.new(source, buffer.read)
Expand All @@ -242,19 +295,9 @@ def parse_comments(code, **options)
end
end

# Mirror the Prism.parse_file_comments API by using the serialization
# API. This uses native strings instead of Ruby strings because it allows us
# to use mmap when it is available.
def parse_file_comments(filepath, **options)
LibRubyParser::PrismString.with(filepath) do |string|
parse_comments(string.read, **options, filepath: filepath)
end
end

# Mirror the Prism.parse_lex API by using the serialization API.
def parse_lex(code, **options)
def parse_lex_common(string, code, options) # :nodoc:
LibRubyParser::PrismBuffer.with do |buffer|
LibRubyParser.pm_serialize_parse_lex(buffer.pointer, code, code.bytesize, dump_options(options))
LibRubyParser.pm_serialize_parse_lex(buffer.pointer, string.pointer, string.length, dump_options(options))

source = Source.new(code)
loader = Serialize::Loader.new(source, buffer.read)
Expand All @@ -267,27 +310,10 @@ def parse_lex(code, **options)
end
end

# Mirror the Prism.parse_lex_file API by using the serialization API.
def parse_lex_file(filepath, **options)
LibRubyParser::PrismString.with(filepath) do |string|
parse_lex(string.read, **options, filepath: filepath)
end
end

# Mirror the Prism.parse_success? API by using the serialization API.
def parse_success?(code, **options)
LibRubyParser.pm_parse_success_p(code, code.bytesize, dump_options(options))
end

# Mirror the Prism.parse_file_success? API by using the serialization API.
def parse_file_success?(filepath, **options)
LibRubyParser::PrismString.with(filepath) do |string|
parse_success?(string.read, **options, filepath: filepath)
end
def parse_file_success_common(string, options) # :nodoc:
LibRubyParser.pm_parse_success_p(string.pointer, string.length, dump_options(options))
end

private

# Convert the given options into a serialized options string.
def dump_options(options)
template = +""
Expand Down

0 comments on commit 9002b3c

Please sign in to comment.