Skip to content

Commit

Permalink
Merge pull request #364 from ydah/refactor-symbol
Browse files Browse the repository at this point in the history
Refactoring to extract Lrama::Grammar::Symbols
  • Loading branch information
yui-knk authored Feb 6, 2024
2 parents 9585696 + 3d0ca31 commit b0c29ea
Show file tree
Hide file tree
Showing 3 changed files with 292 additions and 243 deletions.
266 changes: 23 additions & 243 deletions lib/lrama/grammar.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
require "forwardable"
require "lrama/grammar/auxiliary"
require "lrama/grammar/binding"
require "lrama/grammar/code"
Expand All @@ -11,29 +12,38 @@
require "lrama/grammar/rule"
require "lrama/grammar/rule_builder"
require "lrama/grammar/symbol"
require "lrama/grammar/symbols"
require "lrama/grammar/type"
require "lrama/grammar/union"
require "lrama/lexer"

module Lrama
# Grammar is the result of parsing an input grammar file
class Grammar
extend Forwardable

attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
attr_accessor :union, :expect,
:printers, :error_tokens,
:lex_param, :parse_param, :initial_action,
:symbols, :types,
:symbols_resolver, :types,
:rules, :rule_builders,
:sym_to_rules, :no_stdlib

def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term,
:find_symbol_by_number!, :find_symbol_by_id!, :token_to_symbol,
:find_symbol_by_s_value!, :fill_symbol_number, :fill_nterm_type,
:fill_printer, :fill_error_token, :sort_by_number!


def initialize(rule_counter)
@rule_counter = rule_counter

# Code defined by "%code"
@percent_codes = []
@printers = []
@error_tokens = []
@symbols = []
@symbols_resolver = Grammar::Symbols::Resolver.new
@types = []
@rule_builders = []
@rules = []
Expand Down Expand Up @@ -62,44 +72,6 @@ def add_error_token(ident_or_tags:, token_code:, lineno:)
@error_tokens << ErrorToken.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno)
end

def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
if token_id && (sym = @symbols.find {|s| s.token_id == token_id })
if replace
sym.id = id
sym.alias_name = alias_name
sym.tag = tag
end

return sym
end

if (sym = @symbols.find {|s| s.id == id })
return sym
end

sym = Symbol.new(
id: id, alias_name: alias_name, number: nil, tag: tag,
term: true, token_id: token_id, nullable: false
)
@symbols << sym
@terms = nil

return sym
end

def add_nterm(id:, alias_name: nil, tag: nil)
return if @symbols.find {|s| s.id == id }

sym = Symbol.new(
id: id, alias_name: alias_name, number: nil, tag: tag,
term: false, token_id: nil, nullable: nil,
)
@symbols << sym
@nterms = nil

return sym
end

def add_type(id:, tag:)
@types << Type.new(id: id, tag: tag)
end
Expand Down Expand Up @@ -165,13 +137,9 @@ def prepare
normalize_rules
collect_symbols
set_lhs_and_rhs
fill_symbol_number
fill_default_precedence
fill_symbols
fill_sym_to_rules
fill_nterm_type
fill_symbol_printer
fill_symbol_error_token
@symbols.sort_by!(&:number)
compute_nullable
compute_first_set
end
Expand All @@ -180,40 +148,10 @@ def prepare
#
# * Validation for no_declared_type_reference
def validate!
validate_symbol_number_uniqueness!
validate_symbol_alias_name_uniqueness!
@symbols_resolver.validate!
validate_rule_lhs_is_nterm!
end

def find_symbol_by_s_value(s_value)
@symbols.find do |sym|
sym.id.s_value == s_value
end
end

def find_symbol_by_s_value!(s_value)
find_symbol_by_s_value(s_value) || (raise "Symbol not found: #{s_value}")
end

def find_symbol_by_id(id)
@symbols.find do |sym|
sym.id == id || sym.alias_name == id.s_value
end
end

def find_symbol_by_id!(id)
find_symbol_by_id(id) || (raise "Symbol not found: #{id}")
end

def find_symbol_by_number!(number)
sym = @symbols[number]

raise "Symbol not found: #{number}" unless sym
raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number

sym
end

def find_rules_by_symbol!(sym)
find_rules_by_symbol(sym) || (raise "Rules for #{sym} not found")
end
Expand All @@ -222,22 +160,6 @@ def find_rules_by_symbol(sym)
@sym_to_rules[sym.number]
end

def terms_count
terms.count
end

def terms
@terms ||= @symbols.select(&:term?)
end

def nterms_count
nterms.count
end

def nterms
@nterms ||= @symbols.select(&:nterm?)
end

private

def compute_nullable
Expand Down Expand Up @@ -284,7 +206,7 @@ def compute_nullable
rule.nullable = false
end

nterms.select {|r| r.nullable.nil? }.each do |nterm|
nterms.select {|e| e.nullable.nil? }.each do |nterm|
nterm.nullable = false
end
end
Expand Down Expand Up @@ -330,12 +252,6 @@ def setup_rules
end
end

def find_nterm_by_id!(id)
nterms.find do |nterm|
nterm.id == id
end || (raise "Nterm not found: #{id}")
end

def append_special_symbols
# YYEMPTY (token_id: -2, number: -2) is added when a template is evaluated
# term = add_term(id: Token.new(Token::Ident, "YYEMPTY"), token_id: -2)
Expand Down Expand Up @@ -397,79 +313,6 @@ def collect_symbols
end
end

# Fill #number and #token_id
def fill_symbol_number
# Character literal in grammar file has
# token id corresponding to ASCII code by default,
# so start token_id from 256.
token_id = 256

# YYEMPTY = -2
# YYEOF = 0
# YYerror = 1
# YYUNDEF = 2
number = 3

nterm_token_id = 0
used_numbers = {}

@symbols.map(&:number).each do |n|
used_numbers[n] = true
end

(@symbols.select(&:term?) + @symbols.select(&:nterm?)).each do |sym|
while used_numbers[number] do
number += 1
end

if sym.number.nil?
sym.number = number
number += 1
end

# If id is Token::Char, it uses ASCII code
if sym.term? && sym.token_id.nil?
if sym.id.is_a?(Lrama::Lexer::Token::Char)
# Ignore ' on the both sides
case sym.id.s_value[1..-2]
when "\\b"
sym.token_id = 8
when "\\f"
sym.token_id = 12
when "\\n"
sym.token_id = 10
when "\\r"
sym.token_id = 13
when "\\t"
sym.token_id = 9
when "\\v"
sym.token_id = 11
when "\""
sym.token_id = 34
when "'"
sym.token_id = 39
when "\\\\"
sym.token_id = 92
when /\A\\(\d+)\z/
sym.token_id = Integer($1, 8)
when /\A(.)\z/
sym.token_id = $1.bytes.first
else
raise "Unknown Char s_value #{sym}"
end
else
sym.token_id = token_id
token_id += 1
end
end

if sym.nterm? && sym.token_id.nil?
sym.token_id = nterm_token_id
nterm_token_id += 1
end
end
end

def set_lhs_and_rhs
@rules.each do |rule|
rule.lhs = token_to_symbol(rule._lhs) if rule._lhs
Expand All @@ -480,15 +323,6 @@ def set_lhs_and_rhs
end
end

def token_to_symbol(token)
case token
when Lrama::Lexer::Token
find_symbol_by_id!(token)
else
raise "Unknown class: #{token}"
end
end

# Rule inherits precedence from the last term in RHS.
#
# https://www.gnu.org/software/bison/manual/html_node/How-Precedence.html
Expand All @@ -506,6 +340,14 @@ def fill_default_precedence
end
end

def fill_symbols
fill_symbol_number
fill_nterm_type(@types)
fill_printer(@printers)
fill_error_token(@error_tokens)
sort_by_number!
end

def fill_sym_to_rules
@rules.each do |rule|
key = rule.lhs.number
Expand All @@ -514,68 +356,6 @@ def fill_sym_to_rules
end
end

# Fill nterm's tag defined by %type decl
def fill_nterm_type
@types.each do |type|
nterm = find_nterm_by_id!(type.id)
nterm.tag = type.tag
end
end

def fill_symbol_printer
@symbols.each do |sym|
@printers.each do |printer|
printer.ident_or_tags.each do |ident_or_tag|
case ident_or_tag
when Lrama::Lexer::Token::Ident
sym.printer = printer if sym.id == ident_or_tag
when Lrama::Lexer::Token::Tag
sym.printer = printer if sym.tag == ident_or_tag
else
raise "Unknown token type. #{printer}"
end
end
end
end
end

def fill_symbol_error_token
@symbols.each do |sym|
@error_tokens.each do |error_token|
error_token.ident_or_tags.each do |ident_or_tag|
case ident_or_tag
when Lrama::Lexer::Token::Ident
sym.error_token = error_token if sym.id == ident_or_tag
when Lrama::Lexer::Token::Tag
sym.error_token = error_token if sym.tag == ident_or_tag
else
raise "Unknown token type. #{error_token}"
end
end
end
end
end

def validate_symbol_number_uniqueness!
invalid = @symbols.group_by(&:number).select do |number, syms|
syms.count > 1
end

return if invalid.empty?

raise "Symbol number is duplicated. #{invalid}"
end

def validate_symbol_alias_name_uniqueness!
invalid = @symbols.select(&:alias_name).group_by(&:alias_name).select do |alias_name, syms|
syms.count > 1
end

return if invalid.empty?

raise "Symbol alias name is duplicated. #{invalid}"
end

def validate_rule_lhs_is_nterm!
errors = []

Expand Down
1 change: 1 addition & 0 deletions lib/lrama/grammar/symbols.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
require_relative "symbols/resolver"
Loading

0 comments on commit b0c29ea

Please sign in to comment.