Skip to content

Commit

Permalink
Add referenced_expressions
Browse files Browse the repository at this point in the history
  • Loading branch information
jaynetics committed Dec 25, 2024
1 parent 79a351d commit e730409
Show file tree
Hide file tree
Showing 8 changed files with 53 additions and 40 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added

- `#referenced_expressions`
- like `#referenced_expression`, but for multiplexing backrefs
- returns the `Group` expressions that are being referenced

### Fixed

- fixed `#char` & `#codepoint` errors for single-digit hex escapes
Expand Down
1 change: 1 addition & 0 deletions lib/regexp_parser/expression.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
require_relative 'expression/methods/options'
require_relative 'expression/methods/parts'
require_relative 'expression/methods/printing'
require_relative 'expression/methods/referenced_expressions'
require_relative 'expression/methods/strfregexp'
require_relative 'expression/methods/tests'
require_relative 'expression/methods/traverse'
21 changes: 1 addition & 20 deletions lib/regexp_parser/expression/classes/backreference.rb
Original file line number Diff line number Diff line change
@@ -1,25 +1,6 @@
module Regexp::Expression
module Backreference
class Base < Regexp::Expression::Base
attr_accessor :referenced_expression

def initialize_copy(orig)
exp_id = [self.class, self.starts_at]

# prevent infinite recursion for recursive subexp calls
copied = @@copied ||= {}
self.referenced_expression =
if copied[exp_id]
orig.referenced_expression
else
copied[exp_id] = true
orig.referenced_expression.dup
end
copied.clear

super
end
end
class Base < Regexp::Expression::Base; end

class Number < Backreference::Base
attr_reader :number
Expand Down
14 changes: 0 additions & 14 deletions lib/regexp_parser/expression/classes/conditional.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,17 @@ def initialize
end

class Condition < Regexp::Expression::Base
attr_accessor :referenced_expression

# Name or number of the referenced capturing group that determines state.
# Returns a String if reference is by name, Integer if by number.
def reference
ref = text.tr("'<>()", "")
ref =~ /\D/ ? ref : Integer(ref)
end

def initialize_copy(orig)
self.referenced_expression = orig.referenced_expression.dup
super
end
end

class Branch < Regexp::Expression::Sequence; end

class Expression < Regexp::Expression::Subexpression
attr_accessor :referenced_expression

def <<(exp)
expressions.last << exp
end
Expand Down Expand Up @@ -54,11 +45,6 @@ def branches
def reference
condition.reference
end

def initialize_copy(orig)
self.referenced_expression = orig.referenced_expression.dup
super
end
end
end
end
28 changes: 28 additions & 0 deletions lib/regexp_parser/expression/methods/referenced_expressions.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
module Regexp::Expression
module ReferencedExpressions
attr_accessor :referenced_expressions

def referenced_expression
referenced_expressions && referenced_expressions.first
end

def initialize_copy(orig)
exp_id = [self.class, self.starts_at]

# prevent infinite recursion for recursive subexp calls
copied = self.class.instance_eval { @copied_ref_exps ||= {} }
self.referenced_expressions =
if copied[exp_id]
orig.referenced_expressions
else
copied[exp_id] = true
orig.referenced_expressions && orig.referenced_expressions.map(&:dup)
end
copied.clear

super
end
end

Base.include ReferencedExpressions
end
13 changes: 8 additions & 5 deletions lib/regexp_parser/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -580,16 +580,19 @@ def active_opts
# the instance of Group::Capture that it refers to via its number.
def assign_referenced_expressions
# find all referenceable and referring expressions
targets = { 0 => root }
targets = { 0 => [root] }
referrers = []
root.each_expression do |exp|
exp.is_a?(Group::Capture) && targets[exp.identifier] = exp
referrers << exp if exp.referential?
if exp.referential?
referrers << exp
elsif exp.is_a?(Group::Capture)
(targets[exp.identifier] ||= []) << exp
end
end
# assign reference expression to referring expressions
# assign referenced expressions to referring expressions
# (in a second iteration because there might be forward references)
referrers.each do |exp|
exp.referenced_expression = targets[exp.reference] ||
exp.referenced_expressions = targets[exp.reference] ||
raise(ParserError, "Invalid reference #{exp.reference} at pos #{exp.ts}")
end
end
Expand Down
2 changes: 1 addition & 1 deletion spec/expression/methods/match_length_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

specify('raises for missing references') do
exp = RP.parse(/(a)\1/).last
exp.referenced_expression = nil
exp.referenced_expressions = nil
expect { exp.match_length }.to raise_error(ArgumentError)
end

Expand Down
8 changes: 8 additions & 0 deletions spec/parser/refcalls_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,14 @@
expect(exp3.referenced_expression.to_s).to eq '(ghi)'
end

specify('parse backref referenced_expressions (multiplex)') do
root = RP.parse('(?<a>A)(?<a>B)\\k<a>')
exp = root.last

expect(exp.referenced_expressions).to eq [root[0], root[1]]
expect(exp.referenced_expressions.map(&:to_s)).to eq ['(?<a>A)', '(?<a>B)']
end

specify('parse backref call referenced_expression') do
root = RP.parse('\\g<+1>(abc)\\g<+2>(def)(ghi)\\g<-2>')
exp1 = root[0]
Expand Down

0 comments on commit e730409

Please sign in to comment.