Skip to content

Commit

Permalink
Add simple codegen support for balancing groups
Browse files Browse the repository at this point in the history
  • Loading branch information
stephentoub committed Nov 23, 2021
1 parent 0e5abac commit 0df81af
Show file tree
Hide file tree
Showing 4 changed files with 145 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1172,12 +1172,18 @@ void EmitExpressionConditional(RegexNode node)
// Emits the code for a Capture node.
void EmitCapture(RegexNode node, RegexNode? subsequent = null)
{
Debug.Assert(node.N == -1);

// Get the capture number. This needs to be kept in sync with MapCapNum in RegexWriter.
Debug.Assert(node.Type == RegexNode.Capture);
Debug.Assert(node.N == -1, "Currently only support capnum, not uncapnum");
int capnum = RegexParser.MapCaptureNumber(node.M, rm.Code.Caps);
int uncapnum = RegexParser.MapCaptureNumber(node.N, rm.Code.Caps);

if (uncapnum != -1)
{
using (EmitBlock(writer, $"if (!base.IsMatched({uncapnum}))"))
{
writer.WriteLine($"goto {doneLabel};");
}
writer.WriteLine();
}

TransferTextSpanPosToRunTextPos();
string startingRunTextPosName = ReserveName("startingRunTextPos");
Expand All @@ -1188,7 +1194,14 @@ void EmitCapture(RegexNode node, RegexNode? subsequent = null)
EmitNode(node.Child(0), subsequent);

TransferTextSpanPosToRunTextPos();
writer.WriteLine($"base.Capture({capnum}, {startingRunTextPosName}, runtextpos);");
if (uncapnum == -1)
{
writer.WriteLine($"base.Capture({capnum}, {startingRunTextPosName}, runtextpos);");
}
else
{
writer.WriteLine($"base.TransferCapture({capnum}, {uncapnum}, {startingRunTextPosName}, runtextpos);");
}
}

// Emits code to unwind the capture stack until the crawl position specified in the provided local.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2143,14 +2143,20 @@ void EmitExpressionConditional(RegexNode node)
// Emits the code for a Capture node.
void EmitCapture(RegexNode node, RegexNode? subsequent = null)
{
Debug.Assert(node.N == -1);
LocalBuilder startingRunTextPos = DeclareInt32();

// Get the capture number. This needs to be kept
// in sync with MapCapNum in RegexWriter.
Debug.Assert(node.Type == RegexNode.Capture);
Debug.Assert(node.N == -1, "Currently only support capnum, not uncapnum");
int capnum = RegexParser.MapCaptureNumber(node.M, _code!.Caps);
int uncapnum = RegexParser.MapCaptureNumber(node.N, _code.Caps);

if (uncapnum != -1)
{
// if (!IsMatched(uncapnum)) goto doneLabel;
Ldthis();
Ldc(uncapnum);
Call(s_isMatchedMethod);
BrfalseFar(doneLabel);
}

// runtextpos += textSpanPos;
// textSpan = textSpan.Slice(textSpanPos);
Expand All @@ -2164,13 +2170,27 @@ void EmitCapture(RegexNode node, RegexNode? subsequent = null)

// runtextpos += textSpanPos;
// textSpan = textSpan.Slice(textSpanPos);
// Capture(capnum, startingRunTextPos, runtextpos);
TransferTextSpanPosToRunTextPos();
Ldthis();
Ldc(capnum);
Ldloc(startingRunTextPos);
Ldloc(runtextposLocal);
Call(s_captureMethod);

if (uncapnum == -1)
{
// Capture(capnum, startingRunTextPos, runtextpos);
Ldthis();
Ldc(capnum);
Ldloc(startingRunTextPos);
Ldloc(runtextposLocal);
Call(s_captureMethod);
}
else
{
// TransferCapture(capnum, uncapnum, startingRunTextPos, runtextpos);
Ldthis();
Ldc(capnum);
Ldc(uncapnum);
Ldloc(startingRunTextPos);
Ldloc(runtextposLocal);
Call(s_transferCaptureMethod);
}
}

// Emits code to unwind the capture stack until the crawl position specified in the provided local.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2304,11 +2304,10 @@ internal bool SupportsSimplifiedCodeGenerationImplementation()
break;

case Capture:
// Currently we only support capnums without uncapnums (for balancing groups)
supported = N == -1;
supported = Child(0).SupportsSimplifiedCodeGenerationImplementation();
if (supported)
{
// And we only support them in certain places in the tree.
// Captures are currently only supported in certain places in the tree.
RegexNode? parent = Next;
while (parent != null)
{
Expand All @@ -2329,21 +2328,15 @@ internal bool SupportsSimplifiedCodeGenerationImplementation()
}
}

// If we've found a supported capture, mark all of the nodes in its parent
// hierarchy as containing a capture.
if (supported)
{
// And we only support them if their children are supported.
supported = Child(0).SupportsSimplifiedCodeGenerationImplementation();

// If we've found a supported capture, mark all of the nodes in its parent
// hierarchy as containing a capture.
if (supported)
parent = this;
while (parent != null && ((parent.Options & HasCapturesFlag) == 0))
{
parent = this;
while (parent != null && ((parent.Options & HasCapturesFlag) == 0))
{
parent.Options |= HasCapturesFlag;
parent = parent.Next;
}
parent.Options |= HasCapturesFlag;
parent = parent.Next;
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1115,6 +1115,95 @@ public async Task Docs_Anchors_ContiguousMatches(RegexEngine engine)
Regex.Replace(Input, Pattern, m => string.Concat(m.Value.Reverse())));
}

//
// Based on examples from https://blog.stevenlevithan.com/archives/balancing-groups
//

[Theory]
[MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))]
public async Task Blog_Levithan_BalancingGroups_Palindromes(RegexEngine engine)
{
if (RegexHelpers.IsNonBacktracking(engine))
{
// balancing groups not supported
return;
}

Regex r = await RegexHelpers.GetRegexAsync(engine, @"(?<N>.)+.?(?<-N>\k<N>)+(?(N)(?!))");

// Palindromes
Assert.All(new[]
{
"kayak",
"racecar",
"never odd or even",
"madam im adam"
}, p => Assert.True(r.IsMatch(p)));

// Non-Palindromes
Assert.All(new[]
{
"canoe",
"raceboat"
}, p => Assert.False(r.IsMatch(p)));
}

[Theory]
[MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))]
public async Task Blog_Levithan_BalancingGroups_MatchingParentheses(RegexEngine engine)
{
if (RegexHelpers.IsNonBacktracking(engine))
{
// balancing groups not supported
return;
}

Regex r = await RegexHelpers.GetRegexAsync(engine, @"^\(
(?>
[^()]+
|
\( (?<Depth>)
|
\) (?<-Depth>)
)*
(?(Depth)(?!))
\)$", RegexOptions.IgnorePatternWhitespace);

Assert.True(r.IsMatch("()"));
Assert.True(r.IsMatch("(a(b c(de(f(g)hijkl))mn))"));

Assert.False(r.IsMatch("("));
Assert.False(r.IsMatch(")"));
Assert.False(r.IsMatch("())"));
Assert.False(r.IsMatch("(()"));
Assert.False(r.IsMatch("(ab(cd)ef"));
}

[Theory]
[MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))]
public async Task Blog_Levithan_BalancingGroups_WordLengthIncreases(RegexEngine engine)
{
if (RegexHelpers.IsNonBacktracking(engine))
{
// balancing groups not supported
return;
}

Regex r = await RegexHelpers.GetRegexAsync(engine, @"^(?:
(?(A)\s|)
(?<B>)
(?<C-B>\w)+ (?(B)(?!))
(?:
\s
(?<C>)
(?<B-C>\w)+ (?(C)(?!))
(?<A>)
)?
)+ \b$", RegexOptions.IgnorePatternWhitespace);

Assert.True(r.IsMatch("a bc def ghij klmni"));
Assert.False(r.IsMatch("a bc def ghi klmn"));
}

//
// These patterns come from real-world customer usages
Expand Down

0 comments on commit 0df81af

Please sign in to comment.