Skip to content

Commit

Permalink
Extend SearchValues<string> use in Regex to NonBacktracking (#103496)
Browse files Browse the repository at this point in the history
We previously only enabled this for the compiler. Support it with NonBacktracking, too.
  • Loading branch information
stephentoub authored Jun 17, 2024
1 parent cf854b3 commit 44c519e
Showing 1 changed file with 42 additions and 7 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Collections.Generic;
using System.Diagnostics;

Expand Down Expand Up @@ -83,6 +84,7 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options)
bool dfa = (options & RegexOptions.NonBacktracking) != 0;
bool compiled = (options & RegexOptions.Compiled) != 0 && !dfa; // for now, we never generate code for NonBacktracking, so treat it as non-compiled
bool interpreter = !compiled && !dfa;
bool usesRfoTryFind = !compiled;

// For interpreter, we want to employ optimizations, but we don't want to make construction significantly
// more expensive; someone who wants to pay to do more work can specify Compiled. So for the interpreter
Expand Down Expand Up @@ -140,12 +142,18 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options)
// We're now left-to-right only and looking for multiple prefixes and/or sets.

// If there are multiple leading strings, we can search for any of them.
if (compiled)
if (!interpreter) // this works in the interpreter, but we avoid it due to additional cost during construction
{
if (RegexPrefixAnalyzer.FindPrefixes(root, ignoreCase: true) is { Length: > 1 } caseInsensitivePrefixes)
{
LeadingPrefixes = caseInsensitivePrefixes;
FindMode = FindNextStartingPositionMode.LeadingStrings_OrdinalIgnoreCase_LeftToRight;
#if SYSTEM_TEXT_REGULAREXPRESSIONS
if (usesRfoTryFind)
{
LeadingStrings = SearchValues.Create(LeadingPrefixes, StringComparison.OrdinalIgnoreCase);
}
#endif
return;
}

Expand All @@ -156,6 +164,12 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options)
//{
// LeadingPrefixes = caseSensitivePrefixes;
// FindMode = FindNextStartingPositionMode.LeadingStrings_LeftToRight;
#if SYSTEM_TEXT_REGULAREXPRESSIONS
// if (usesRfoTryFind)
// {
// LeadingStrings = SearchValues.Create(LeadingPrefixes, StringComparison.Ordinal);
// }
#endif
// return;
//}
}
Expand Down Expand Up @@ -275,6 +289,11 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options)
/// <remarks>The case-insensitivity of the 0th entry will always match the mode selected, but subsequent entries may not.</remarks>
public List<FixedDistanceSet>? FixedDistanceSets { get; }

#if SYSTEM_TEXT_REGULAREXPRESSIONS
/// <summary>When in leading strings mode, gets the search values to use for searching the input.</summary>
public SearchValues<string>? LeadingStrings { get; }
#endif

/// <summary>Data about a character class at a fixed offset from the start of any match to a pattern.</summary>
public struct FixedDistanceSet(char[]? chars, string set, int distance)
{
Expand Down Expand Up @@ -676,6 +695,28 @@ public bool TryFindNextStartingPositionLeftToRight(ReadOnlySpan<char> textSpan,
return false;
}

// There are multiple possible strings at the beginning. Search for one.
case FindNextStartingPositionMode.LeadingStrings_LeftToRight:
case FindNextStartingPositionMode.LeadingStrings_OrdinalIgnoreCase_LeftToRight:
{
if (LeadingStrings is not SearchValues<string> searchValues)
{
// This should be exceedingly rare and only happen if a Compiled regex selected this
// option but then failed to compile (e.g. due to too deep stacks) and fell back to the interpreter.
return true;
}

int i = textSpan.Slice(pos).IndexOfAny(searchValues);
if (i >= 0)
{
pos += i;
return true;
}

pos = textSpan.Length;
return false;
}

// There are one or more sets at fixed offsets from the start of the pattern.

case FindNextStartingPositionMode.FixedDistanceSets_LeftToRight:
Expand Down Expand Up @@ -800,12 +841,6 @@ public bool TryFindNextStartingPositionLeftToRight(ReadOnlySpan<char> textSpan,
return false;
}

// Not supported in the interpreter, but we could end up here for patterns so complex the compiler gave up on them.

case FindNextStartingPositionMode.LeadingStrings_LeftToRight:
case FindNextStartingPositionMode.LeadingStrings_OrdinalIgnoreCase_LeftToRight:
return true;

// Nothing special to look for. Just return true indicating this is a valid position to try to match.

default:
Expand Down

0 comments on commit 44c519e

Please sign in to comment.