diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs
index fa90486e7407a..1318769136235 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs
@@ -1,6 +1,7 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
+using System.Buffers;
using System.Collections.Generic;
using System.Diagnostics;
@@ -83,6 +84,7 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options)
bool dfa = (options & RegexOptions.NonBacktracking) != 0;
bool compiled = (options & RegexOptions.Compiled) != 0 && !dfa; // for now, we never generate code for NonBacktracking, so treat it as non-compiled
bool interpreter = !compiled && !dfa;
+ bool usesRfoTryFind = !compiled;
// For interpreter, we want to employ optimizations, but we don't want to make construction significantly
// more expensive; someone who wants to pay to do more work can specify Compiled. So for the interpreter
@@ -140,12 +142,18 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options)
// We're now left-to-right only and looking for multiple prefixes and/or sets.
// If there are multiple leading strings, we can search for any of them.
- if (compiled)
+ if (!interpreter) // this works in the interpreter, but we avoid it due to additional cost during construction
{
if (RegexPrefixAnalyzer.FindPrefixes(root, ignoreCase: true) is { Length: > 1 } caseInsensitivePrefixes)
{
LeadingPrefixes = caseInsensitivePrefixes;
FindMode = FindNextStartingPositionMode.LeadingStrings_OrdinalIgnoreCase_LeftToRight;
+#if SYSTEM_TEXT_REGULAREXPRESSIONS
+ if (usesRfoTryFind)
+ {
+ LeadingStrings = SearchValues.Create(LeadingPrefixes, StringComparison.OrdinalIgnoreCase);
+ }
+#endif
return;
}
@@ -156,6 +164,12 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options)
//{
// LeadingPrefixes = caseSensitivePrefixes;
// FindMode = FindNextStartingPositionMode.LeadingStrings_LeftToRight;
+#if SYSTEM_TEXT_REGULAREXPRESSIONS
+ // if (usesRfoTryFind)
+ // {
+ // LeadingStrings = SearchValues.Create(LeadingPrefixes, StringComparison.Ordinal);
+ // }
+#endif
// return;
//}
}
@@ -275,6 +289,11 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options)
/// The case-insensitivity of the 0th entry will always match the mode selected, but subsequent entries may not.
public List? FixedDistanceSets { get; }
+#if SYSTEM_TEXT_REGULAREXPRESSIONS
+ /// When in leading strings mode, gets the search values to use for searching the input.
+ public SearchValues? LeadingStrings { get; }
+#endif
+
/// Data about a character class at a fixed offset from the start of any match to a pattern.
public struct FixedDistanceSet(char[]? chars, string set, int distance)
{
@@ -676,6 +695,28 @@ public bool TryFindNextStartingPositionLeftToRight(ReadOnlySpan textSpan,
return false;
}
+ // There are multiple possible strings at the beginning. Search for one.
+ case FindNextStartingPositionMode.LeadingStrings_LeftToRight:
+ case FindNextStartingPositionMode.LeadingStrings_OrdinalIgnoreCase_LeftToRight:
+ {
+ if (LeadingStrings is not SearchValues searchValues)
+ {
+ // This should be exceedingly rare and only happen if a Compiled regex selected this
+ // option but then failed to compile (e.g. due to too deep stacks) and fell back to the interpreter.
+ return true;
+ }
+
+ int i = textSpan.Slice(pos).IndexOfAny(searchValues);
+ if (i >= 0)
+ {
+ pos += i;
+ return true;
+ }
+
+ pos = textSpan.Length;
+ return false;
+ }
+
// There are one or more sets at fixed offsets from the start of the pattern.
case FindNextStartingPositionMode.FixedDistanceSets_LeftToRight:
@@ -800,12 +841,6 @@ public bool TryFindNextStartingPositionLeftToRight(ReadOnlySpan textSpan,
return false;
}
- // Not supported in the interpreter, but we could end up here for patterns so complex the compiler gave up on them.
-
- case FindNextStartingPositionMode.LeadingStrings_LeftToRight:
- case FindNextStartingPositionMode.LeadingStrings_OrdinalIgnoreCase_LeftToRight:
- return true;
-
// Nothing special to look for. Just return true indicating this is a valid position to try to match.
default: