From 2974608d47618eedb220a0e53257be525c66db0e Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Tue, 23 Apr 2024 12:06:53 -0400 Subject: [PATCH] Fix incorrect suffix search for alternation with RegexOptions.RightToLeft (#101408) * Fix incorrect suffix search for alternation with RegexOptions.RightToLeft Our limited RegexOptions.RightToLeft support for prefix optimizations is mishandling alternations. For RTL, this actually needs to create a suffix, but for alternations, we were actually creating a prefix. As this option is rarely used and it's not worth significant investment in optimizing, the fix is just to disable the handling of alternations for RTL, as the support has been broken since it was introduced in .NET 7. * Fix new tests * Avoid annoying skip message --- .../RegularExpressions/RegexPrefixAnalyzer.cs | 2 +- .../Regex.CompileToAssembly.Tests.cs | 93 +++++++++---------- .../FunctionalTests/Regex.Match.Tests.cs | 5 + .../UnitTests/RegexFindOptimizationsTests.cs | 2 +- 4 files changed, 53 insertions(+), 49 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs index 926a28339162f6..b46af7b1b9d066 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs @@ -359,7 +359,7 @@ static bool Process(RegexNode node, ref ValueStringBuilder vsb) } // Alternation: find a string that's a shared prefix of all branches - case RegexNodeKind.Alternate: + case RegexNodeKind.Alternate when !rtl: // for RTL we'd need to be matching the suffixes of the alternation cases { int childCount = node.ChildCount(); diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.CompileToAssembly.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.CompileToAssembly.Tests.cs index a669f65bd023ed..c1d17ffa6a3ae5 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.CompileToAssembly.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.CompileToAssembly.Tests.cs @@ -14,63 +14,62 @@ namespace System.Text.RegularExpressions.Tests [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework)] public class RegexCompileToAssemblyTests : FileCleanupTestBase { - public static bool IsDebug => typeof(Regex).Assembly.GetCustomAttributes(false).OfType().Any(da => da.IsJITTrackingEnabled); - public static bool IsRelease => !IsDebug; - public static bool IsDebugAndRemoteExecutorSupported => IsDebug && RemoteExecutor.IsSupported; - - [ConditionalFact(nameof(IsRelease))] - public void CompileToAssembly_PNSE() + [Fact] + public void CompileToAssembly_SimpleTest() { - Assert.Throws(() => Regex.CompileToAssembly(null, null)); - Assert.Throws(() => Regex.CompileToAssembly(null, null, null)); - Assert.Throws(() => Regex.CompileToAssembly(null, null, null, null)); + bool isDebug = typeof(Regex).Assembly.GetCustomAttributes(false).OfType().Any(da => da.IsJITTrackingEnabled); - Assert.Throws(() => Regex.CompileToAssembly( - [new RegexCompilationInfo("abcd", RegexOptions.None, "abcd", "SomeNamespace", true)], - new AssemblyName("abcd"))); + if (!isDebug) + { + Assert.Throws(() => Regex.CompileToAssembly(null, null)); + Assert.Throws(() => Regex.CompileToAssembly(null, null, null)); + Assert.Throws(() => Regex.CompileToAssembly(null, null, null, null)); - Assert.Throws(() => Regex.CompileToAssembly( - [new RegexCompilationInfo("abcd", RegexOptions.None, "abcd", "SomeNamespace", true)], - new AssemblyName("abcd"), - [new CustomAttributeBuilder(typeof(AssemblyCompanyAttribute).GetConstructor([typeof(string)]), new[] { "TestCompany" })])); + Assert.Throws(() => Regex.CompileToAssembly( + [new RegexCompilationInfo("abcd", RegexOptions.None, "abcd", "SomeNamespace", true)], + new AssemblyName("abcd"))); - Assert.Throws(() => Regex.CompileToAssembly( - [new RegexCompilationInfo("abcd", RegexOptions.None, "abcd", "SomeNamespace", true)], - new AssemblyName("abcd"), - [new CustomAttributeBuilder(typeof(AssemblyCompanyAttribute).GetConstructor([typeof(string)]), new[] { "TestCompany" })], - "resourceFile")); - } + Assert.Throws(() => Regex.CompileToAssembly( + [new RegexCompilationInfo("abcd", RegexOptions.None, "abcd", "SomeNamespace", true)], + new AssemblyName("abcd"), + [new CustomAttributeBuilder(typeof(AssemblyCompanyAttribute).GetConstructor([typeof(string)]), new[] { "TestCompany" })])); - [ConditionalFact(nameof(IsDebugAndRemoteExecutorSupported))] - public void CompileToAssembly_SimpleUseInDebug() - { - RemoteExecutor.Invoke(() => + Assert.Throws(() => Regex.CompileToAssembly( + [new RegexCompilationInfo("abcd", RegexOptions.None, "abcd", "SomeNamespace", true)], + new AssemblyName("abcd"), + [new CustomAttributeBuilder(typeof(AssemblyCompanyAttribute).GetConstructor([typeof(string)]), new[] { "TestCompany" })], + "resourceFile")); + } + else if (RemoteExecutor.IsSupported) { - (RegexCompilationInfo rci, string validInput, string invalidInput)[] regexes = - [ - (new RegexCompilationInfo("abcd", RegexOptions.None, "Type1", "Namespace1", ispublic: true), "123abcd123", "123abed123"), - (new RegexCompilationInfo("(a|b|cde)+", RegexOptions.None, "Type2", "Namespace2.Sub", ispublic: true), "abcde", "cd"), - ]; + RemoteExecutor.Invoke(() => + { + (RegexCompilationInfo rci, string validInput, string invalidInput)[] regexes = + [ + (new RegexCompilationInfo("abcd", RegexOptions.None, "Type1", "Namespace1", ispublic: true), "123abcd123", "123abed123"), + (new RegexCompilationInfo("(a|b|cde)+", RegexOptions.None, "Type2", "Namespace2.Sub", ispublic: true), "abcde", "cd"), + ]; - string assemblyName = Path.GetRandomFileName(); + string assemblyName = Path.GetRandomFileName(); - string cwd = Environment.CurrentDirectory; - Environment.CurrentDirectory = TestDirectory; - try - { - Regex.CompileToAssembly(regexes.Select(r => r.rci).ToArray(), new AssemblyName(assemblyName)); - } - finally - { - Environment.CurrentDirectory = cwd; - } + string cwd = Environment.CurrentDirectory; + Environment.CurrentDirectory = TestDirectory; + try + { + Regex.CompileToAssembly(regexes.Select(r => r.rci).ToArray(), new AssemblyName(assemblyName)); + } + finally + { + Environment.CurrentDirectory = cwd; + } - string assemblyPath = Path.Combine(TestDirectory, assemblyName + ".dll"); - Assert.True(File.Exists(assemblyPath)); + string assemblyPath = Path.Combine(TestDirectory, assemblyName + ".dll"); + Assert.True(File.Exists(assemblyPath)); - // Uncomment to save the assembly to the desktop for inspection: - // File.Copy(assemblyPath, Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), Path.GetFileName(assemblyPath))); - }).Dispose(); + // Uncomment to save the assembly to the desktop for inspection: + // File.Copy(assemblyPath, Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), Path.GetFileName(assemblyPath))); + }).Dispose(); + } } } } diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs index 1b06272fba64ab..d58290174c222f 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs @@ -698,6 +698,11 @@ public static IEnumerable Match_MemberData() yield return (@"(...)(?(1)\w*|\s*)[a1 ]", "zabcaaaaaaa", RegexOptions.RightToLeft, 0, 11, true, "aaaa"); yield return (@"(...)(?(1)\w*|\s*)[a1 ]", "---- ", RegexOptions.RightToLeft, 0, 11, true, "--- "); yield return (@"(aaa)(?(1)aaa|b?)*", "aaaaaa", RegexOptions.None, 0, 6, true, "aaaaaa"); + + yield return (@"AAB|AAC", "AABAACD", RegexOptions.RightToLeft, 0, 6, true, "AAC"); + yield return (@"AAB|AA\d", "AABAACD", RegexOptions.RightToLeft, 0, 6, true, "AAB"); + yield return (@"(AB){3,}", "1234ABABABAB5678", RegexOptions.RightToLeft, 0, 16, true, "ABABABAB"); + yield return (@"(AB){1,3}", "1234ABABABAB5678", RegexOptions.RightToLeft, 0, 16, true, "ABABAB"); } // Character Class Subtraction diff --git a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs index c962bc072b6fd7..4c0b02b266142e 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs @@ -88,7 +88,6 @@ public void TrailingAnchor(string pattern, int options, int expectedMode, int ex [InlineData(@"(ab){2,4}(de){4,}", 0, (int)FindNextStartingPositionMode.LeadingString_LeftToRight, "abab")] [InlineData(@"(ab){2,4}(de){4,}", (int)RegexOptions.RightToLeft, (int)FindNextStartingPositionMode.LeadingString_RightToLeft, "de")] [InlineData(@"ab|(abc)|(abcd)", 0, (int)FindNextStartingPositionMode.LeadingString_LeftToRight, "ab")] - [InlineData(@"ab|(abc)|(abcd)", (int)RegexOptions.RightToLeft, (int)FindNextStartingPositionMode.LeadingString_RightToLeft, "ab")] [InlineData(@"ab(?=cd)", 0, (int)FindNextStartingPositionMode.LeadingString_LeftToRight, "ab")] [InlineData(@"ab(?=cd)", (int)RegexOptions.RightToLeft, (int)FindNextStartingPositionMode.LeadingString_RightToLeft, "ab")] [InlineData(@"\bab(?=\w)(?!=\d)c\b", 0, (int)FindNextStartingPositionMode.LeadingString_LeftToRight, "abc")] @@ -110,6 +109,7 @@ public void LeadingPrefix(string pattern, int options, int expectedMode, string [InlineData(@"a", (int)RegexOptions.IgnoreCase | (int)RegexOptions.RightToLeft, (int)FindNextStartingPositionMode.LeadingSet_RightToLeft, "Aa")] [InlineData(@"ab|cd|ef|gh", (int)RegexOptions.RightToLeft, (int)FindNextStartingPositionMode.LeadingSet_RightToLeft, "bdfh")] [InlineData(@"\bab(?=\w)(?!=\d)c\b", (int)(RegexOptions.IgnoreCase | RegexOptions.RightToLeft), (int)FindNextStartingPositionMode.LeadingSet_RightToLeft, "Cc")] + [InlineData(@"ab|(abc)|(abcd)", (int)RegexOptions.RightToLeft, (int)FindNextStartingPositionMode.LeadingSet_RightToLeft, "bcd")] public void LeadingSet(string pattern, int options, int expectedMode, string expectedChars) { RegexFindOptimizations opts = ComputeOptimizations(pattern, (RegexOptions)options);