diff --git a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj
index 4606e127d4456..4fc98880dc259 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj
+++ b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj
@@ -63,6 +63,7 @@
+
@@ -82,10 +83,6 @@
-
-
-
-
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Debug.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Debug.cs
index ee124fdf205c1..572a47b4d4ed1 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Debug.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Debug.cs
@@ -26,23 +26,21 @@ internal static bool EnableDebugTracing
}
/// Unwind the regex and save the resulting state graph in DGML
- /// roughly the maximum number of states, 0 means no bound
- /// if true then hide state info
- /// if true then pretend that there is a .* at the beginning
- /// if true then unwind the regex backwards (addDotStar is then ignored)
- /// if true then compute and save only general DFA info
- /// dgml output is written here
+ /// Writer to which the DGML is written.
+ /// True to create an NFA instead of a DFA.
+ /// True to prepend .*? onto the pattern (outside of the implicit root capture).
+ /// If true, then unwind the regex backwards (and is ignored).
+ /// The approximate maximum number of states to include; less than or equal to 0 for no maximum.
/// maximum length of labels in nodes anything over that length is indicated with ..
- /// if true creates NFA instead of DFA
[ExcludeFromCodeCoverage(Justification = "Debug only")]
- internal void SaveDGML(TextWriter writer, int bound, bool hideStateInfo, bool addDotStar, bool inReverse, bool onlyDFAinfo, int maxLabelLength, bool asNFA)
+ internal void SaveDGML(TextWriter writer, bool nfa, bool addDotStar, bool reverse, int maxStates, int maxLabelLength)
{
if (factory is not SymbolicRegexRunnerFactory srmFactory)
{
throw new NotSupportedException();
}
- srmFactory._matcher.SaveDGML(writer, bound, hideStateInfo, addDotStar, inReverse, onlyDFAinfo, maxLabelLength, asNFA);
+ srmFactory._matcher.SaveDGML(writer, nfa, addDotStar, reverse, maxStates, maxLabelLength);
}
///
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Dgml/DgmlWriter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Dgml/DgmlWriter.cs
deleted file mode 100644
index cbd134be45363..0000000000000
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Dgml/DgmlWriter.cs
+++ /dev/null
@@ -1,241 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-#if DEBUG
-using System.Collections.Generic;
-using System.Diagnostics;
-using System.IO;
-
-namespace System.Text.RegularExpressions.Symbolic.DGML
-{
- internal sealed class DgmlWriter
- {
- private readonly int _maxDgmlTransitionLabelLength;
- private readonly TextWriter _tw;
- private readonly bool _hideStateInfo;
- private readonly bool _onlyDFAinfo;
-
- internal DgmlWriter(TextWriter tw, bool hideStateInfo, int maxDgmlTransitionLabelLength = -1, bool onlyDFAinfo = false)
- {
- _maxDgmlTransitionLabelLength = maxDgmlTransitionLabelLength;
- _tw = tw;
- _hideStateInfo = hideStateInfo;
- _onlyDFAinfo = onlyDFAinfo;
- }
-
- ///
- /// Write the automaton in dgml format into the textwriter.
- ///
- public void Write(IAutomaton fa)
- {
- var nonEpsilonMoves = new Dictionary<(int, int), List>();
- var epsilonmoves = new List>();
-
- var nonEpsilonStates = new HashSet();
-
- foreach (Move move in fa.GetMoves())
- {
- if (move.IsEpsilon)
- {
- epsilonmoves.Add(move);
- }
- else
- {
- nonEpsilonStates.Add(move.SourceState);
- var p = (move.SourceState, move.TargetState);
- if (!nonEpsilonMoves.TryGetValue(p, out List? rules))
- {
- rules = new List();
- nonEpsilonMoves[p] = rules;
- }
-
- Debug.Assert(move.Label is not null);
- rules.Add(move.Label);
- }
- }
-
- _tw.WriteLine("");
- _tw.WriteLine("");
- _tw.WriteLine("");
- _tw.WriteLine("", GetDFAInfo(fa));
- _tw.WriteLine("", GetDFAInfo(fa));
- if (_onlyDFAinfo)
- {
- _tw.WriteLine("");
- }
- else
- {
- foreach (int state in fa.GetStates())
- {
- _tw.WriteLine("", state, _hideStateInfo ? "Collapsed" : "Expanded", GetStateInfo(fa, state));
- if (state == fa.InitialState)
- {
- _tw.WriteLine("");
- }
- if (fa.IsFinalState(state))
- {
- _tw.WriteLine("");
- }
- _tw.WriteLine("");
- _tw.WriteLine("", state, GetStateInfo(fa, state));
- }
- _tw.WriteLine("");
- _tw.WriteLine("");
- _tw.WriteLine("", fa.InitialState, fa.DescribeStartLabel());
- _tw.WriteLine("");
-
- foreach (Move move in epsilonmoves)
- {
- _tw.WriteLine("", move.SourceState, move.TargetState);
- }
-
- foreach (KeyValuePair<(int, int), List> move in nonEpsilonMoves)
- {
- _tw.WriteLine(GetNonFinalRuleInfo(fa, move.Key.Item1, move.Key.Item2, move.Value));
- }
-
- foreach (int state in fa.GetStates())
- {
- _tw.WriteLine("", state);
- }
-
- _tw.WriteLine("");
- WriteCategoriesAndStyles();
- }
- _tw.WriteLine("");
- }
-
- private string GetDFAInfo(IAutomaton fa)
- {
- StringBuilder sb = new();
- sb.Append("|Q|=");
- sb.Append(fa.StateCount);
- sb.Append("
");
- sb.Append('|');
- sb.Append(DeltaCapital);
- sb.Append("|=");
- sb.Append(fa.TransitionCount);
- sb.Append("
");
- sb.Append('|');
- sb.Append(SigmalCapital);
- sb.Append("|=");
- sb.Append(fa.Alphabet.Length);
- sb.Append("
");
- sb.Append(SigmalCapital);
- sb.Append('=');
- for (int i = 0; i < fa.Alphabet.Length; i++)
- {
- if (i > 0)
- sb.Append(',');
- sb.Append(fa.DescribeLabel(fa.Alphabet[i]));
- }
- return sb.ToString();
- }
-
- private const string DeltaCapital = "Δ";
- private const string SigmalCapital = "Σ";
-
- private static string GetStateInfo(IAutomaton fa, int state)
- {
- StringBuilder sb = new();
- sb.Append(fa.DescribeState(state));
- return sb.ToString();
- }
-
- private string GetNonFinalRuleInfo(IAutomaton aut, int source, int target, List rules)
- {
- string lab = "";
- string info = "";
- for (int i = 0; i < rules.Count; i++)
- {
- lab += (lab == "" ? "" : ",\n ") + aut.DescribeLabel(rules[i]);
- }
-
- int lab_length = lab.Length;
- if (_maxDgmlTransitionLabelLength >= 0 && lab_length > _maxDgmlTransitionLabelLength)
- {
- info += $" FullLabel = \"{lab}\"";
- lab = string.Concat(lab.AsSpan(0, _maxDgmlTransitionLabelLength), "..");
- }
-
- return $"";
- }
-
- private void WriteCategoriesAndStyles()
- {
- _tw.WriteLine("");
- _tw.WriteLine("");
- _tw.WriteLine("");
- _tw.WriteLine("");
- _tw.WriteLine("");
- _tw.WriteLine("");
- _tw.WriteLine("");
- _tw.WriteLine("");
- _tw.WriteLine("");
- _tw.WriteLine("");
- _tw.WriteLine("");
- _tw.WriteLine("");
- _tw.WriteLine("");
- _tw.WriteLine("");
- _tw.WriteLine("");
- //_tw.WriteLine("");
- //_tw.WriteLine("");
- _tw.WriteLine("");
- _tw.WriteLine("");
- _tw.WriteLine("");
- _tw.WriteLine("");
- _tw.WriteLine("");
- _tw.WriteLine("");
- _tw.WriteLine("");
- _tw.WriteLine("");
- }
- }
-}
-#endif
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Dgml/IAutomaton.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Dgml/IAutomaton.cs
deleted file mode 100644
index f6237967905f4..0000000000000
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Dgml/IAutomaton.cs
+++ /dev/null
@@ -1,66 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-#if DEBUG
-using System.Collections.Generic;
-
-namespace System.Text.RegularExpressions.Symbolic.DGML
-{
- ///
- /// For accessing the key components of an automaton.
- ///
- /// type of labels in moves
- internal interface IAutomaton
- {
- ///
- /// Enumerates all moves of the automaton.
- ///
- IEnumerable> GetMoves();
-
- ///
- /// Enumerates all states of the automaton.
- ///
- IEnumerable GetStates();
-
- ///
- /// Returns the minterm partition of the alphabet.
- ///
- TLabel[] Alphabet { get; }
-
- ///
- /// Provides a description of the state for visualization purposes.
- ///
- string DescribeState(int state);
-
- ///
- /// Provides a description of the label for visualization purposes.
- ///
- string DescribeLabel(TLabel lab);
-
- ///
- /// Provides a description of the start label for visualization purposes.
- ///
- string DescribeStartLabel();
-
- ///
- /// The initial state of the automaton.
- ///
- int InitialState { get; }
-
- ///
- /// The number of states of the automaton.
- ///
- int StateCount { get; }
-
- ///
- /// The number of transitions of the automaton.
- ///
- int TransitionCount { get; }
-
- ///
- /// Returns true iff the state is a final state.
- ///
- bool IsFinalState(int state);
- }
-}
-#endif
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Dgml/Move.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Dgml/Move.cs
deleted file mode 100644
index e25e3b720a901..0000000000000
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Dgml/Move.cs
+++ /dev/null
@@ -1,77 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-#if DEBUG
-using System.Diagnostics.CodeAnalysis;
-
-namespace System.Text.RegularExpressions.Symbolic.DGML
-{
- ///
- /// Represents a move of a symbolic finite automaton.
- /// The value default(L) is reserved to represent the label of an epsilon move.
- /// Thus if S is a reference type the label of an epsilon move is null.
- ///
- /// the type of the labels on moves
- internal sealed class Move
- {
- ///
- /// Source state of the move
- ///
- public readonly int SourceState;
- ///
- /// Target state of the move
- ///
- public readonly int TargetState;
- ///
- /// Label of the move
- ///
- public readonly TLabel? Label;
-
- ///
- /// Transition of an automaton.
- ///
- /// source state of the transition
- /// target state of the transition
- /// label of the transition
- public Move(int sourceState, int targetState, TLabel? lab)
- {
- SourceState = sourceState;
- TargetState = targetState;
- Label = lab;
- }
-
- ///
- /// Creates a move. Creates an epsilon move if label is default(L).
- ///
- public static Move Create(int sourceState, int targetState, TLabel condition) => new Move(sourceState, targetState, condition);
-
- ///
- /// Creates an epsilon move. Same as Create(sourceState, targetState, default(L)).
- ///
- public static Move Epsilon(int sourceState, int targetState) => new Move(sourceState, targetState, default);
-
- ///
- /// Returns true if label equals default(S).
- ///
- public bool IsEpsilon => Equals(Label, default(TLabel));
-
- ///
- /// Returns true if the source state and the target state are identical
- ///
- public bool IsSelfLoop => SourceState == TargetState;
-
- ///
- /// Returns true if obj is a move with the same source state, target state, and label.
- ///
- public override bool Equals([NotNullWhen(false)] object? obj) =>
- obj is Move t &&
- t.SourceState == SourceState &&
- t.TargetState == TargetState &&
- (t.Label is null ? Label is null : t.Label.Equals(Label));
-
- public override int GetHashCode() => (SourceState, Label, TargetState).GetHashCode();
-
- public override string ToString() => $"({SourceState},{(Equals(Label, default(TLabel)) ? "" : Label + ",")}{TargetState})";
- }
-}
-#endif
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Dgml/RegexAutomaton.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Dgml/RegexAutomaton.cs
deleted file mode 100644
index ddcd9f8bf5bcb..0000000000000
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Dgml/RegexAutomaton.cs
+++ /dev/null
@@ -1,140 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-#if DEBUG
-using System.Collections.Generic;
-using System.Diagnostics;
-
-namespace System.Text.RegularExpressions.Symbolic.DGML
-{
- ///
- /// Used by DgmlWriter to unwind a regex into a DFA up to a bound that limits the number of states
- ///
- internal sealed class RegexAutomaton : IAutomaton<(SymbolicRegexNode?, T)> where T : notnull
- {
- private readonly DfaMatchingState _q0;
- private readonly List _states = new();
- private readonly HashSet _stateSet = new();
- private readonly List?, T)>> _moves = new();
- private readonly SymbolicRegexBuilder _builder;
- private readonly SymbolicNFA? _nfa;
-
- internal RegexAutomaton(SymbolicRegexMatcher srm, int bound, bool addDotStar, bool inReverse, bool asNFA)
- {
- _builder = srm._builder;
- uint startId = inReverse ?
- (srm._reversePattern._info.StartsWithLineAnchor ? CharKind.BeginningEnd : 0) :
- (srm._pattern._info.StartsWithLineAnchor ? CharKind.BeginningEnd : 0);
-
- //inReverse only matters if Ar contains some line anchor
- _q0 = _builder.CreateState(inReverse ? srm._reversePattern : (addDotStar ? srm._dotStarredPattern : srm._pattern), startId);
-
- if (asNFA)
- {
- _nfa = _q0.Node.Explore(bound);
- for (int q = 0; q < _nfa.StateCount; q++)
- {
- _states.Add(q);
- foreach ((T, SymbolicRegexNode?, int) branch in _nfa.EnumeratePaths(q))
- _moves.Add(Move<(SymbolicRegexNode?, T)>.Create(q, branch.Item3, (branch.Item2, branch.Item1)));
- }
- }
- else
- {
- Dictionary<(int, int), T> normalizedmoves = new();
- Stack> stack = new();
- stack.Push(_q0);
- _states.Add(_q0.Id);
- _stateSet.Add(_q0.Id);
-
- T[]? partition = _builder._solver.GetMinterms();
- Debug.Assert(partition is not null);
- //unwind until the stack is empty or the bound has been reached
- while (stack.Count > 0 && (bound <= 0 || _states.Count < bound))
- {
- DfaMatchingState q = stack.Pop();
- foreach (T c in partition)
- {
- DfaMatchingState p = q.Next(c);
-
- // check that p is not a dead-end
- if (!p.IsNothing)
- {
- if (_stateSet.Add(p.Id))
- {
- stack.Push(p);
- _states.Add(p.Id);
- }
-
- (int, int) qp = (q.Id, p.Id);
- normalizedmoves[qp] = normalizedmoves.ContainsKey(qp) ?
- _builder._solver.Or(normalizedmoves[qp], c) :
- c;
- }
- }
- }
-
- foreach (KeyValuePair<(int, int), T> entry in normalizedmoves)
- _moves.Add(Move<(SymbolicRegexNode?, T)>.Create(entry.Key.Item1, entry.Key.Item2, (null, entry.Value)));
- }
- }
-
- public (SymbolicRegexNode?, T)[] Alphabet
- {
- get
- {
- T[]? alphabet = _builder._solver.GetMinterms();
- Debug.Assert(alphabet is not null);
- var results = new (SymbolicRegexNode?, T)[alphabet.Length];
- for (int i = 0; i < alphabet.Length; i++)
- {
- results[i] = (null, alphabet[i]);
- }
- return results;
- }
- }
-
- public int InitialState => _nfa is not null ? 0 : _q0.Id;
-
- public int StateCount => _states.Count;
-
- public int TransitionCount => _moves.Count;
-
- public string DescribeLabel((SymbolicRegexNode?, T) lab) =>
- lab.Item1 is null ? Net.WebUtility.HtmlEncode(_builder._solver.PrettyPrint(lab.Item2)) :
- // Conditional nullability based on anchors
- Net.WebUtility.HtmlEncode($"{lab.Item1}/{_builder._solver.PrettyPrint(lab.Item2)}");
-
- public string DescribeStartLabel() => "";
-
- public string DescribeState(int state)
- {
- if (_nfa is not null)
- {
- Debug.Assert(state < _nfa.StateCount);
- string? str = Net.WebUtility.HtmlEncode(_nfa.GetNode(state).ToString());
- return _nfa.IsUnexplored(state) ? $"Unexplored:{str}" : str;
- }
-
- Debug.Assert(_builder._stateArray is not null);
- return _builder._stateArray[state].DgmlView;
- }
-
- public IEnumerable GetStates() => _states;
-
- public bool IsFinalState(int state)
- {
- if (_nfa is not null)
- {
- Debug.Assert(state < _nfa.StateCount);
- return _nfa.CanBeNullable(state);
- }
-
- Debug.Assert(_builder._stateArray is not null && state < _builder._stateArray.Length);
- return _builder._stateArray[state].Node.CanBeNullable;
- }
-
- public IEnumerable?, T)>> GetMoves() => _moves;
- }
-}
-#endif
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/DgmlWriter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/DgmlWriter.cs
new file mode 100644
index 0000000000000..540a834b60a1f
--- /dev/null
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/DgmlWriter.cs
@@ -0,0 +1,321 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#if DEBUG
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Diagnostics.CodeAnalysis;
+using System.IO;
+using System.Net;
+
+namespace System.Text.RegularExpressions.Symbolic
+{
+ [ExcludeFromCodeCoverage(Justification = "Currently only used for testing")]
+ internal static class DgmlWriter where T : notnull
+ {
+ /// Write the DFA or NFA in DGML format into the TextWriter.
+ /// The for the regular expression.
+ /// Writer to which the DGML is written.
+ /// True to create an NFA instead of a DFA.
+ /// True to prepend .*? onto the pattern (outside of the implicit root capture).
+ /// If true, then unwind the regex backwards (and is ignored).
+ /// The approximate maximum number of states to include; less than or equal to 0 for no maximum.
+ /// maximum length of labels in nodes anything over that length is indicated with ..
+ public static void Write(
+ TextWriter writer, SymbolicRegexMatcher matcher,
+ bool nfa = false, bool addDotStar = true, bool reverse = false, int maxStates = -1, int maxLabelLength = -1)
+ {
+ var explorer = new DfaExplorer(matcher, nfa, addDotStar, reverse, maxStates);
+ var nonEpsilonTransitions = new Dictionary<(int SourceState, int TargetState), List<(SymbolicRegexNode?, T)>>();
+ var epsilonTransitions = new List();
+
+ foreach (Transition transition in explorer.GetTransitions())
+ {
+ if (transition.IsEpsilon)
+ {
+ epsilonTransitions.Add(transition);
+ }
+ else
+ {
+ (int SourceState, int TargetState) p = (transition.SourceState, transition.TargetState);
+ if (!nonEpsilonTransitions.TryGetValue(p, out List<(SymbolicRegexNode?, T)>? rules))
+ {
+ nonEpsilonTransitions[p] = rules = new List<(SymbolicRegexNode?, T)>();
+ }
+
+ rules.Add(transition.Label);
+ }
+ }
+
+ writer.WriteLine("");
+ writer.WriteLine("");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ", GetDFAInfo(explorer));
+ writer.WriteLine(" ", GetDFAInfo(explorer));
+ foreach (int state in explorer.GetStates())
+ {
+ writer.WriteLine(" ", state, explorer.DescribeState(state));
+ if (state == explorer.InitialState)
+ {
+ writer.WriteLine(" ");
+ }
+ if (explorer.IsFinalState(state))
+ {
+ writer.WriteLine(" ");
+ }
+ writer.WriteLine(" ");
+ writer.WriteLine(" ", state, explorer.DescribeState(state));
+ }
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ", explorer.InitialState);
+ writer.WriteLine(" ");
+
+ foreach (Transition transition in epsilonTransitions)
+ {
+ writer.WriteLine(" ", transition.SourceState, transition.TargetState);
+ }
+
+ foreach (KeyValuePair<(int, int), List<(SymbolicRegexNode?, T)>> transition in nonEpsilonTransitions)
+ {
+ string label = string.Join($",{Environment.NewLine} ", DescribeLabels(explorer, transition.Value));
+ string info = "";
+ if (label.Length > (uint)maxLabelLength)
+ {
+ info = $"FullLabel = \"{label}\" ";
+ label = string.Concat(label.AsSpan(0, maxLabelLength), "..");
+ }
+
+ writer.WriteLine($" ");
+ }
+
+ foreach (int state in explorer.GetStates())
+ {
+ writer.WriteLine(" ", state);
+ }
+
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine(" ");
+ writer.WriteLine("");
+ }
+
+ private static string GetDFAInfo(DfaExplorer explorer)
+ {
+ StringBuilder sb = new();
+ sb.Append($"States = {explorer.StateCount}
");
+ sb.Append($"Transitions = {explorer.TransitionCount}
");
+ sb.Append($"Min Terms ({explorer._builder._solver.GetMinterms()!.Length}) = ").AppendJoin(',', DescribeLabels(explorer, explorer.Alphabet));
+ return sb.ToString();
+ }
+
+ private static IEnumerable DescribeLabels(DfaExplorer explorer, IList<(SymbolicRegexNode?, T)> items)
+ {
+ for (int i = 0; i < items.Count; i++)
+ {
+ yield return explorer.DescribeLabel(items[i]);
+ }
+ }
+
+ /// Used to unwind a regex into a DFA up to a bound that limits the number of states
+ private sealed class DfaExplorer
+ {
+ private readonly DfaMatchingState _initialState;
+ private readonly List _states = new();
+ private readonly List _transitions = new();
+ private readonly SymbolicNFA? _nfa;
+ internal readonly SymbolicRegexBuilder _builder;
+
+ internal DfaExplorer(SymbolicRegexMatcher srm, bool nfa, bool addDotStar, bool reverse, int maxStates)
+ {
+ _builder = srm._builder;
+ uint startId = reverse ?
+ (srm._reversePattern._info.StartsWithLineAnchor ? CharKind.BeginningEnd : 0) :
+ (srm._pattern._info.StartsWithLineAnchor ? CharKind.BeginningEnd : 0);
+
+ // Create the initial state
+ _initialState = _builder.CreateState(
+ reverse ? srm._reversePattern :
+ addDotStar ? srm._dotStarredPattern :
+ srm._pattern, startId);
+
+ if (nfa)
+ {
+ _nfa = _initialState.Node.Explore(maxStates);
+ for (int q = 0; q < _nfa.StateCount; q++)
+ {
+ _states.Add(q);
+ foreach ((T, SymbolicRegexNode?, int) branch in _nfa.EnumeratePaths(q))
+ {
+ _transitions.Add(new Transition(q, branch.Item3, (branch.Item2, branch.Item1)));
+ }
+ }
+ }
+ else
+ {
+ Dictionary<(int, int), T> normalizedMoves = new();
+ Stack> stack = new();
+ stack.Push(_initialState);
+ _states.Add(_initialState.Id);
+
+ HashSet stateSet = new();
+ stateSet.Add(_initialState.Id);
+
+ T[]? minterms = _builder._solver.GetMinterms();
+ Debug.Assert(minterms is not null);
+
+ // Unwind until the stack is empty or the bound has been reached
+ while (stack.Count > 0 && (maxStates <= 0 || _states.Count < maxStates))
+ {
+ DfaMatchingState q = stack.Pop();
+ foreach (T c in minterms)
+ {
+ DfaMatchingState p = q.Next(c);
+
+ // check that p is not a dead-end
+ if (!p.IsNothing)
+ {
+ if (stateSet.Add(p.Id))
+ {
+ stack.Push(p);
+ _states.Add(p.Id);
+ }
+
+ (int, int) qp = (q.Id, p.Id);
+ normalizedMoves[qp] = normalizedMoves.ContainsKey(qp) ?
+ _builder._solver.Or(normalizedMoves[qp], c) :
+ c;
+ }
+ }
+ }
+
+ foreach (KeyValuePair<(int, int), T> entry in normalizedMoves)
+ {
+ _transitions.Add(new Transition(entry.Key.Item1, entry.Key.Item2, (null, entry.Value)));
+ }
+ }
+ }
+
+ public (SymbolicRegexNode?, T)[] Alphabet
+ {
+ get
+ {
+ T[]? alphabet = _builder._solver.GetMinterms();
+ Debug.Assert(alphabet is not null);
+ var results = new (SymbolicRegexNode?, T)[alphabet.Length];
+ for (int i = 0; i < alphabet.Length; i++)
+ {
+ results[i] = (null, alphabet[i]);
+ }
+ return results;
+ }
+ }
+
+ public int InitialState => _nfa is not null ? 0 : _initialState.Id;
+
+ public int StateCount => _states.Count;
+
+ public int TransitionCount => _transitions.Count;
+
+ public string DescribeLabel((SymbolicRegexNode?, T) lab) =>
+ WebUtility.HtmlEncode(lab.Item1 is null ? // Conditional nullability based on anchors
+ _builder._solver.PrettyPrint(lab.Item2) :
+ $"{lab.Item1}/{_builder._solver.PrettyPrint(lab.Item2)}");
+
+ public string DescribeState(int state)
+ {
+ if (_nfa is not null)
+ {
+ Debug.Assert(state < _nfa.StateCount);
+ string? str = WebUtility.HtmlEncode(_nfa.GetNode(state).ToString());
+ return _nfa.IsUnexplored(state) ? $"Unexplored:{str}" : str;
+ }
+
+ Debug.Assert(_builder._stateArray is not null);
+ return _builder._stateArray[state].DgmlView;
+ }
+
+ public IEnumerable GetStates() => _states;
+
+ public bool IsFinalState(int state)
+ {
+ if (_nfa is not null)
+ {
+ Debug.Assert(state < _nfa.StateCount);
+ return _nfa.CanBeNullable(state);
+ }
+
+ Debug.Assert(_builder._stateArray is not null && state < _builder._stateArray.Length);
+ return _builder._stateArray[state].Node.CanBeNullable;
+ }
+
+ public List GetTransitions() => _transitions;
+ }
+
+ private record Transition(int SourceState, int TargetState, (SymbolicRegexNode?, T) Label)
+ {
+ public bool IsEpsilon => Label.Equals(default);
+ }
+ }
+}
+#endif
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs
index e96525cf07a51..37010427734ac 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs
@@ -16,15 +16,13 @@ internal abstract class SymbolicRegexMatcher
{
#if DEBUG
/// Unwind the regex of the matcher and save the resulting state graph in DGML
- /// roughly the maximum number of states, 0 means no bound
- /// if true then hide state info
- /// if true then pretend that there is a .* at the beginning
- /// if true then unwind the regex backwards (addDotStar is then ignored)
- /// if true then compute and save only genral DFA info
- /// dgml output is written here
+ /// Writer to which the DGML is written.
+ /// True to create an NFA instead of a DFA.
+ /// True to prepend .*? onto the pattern (outside of the implicit root capture).
+ /// If true, then unwind the regex backwards.
+ /// The approximate maximum number of states to include; less than or equal to 0 for no maximum.
/// maximum length of labels in nodes anything over that length is indicated with ..
- /// if true creates NFA instead of DFA
- public abstract void SaveDGML(TextWriter writer, int bound, bool hideStateInfo, bool addDotStar, bool inReverse, bool onlyDFAinfo, int maxLabelLength, bool asNFA);
+ public abstract void SaveDGML(TextWriter writer, bool nfa, bool addDotStar, bool reverse, int maxStates, int maxLabelLength);
///
/// Generates up to k random strings matched by the regex
@@ -1276,12 +1274,8 @@ static int[] GetNextStates(int sourceState, int mintermId, SymbolicRegexBuilder<
}
#if DEBUG
- public override void SaveDGML(TextWriter writer, int bound, bool hideStateInfo, bool addDotStar, bool inReverse, bool onlyDFAinfo, int maxLabelLength, bool asNFA)
- {
- var graph = new DGML.RegexAutomaton(this, bound, addDotStar, inReverse, asNFA);
- var dgml = new DGML.DgmlWriter(writer, hideStateInfo, maxLabelLength, onlyDFAinfo);
- dgml.Write(graph);
- }
+ public override void SaveDGML(TextWriter writer, bool nfa, bool addDotStar, bool reverse, int maxStates, int maxLabelLength) =>
+ DgmlWriter.Write(writer, this, nfa, addDotStar, reverse, maxStates, maxLabelLength);
public override IEnumerable GenerateRandomMembers(int k, int randomseed, bool negative) =>
new SymbolicRegexSampler(_pattern, randomseed, negative).GenerateRandomMembers(k);
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexNode.cs
index bf4c9eb20671c..59522b71116bd 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexNode.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexNode.cs
@@ -1582,9 +1582,11 @@ internal void ToString(StringBuilder sb)
case SymbolicRegexNodeKind.OrderedOr:
Debug.Assert(_left is not null && _right is not null);
+ sb.Append('(');
_left.ToString(sb);
sb.Append('|');
_right.ToString(sb);
+ sb.Append(')');
return;
case SymbolicRegexNodeKind.Concat:
diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/CustomDerivedRegexScenarioTest.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/CustomDerivedRegexScenarioTest.cs
index 5f40a3c2e56fc..cf691b3014b87 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/CustomDerivedRegexScenarioTest.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/CustomDerivedRegexScenarioTest.cs
@@ -35,7 +35,7 @@ internal class CustomDerivedRegex : Regex
public CustomDerivedRegex()
{
- pattern = /*lang=regex*/@"\G(\d{1,3})(?=(?:\d{3})+\b)";
+ pattern = @"\G(\d{1,3})(?=(?:\d{3})+\b)";
roptions = RegexOptions.Compiled;
internalMatchTimeout = Timeout.InfiniteTimeSpan;
factory = new CustomRegexRunnerFactory();
diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Tests.Common.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Tests.Common.cs
index e1792c623be21..e94e98140734a 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Tests.Common.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Tests.Common.cs
@@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
using System.Linq;
using System.Threading.Tasks;
using Xunit;
@@ -48,7 +49,7 @@ public static bool IsDefaultStart(string input, RegexOptions options, int start)
return start == 0;
}
- public static async Task GetRegexAsync(RegexEngine engine, string pattern, RegexOptions options, Globalization.CultureInfo culture)
+ public static async Task GetRegexAsync(RegexEngine engine, [StringSyntax(StringSyntaxAttribute.Regex)] string pattern, RegexOptions options, Globalization.CultureInfo culture)
{
using (new System.Tests.ThreadCultureChange(culture))
{
@@ -100,7 +101,7 @@ public static IEnumerable AvailableEngines
public static bool IsNonBacktracking(RegexEngine engine) =>
engine is RegexEngine.NonBacktracking or RegexEngine.NonBacktrackingSourceGenerated;
- public static async Task GetRegexAsync(RegexEngine engine, string pattern, RegexOptions? options = null, TimeSpan? matchTimeout = null)
+ public static async Task GetRegexAsync(RegexEngine engine, [StringSyntax(StringSyntaxAttribute.Regex)] string pattern, RegexOptions? options = null, TimeSpan? matchTimeout = null)
{
if (options is null)
{
diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexExperiment.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexExperiment.cs
index dd7b552e49a90..aa5f1932118b8 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexExperiment.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexExperiment.cs
@@ -8,6 +8,7 @@
using Xunit;
using Xunit.Abstractions;
using System.Threading.Tasks;
+using System.Diagnostics.CodeAnalysis;
namespace System.Text.RegularExpressions.Tests
{
@@ -49,42 +50,6 @@ public void RegenerateUnicodeTables()
}
}
- /// Save the regex as a DFA in DGML format in the textwriter.
- private static bool TrySaveDGML(Regex regex, TextWriter writer, int bound = -1, bool hideStateInfo = false, bool addDotStar = false, bool inReverse = false, bool onlyDFAinfo = false, int maxLabelLength = -1, bool asNFA = false)
- {
- MethodInfo? saveDgml = regex.GetType().GetMethod("SaveDGML", BindingFlags.NonPublic | BindingFlags.Instance);
- if (saveDgml is null)
- {
- return false;
- }
- else
- {
- saveDgml.Invoke(regex, new object[] { writer, bound, hideStateInfo, addDotStar, inReverse, onlyDFAinfo, maxLabelLength, asNFA });
- return true;
- }
- }
-
- /// View the regex as a DFA in DGML format in VS.
- internal static void ViewDGML(Regex regex, int bound = -1, bool hideStateInfo = true, bool addDotStar = false, bool inReverse = false, bool onlyDFAinfo = false, string name = "DFA", int maxLabelLength = 20, bool asNFA = false)
- {
- if (!Directory.Exists(DgmlOutputDirectoryPath))
- {
- Directory.CreateDirectory(DgmlOutputDirectoryPath);
- }
-
- var sw = new StringWriter();
- // If TrySaveDGML returns false then Regex.SaveDGML is not supported (in Release build)
- if (TrySaveDGML(regex, sw, bound, hideStateInfo, addDotStar, inReverse, onlyDFAinfo, maxLabelLength, asNFA))
- {
- if (asNFA)
- {
- name = "NFA";
- }
-
- File.WriteAllText(Path.Combine(DgmlOutputDirectoryPath, $"{(inReverse ? name + "r" : (addDotStar ? name + "1" : name))}.dgml"), sw.ToString());
- }
- }
-
private static long MeasureMatchTime(Regex re, string input, out Match match)
{
try
@@ -124,6 +89,10 @@ private static string And(params string[] regexes)
///
private static string Not(string regex) => $"(?({regex})[0-[0]]|.*)";
+ ///
+ /// When is set to return true, outputs DGML diagrams for the specified pattern.
+ /// This is useful for understanding what graphs the NonBacktracking engine creates for the specified pattern.
+ ///
[Fact]
public void ViewSampleRegexInDGML()
{
@@ -132,22 +101,37 @@ public void ViewSampleRegexInDGML()
return;
}
+ if (!Directory.Exists(DgmlOutputDirectoryPath))
+ {
+ Directory.CreateDirectory(DgmlOutputDirectoryPath);
+ }
+
try
{
- //string rawregex = @"\bis\w*\b";
- string rawregex = And(".*[0-9].*[0-9].*", ".*[A-Z].*[A-Z].*", Not(".*(01|12).*"));
- //string rawregex = "a.{4}$";
- Regex re = new Regex($@"{rawregex}", RegexHelpers.RegexOptionNonBacktracking | RegexOptions.Singleline);
- ViewDGML(re);
- ViewDGML(re, inReverse: true);
- ViewDGML(re, addDotStar: true);
- ViewDGML(re, asNFA: true, bound: 12);
- ViewDGML(re, inReverse: true, asNFA: true, bound: 12);
- ViewDGML(re, addDotStar: true, asNFA: true, bound: 12);
+ /*lang=regex*/
+ string pattern = @"abc|cd";
+
+ ViewDGML(pattern, "DFA");
+ ViewDGML(pattern, "DFA_DotStar", addDotStar: true);
+
+ ViewDGML(pattern, "NFA", nfa: true, maxStates: 12);
+ ViewDGML(pattern, "NFA_DotStar", nfa: true, addDotStar: true, maxStates: 12);
+
+ static void ViewDGML(string pattern, string name, bool nfa = false, bool addDotStar = false, bool reverse = false, int maxStates = -1, int maxLabelLength = 20)
+ {
+ var regex = new Regex(pattern, RegexHelpers.RegexOptionNonBacktracking | RegexOptions.Singleline);
+ if (regex.GetType().GetMethod("SaveDGML", BindingFlags.NonPublic | BindingFlags.Instance) is MethodInfo saveDgml)
+ {
+ var sw = new StringWriter();
+ saveDgml.Invoke(regex, new object[] { sw, nfa, addDotStar, reverse, maxStates, maxLabelLength });
+ string path = Path.Combine(DgmlOutputDirectoryPath, $"{name}.dgml");
+ File.WriteAllText(path, sw.ToString());
+ Console.WriteLine(path);
+ }
+ }
}
- catch (NotSupportedException e)
+ catch (NotSupportedException e) when (e.Message.Contains("conditional"))
{
- Assert.Contains("conditional", e.Message);
}
}
@@ -160,7 +144,7 @@ public void TestDGMLGeneration(string pattern, int explorationbound, string[] ex
{
StringWriter sw = new StringWriter();
var re = new Regex(pattern, RegexHelpers.RegexOptionNonBacktracking | RegexOptions.Singleline);
- if (TrySaveDGML(re, writer: sw, bound: explorationbound, inReverse: exploreInReverse, asNFA: exploreAsNFA))
+ if (TrySaveDGML(re, sw, exploreAsNFA, addDotStar: false, exploreInReverse, explorationbound, maxLabelLength: -1))
{
string str = sw.ToString();
Assert.StartsWith("", str);
@@ -171,12 +155,12 @@ public void TestDGMLGeneration(string pattern, int explorationbound, string[] ex
}
}
- static bool TrySaveDGML(Regex regex, TextWriter writer, int bound = -1, bool hideStateInfo = false, bool addDotStar = false, bool inReverse = false, bool onlyDFAinfo = false, int maxLabelLength = -1, bool asNFA = false)
+ static bool TrySaveDGML(Regex regex, TextWriter writer, bool nfa, bool addDotStar, bool reverse, int maxStates, int maxLabelLength)
{
MethodInfo saveDgml = regex.GetType().GetMethod("SaveDGML", BindingFlags.NonPublic | BindingFlags.Instance);
if (saveDgml is not null)
{
- saveDgml.Invoke(regex, new object[] { writer, bound, hideStateInfo, addDotStar, inReverse, onlyDFAinfo, maxLabelLength, asNFA });
+ saveDgml.Invoke(regex, new object[] { writer, nfa, addDotStar, reverse, maxStates, maxLabelLength });
return true;
}
diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexRunnerTests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexRunnerTests.cs
index 82f2ae5a0336b..b2cd20814c0fa 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexRunnerTests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexRunnerTests.cs
@@ -13,7 +13,7 @@ public class RegexRunnerTests
[MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))]
public async Task EnginesThrowNotImplementedForGoAndFFC(RegexEngine engine)
{
- Regex re = await RegexHelpers.GetRegexAsync(engine, /*lang=regex*/@"abc");
+ Regex re = await RegexHelpers.GetRegexAsync(engine, @"abc");
// Use reflection to ensure the runner is created so it can be fetched.
MethodInfo createRunnerMethod = typeof(Regex).GetMethod("CreateRunner", BindingFlags.Instance | BindingFlags.NonPublic);
@@ -34,7 +34,7 @@ public async Task EnginesThrowNotImplementedForGoAndFFC(RegexEngine engine)
[MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))]
public async Task EnsureRunmatchValueIsNulledAfterIsMatch(RegexEngine engine)
{
- Regex re = await RegexHelpers.GetRegexAsync(engine, /*lang=regex*/@"abc");
+ Regex re = await RegexHelpers.GetRegexAsync(engine, @"abc");
// First call IsMatch which should initialize runmatch on the runner.
Assert.True(re.IsMatch("abcabcabc"));
diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/System.Text.RegularExpressions.Tests.csproj b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/System.Text.RegularExpressions.Tests.csproj
index 35707651e3b74..4162d7b75fd26 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/System.Text.RegularExpressions.Tests.csproj
+++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/System.Text.RegularExpressions.Tests.csproj
@@ -38,6 +38,7 @@
+