From 808424c8aef5962757e26332e49d7f48e61bdfd8 Mon Sep 17 00:00:00 2001 From: Ryan Molden Date: Tue, 25 Jul 2023 19:26:37 -0700 Subject: [PATCH 1/7] Fix bugs in DAC names that either contain non-closed generics or non-assembly qualified names (see bug #897) Add lots of comments to parser as its complex due to having to deal with tragedies like the type name in ParseGenericWithComplexGenericArgs2 Replace uses of char literals in numerous places with their named constants I was using elsewhere Correct numerous comment spelling errors (thanks VS spell checker) Add numerous tests both for the case in 897, some variants of it, and another case I encountered internally --- .../src/DACNameParserTests.cs | 70 +++ .../Implementation/DACNameParser.cs | 423 ++++++++++++++---- 2 files changed, 406 insertions(+), 87 deletions(-) diff --git a/src/Microsoft.Diagnostics.Runtime.Tests/src/DACNameParserTests.cs b/src/Microsoft.Diagnostics.Runtime.Tests/src/DACNameParserTests.cs index 5c0875ed8..5769e5d41 100644 --- a/src/Microsoft.Diagnostics.Runtime.Tests/src/DACNameParserTests.cs +++ b/src/Microsoft.Diagnostics.Runtime.Tests/src/DACNameParserTests.cs @@ -188,5 +188,75 @@ public void ParseArrayOfNestedGenericType() Assert.Equal(expectedResult, DACNameParser.Parse(input)); } + + // Parse a generic arg list with some assembly-qualified names, and some non-qualified names, moving the location of the assembly-qualified arg throughout the list + // in the various tests (i.e. ParseValueTupleWithoutAllFullyQualifiedNames1, ParseValueTupleWithoutAllFullyQualifiedNames2, ...) + [Fact] + public void ParseValueTupleWithoutAllFullyQualifiedNames1() + { + string input = "System.ValueTuple`3[[System.String, mscorlib],TCheapVersion,TExpensiveVersion]"; + string expectedResult = "System.ValueTuple"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + [Fact] + public void ParseValueTupleWithoutAllFullyQualifiedNames2() + { + string input = "System.ValueTuple`3[TCheapVersion,[System.String, mscorlib],TExpensiveVersion]"; + string expectedResult = "System.ValueTuple"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + [Fact] + public void ParseValueTupleWithoutAllFullyQualifiedNames3() + { + string input = "System.ValueTuple`3[TCheapVersion,TExpensiveVersion,[System.String, mscorlib]]"; + string expectedResult = "System.ValueTuple"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + // Test with NO qualified names + [Fact] + public void ParseValueTupleWithNoFullyQualifiedNames() + { + string input = "System.ValueTuple`3[TType1,TType2,TType3]"; + string expectedResult = "System.ValueTuple"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + [Fact] + public void Bug897() + { + // https://github.com/microsoft/clrmd/issues/897 + + // This input is unusual both in that T and TPluginType are not assembly qualified, but also LambdaInstance is a non-closed generic type. + string input = "StructureMap.Pipeline.ExpressedInstance`3[[StructureMap.Pipeline.LambdaInstance`2, EnforceHttpsModule],T,TPluginType]"; + string expectedResult = "StructureMap.Pipeline.ExpressedInstance, T, TPluginType>"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + // Some simple variants on Bug897 + [Fact] + public void Bug897_Variant1() + { + string input = "StructureMap.Pipeline.ExpressedInstance`3[[StructureMap.Pipeline.LambdaInstance`2[[System.String, mscorlib],SomeFakeType], SomeFakeAssembly],T,TPluginType]"; + string expectedResult = "StructureMap.Pipeline.ExpressedInstance, T, TPluginType>"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + [Fact] + public void Bug897_Variant2() + { + string input = "StructureMap.Pipeline.ExpressedInstance`3[[StructureMap.Pipeline.LambdaInstance`2[[List`1[[System.ValueType`2[[System.Boolean, mscorlib],SomeOtherFakeType], mscorlib]], mscorlib],SomeFakeType], SomeFakeAssembly],T,TPluginType]"; + string expectedResult = "StructureMap.Pipeline.ExpressedInstance>, SomeFakeType>, T, TPluginType>"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } } } \ No newline at end of file diff --git a/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs b/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs index 119964016..3b2ceeb4e 100644 --- a/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs +++ b/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs @@ -23,14 +23,14 @@ namespace Microsoft.Diagnostics.Runtime.Implementation // 1) Minimal heap allocation. // 2) Fast, non-recursive parsing. // 3) Accuracy in the face of complex nesting of types, nesting of generic args, etc... - // 4) Resiliancy to failure (i.e. returns the original DAC name if errors are encountered instead of throwing). + // 4) Resiliency to failure (i.e. returns the original DAC name if errors are encountered instead of throwing). internal static class DACNameParser { private const char GenericAritySpecifier = '`'; - private const char ArgSeperator = ','; + private const char ArgSeparator = ','; private const char NestedClassSpecifier = '+'; - private const char GenericArgListOrArrayStartSpecifier = '['; - private const char GenericArgListOrArrayEndSpecifier = ']'; + private const char GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier = '['; + private const char GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier = ']'; [return: NotNullIfNotNull(nameof(name))] public static string? Parse(string? name) @@ -43,7 +43,6 @@ internal static class DACNameParser try { - // This is the primary method of the parser. It operates as a simple state machine storing information about types encountered as // they are encountered and back-propagating information to types as it is discovered. // @@ -76,6 +75,54 @@ void EnsureGenericArgList() int curPos = 0; int parsingNestedClassDepth = 0; int parsingGenericArgListDepth = 0; + Stack isCurrentArgAssemblyQualified = new Stack(); + + // local type name parsing method that doesn't modify the currentState variable that drive the state machine. Used by a few of the typename parsing paths that need to + // have knowledge of their context to operate properly, but don't want to duplicate this code. Returns a non-empty string in the special case where the input string + // should be the output string, returns string.Empty in all other cases and expects callers to continue to parsing progress by setting currentState/curPos directly + // or calling DetermineNextStateAndPos. + string ParseTypeNameNoStateAdvance() + { + // We are parsing a type name, this is the initial state as well as one entered into every time we are extracting a single type name through the course of parsing. + // This handles parsing nested types as well as generic param types. + // + // Parsing of type names is non-extractive, i.e. we don't substring the input string or create any new heap allocations to track them (apart from the two local + // Lists), we simply remember the extents of the name (start/end). + + int start; + (start, curPos) = GetTypeNameExtent(name, curPos, parsingGenericArgList: parsingGenericArgListDepth != 0); + + // Special case: Check if after parsing the type name we have exhausted the string, if so it means the input string is the output string, so just return it + // without allocating a copy. + if (ReturnOriginalDACString(curPos, name.Length, nameSegments)) + return name; + + bool typeIsNestedClass = (parsingNestedClassDepth != 0); + if (parsingGenericArgListDepth == 0) + { + // We are parsing a top-level type name/sequence + EnsureNameSegmentList(); + +#pragma warning disable CS8602 // EnsureNameSegmentList call above ensures that nameSegments is never null here + nameSegments.Add(new TypeNameSegment(name, (start, curPos), typeIsNestedClass, parsingArgDepth: 0)); +#pragma warning restore CS8602 + } + else + { + // We are parsing a generic list (potentially nested lists in the case where a generic param is itself generic) + EnsureGenericArgList(); + +#pragma warning disable CS8602 // EnsureGenericArgList call above ensures that genericArgs is never null here + genericArgs.Add(new TypeNameSegment(name, (start, curPos), typeIsNestedClass, parsingGenericArgListDepth)); +#pragma warning restore CS8602 + + } + + if (parsingNestedClassDepth != 0) + parsingNestedClassDepth--; + + return string.Empty; + } while (ShouldContinueParsing(currentState)) { @@ -83,45 +130,104 @@ void EnsureGenericArgList() { case ParsingState.ParsingTypeName: { - // We are parsing a type name, this is the initial state as well as one entered into every time we are extracting a single type name through the course of parsing. - // This handles parsing nested types as well as generic param types. - // - // Parsing of type names is non-extractive, i.e. we don't substring the input string or create any new heap allocations to track them (apart from the two local - // Lists), we simply remember the extents of the name (start/end). + string parsedResult = ParseTypeNameNoStateAdvance(); + if (!string.IsNullOrEmpty(parsedResult)) + return parsedResult; + + (currentState, curPos) = DetermineNextStateAndPos(name, curPos); + break; + } + case ParsingState.ParsingAssemblyQualifiedGenericArgName: + case ParsingState.ParsingNonAssemblyQualifiedGenericArgName: + { + // Generic params come in two flavors, one is assembly qualified, like this List`1[[System.Boolean, mscorlib]], the other is non-assembly qualified which will lack the + // [ and ] and just be a type name, like this List`1[TFoo]. Of course in multi argument generics these styles can be mixed and of course we can nest generics whose + // args are generic whose args are generic, etc... arbitrarily deeply. The primary tricky bit is when consuming a param from the input we need to consume the type name + // (in both cases) and potentially the trailing assembly name. We use isCurrentArgAssemblyQualified as a way to track whether the arg we are currently processing + // will have a trailing assembly name after it. We need a stack since we can, and do, frequently, generics whose args are generic (often numerous levels deep). Each + // arg on the descent of processing such types has its own unique 'is assembly qualified name' state. - int start; - (start, curPos) = GetTypeNameExtent(name, curPos, parsingGenericArgList: parsingGenericArgListDepth != 0); + // NOTE: We ignore the return type, the special case it handles isn't possible here in a well-formed name and we will only get well formed names from the DAC + ParseTypeNameNoStateAdvance(); - // Special case: Check if after parsing the type name we have exhausted the string, if so it means the input string is the output string, so just return it - // without allocating a copy. - if (ReturnOriginalDACString(curPos, name.Length, nameSegments)) - return name; + isCurrentArgAssemblyQualified.Push(currentState == ParsingState.ParsingAssemblyQualifiedGenericArgName); - bool typeIsNestedClass = (parsingNestedClassDepth != 0); - if (parsingGenericArgListDepth == 0) + // After a generic arg type name the only legit input states are: + // + // 1) We are entirely done with the string (curPos == name.Length). + // 2) The next token is an argument separator (,) + // 3) The next token is the end of the argument list (]) + // 4) The next token is a generic arity specifier for this arg (`) + // + // Anything else is an error + if (curPos == name.Length || + (name[curPos] != ArgSeparator && + name[curPos] != GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier && + name[curPos] != GenericAritySpecifier)) { - // We are parsing a top-level type name/sequence - EnsureNameSegmentList(); + currentState = ParsingState.Error; + break; + } -#pragma warning disable CS8602 // EnsureNameSegmentList call above ensures that nameSegments is never null here - nameSegments.Add(new TypeNameSegment(name, (start, curPos), typeIsNestedClass, parsingArgDepth: 0)); -#pragma warning restore CS8602 + // an argument can itself be generic, so we may encounter an arity specifier while parsing the arg name, make sure we don't get tripped up by that + if (name[curPos] == GenericAritySpecifier) + { + // NOTE: NOT done with this arg so leave our entry on the isCurrentArgAssemblyQualified in place + currentState = ParsingState.ParsingGenericArgCount; + curPos += 1; + break; } - else + + // Skip the assembly name portion if one exists + if (currentState == ParsingState.ParsingAssemblyQualifiedGenericArgName) { - // We are parsing a generic list (potentialy nested lists in the case where a generic param is itself generic) - EnsureGenericArgList(); + while (curPos != name.Length && name[curPos] != GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier) + { + curPos++; + } -#pragma warning disable CS8602 // EnsureGenericArgList call above ensures that genericArgs is never null here - genericArgs.Add(new TypeNameSegment(name, (start, curPos), typeIsNestedClass, parsingGenericArgListDepth)); -#pragma warning restore CS8602 + // Since we are now pointing at the end of the assembly qualified name (the ]), advance past it so the checks below will be correct for both + // assembly qualified and non-assembly qualified names + if (curPos != name.Length) + { + curPos++; + } + } + + // We shouldn't have exhausted our input, if we have, fail + if (curPos == name.Length) + { + // We are done with this arg, so pop our entry + isCurrentArgAssemblyQualified.Pop(); + currentState = ParsingState.Error; + break; } - if (parsingNestedClassDepth != 0) - parsingNestedClassDepth--; + // We're done with the arg list, so just call into DetermineNextStateAndPos which will do the right thing + if (name[curPos] == GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier) + { + // NOTE: We don't pop our entry because DetermineNextStateAndPos will put us into ResolveParsedGenericList, and we need to know + // that info in there. That code will clean up our entry when it finishes processing the arg. + (currentState, curPos) = DetermineNextStateAndPos(name, curPos); + break; + } - (currentState, curPos) = DetermineNextStateAndPos(name, curPos); + // We have encountered another argument, so figure out our new state and yield back to the main loop + if (name[curPos] == ArgSeparator) + { + // We are done with this arg, so pop our entry + isCurrentArgAssemblyQualified.Pop(); + + (currentState, curPos) = DetermineNextStateAndPos(name, curPos); + break; + } + + // Should never get here + // + // We are done with this arg, so pop our entry + isCurrentArgAssemblyQualified.Pop(); + currentState = ParsingState.Error; break; } case ParsingState.ParsingNestedClass: @@ -158,37 +264,76 @@ void EnsureGenericArgList() break; } + // Double check that we aren't looking at non-closed generic type (see Github bug #897). + if (curPos != name.Length && name[curPos] != GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier && name[curPos] != NestedClassSpecifier) + { + int targetIndex = targetList.Count - 1; + TypeNameSegment seg = targetList[targetIndex]; + seg.MarkAsNonClosedGenericType(); + targetList[targetIndex] = seg; + + if (isCurrentArgAssemblyQualified.Count != 0 && isCurrentArgAssemblyQualified.Peek()) + { + Debug.Assert(name[curPos] == ArgSeparator, $"{nameof(isCurrentArgAssemblyQualified)} is true but the non-closed generic we are processing was not followed by a '{ArgSeparator}'."); + if (name[curPos] != ArgSeparator) + { + currentState = ParsingState.Error; + break; + } + + currentState = ParsingState.ParsingGenericArgAssemblySpecifier; + break; + } + } + (currentState, curPos) = DetermineNextStateAndPos(name, curPos); break; } case ParsingState.ParsingGenericArgAssemblySpecifier: { // Nothing to do here, really, just skip the assembly name specified in the generic arg type - while (curPos < name.Length && name[curPos] != ']') + while (curPos < name.Length && name[curPos] != GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier) curPos++; + if (curPos != name.Length) + curPos++; // we hit the ] which closes the fully qualified name, so advance to the next char + + // Done with this arg, so clean up our stack + isCurrentArgAssemblyQualified.Pop(); + (currentState, curPos) = DetermineNextStateAndPos(name, curPos); break; } case ParsingState.ParsingGenericArgs: { - // Start parsing the list of generic types, this just entails marking that we are parsing a generic arg list. NOTE: to support nested generic arg lsits we + // Start parsing the list of generic types, this just entails marking that we are parsing a generic arg list. NOTE: to support nested generic arg lists we // have to keep track of list count, not just a simple bool are/aren't parsing. parsingGenericArgListDepth++; - currentState = ParsingState.ParsingTypeName; + + if (name[curPos + 1] == GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier) + { + currentState = ParsingState.ParsingAssemblyQualifiedGenericArgName; + curPos += 2; + } + else + { + currentState = ParsingState.ParsingNonAssemblyQualifiedGenericArgName; + curPos += 1; + } + break; } case ParsingState.ParsingArraySpecifier: { // Parse the array specifier, this mainly is to catch multi-dimensional arrays - // There is always at least a single dimenision in arrays, every comma counts as one more + // There is always at least a single dimension in arrays, every comma counts as one more int arrayDimensions = 1; // Calculate the array dimensions while ((curPos < name.Length) && (name[curPos] != ']')) { - if (name[curPos] == ',') + if (name[curPos] == ArgSeparator) arrayDimensions++; curPos++; @@ -242,15 +387,97 @@ void EnsureGenericArgList() // top-level). parsingGenericArgListDepth--; + bool isArgAssemblyQualified = isCurrentArgAssemblyQualified.Count != 0 && isCurrentArgAssemblyQualified.Peek(); + if (genericArgs == null || genericArgs.Count == 0) { + // Done with this arg so clean up its entry + isCurrentArgAssemblyQualified.Pop(); + // For top-level types with multiple-level generic arg lists (so a type with a generic arg which itself is a generic type) as we unwind the nested generic args // lists we can end up wth no more work to do upon exiting a level (because we already propagated the info backwards before getting here), in which case, do nothing. (currentState, curPos) = DetermineNextStateAndPos(name, curPos); break; } - (currentState, curPos) = ResolveParsedGenericList(name, curPos, parsingGenericArgListDepth, nameSegments, genericArgs); + bool succeeded = ResolveParsedGenericList(parsingGenericArgListDepth, nameSegments, genericArgs); + if (!succeeded) + { + // Done with this arg so clean up its entry + isCurrentArgAssemblyQualified.Pop(); + + currentState = ParsingState.Error; + break; + } + + if (curPos == name.Length) + { + // Done with this arg so clean up its entry + isCurrentArgAssemblyQualified.Pop(); + + currentState = ParsingState.Done; + break; + } + + if (name[curPos] == ArgSeparator) + { + // If we are in the middle of parsing some number of possibly nested assembly qualified type names, and we close out a generic arg list and land on a , it either means + // it will be followed by the assembly name OR by another type name that itself is or is not qualified. So, account for both cases. + if (isArgAssemblyQualified) + { + // NOTE: We will pop isCurrentArgAssemblyQualified in the handler for ParsingGenericArgAssemblySpecifier + // + // Should be an assembly name, so skip it + currentState = ParsingState.ParsingGenericArgAssemblySpecifier; + break; + } + + // Done with this arg so clean up its entry + isCurrentArgAssemblyQualified.Pop(); + + // move past the comma + curPos++; + + // Advance past any whitespace in the param list + int potentiallyNewPos = MoveCurPosPastWhitespaceOrFail(name, curPos); + if (potentiallyNewPos < 0) + { + currentState = ParsingState.Error; + break; + } + + curPos = potentiallyNewPos; + + // We are moving on from the resolved generic and we have encountered a comma. We popped the state of the generic argument already, but it's possible + // that beyond this comma lies the assembly qualified name of the previous param (which could be a generic type that the just resolved generic was a param for). + // So if we see there is another param still (isCurrentArgAssemblyQualified.Count != 0) and that param is assembly qualified (isCurrentArgAssemblyQualified.Peek()) + // then we need to enter a state to consume that assembly name + if (isCurrentArgAssemblyQualified.Count != 0 && isCurrentArgAssemblyQualified.Peek()) + { + // NOTE: The handler for ParsingGenericArgAssemblySpecifier will handle popping the isCurrentArgAssemblyQualified state for the arg + // it will be working on. + currentState = ParsingState.ParsingGenericArgAssemblySpecifier; + break; + } + + // we either have another qualified name, or a non-qualified one + if (name[curPos] == GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier) + { + currentState = ParsingState.ParsingAssemblyQualifiedGenericArgName; + curPos += 1; + break; + } + else + { + currentState = ParsingState.ParsingNonAssemblyQualifiedGenericArgName; + break; + } + } + + // Done with this arg so clean up its entry + isCurrentArgAssemblyQualified.Pop(); + + (currentState, curPos) = DetermineNextStateAndPos(name, curPos); break; } } @@ -277,16 +504,24 @@ void EnsureGenericArgList() } } - private static (ParsingState State, int CurrentPosition) ResolveParsedGenericList(string name, int currentPosition, int parsingGenericArgListDepth, List? nameSegments, List? genericArgs) + private static int MoveCurPosPastWhitespaceOrFail(string name, int curPos) + { + while (curPos < name.Length && name[curPos] == ' ') + curPos++; + + return (curPos != name.Length ? curPos : -1); + } + + private static bool ResolveParsedGenericList(int parsingGenericArgListDepth, List? nameSegments, List? genericArgs) { if (genericArgs == null) { - return (ParsingState.Error, currentPosition); + return false; } // This is the most complicated part of the state machine, it involves back-propagating completed generic argument types into previous types they belong to. // It has to take care to propagate both amongst the genericArgs list as well as into the nameSegments list, it also has to ensure it unifies nested classes that - // exist seperate from their parent type, before back-propagating that parent type. + // exist separate from their parent type, before back-propagating that parent type. // // NOTE: This is called one time per generic list, so in the case of nested generics (where a param to a generic is itself another generic) this will be called // twice (and so on and so forth for arbitrary levels of nesting). What that means is that each call we only want to clear as many generics off our queue @@ -306,7 +541,7 @@ private static (ParsingState State, int CurrentPosition) ResolveParsedGenericLis // (also waiting for its params) and one for ActionResponse, which is the arg to pair with the IEnumerable. So we have handle this arg rollup before we can // propagate the args to the nameSegments list // - // NOTE: It is important to do this walk backwards since our list is being used like a queue and later entries bind with entries before them during genric arg + // NOTE: It is important to do this walk backwards since our list is being used like a queue and later entries bind with entries before them during generic arg // back-propagation // // NOTE: Purposely not using FindLastIndexOf because want to avoid allocation cost of lambda + indirection cost of callback during search @@ -328,7 +563,7 @@ private static (ParsingState State, int CurrentPosition) ResolveParsedGenericLis propagatedTypesToGenericArgs = true; if (!TryPatchUnfulfilledGenericArgs(genericTargetIndex, genericArgs)) - return (ParsingState.Error, currentPosition); + return false; int previousTarget = genericTargetIndex; genericTargetIndex = -1; @@ -354,7 +589,7 @@ private static (ParsingState State, int CurrentPosition) ResolveParsedGenericLis if (nameSegments == null) { Debug.Fail("Ended resolving generic arg list but no top-level types to propagate them to."); - return (ParsingState.Error, currentPosition); + return false; } // Fill the nameSegment generics with args, in order, from the genericArgs list. This works correctly whether the nameSegments list is a single generic or a @@ -388,7 +623,7 @@ private static (ParsingState State, int CurrentPosition) ResolveParsedGenericLis targetSegmentIndex = nameSegments.FindIndex(targetSegmentIndex, (tns) => tns.HasUnfulfilledGenericArgs); if (targetSegmentIndex == -1) { - return (ParsingState.Error, currentPosition); + return false; } targetSegment = nameSegments[targetSegmentIndex]; @@ -400,18 +635,14 @@ private static (ParsingState State, int CurrentPosition) ResolveParsedGenericLis nameSegments[targetSegmentIndex] = targetSegment; DebugOnly.Assert(genericArgs.Count == 0, "Back-propagation to top-level generic types ended with generic args still in the genericArgs list."); - - return DetermineNextStateAndPos(name, currentPosition); } else { - return (ParsingState.Error, currentPosition); + return false; } } - else - { - return DetermineNextStateAndPos(name, currentPosition); - } + + return true; } private static bool TryPatchUnfulfilledGenericArgs(int genericTargetIndex, List genericArgs) @@ -420,7 +651,7 @@ private static bool TryPatchUnfulfilledGenericArgs(int genericTargetIndex, List< DebugOnly.Assert(targetTypeToPatch.HasUnfulfilledGenericArgs, "Called TryPatchUnfulfilledGenericArgs with an index pointing at a TypeNameSegment that does not have unfulfilled generic params"); // Patch ALL missing args from the target type, but no more. Any types before the target that need filling in will trigger another ResolveParsedGenericList state - // to be entered as we unwind the parse, and at that point we will progate types back another level. This is very important to correctly parse horrific types like: + // to be entered as we unwind the parse, and at that point we will propagate types back another level. This is very important to correctly parse horrific types like: // // Microsoft.CodeAnalysis.PooledObjects.ObjectPool+TagNode>+Node, System.Boolean>>>+Factory int nextTargetFulfillmentIndex = -1; @@ -491,7 +722,7 @@ private static bool TryPatchUnfulfilledGenericArgs(int genericTargetIndex, List< // Add the located argument as a generic arg to our previous generic type targetTypeToPatch.AddGenericArg(genericArgs[nextTargetFulfillmentIndex]); - // Remove the back-propagaeted argument from our argument list since it is now contained within targetTypeToPatch + // Remove the back-propagated argument from our argument list since it is now contained within targetTypeToPatch genericArgs.RemoveAt(nextTargetFulfillmentIndex); } @@ -519,7 +750,7 @@ private static void UnifyNestedClasses(int parsingGenericArgListDepth, List // while (cur < name.Length && - name[cur] != '[' && - (name[cur] != ',' || !parsingGenericArgList) && - (name[cur] != '+' || parsingGenericArgList) && // allow + when parsing generic arg type names because generic arg names can be nested types - (name[cur] != '`' || ((cur + 1 < name.Length) && !char.IsDigit(name[cur + 1])))) + name[cur] != GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier && + name[cur] != GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier && + (name[cur] != ArgSeparator || !parsingGenericArgList) && + (name[cur] != NestedClassSpecifier || parsingGenericArgList) && // allow + when parsing generic arg type names because generic arg names can be nested types + (name[cur] != GenericAritySpecifier || ((cur + 1 < name.Length) && !char.IsDigit(name[cur + 1])))) { cur++; } @@ -610,9 +842,26 @@ private static (ParsingState State, int Pos) DetermineNextStateAndPos(string nam switch (name[curPos]) { - case ArgSeperator: + case ArgSeparator: { - return (ParsingState.ParsingGenericArgAssemblySpecifier, curPos + 1); + curPos++; // move past the comma + + int potentiallyNewPos = MoveCurPosPastWhitespaceOrFail(name, curPos); + if (potentiallyNewPos < 0) + { + return (ParsingState.Error, curPos); + } + + curPos = potentiallyNewPos; + + if (name[curPos] == GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier) + { + return (ParsingState.ParsingAssemblyQualifiedGenericArgName, curPos + 1); + } + else + { + return (ParsingState.ParsingNonAssemblyQualifiedGenericArgName, curPos); + } } case GenericAritySpecifier: { @@ -622,33 +871,23 @@ private static (ParsingState State, int Pos) DetermineNextStateAndPos(string nam { return (ParsingState.ParsingNestedClass, curPos + 1); } - case GenericArgListOrArrayStartSpecifier: + case GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier: { if (curPos + 1 != name.Length) { - if (name[curPos + 1] is GenericArgListOrArrayEndSpecifier or ArgSeperator) + if (name[curPos + 1] is GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier or ArgSeparator) return (ParsingState.ParsingArraySpecifier, curPos); - else if (name[curPos + 1] == GenericArgListOrArrayStartSpecifier) - return (ParsingState.ParsingGenericArgs, curPos + 2); + else + return (ParsingState.ParsingGenericArgs, curPos); } return (ParsingState.Error, curPos); } - case GenericArgListOrArrayEndSpecifier: + case GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier: { if (curPos != name.Length) { - if (name[curPos + 1] == GenericArgListOrArrayEndSpecifier) - return (ParsingState.ResolveParsedGenericList, curPos + 2); - else if (name[curPos + 1] == ArgSeperator) - { - // +3 because cur pos == ']' and next position is a ',', which means we must have a list like this: - // - // [[Microsoft.VisualStudio.Threading.IJoinableTaskDependent, Microsoft.VisualStudio.Threading],[System.Int32, mscorlib],[Microsoft.VisualStudio.Threading.IJoinableTaskDependent, Microsoft.VisualStudio.Threading]] - // - // and thus curPos + 3 == '[', which we want to skip as well. - return (ParsingState.ParsingTypeName, curPos + 3); - } + return (ParsingState.ResolveParsedGenericList, curPos + 1); } return (ParsingState.Error, curPos); @@ -657,9 +896,12 @@ private static (ParsingState State, int Pos) DetermineNextStateAndPos(string nam return (ParsingState.Error, curPos); } } + private enum ParsingState { ParsingTypeName, + ParsingAssemblyQualifiedGenericArgName, + ParsingNonAssemblyQualifiedGenericArgName, ParsingArraySpecifier, ParsingNestedClass, ParsingGenericArgCount, @@ -679,6 +921,7 @@ private struct TypeNameSegment private int _nextUnfulfilledGenericArgSlot; private int _arrayDimensions; private int _arrayOfArraysCount; + private bool _isNonClosedGenericType; // NOTE: This is only an array to defeat the cycle detection stuff in the compiler. Since this is a struct if I have a field that is TypeNameSegment or // TypeNameSegment? it complains there is a cycle in the layout since to determine the size of the struct it must determine the size of the struct. To @@ -696,6 +939,7 @@ public TypeNameSegment(string input, (int Start, int End) extent, bool isNestedC _arrayDimensions = 0; _arrayOfArraysCount = 0; ParsingArgDepth = parsingArgDepth; + _isNonClosedGenericType = false; _nestedClass = null; } @@ -711,17 +955,17 @@ public TypeNameSegment(string input, (int Start, int End) extent, bool isNestedC public bool IsGenericClass => _expectedGenericArgCount != 0; - public bool HasUnfulfilledGenericArgs => IsGenericClass && (_typeArgSegments == null || _nextUnfulfilledGenericArgSlot < _typeArgSegments.Length); + public bool HasUnfulfilledGenericArgs => IsGenericClass && !_isNonClosedGenericType && (_typeArgSegments == null || _nextUnfulfilledGenericArgSlot < _typeArgSegments.Length); public int ExpectedGenericArgsCount => _expectedGenericArgCount; - public int RemainingUnfulfilledGenericArgCount => _expectedGenericArgCount - _nextUnfulfilledGenericArgSlot; + public int RemainingUnfulfilledGenericArgCount => _isNonClosedGenericType ? 0 : _expectedGenericArgCount - _nextUnfulfilledGenericArgSlot; public void SetArrayDimensions(int dimensions) { // There are two distinct versions of multi-dimensional arrays we represent in a single TypeNameSegment. There // is the classic multidimensional array (ala int[,]) and then there is the array of arrays specifier (int[][]). - // Instead of creating a seperate TypeNameSegment for the latter that is just indicating 'an array of the previous + // Instead of creating a separate TypeNameSegment for the latter that is just indicating 'an array of the previous // name segment', we just pile them all into the original TypeNameSegment and differentiate when we are printing // out the names at the end (to either print [,] or [][]). @@ -731,6 +975,11 @@ public void SetArrayDimensions(int dimensions) _arrayOfArraysCount++; } + public void MarkAsNonClosedGenericType() + { + _isNonClosedGenericType = true; + } + public void SetExpectedGenericArgCount(int expectedCount) { _expectedGenericArgCount = expectedCount; @@ -771,7 +1020,7 @@ public void ToString(StringBuilder destination) if (IsGenericClass) { - OutputTypeArguments(destination, _nextUnfulfilledGenericArgSlot, _expectedGenericArgCount, _typeArgSegments); + OutputTypeArguments(destination, _isNonClosedGenericType, _nextUnfulfilledGenericArgSlot, _expectedGenericArgCount, _typeArgSegments); } _nestedClass?[0].ToString(destination); @@ -785,18 +1034,18 @@ public void ToString(StringBuilder destination) } else if (_arrayDimensions != 0) { - destination.Append('['); + destination.Append(GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier); if (_arrayDimensions > 1) - destination.Append(',', _arrayDimensions - 1); - destination.Append(']'); + destination.Append(ArgSeparator, _arrayDimensions - 1); + destination.Append(GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier); } } private bool HasNestedClassSegment => _nestedClass != null; - private static void OutputTypeArguments(StringBuilder destination, int firstMissingGenericSlot, int expectedArgCount, TypeNameSegment[]? typeArgs) + private static void OutputTypeArguments(StringBuilder destination, bool isNonClosedGenericType, int firstMissingGenericSlot, int expectedArgCount, TypeNameSegment[]? typeArgs) { - if (typeArgs == null) + if (typeArgs == null || isNonClosedGenericType) { destination.Append('<'); AddMissingArgumentInfo(destination, firstMissingGenericSlot, expectedArgCount); @@ -822,7 +1071,7 @@ private static void OutputTypeArguments(StringBuilder destination, int firstMiss private static void AddMissingArgumentInfo(StringBuilder result, int firstMissingGenericSlot, int expectedArgCount) { - // This is an error case where the DAC gave no generic args or incomplete generic args for a type + // This is either an error case where the DAC gave no generic args or incomplete generic args for a type or a use of a non-closed generic type for (int i = firstMissingGenericSlot; i < expectedArgCount; i++) { if (i != 0) From 9bf37657e4f7285acf5a912896d65193ea3dd266 Mon Sep 17 00:00:00 2001 From: Ryan Molden Date: Mon, 31 Jul 2023 15:50:55 -0700 Subject: [PATCH 2/7] More DAC name corner cases --- .../src/DACNameParserTests.cs | 148 +++++++++++++++++ .../Implementation/DACNameParser.cs | 149 +++++++++++++++++- 2 files changed, 293 insertions(+), 4 deletions(-) diff --git a/src/Microsoft.Diagnostics.Runtime.Tests/src/DACNameParserTests.cs b/src/Microsoft.Diagnostics.Runtime.Tests/src/DACNameParserTests.cs index 5769e5d41..ef91250b3 100644 --- a/src/Microsoft.Diagnostics.Runtime.Tests/src/DACNameParserTests.cs +++ b/src/Microsoft.Diagnostics.Runtime.Tests/src/DACNameParserTests.cs @@ -162,6 +162,15 @@ public void ParseGenericClassWithNestedGenericClassWithNameContainingAngleBracke Assert.Equal(expectedResult, DACNameParser.Parse(input)); } + [Fact] + public void ParseNestedGenericWithNestedGenericArg() + { + string input = "Microsoft.SqlServer.Management.SqlParser.Binder.ExecutionSimulator`1+NodeBase`1[T,[Microsoft.SqlServer.Management.SqlParser.Binder.ExecutionSimulator`1+ExecutionNode, Microsoft.SqlServer.Management.SqlParser]]"; + string expectedResult = "Microsoft.SqlServer.Management.SqlParser.Binder.ExecutionSimulator+NodeBase+ExecutionNode>"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + [Fact] public void ParseGenericWithComplexGenericArgs() { @@ -258,5 +267,144 @@ public void Bug897_Variant2() Assert.Equal(expectedResult, DACNameParser.Parse(input)); } + + [Fact] + public void ParseOuterMostGenericWithFinalArgumentBeingAssemblyQualifiedNestedGeneric() + { + string input = "System.Linq.Parallel.QueryOperatorEnumerator`2[TSource,[System.Linq.Parallel.ConcatKey`2[TLeftKey,TRightKey], System.Core]]"; + string expectedResult = "System.Linq.Parallel.QueryOperatorEnumerator>"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + [Fact] + public void ParseGenericWithAssemblyQualifiedArrayTypeParam() + { + string input = "System.Collections.Generic.Dictionary`2[[System.String, mscorlib],[System.Object[], mscorlib]]"; + string expectedResult = "System.Collections.Generic.Dictionary"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + [Fact] + public void ParseGenericWithNonAssemblyQualifiedArrayTypeParam() + { + string input = "System.Collections.Generic.Dictionary`2[[System.String, mscorlib],TFoo[]]"; + string expectedResult = "System.Collections.Generic.Dictionary"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + [Fact] + public void ParseGenericWithAssemblyQualifiedArrayOfArrayTypeParam() + { + string input = "System.Collections.Generic.Dictionary`2[[System.String, mscorlib],[System.Object[][], mscorlib]]"; + string expectedResult = "System.Collections.Generic.Dictionary"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + [Fact] + public void ParseGenericWithNonAssemblyQualifiedArrayOfArrayTypeParam() + { + string input = "System.Collections.Generic.Dictionary`2[[System.String, mscorlib],TFoo[][]]"; + string expectedResult = "System.Collections.Generic.Dictionary"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + [Fact] + public void ParseGenericWithAssemblyQualifiedMultiDimensionalArrayTypeParam() + { + string input = "System.Collections.Generic.Dictionary`2[[System.String, mscorlib],[System.Object[,,], mscorlib]]"; + string expectedResult = "System.Collections.Generic.Dictionary"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + [Fact] + public void ParseGenericWithNonAssemblyQualifiedMultiDimensionalArrayTypeParam() + { + string input = "System.Collections.Generic.Dictionary`2[[System.String, mscorlib],TFoo[,,]]"; + string expectedResult = "System.Collections.Generic.Dictionary"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + [Fact] + public void ParseArrayOfNestedGenericTypeWithNonAssemblyQualifiedArrayTypeParam() + { + string input = "System.Collections.Generic.Dictionary`2+Entry[[System.Int32, mscorlib],[System.Collections.Generic.Stack`1[[System.Type[], mscorlib]], System]][]"; + string expectedResult = "System.Collections.Generic.Dictionary>+Entry[]"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + [Fact] + public void ParseNestedGenericAnonymousTypes() + { + string input = "System.Func`2[[<>f__AnonymousType12`2[[<>f__AnonymousType10`2[[System.Reflection.ConstructorInfo, mscorlib],[System.Reflection.ParameterInfo[], mscorlib]], Microsoft.VisualStudio.Composition],[System.Reflection.TypeInfo, mscorlib]], Microsoft.VisualStudio.Composition],[System.Reflection.ConstructorInfo, mscorlib]]"; + string expectedResult = "System.Func<<>f__AnonymousType12<<>f__AnonymousType10, System.Reflection.TypeInfo>, System.Reflection.ConstructorInfo>"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + [Fact] + public void HandlesNonTraditionalGenericTypeMissingAritySpecifier() + { + // NOTE: jaredpar in the Roslyn compiler team verified this is in fact a generic type in F#, but it has no arity specifier. We should be able to handle + // these by recognizing when we encounter the [ and decide we are parsing an array specifier (which we should since we never saw a generic arity specifier) + // if we anything other than a ] or a , after it (i.e. say a letter), then we can assume this is one of these non-traditional generic cases and manually force + // ourself down that path. It means we will need to figure out the arity ourselves, but that shouldn't be terribly hard, we just start at 1 and count any commas + // we encounter before the closing , (taking care for nested generics inside the outer generic in our counting). + string input = "FSharp.Compiler.TypedTreeBasics+loop@444-39T[a]"; + string expectedResult = "FSharp.Compiler.TypedTreeBasics+loop@444-39T"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + [Fact] + public void HandlesNonTraditionalGenericTypeWithMultipleParamsMissingAritySpecifier() + { + string input = "Microsoft.FSharp.Collections.ArrayModule+Parallel+sortingFunc@2439-1[TKey,TValue]"; + string expectedResult = "Microsoft.FSharp.Collections.ArrayModule+Parallel+sortingFunc@2439-1"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + + [Fact] + public void HandlesNonTraditionalGenericTypeWithNestedGenericTypeMissingAritySpecifier() + { + string input = "Microsoft.FSharp.Collections.ArrayModule+Parallel+sortingFunc@2439-1[TKey,[TSomeFoo, TFakeAssembly]]"; + string expectedResult = "Microsoft.FSharp.Collections.ArrayModule+Parallel+sortingFunc@2439-1"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + [Fact] + public void HandlesNonTraditionalGenericTypeWithArrayParamMissingAritySpecifier() + { + string input = "Microsoft.FSharp.Collections.ArrayModule+Parallel+sortingFunc@2439-1[TKey[]]"; + string expectedResult = "Microsoft.FSharp.Collections.ArrayModule+Parallel+sortingFunc@2439-1"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + public void HandleNonTraditionObfuscatedGenericWithAssemblyQualifiedArg() + { + string input = "a37[[akx, yWorks.yFilesWPF.Viewer]]"; + string expectedResult = "a37"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + public void HandleNonTraditionObfuscatedGenericWithNonAssemblyQualifiedArg() + { + string input = "a37[akx]"; + string expectedResult = "a37"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } } } \ No newline at end of file diff --git a/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs b/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs index 3b2ceeb4e..7b21925c7 100644 --- a/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs +++ b/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs @@ -134,6 +134,51 @@ string ParseTypeNameNoStateAdvance() if (!string.IsNullOrEmpty(parsedResult)) return parsedResult; + // Two cases we need to look for, we just finished parsing the type name of an assembly qualified generic arg, in which case we need to transition into the ParsingGenericArgAssemblySpecifier state. + // The other is the case of non-traditional/obfuscated generic names. FSharp (and seemingly some obfuscators) will name generic types in ways that do not have arity specifiers. This can easily lead + // us to believe, when we encounter a '[' that we must be dealing with an array decl because we haven't yet seen an arity specifier. So we try to detect this below. First, if ANY args/typenames above + // us have unfulfilled generics it means we HAVE seen an arity specifier, so we aren't in this case special case, so don't look further. If they pass that test then we need to make sure the next + // symbol after the [ isn't an ] or an , (as both of those represent sequences in array specifiers). If all those checks pass assume this is a non-traditional generic type and calculate the arity + // manually and parse the args. + if (name[curPos] == ArgSeparator) + { + if (isCurrentArgAssemblyQualified.Count != 0 && isCurrentArgAssemblyQualified.Peek()) + { + currentState = ParsingState.ParsingGenericArgAssemblySpecifier; + break; + } + } + else if (name[curPos] == GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier) + { + if(!DoAnyArgsOrTypeNamesHaveUnfulfilledGenericArguments(nameSegments, genericArgs)) + { + // It's possible this is an FSharp or obfuscated generic name, lacking an arity specifier (e.g. something like Microsoft.FSharp.Collections.ArrayModule+Parallel+sortingFunc@2439-1[TKey,TValue]). + // Or it could be an array decl, we will want to manually check for the former here (the latter will be correctly detected inside DetermineNextStateAndPos). + if ((curPos != name.Length - 1) && (name[curPos + 1] != GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier && name[curPos + 1] != ArgSeparator)) + { + // This looks like a generic specified without an arity specifier. So let's manually calculate the arity ourselves. It is a little tricky in that the argument list can itself have generics + // in it, but essentially we just need to count the outermost commas, ignoring any commas in nested generic type decls, and apply that to the most recent argument/type name as its expected + // generic param count then force ourselves into the ParsingGenericArgs state to parse the arg list. + int genericArity = ManuallyCalculateArity(name, curPos + 1); + + List? targetList = ((genericArgs != null) && (genericArgs.Count != 0)) ? genericArgs : nameSegments; + + if (targetList != null) + { + // NOTE: TypeNameSegment is a struct to avoid heap allocations, that means we have to extract / modify / re-store to ensure the updated state gets back into whatever + // list this came from. + int targetIndex = targetList.Count - 1; + TypeNameSegment seg = targetList[targetIndex]; + seg.SetExpectedGenericArgCount(genericArity); + targetList[targetIndex] = seg; + } + + currentState = ParsingState.ParsingGenericArgs; + break; + } + } + } + (currentState, curPos) = DetermineNextStateAndPos(name, curPos); break; } @@ -158,10 +203,12 @@ string ParseTypeNameNoStateAdvance() // 2) The next token is an argument separator (,) // 3) The next token is the end of the argument list (]) // 4) The next token is a generic arity specifier for this arg (`) + // 5) The next token is an array specifier ([) // // Anything else is an error if (curPos == name.Length || (name[curPos] != ArgSeparator && + name[curPos] != GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier && name[curPos] != GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier && name[curPos] != GenericAritySpecifier)) { @@ -178,6 +225,13 @@ string ParseTypeNameNoStateAdvance() break; } + if (name[curPos] == GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier) + { + // This has to be an array, if it were an assembly qualified name we would have first seen an ArgSeperator, if it were a generic arg list we would have first seen a GenericAritySpecifier + currentState = ParsingState.ParsingArraySpecifier; + break; + } + // Skip the assembly name portion if one exists if (currentState == ParsingState.ParsingAssemblyQualifiedGenericArgName) { @@ -363,7 +417,45 @@ string ParseTypeNameNoStateAdvance() break; } - (currentState, curPos) = DetermineNextStateAndPos(name, curPos); + // NOTE: We have to speculatively determine our next state. This is because in the case that we have an array of array situation + // we need to leave our isCurrentArgAssemblyQualified entry on the stack, however if we do NOT have that situation then + // we may need to rerun DetermineNextStateAndPos after cleaning up a trailing assembly name if the current isCurrentArgAssemblyQualified + // is true. + ParsingState potentialNewState; + int potentialNewPos; + (potentialNewState, potentialNewPos) = DetermineNextStateAndPos(name, curPos); + + if(potentialNewState != ParsingState.ParsingArraySpecifier && potentialNewState != ParsingState.Done) + { + if (isCurrentArgAssemblyQualified.Count != 0 && isCurrentArgAssemblyQualified.Peek()) + { + // If we aren't still parsing an array specifier then we will need to clean up a trailing assembly name (if there is one), and redo + // DetermineNextStateAndPos, since the presence of ', ]' will fool it into thinking we should be in a + // ParsingNonAssemblyQualifiedGenericArgName state. + while (curPos < name.Length && name[curPos] != GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier) + curPos++; + + if (curPos != name.Length) + curPos++; // we hit the ] which closes the fully qualified name, so advance to the next char + + (currentState, curPos) = DetermineNextStateAndPos(name, curPos); + } + else + { + currentState = potentialNewState; + curPos = potentialNewPos; + } + + // Done with this arg + if (isCurrentArgAssemblyQualified.Count != 0) + isCurrentArgAssemblyQualified.Pop(); + } + else + { + currentState = potentialNewState; + curPos = potentialNewPos; + } + if (genericArgs == null || genericArgs.Count == 0 && currentState == ParsingState.Done) { // Special case: Return original string in cases like this: @@ -412,8 +504,9 @@ string ParseTypeNameNoStateAdvance() if (curPos == name.Length) { - // Done with this arg so clean up its entry - isCurrentArgAssemblyQualified.Pop(); + // Its possible we are resolving the outermost generic, in which case the isCurrentArgAssemblyQualified stack will be empty, so guard against that + if (isCurrentArgAssemblyQualified.Count != 0) + isCurrentArgAssemblyQualified.Pop(); currentState = ParsingState.Done; break; @@ -439,6 +532,7 @@ string ParseTypeNameNoStateAdvance() curPos++; // Advance past any whitespace in the param list + int potentiallyNewPos = MoveCurPosPastWhitespaceOrFail(name, curPos); if (potentiallyNewPos < 0) { @@ -475,7 +569,8 @@ string ParseTypeNameNoStateAdvance() } // Done with this arg so clean up its entry - isCurrentArgAssemblyQualified.Pop(); + if(isCurrentArgAssemblyQualified.Count != 0) + isCurrentArgAssemblyQualified.Pop(); (currentState, curPos) = DetermineNextStateAndPos(name, curPos); break; @@ -504,6 +599,33 @@ string ParseTypeNameNoStateAdvance() } } + private static int ManuallyCalculateArity(string name, int startPos) + { + int genericArity = 1; + int commaIgnoreLevel = 0; + for (int i = startPos; i < name.Length; i++) + { + if (name[i] == ArgSeparator) + { + if (commaIgnoreLevel == 0) + genericArity++; + } + else if (name[i] == GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier) + { + if (commaIgnoreLevel != 0) + commaIgnoreLevel--; + else + break; + } + else if (name[i] == GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier) + { + commaIgnoreLevel++; + } + } + + return genericArity; + } + private static int MoveCurPosPastWhitespaceOrFail(string name, int curPos) { while (curPos < name.Length && name[curPos] == ' ') @@ -897,6 +1019,25 @@ private static (ParsingState State, int Pos) DetermineNextStateAndPos(string nam } } + private static bool DoAnyArgsOrTypeNamesHaveUnfulfilledGenericArguments(List? nameSegments, List? genericArgs) + { + foreach (List current in new[] { genericArgs, nameSegments }) + { + if (current != null) + { + for (int i = current.Count - 1; i >= 0; i--) + { + if (current[i].IsGenericClass && current[i].HasUnfulfilledGenericArgs) + { + return true; + } + } + } + } + + return false; + } + private enum ParsingState { ParsingTypeName, From 545c251dab39a5a4b9ee62618cca0a372edffc6e Mon Sep 17 00:00:00 2001 From: Ryan Molden Date: Mon, 31 Jul 2023 16:18:23 -0700 Subject: [PATCH 3/7] Add accidental omission of nullability specifier --- .../Implementation/DACNameParser.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs b/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs index 7b21925c7..b39645f37 100644 --- a/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs +++ b/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs @@ -1021,7 +1021,7 @@ private static (ParsingState State, int Pos) DetermineNextStateAndPos(string nam private static bool DoAnyArgsOrTypeNamesHaveUnfulfilledGenericArguments(List? nameSegments, List? genericArgs) { - foreach (List current in new[] { genericArgs, nameSegments }) + foreach (List? current in new[] { genericArgs, nameSegments }) { if (current != null) { From 949b0beb1407dca204ffbfb4e306730c69b294de Mon Sep 17 00:00:00 2001 From: Ryan Molden Date: Tue, 1 Aug 2023 12:57:48 -0700 Subject: [PATCH 4/7] Fix spacing complaints in my if statements --- .../Implementation/DACNameParser.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs b/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs index b39645f37..9089cfabc 100644 --- a/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs +++ b/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs @@ -150,7 +150,7 @@ string ParseTypeNameNoStateAdvance() } else if (name[curPos] == GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier) { - if(!DoAnyArgsOrTypeNamesHaveUnfulfilledGenericArguments(nameSegments, genericArgs)) + if (!DoAnyArgsOrTypeNamesHaveUnfulfilledGenericArguments(nameSegments, genericArgs)) { // It's possible this is an FSharp or obfuscated generic name, lacking an arity specifier (e.g. something like Microsoft.FSharp.Collections.ArrayModule+Parallel+sortingFunc@2439-1[TKey,TValue]). // Or it could be an array decl, we will want to manually check for the former here (the latter will be correctly detected inside DetermineNextStateAndPos). @@ -425,7 +425,7 @@ string ParseTypeNameNoStateAdvance() int potentialNewPos; (potentialNewState, potentialNewPos) = DetermineNextStateAndPos(name, curPos); - if(potentialNewState != ParsingState.ParsingArraySpecifier && potentialNewState != ParsingState.Done) + if (potentialNewState != ParsingState.ParsingArraySpecifier && potentialNewState != ParsingState.Done) { if (isCurrentArgAssemblyQualified.Count != 0 && isCurrentArgAssemblyQualified.Peek()) { @@ -569,7 +569,7 @@ string ParseTypeNameNoStateAdvance() } // Done with this arg so clean up its entry - if(isCurrentArgAssemblyQualified.Count != 0) + if (isCurrentArgAssemblyQualified.Count != 0) isCurrentArgAssemblyQualified.Pop(); (currentState, curPos) = DetermineNextStateAndPos(name, curPos); From a00f6812dd7e0bd289858d219f4789698a341f76 Mon Sep 17 00:00:00 2001 From: Ryan Molden Date: Tue, 1 Aug 2023 13:00:45 -0700 Subject: [PATCH 5/7] Fix typos/omitted words in comment in test --- .../src/DACNameParserTests.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.Diagnostics.Runtime.Tests/src/DACNameParserTests.cs b/src/Microsoft.Diagnostics.Runtime.Tests/src/DACNameParserTests.cs index ef91250b3..2aed3b2a9 100644 --- a/src/Microsoft.Diagnostics.Runtime.Tests/src/DACNameParserTests.cs +++ b/src/Microsoft.Diagnostics.Runtime.Tests/src/DACNameParserTests.cs @@ -354,9 +354,9 @@ public void HandlesNonTraditionalGenericTypeMissingAritySpecifier() { // NOTE: jaredpar in the Roslyn compiler team verified this is in fact a generic type in F#, but it has no arity specifier. We should be able to handle // these by recognizing when we encounter the [ and decide we are parsing an array specifier (which we should since we never saw a generic arity specifier) - // if we anything other than a ] or a , after it (i.e. say a letter), then we can assume this is one of these non-traditional generic cases and manually force + // if we see anything other than a ] or a , after it (i.e. say a letter), then we can assume this is one of these non-traditional generic cases and manually force // ourself down that path. It means we will need to figure out the arity ourselves, but that shouldn't be terribly hard, we just start at 1 and count any commas - // we encounter before the closing , (taking care for nested generics inside the outer generic in our counting). + // we encounter before the closing ] (taking care for nested generics inside the outer generic in our counting). string input = "FSharp.Compiler.TypedTreeBasics+loop@444-39T[a]"; string expectedResult = "FSharp.Compiler.TypedTreeBasics+loop@444-39T"; From adf17932857552bd5ef2477b4181bd944f5def73 Mon Sep 17 00:00:00 2001 From: Ryan Molden Date: Wed, 2 Aug 2023 11:07:38 -0700 Subject: [PATCH 6/7] Fix copy paste error where I left Fact off of couple of test methods --- .../src/DACNameParserTests.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Microsoft.Diagnostics.Runtime.Tests/src/DACNameParserTests.cs b/src/Microsoft.Diagnostics.Runtime.Tests/src/DACNameParserTests.cs index 2aed3b2a9..f48604781 100644 --- a/src/Microsoft.Diagnostics.Runtime.Tests/src/DACNameParserTests.cs +++ b/src/Microsoft.Diagnostics.Runtime.Tests/src/DACNameParserTests.cs @@ -391,6 +391,7 @@ public void HandlesNonTraditionalGenericTypeWithArrayParamMissingAritySpecifier( Assert.Equal(expectedResult, DACNameParser.Parse(input)); } + [Fact] public void HandleNonTraditionObfuscatedGenericWithAssemblyQualifiedArg() { string input = "a37[[akx, yWorks.yFilesWPF.Viewer]]"; @@ -399,6 +400,7 @@ public void HandleNonTraditionObfuscatedGenericWithAssemblyQualifiedArg() Assert.Equal(expectedResult, DACNameParser.Parse(input)); } + [Fact] public void HandleNonTraditionObfuscatedGenericWithNonAssemblyQualifiedArg() { string input = "a37[akx]"; From 712c196e6c64fd32377894549ff8ab48b004e0e5 Mon Sep 17 00:00:00 2001 From: Lee Culver Date: Tue, 22 Aug 2023 08:53:45 -0700 Subject: [PATCH 7/7] Reduce allocations --- .../Implementation/DACNameParser.cs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs b/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs index 9089cfabc..a4a25ab69 100644 --- a/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs +++ b/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs @@ -1021,7 +1021,9 @@ private static (ParsingState State, int Pos) DetermineNextStateAndPos(string nam private static bool DoAnyArgsOrTypeNamesHaveUnfulfilledGenericArguments(List? nameSegments, List? genericArgs) { - foreach (List? current in new[] { genericArgs, nameSegments }) + return DoAnyArgsOrTypeNamesHaveUnfulfilledGenericArgumentsWorker(nameSegments) || DoAnyArgsOrTypeNamesHaveUnfulfilledGenericArgumentsWorker(genericArgs); + + static bool DoAnyArgsOrTypeNamesHaveUnfulfilledGenericArgumentsWorker(List? current) { if (current != null) { @@ -1033,9 +1035,9 @@ private static bool DoAnyArgsOrTypeNamesHaveUnfulfilledGenericArguments(List