From c0ba6f4d3a61858270711b8ba3b5f01514ee6612 Mon Sep 17 00:00:00 2001 From: Ryan Molden Date: Tue, 25 Jul 2023 19:26:37 -0700 Subject: [PATCH] Fix bugs in DAC names that either contain non-closed generics or non-assembly qualified names (see bug #897) Add lots of comments to parser as its complex due to having to deal with tragedies like the type name in ParseGenericWithComplexGenericArgs2 Replace uses of char literals in numerous places with their named constants I was using elsewhere Correct numerous comment spelling errors (thanks VS spell checker) Add numerous tests both for the case in 897, some variants of it, and another case I encountered internally --- .../src/DACNameParserTests.cs | 70 +++ .../Implementation/DACNameParser.cs | 423 ++++++++++++++---- 2 files changed, 406 insertions(+), 87 deletions(-) diff --git a/src/Microsoft.Diagnostics.Runtime.Tests/src/DACNameParserTests.cs b/src/Microsoft.Diagnostics.Runtime.Tests/src/DACNameParserTests.cs index 5c0875ed8..5769e5d41 100644 --- a/src/Microsoft.Diagnostics.Runtime.Tests/src/DACNameParserTests.cs +++ b/src/Microsoft.Diagnostics.Runtime.Tests/src/DACNameParserTests.cs @@ -188,5 +188,75 @@ public void ParseArrayOfNestedGenericType() Assert.Equal(expectedResult, DACNameParser.Parse(input)); } + + // Parse a generic arg list with some assembly-qualified names, and some non-qualified names, moving the location of the assembly-qualified arg throughout the list + // in the various tests (i.e. ParseValueTupleWithoutAllFullyQualifiedNames1, ParseValueTupleWithoutAllFullyQualifiedNames2, ...) + [Fact] + public void ParseValueTupleWithoutAllFullyQualifiedNames1() + { + string input = "System.ValueTuple`3[[System.String, mscorlib],TCheapVersion,TExpensiveVersion]"; + string expectedResult = "System.ValueTuple"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + [Fact] + public void ParseValueTupleWithoutAllFullyQualifiedNames2() + { + string input = "System.ValueTuple`3[TCheapVersion,[System.String, mscorlib],TExpensiveVersion]"; + string expectedResult = "System.ValueTuple"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + [Fact] + public void ParseValueTupleWithoutAllFullyQualifiedNames3() + { + string input = "System.ValueTuple`3[TCheapVersion,TExpensiveVersion,[System.String, mscorlib]]"; + string expectedResult = "System.ValueTuple"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + // Test with NO qualified names + [Fact] + public void ParseValueTupleWithNoFullyQualifiedNames() + { + string input = "System.ValueTuple`3[TType1,TType2,TType3]"; + string expectedResult = "System.ValueTuple"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + [Fact] + public void Bug897() + { + // https://github.com/microsoft/clrmd/issues/897 + + // This input is unusual both in that T and TPluginType are not assembly qualified, but also LambdaInstance is a non-closed generic type. + string input = "StructureMap.Pipeline.ExpressedInstance`3[[StructureMap.Pipeline.LambdaInstance`2, EnforceHttpsModule],T,TPluginType]"; + string expectedResult = "StructureMap.Pipeline.ExpressedInstance, T, TPluginType>"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + // Some simple variants on Bug897 + [Fact] + public void Bug897_Variant1() + { + string input = "StructureMap.Pipeline.ExpressedInstance`3[[StructureMap.Pipeline.LambdaInstance`2[[System.String, mscorlib],SomeFakeType], SomeFakeAssembly],T,TPluginType]"; + string expectedResult = "StructureMap.Pipeline.ExpressedInstance, T, TPluginType>"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + [Fact] + public void Bug897_Variant2() + { + string input = "StructureMap.Pipeline.ExpressedInstance`3[[StructureMap.Pipeline.LambdaInstance`2[[List`1[[System.ValueType`2[[System.Boolean, mscorlib],SomeOtherFakeType], mscorlib]], mscorlib],SomeFakeType], SomeFakeAssembly],T,TPluginType]"; + string expectedResult = "StructureMap.Pipeline.ExpressedInstance>, SomeFakeType>, T, TPluginType>"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } } } \ No newline at end of file diff --git a/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs b/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs index 119964016..3b2ceeb4e 100644 --- a/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs +++ b/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs @@ -23,14 +23,14 @@ namespace Microsoft.Diagnostics.Runtime.Implementation // 1) Minimal heap allocation. // 2) Fast, non-recursive parsing. // 3) Accuracy in the face of complex nesting of types, nesting of generic args, etc... - // 4) Resiliancy to failure (i.e. returns the original DAC name if errors are encountered instead of throwing). + // 4) Resiliency to failure (i.e. returns the original DAC name if errors are encountered instead of throwing). internal static class DACNameParser { private const char GenericAritySpecifier = '`'; - private const char ArgSeperator = ','; + private const char ArgSeparator = ','; private const char NestedClassSpecifier = '+'; - private const char GenericArgListOrArrayStartSpecifier = '['; - private const char GenericArgListOrArrayEndSpecifier = ']'; + private const char GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier = '['; + private const char GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier = ']'; [return: NotNullIfNotNull(nameof(name))] public static string? Parse(string? name) @@ -43,7 +43,6 @@ internal static class DACNameParser try { - // This is the primary method of the parser. It operates as a simple state machine storing information about types encountered as // they are encountered and back-propagating information to types as it is discovered. // @@ -76,6 +75,54 @@ void EnsureGenericArgList() int curPos = 0; int parsingNestedClassDepth = 0; int parsingGenericArgListDepth = 0; + Stack isCurrentArgAssemblyQualified = new Stack(); + + // local type name parsing method that doesn't modify the currentState variable that drive the state machine. Used by a few of the typename parsing paths that need to + // have knowledge of their context to operate properly, but don't want to duplicate this code. Returns a non-empty string in the special case where the input string + // should be the output string, returns string.Empty in all other cases and expects callers to continue to parsing progress by setting currentState/curPos directly + // or calling DetermineNextStateAndPos. + string ParseTypeNameNoStateAdvance() + { + // We are parsing a type name, this is the initial state as well as one entered into every time we are extracting a single type name through the course of parsing. + // This handles parsing nested types as well as generic param types. + // + // Parsing of type names is non-extractive, i.e. we don't substring the input string or create any new heap allocations to track them (apart from the two local + // Lists), we simply remember the extents of the name (start/end). + + int start; + (start, curPos) = GetTypeNameExtent(name, curPos, parsingGenericArgList: parsingGenericArgListDepth != 0); + + // Special case: Check if after parsing the type name we have exhausted the string, if so it means the input string is the output string, so just return it + // without allocating a copy. + if (ReturnOriginalDACString(curPos, name.Length, nameSegments)) + return name; + + bool typeIsNestedClass = (parsingNestedClassDepth != 0); + if (parsingGenericArgListDepth == 0) + { + // We are parsing a top-level type name/sequence + EnsureNameSegmentList(); + +#pragma warning disable CS8602 // EnsureNameSegmentList call above ensures that nameSegments is never null here + nameSegments.Add(new TypeNameSegment(name, (start, curPos), typeIsNestedClass, parsingArgDepth: 0)); +#pragma warning restore CS8602 + } + else + { + // We are parsing a generic list (potentially nested lists in the case where a generic param is itself generic) + EnsureGenericArgList(); + +#pragma warning disable CS8602 // EnsureGenericArgList call above ensures that genericArgs is never null here + genericArgs.Add(new TypeNameSegment(name, (start, curPos), typeIsNestedClass, parsingGenericArgListDepth)); +#pragma warning restore CS8602 + + } + + if (parsingNestedClassDepth != 0) + parsingNestedClassDepth--; + + return string.Empty; + } while (ShouldContinueParsing(currentState)) { @@ -83,45 +130,104 @@ void EnsureGenericArgList() { case ParsingState.ParsingTypeName: { - // We are parsing a type name, this is the initial state as well as one entered into every time we are extracting a single type name through the course of parsing. - // This handles parsing nested types as well as generic param types. - // - // Parsing of type names is non-extractive, i.e. we don't substring the input string or create any new heap allocations to track them (apart from the two local - // Lists), we simply remember the extents of the name (start/end). + string parsedResult = ParseTypeNameNoStateAdvance(); + if (!string.IsNullOrEmpty(parsedResult)) + return parsedResult; + + (currentState, curPos) = DetermineNextStateAndPos(name, curPos); + break; + } + case ParsingState.ParsingAssemblyQualifiedGenericArgName: + case ParsingState.ParsingNonAssemblyQualifiedGenericArgName: + { + // Generic params come in two flavors, one is assembly qualified, like this List`1[[System.Boolean, mscorlib]], the other is non-assembly qualified which will lack the + // [ and ] and just be a type name, like this List`1[TFoo]. Of course in multi argument generics these styles can be mixed and of course we can nest generics whose + // args are generic whose args are generic, etc... arbitrarily deeply. The primary tricky bit is when consuming a param from the input we need to consume the type name + // (in both cases) and potentially the trailing assembly name. We use isCurrentArgAssemblyQualified as a way to track whether the arg we are currently processing + // will have a trailing assembly name after it. We need a stack since we can, and do, frequently, generics whose args are generic (often numerous levels deep). Each + // arg on the descent of processing such types has its own unique 'is assembly qualified name' state. - int start; - (start, curPos) = GetTypeNameExtent(name, curPos, parsingGenericArgList: parsingGenericArgListDepth != 0); + // NOTE: We ignore the return type, the special case it handles isn't possible here in a well-formed name and we will only get well formed names from the DAC + ParseTypeNameNoStateAdvance(); - // Special case: Check if after parsing the type name we have exhausted the string, if so it means the input string is the output string, so just return it - // without allocating a copy. - if (ReturnOriginalDACString(curPos, name.Length, nameSegments)) - return name; + isCurrentArgAssemblyQualified.Push(currentState == ParsingState.ParsingAssemblyQualifiedGenericArgName); - bool typeIsNestedClass = (parsingNestedClassDepth != 0); - if (parsingGenericArgListDepth == 0) + // After a generic arg type name the only legit input states are: + // + // 1) We are entirely done with the string (curPos == name.Length). + // 2) The next token is an argument separator (,) + // 3) The next token is the end of the argument list (]) + // 4) The next token is a generic arity specifier for this arg (`) + // + // Anything else is an error + if (curPos == name.Length || + (name[curPos] != ArgSeparator && + name[curPos] != GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier && + name[curPos] != GenericAritySpecifier)) { - // We are parsing a top-level type name/sequence - EnsureNameSegmentList(); + currentState = ParsingState.Error; + break; + } -#pragma warning disable CS8602 // EnsureNameSegmentList call above ensures that nameSegments is never null here - nameSegments.Add(new TypeNameSegment(name, (start, curPos), typeIsNestedClass, parsingArgDepth: 0)); -#pragma warning restore CS8602 + // an argument can itself be generic, so we may encounter an arity specifier while parsing the arg name, make sure we don't get tripped up by that + if (name[curPos] == GenericAritySpecifier) + { + // NOTE: NOT done with this arg so leave our entry on the isCurrentArgAssemblyQualified in place + currentState = ParsingState.ParsingGenericArgCount; + curPos += 1; + break; } - else + + // Skip the assembly name portion if one exists + if (currentState == ParsingState.ParsingAssemblyQualifiedGenericArgName) { - // We are parsing a generic list (potentialy nested lists in the case where a generic param is itself generic) - EnsureGenericArgList(); + while (curPos != name.Length && name[curPos] != GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier) + { + curPos++; + } -#pragma warning disable CS8602 // EnsureGenericArgList call above ensures that genericArgs is never null here - genericArgs.Add(new TypeNameSegment(name, (start, curPos), typeIsNestedClass, parsingGenericArgListDepth)); -#pragma warning restore CS8602 + // Since we are now pointing at the end of the assembly qualified name (the ]), advance past it so the checks below will be correct for both + // assembly qualified and non-assembly qualified names + if (curPos != name.Length) + { + curPos++; + } + } + + // We shouldn't have exhausted our input, if we have, fail + if (curPos == name.Length) + { + // We are done with this arg, so pop our entry + isCurrentArgAssemblyQualified.Pop(); + currentState = ParsingState.Error; + break; } - if (parsingNestedClassDepth != 0) - parsingNestedClassDepth--; + // We're done with the arg list, so just call into DetermineNextStateAndPos which will do the right thing + if (name[curPos] == GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier) + { + // NOTE: We don't pop our entry because DetermineNextStateAndPos will put us into ResolveParsedGenericList, and we need to know + // that info in there. That code will clean up our entry when it finishes processing the arg. + (currentState, curPos) = DetermineNextStateAndPos(name, curPos); + break; + } - (currentState, curPos) = DetermineNextStateAndPos(name, curPos); + // We have encountered another argument, so figure out our new state and yield back to the main loop + if (name[curPos] == ArgSeparator) + { + // We are done with this arg, so pop our entry + isCurrentArgAssemblyQualified.Pop(); + + (currentState, curPos) = DetermineNextStateAndPos(name, curPos); + break; + } + + // Should never get here + // + // We are done with this arg, so pop our entry + isCurrentArgAssemblyQualified.Pop(); + currentState = ParsingState.Error; break; } case ParsingState.ParsingNestedClass: @@ -158,37 +264,76 @@ void EnsureGenericArgList() break; } + // Double check that we aren't looking at non-closed generic type (see Github bug #897). + if (curPos != name.Length && name[curPos] != GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier && name[curPos] != NestedClassSpecifier) + { + int targetIndex = targetList.Count - 1; + TypeNameSegment seg = targetList[targetIndex]; + seg.MarkAsNonClosedGenericType(); + targetList[targetIndex] = seg; + + if (isCurrentArgAssemblyQualified.Count != 0 && isCurrentArgAssemblyQualified.Peek()) + { + Debug.Assert(name[curPos] == ArgSeparator, $"{nameof(isCurrentArgAssemblyQualified)} is true but the non-closed generic we are processing was not followed by a '{ArgSeparator}'."); + if (name[curPos] != ArgSeparator) + { + currentState = ParsingState.Error; + break; + } + + currentState = ParsingState.ParsingGenericArgAssemblySpecifier; + break; + } + } + (currentState, curPos) = DetermineNextStateAndPos(name, curPos); break; } case ParsingState.ParsingGenericArgAssemblySpecifier: { // Nothing to do here, really, just skip the assembly name specified in the generic arg type - while (curPos < name.Length && name[curPos] != ']') + while (curPos < name.Length && name[curPos] != GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier) curPos++; + if (curPos != name.Length) + curPos++; // we hit the ] which closes the fully qualified name, so advance to the next char + + // Done with this arg, so clean up our stack + isCurrentArgAssemblyQualified.Pop(); + (currentState, curPos) = DetermineNextStateAndPos(name, curPos); break; } case ParsingState.ParsingGenericArgs: { - // Start parsing the list of generic types, this just entails marking that we are parsing a generic arg list. NOTE: to support nested generic arg lsits we + // Start parsing the list of generic types, this just entails marking that we are parsing a generic arg list. NOTE: to support nested generic arg lists we // have to keep track of list count, not just a simple bool are/aren't parsing. parsingGenericArgListDepth++; - currentState = ParsingState.ParsingTypeName; + + if (name[curPos + 1] == GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier) + { + currentState = ParsingState.ParsingAssemblyQualifiedGenericArgName; + curPos += 2; + } + else + { + currentState = ParsingState.ParsingNonAssemblyQualifiedGenericArgName; + curPos += 1; + } + break; } case ParsingState.ParsingArraySpecifier: { // Parse the array specifier, this mainly is to catch multi-dimensional arrays - // There is always at least a single dimenision in arrays, every comma counts as one more + // There is always at least a single dimension in arrays, every comma counts as one more int arrayDimensions = 1; // Calculate the array dimensions while ((curPos < name.Length) && (name[curPos] != ']')) { - if (name[curPos] == ',') + if (name[curPos] == ArgSeparator) arrayDimensions++; curPos++; @@ -242,15 +387,97 @@ void EnsureGenericArgList() // top-level). parsingGenericArgListDepth--; + bool isArgAssemblyQualified = isCurrentArgAssemblyQualified.Count != 0 && isCurrentArgAssemblyQualified.Peek(); + if (genericArgs == null || genericArgs.Count == 0) { + // Done with this arg so clean up its entry + isCurrentArgAssemblyQualified.Pop(); + // For top-level types with multiple-level generic arg lists (so a type with a generic arg which itself is a generic type) as we unwind the nested generic args // lists we can end up wth no more work to do upon exiting a level (because we already propagated the info backwards before getting here), in which case, do nothing. (currentState, curPos) = DetermineNextStateAndPos(name, curPos); break; } - (currentState, curPos) = ResolveParsedGenericList(name, curPos, parsingGenericArgListDepth, nameSegments, genericArgs); + bool succeeded = ResolveParsedGenericList(parsingGenericArgListDepth, nameSegments, genericArgs); + if (!succeeded) + { + // Done with this arg so clean up its entry + isCurrentArgAssemblyQualified.Pop(); + + currentState = ParsingState.Error; + break; + } + + if (curPos == name.Length) + { + // Done with this arg so clean up its entry + isCurrentArgAssemblyQualified.Pop(); + + currentState = ParsingState.Done; + break; + } + + if (name[curPos] == ArgSeparator) + { + // If we are in the middle of parsing some number of possibly nested assembly qualified type names, and we close out a generic arg list and land on a , it either means + // it will be followed by the assembly name OR by another type name that itself is or is not qualified. So, account for both cases. + if (isArgAssemblyQualified) + { + // NOTE: We will pop isCurrentArgAssemblyQualified in the handler for ParsingGenericArgAssemblySpecifier + // + // Should be an assembly name, so skip it + currentState = ParsingState.ParsingGenericArgAssemblySpecifier; + break; + } + + // Done with this arg so clean up its entry + isCurrentArgAssemblyQualified.Pop(); + + // move past the comma + curPos++; + + // Advance past any whitespace in the param list + int potentiallyNewPos = MoveCurPosPastWhitespaceOrFail(name, curPos); + if (potentiallyNewPos < 0) + { + currentState = ParsingState.Error; + break; + } + + curPos = potentiallyNewPos; + + // We are moving on from the resolved generic and we have encountered a comma. We popped the state of the generic argument already, but it's possible + // that beyond this comma lies the assembly qualified name of the previous param (which could be a generic type that the just resolved generic was a param for). + // So if we see there is another param still (isCurrentArgAssemblyQualified.Count != 0) and that param is assembly qualified (isCurrentArgAssemblyQualified.Peek()) + // then we need to enter a state to consume that assembly name + if (isCurrentArgAssemblyQualified.Count != 0 && isCurrentArgAssemblyQualified.Peek()) + { + // NOTE: The handler for ParsingGenericArgAssemblySpecifier will handle popping the isCurrentArgAssemblyQualified state for the arg + // it will be working on. + currentState = ParsingState.ParsingGenericArgAssemblySpecifier; + break; + } + + // we either have another qualified name, or a non-qualified one + if (name[curPos] == GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier) + { + currentState = ParsingState.ParsingAssemblyQualifiedGenericArgName; + curPos += 1; + break; + } + else + { + currentState = ParsingState.ParsingNonAssemblyQualifiedGenericArgName; + break; + } + } + + // Done with this arg so clean up its entry + isCurrentArgAssemblyQualified.Pop(); + + (currentState, curPos) = DetermineNextStateAndPos(name, curPos); break; } } @@ -277,16 +504,24 @@ void EnsureGenericArgList() } } - private static (ParsingState State, int CurrentPosition) ResolveParsedGenericList(string name, int currentPosition, int parsingGenericArgListDepth, List? nameSegments, List? genericArgs) + private static int MoveCurPosPastWhitespaceOrFail(string name, int curPos) + { + while (curPos < name.Length && name[curPos] == ' ') + curPos++; + + return (curPos != name.Length ? curPos : -1); + } + + private static bool ResolveParsedGenericList(int parsingGenericArgListDepth, List? nameSegments, List? genericArgs) { if (genericArgs == null) { - return (ParsingState.Error, currentPosition); + return false; } // This is the most complicated part of the state machine, it involves back-propagating completed generic argument types into previous types they belong to. // It has to take care to propagate both amongst the genericArgs list as well as into the nameSegments list, it also has to ensure it unifies nested classes that - // exist seperate from their parent type, before back-propagating that parent type. + // exist separate from their parent type, before back-propagating that parent type. // // NOTE: This is called one time per generic list, so in the case of nested generics (where a param to a generic is itself another generic) this will be called // twice (and so on and so forth for arbitrary levels of nesting). What that means is that each call we only want to clear as many generics off our queue @@ -306,7 +541,7 @@ private static (ParsingState State, int CurrentPosition) ResolveParsedGenericLis // (also waiting for its params) and one for ActionResponse, which is the arg to pair with the IEnumerable. So we have handle this arg rollup before we can // propagate the args to the nameSegments list // - // NOTE: It is important to do this walk backwards since our list is being used like a queue and later entries bind with entries before them during genric arg + // NOTE: It is important to do this walk backwards since our list is being used like a queue and later entries bind with entries before them during generic arg // back-propagation // // NOTE: Purposely not using FindLastIndexOf because want to avoid allocation cost of lambda + indirection cost of callback during search @@ -328,7 +563,7 @@ private static (ParsingState State, int CurrentPosition) ResolveParsedGenericLis propagatedTypesToGenericArgs = true; if (!TryPatchUnfulfilledGenericArgs(genericTargetIndex, genericArgs)) - return (ParsingState.Error, currentPosition); + return false; int previousTarget = genericTargetIndex; genericTargetIndex = -1; @@ -354,7 +589,7 @@ private static (ParsingState State, int CurrentPosition) ResolveParsedGenericLis if (nameSegments == null) { Debug.Fail("Ended resolving generic arg list but no top-level types to propagate them to."); - return (ParsingState.Error, currentPosition); + return false; } // Fill the nameSegment generics with args, in order, from the genericArgs list. This works correctly whether the nameSegments list is a single generic or a @@ -388,7 +623,7 @@ private static (ParsingState State, int CurrentPosition) ResolveParsedGenericLis targetSegmentIndex = nameSegments.FindIndex(targetSegmentIndex, (tns) => tns.HasUnfulfilledGenericArgs); if (targetSegmentIndex == -1) { - return (ParsingState.Error, currentPosition); + return false; } targetSegment = nameSegments[targetSegmentIndex]; @@ -400,18 +635,14 @@ private static (ParsingState State, int CurrentPosition) ResolveParsedGenericLis nameSegments[targetSegmentIndex] = targetSegment; DebugOnly.Assert(genericArgs.Count == 0, "Back-propagation to top-level generic types ended with generic args still in the genericArgs list."); - - return DetermineNextStateAndPos(name, currentPosition); } else { - return (ParsingState.Error, currentPosition); + return false; } } - else - { - return DetermineNextStateAndPos(name, currentPosition); - } + + return true; } private static bool TryPatchUnfulfilledGenericArgs(int genericTargetIndex, List genericArgs) @@ -420,7 +651,7 @@ private static bool TryPatchUnfulfilledGenericArgs(int genericTargetIndex, List< DebugOnly.Assert(targetTypeToPatch.HasUnfulfilledGenericArgs, "Called TryPatchUnfulfilledGenericArgs with an index pointing at a TypeNameSegment that does not have unfulfilled generic params"); // Patch ALL missing args from the target type, but no more. Any types before the target that need filling in will trigger another ResolveParsedGenericList state - // to be entered as we unwind the parse, and at that point we will progate types back another level. This is very important to correctly parse horrific types like: + // to be entered as we unwind the parse, and at that point we will propagate types back another level. This is very important to correctly parse horrific types like: // // Microsoft.CodeAnalysis.PooledObjects.ObjectPool+TagNode>+Node, System.Boolean>>>+Factory int nextTargetFulfillmentIndex = -1; @@ -491,7 +722,7 @@ private static bool TryPatchUnfulfilledGenericArgs(int genericTargetIndex, List< // Add the located argument as a generic arg to our previous generic type targetTypeToPatch.AddGenericArg(genericArgs[nextTargetFulfillmentIndex]); - // Remove the back-propagaeted argument from our argument list since it is now contained within targetTypeToPatch + // Remove the back-propagated argument from our argument list since it is now contained within targetTypeToPatch genericArgs.RemoveAt(nextTargetFulfillmentIndex); } @@ -519,7 +750,7 @@ private static void UnifyNestedClasses(int parsingGenericArgListDepth, List // while (cur < name.Length && - name[cur] != '[' && - (name[cur] != ',' || !parsingGenericArgList) && - (name[cur] != '+' || parsingGenericArgList) && // allow + when parsing generic arg type names because generic arg names can be nested types - (name[cur] != '`' || ((cur + 1 < name.Length) && !char.IsDigit(name[cur + 1])))) + name[cur] != GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier && + name[cur] != GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier && + (name[cur] != ArgSeparator || !parsingGenericArgList) && + (name[cur] != NestedClassSpecifier || parsingGenericArgList) && // allow + when parsing generic arg type names because generic arg names can be nested types + (name[cur] != GenericAritySpecifier || ((cur + 1 < name.Length) && !char.IsDigit(name[cur + 1])))) { cur++; } @@ -610,9 +842,26 @@ private static (ParsingState State, int Pos) DetermineNextStateAndPos(string nam switch (name[curPos]) { - case ArgSeperator: + case ArgSeparator: { - return (ParsingState.ParsingGenericArgAssemblySpecifier, curPos + 1); + curPos++; // move past the comma + + int potentiallyNewPos = MoveCurPosPastWhitespaceOrFail(name, curPos); + if (potentiallyNewPos < 0) + { + return (ParsingState.Error, curPos); + } + + curPos = potentiallyNewPos; + + if (name[curPos] == GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier) + { + return (ParsingState.ParsingAssemblyQualifiedGenericArgName, curPos + 1); + } + else + { + return (ParsingState.ParsingNonAssemblyQualifiedGenericArgName, curPos); + } } case GenericAritySpecifier: { @@ -622,33 +871,23 @@ private static (ParsingState State, int Pos) DetermineNextStateAndPos(string nam { return (ParsingState.ParsingNestedClass, curPos + 1); } - case GenericArgListOrArrayStartSpecifier: + case GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier: { if (curPos + 1 != name.Length) { - if (name[curPos + 1] is GenericArgListOrArrayEndSpecifier or ArgSeperator) + if (name[curPos + 1] is GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier or ArgSeparator) return (ParsingState.ParsingArraySpecifier, curPos); - else if (name[curPos + 1] == GenericArgListOrArrayStartSpecifier) - return (ParsingState.ParsingGenericArgs, curPos + 2); + else + return (ParsingState.ParsingGenericArgs, curPos); } return (ParsingState.Error, curPos); } - case GenericArgListOrArrayEndSpecifier: + case GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier: { if (curPos != name.Length) { - if (name[curPos + 1] == GenericArgListOrArrayEndSpecifier) - return (ParsingState.ResolveParsedGenericList, curPos + 2); - else if (name[curPos + 1] == ArgSeperator) - { - // +3 because cur pos == ']' and next position is a ',', which means we must have a list like this: - // - // [[Microsoft.VisualStudio.Threading.IJoinableTaskDependent, Microsoft.VisualStudio.Threading],[System.Int32, mscorlib],[Microsoft.VisualStudio.Threading.IJoinableTaskDependent, Microsoft.VisualStudio.Threading]] - // - // and thus curPos + 3 == '[', which we want to skip as well. - return (ParsingState.ParsingTypeName, curPos + 3); - } + return (ParsingState.ResolveParsedGenericList, curPos + 1); } return (ParsingState.Error, curPos); @@ -657,9 +896,12 @@ private static (ParsingState State, int Pos) DetermineNextStateAndPos(string nam return (ParsingState.Error, curPos); } } + private enum ParsingState { ParsingTypeName, + ParsingAssemblyQualifiedGenericArgName, + ParsingNonAssemblyQualifiedGenericArgName, ParsingArraySpecifier, ParsingNestedClass, ParsingGenericArgCount, @@ -679,6 +921,7 @@ private struct TypeNameSegment private int _nextUnfulfilledGenericArgSlot; private int _arrayDimensions; private int _arrayOfArraysCount; + private bool _isNonClosedGenericType; // NOTE: This is only an array to defeat the cycle detection stuff in the compiler. Since this is a struct if I have a field that is TypeNameSegment or // TypeNameSegment? it complains there is a cycle in the layout since to determine the size of the struct it must determine the size of the struct. To @@ -696,6 +939,7 @@ public TypeNameSegment(string input, (int Start, int End) extent, bool isNestedC _arrayDimensions = 0; _arrayOfArraysCount = 0; ParsingArgDepth = parsingArgDepth; + _isNonClosedGenericType = false; _nestedClass = null; } @@ -711,17 +955,17 @@ public TypeNameSegment(string input, (int Start, int End) extent, bool isNestedC public bool IsGenericClass => _expectedGenericArgCount != 0; - public bool HasUnfulfilledGenericArgs => IsGenericClass && (_typeArgSegments == null || _nextUnfulfilledGenericArgSlot < _typeArgSegments.Length); + public bool HasUnfulfilledGenericArgs => IsGenericClass && !_isNonClosedGenericType && (_typeArgSegments == null || _nextUnfulfilledGenericArgSlot < _typeArgSegments.Length); public int ExpectedGenericArgsCount => _expectedGenericArgCount; - public int RemainingUnfulfilledGenericArgCount => _expectedGenericArgCount - _nextUnfulfilledGenericArgSlot; + public int RemainingUnfulfilledGenericArgCount => _isNonClosedGenericType ? 0 : _expectedGenericArgCount - _nextUnfulfilledGenericArgSlot; public void SetArrayDimensions(int dimensions) { // There are two distinct versions of multi-dimensional arrays we represent in a single TypeNameSegment. There // is the classic multidimensional array (ala int[,]) and then there is the array of arrays specifier (int[][]). - // Instead of creating a seperate TypeNameSegment for the latter that is just indicating 'an array of the previous + // Instead of creating a separate TypeNameSegment for the latter that is just indicating 'an array of the previous // name segment', we just pile them all into the original TypeNameSegment and differentiate when we are printing // out the names at the end (to either print [,] or [][]). @@ -731,6 +975,11 @@ public void SetArrayDimensions(int dimensions) _arrayOfArraysCount++; } + public void MarkAsNonClosedGenericType() + { + _isNonClosedGenericType = true; + } + public void SetExpectedGenericArgCount(int expectedCount) { _expectedGenericArgCount = expectedCount; @@ -771,7 +1020,7 @@ public void ToString(StringBuilder destination) if (IsGenericClass) { - OutputTypeArguments(destination, _nextUnfulfilledGenericArgSlot, _expectedGenericArgCount, _typeArgSegments); + OutputTypeArguments(destination, _isNonClosedGenericType, _nextUnfulfilledGenericArgSlot, _expectedGenericArgCount, _typeArgSegments); } _nestedClass?[0].ToString(destination); @@ -785,18 +1034,18 @@ public void ToString(StringBuilder destination) } else if (_arrayDimensions != 0) { - destination.Append('['); + destination.Append(GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier); if (_arrayDimensions > 1) - destination.Append(',', _arrayDimensions - 1); - destination.Append(']'); + destination.Append(ArgSeparator, _arrayDimensions - 1); + destination.Append(GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier); } } private bool HasNestedClassSegment => _nestedClass != null; - private static void OutputTypeArguments(StringBuilder destination, int firstMissingGenericSlot, int expectedArgCount, TypeNameSegment[]? typeArgs) + private static void OutputTypeArguments(StringBuilder destination, bool isNonClosedGenericType, int firstMissingGenericSlot, int expectedArgCount, TypeNameSegment[]? typeArgs) { - if (typeArgs == null) + if (typeArgs == null || isNonClosedGenericType) { destination.Append('<'); AddMissingArgumentInfo(destination, firstMissingGenericSlot, expectedArgCount); @@ -822,7 +1071,7 @@ private static void OutputTypeArguments(StringBuilder destination, int firstMiss private static void AddMissingArgumentInfo(StringBuilder result, int firstMissingGenericSlot, int expectedArgCount) { - // This is an error case where the DAC gave no generic args or incomplete generic args for a type + // This is either an error case where the DAC gave no generic args or incomplete generic args for a type or a use of a non-closed generic type for (int i = firstMissingGenericSlot; i < expectedArgCount; i++) { if (i != 0)