diff --git a/src/Microsoft.Diagnostics.Runtime.Tests/src/DACNameParserTests.cs b/src/Microsoft.Diagnostics.Runtime.Tests/src/DACNameParserTests.cs index 5c0875ed8..5769e5d41 100644 --- a/src/Microsoft.Diagnostics.Runtime.Tests/src/DACNameParserTests.cs +++ b/src/Microsoft.Diagnostics.Runtime.Tests/src/DACNameParserTests.cs @@ -188,5 +188,75 @@ public void ParseArrayOfNestedGenericType() Assert.Equal(expectedResult, DACNameParser.Parse(input)); } + + // Parse a generic arg list with some assembly-qualified names, and some non-qualified names, moving the location of the assembly-qualified arg throughout the list + // in the various tests (i.e. ParseValueTupleWithoutAllFullyQualifiedNames1, ParseValueTupleWithoutAllFullyQualifiedNames2, ...) + [Fact] + public void ParseValueTupleWithoutAllFullyQualifiedNames1() + { + string input = "System.ValueTuple`3[[System.String, mscorlib],TCheapVersion,TExpensiveVersion]"; + string expectedResult = "System.ValueTuple"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + [Fact] + public void ParseValueTupleWithoutAllFullyQualifiedNames2() + { + string input = "System.ValueTuple`3[TCheapVersion,[System.String, mscorlib],TExpensiveVersion]"; + string expectedResult = "System.ValueTuple"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + [Fact] + public void ParseValueTupleWithoutAllFullyQualifiedNames3() + { + string input = "System.ValueTuple`3[TCheapVersion,TExpensiveVersion,[System.String, mscorlib]]"; + string expectedResult = "System.ValueTuple"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + // Test with NO qualified names + [Fact] + public void ParseValueTupleWithNoFullyQualifiedNames() + { + string input = "System.ValueTuple`3[TType1,TType2,TType3]"; + string expectedResult = "System.ValueTuple"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + [Fact] + public void Bug897() + { + // https://github.com/microsoft/clrmd/issues/897 + + // This input is unusual both in that T and TPluginType are not assembly qualified, but also LambdaInstance is a non-closed generic type. + string input = "StructureMap.Pipeline.ExpressedInstance`3[[StructureMap.Pipeline.LambdaInstance`2, EnforceHttpsModule],T,TPluginType]"; + string expectedResult = "StructureMap.Pipeline.ExpressedInstance, T, TPluginType>"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + // Some simple variants on Bug897 + [Fact] + public void Bug897_Variant1() + { + string input = "StructureMap.Pipeline.ExpressedInstance`3[[StructureMap.Pipeline.LambdaInstance`2[[System.String, mscorlib],SomeFakeType], SomeFakeAssembly],T,TPluginType]"; + string expectedResult = "StructureMap.Pipeline.ExpressedInstance, T, TPluginType>"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } + + [Fact] + public void Bug897_Variant2() + { + string input = "StructureMap.Pipeline.ExpressedInstance`3[[StructureMap.Pipeline.LambdaInstance`2[[List`1[[System.ValueType`2[[System.Boolean, mscorlib],SomeOtherFakeType], mscorlib]], mscorlib],SomeFakeType], SomeFakeAssembly],T,TPluginType]"; + string expectedResult = "StructureMap.Pipeline.ExpressedInstance>, SomeFakeType>, T, TPluginType>"; + + Assert.Equal(expectedResult, DACNameParser.Parse(input)); + } } } \ No newline at end of file diff --git a/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs b/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs index 119964016..3b2ceeb4e 100644 --- a/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs +++ b/src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs @@ -23,14 +23,14 @@ namespace Microsoft.Diagnostics.Runtime.Implementation // 1) Minimal heap allocation. // 2) Fast, non-recursive parsing. // 3) Accuracy in the face of complex nesting of types, nesting of generic args, etc... - // 4) Resiliancy to failure (i.e. returns the original DAC name if errors are encountered instead of throwing). + // 4) Resiliency to failure (i.e. returns the original DAC name if errors are encountered instead of throwing). internal static class DACNameParser { private const char GenericAritySpecifier = '`'; - private const char ArgSeperator = ','; + private const char ArgSeparator = ','; private const char NestedClassSpecifier = '+'; - private const char GenericArgListOrArrayStartSpecifier = '['; - private const char GenericArgListOrArrayEndSpecifier = ']'; + private const char GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier = '['; + private const char GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier = ']'; [return: NotNullIfNotNull(nameof(name))] public static string? Parse(string? name) @@ -43,7 +43,6 @@ internal static class DACNameParser try { - // This is the primary method of the parser. It operates as a simple state machine storing information about types encountered as // they are encountered and back-propagating information to types as it is discovered. // @@ -76,6 +75,54 @@ void EnsureGenericArgList() int curPos = 0; int parsingNestedClassDepth = 0; int parsingGenericArgListDepth = 0; + Stack isCurrentArgAssemblyQualified = new Stack(); + + // local type name parsing method that doesn't modify the currentState variable that drive the state machine. Used by a few of the typename parsing paths that need to + // have knowledge of their context to operate properly, but don't want to duplicate this code. Returns a non-empty string in the special case where the input string + // should be the output string, returns string.Empty in all other cases and expects callers to continue to parsing progress by setting currentState/curPos directly + // or calling DetermineNextStateAndPos. + string ParseTypeNameNoStateAdvance() + { + // We are parsing a type name, this is the initial state as well as one entered into every time we are extracting a single type name through the course of parsing. + // This handles parsing nested types as well as generic param types. + // + // Parsing of type names is non-extractive, i.e. we don't substring the input string or create any new heap allocations to track them (apart from the two local + // Lists), we simply remember the extents of the name (start/end). + + int start; + (start, curPos) = GetTypeNameExtent(name, curPos, parsingGenericArgList: parsingGenericArgListDepth != 0); + + // Special case: Check if after parsing the type name we have exhausted the string, if so it means the input string is the output string, so just return it + // without allocating a copy. + if (ReturnOriginalDACString(curPos, name.Length, nameSegments)) + return name; + + bool typeIsNestedClass = (parsingNestedClassDepth != 0); + if (parsingGenericArgListDepth == 0) + { + // We are parsing a top-level type name/sequence + EnsureNameSegmentList(); + +#pragma warning disable CS8602 // EnsureNameSegmentList call above ensures that nameSegments is never null here + nameSegments.Add(new TypeNameSegment(name, (start, curPos), typeIsNestedClass, parsingArgDepth: 0)); +#pragma warning restore CS8602 + } + else + { + // We are parsing a generic list (potentially nested lists in the case where a generic param is itself generic) + EnsureGenericArgList(); + +#pragma warning disable CS8602 // EnsureGenericArgList call above ensures that genericArgs is never null here + genericArgs.Add(new TypeNameSegment(name, (start, curPos), typeIsNestedClass, parsingGenericArgListDepth)); +#pragma warning restore CS8602 + + } + + if (parsingNestedClassDepth != 0) + parsingNestedClassDepth--; + + return string.Empty; + } while (ShouldContinueParsing(currentState)) { @@ -83,45 +130,104 @@ void EnsureGenericArgList() { case ParsingState.ParsingTypeName: { - // We are parsing a type name, this is the initial state as well as one entered into every time we are extracting a single type name through the course of parsing. - // This handles parsing nested types as well as generic param types. - // - // Parsing of type names is non-extractive, i.e. we don't substring the input string or create any new heap allocations to track them (apart from the two local - // Lists), we simply remember the extents of the name (start/end). + string parsedResult = ParseTypeNameNoStateAdvance(); + if (!string.IsNullOrEmpty(parsedResult)) + return parsedResult; + + (currentState, curPos) = DetermineNextStateAndPos(name, curPos); + break; + } + case ParsingState.ParsingAssemblyQualifiedGenericArgName: + case ParsingState.ParsingNonAssemblyQualifiedGenericArgName: + { + // Generic params come in two flavors, one is assembly qualified, like this List`1[[System.Boolean, mscorlib]], the other is non-assembly qualified which will lack the + // [ and ] and just be a type name, like this List`1[TFoo]. Of course in multi argument generics these styles can be mixed and of course we can nest generics whose + // args are generic whose args are generic, etc... arbitrarily deeply. The primary tricky bit is when consuming a param from the input we need to consume the type name + // (in both cases) and potentially the trailing assembly name. We use isCurrentArgAssemblyQualified as a way to track whether the arg we are currently processing + // will have a trailing assembly name after it. We need a stack since we can, and do, frequently, generics whose args are generic (often numerous levels deep). Each + // arg on the descent of processing such types has its own unique 'is assembly qualified name' state. - int start; - (start, curPos) = GetTypeNameExtent(name, curPos, parsingGenericArgList: parsingGenericArgListDepth != 0); + // NOTE: We ignore the return type, the special case it handles isn't possible here in a well-formed name and we will only get well formed names from the DAC + ParseTypeNameNoStateAdvance(); - // Special case: Check if after parsing the type name we have exhausted the string, if so it means the input string is the output string, so just return it - // without allocating a copy. - if (ReturnOriginalDACString(curPos, name.Length, nameSegments)) - return name; + isCurrentArgAssemblyQualified.Push(currentState == ParsingState.ParsingAssemblyQualifiedGenericArgName); - bool typeIsNestedClass = (parsingNestedClassDepth != 0); - if (parsingGenericArgListDepth == 0) + // After a generic arg type name the only legit input states are: + // + // 1) We are entirely done with the string (curPos == name.Length). + // 2) The next token is an argument separator (,) + // 3) The next token is the end of the argument list (]) + // 4) The next token is a generic arity specifier for this arg (`) + // + // Anything else is an error + if (curPos == name.Length || + (name[curPos] != ArgSeparator && + name[curPos] != GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier && + name[curPos] != GenericAritySpecifier)) { - // We are parsing a top-level type name/sequence - EnsureNameSegmentList(); + currentState = ParsingState.Error; + break; + } -#pragma warning disable CS8602 // EnsureNameSegmentList call above ensures that nameSegments is never null here - nameSegments.Add(new TypeNameSegment(name, (start, curPos), typeIsNestedClass, parsingArgDepth: 0)); -#pragma warning restore CS8602 + // an argument can itself be generic, so we may encounter an arity specifier while parsing the arg name, make sure we don't get tripped up by that + if (name[curPos] == GenericAritySpecifier) + { + // NOTE: NOT done with this arg so leave our entry on the isCurrentArgAssemblyQualified in place + currentState = ParsingState.ParsingGenericArgCount; + curPos += 1; + break; } - else + + // Skip the assembly name portion if one exists + if (currentState == ParsingState.ParsingAssemblyQualifiedGenericArgName) { - // We are parsing a generic list (potentialy nested lists in the case where a generic param is itself generic) - EnsureGenericArgList(); + while (curPos != name.Length && name[curPos] != GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier) + { + curPos++; + } -#pragma warning disable CS8602 // EnsureGenericArgList call above ensures that genericArgs is never null here - genericArgs.Add(new TypeNameSegment(name, (start, curPos), typeIsNestedClass, parsingGenericArgListDepth)); -#pragma warning restore CS8602 + // Since we are now pointing at the end of the assembly qualified name (the ]), advance past it so the checks below will be correct for both + // assembly qualified and non-assembly qualified names + if (curPos != name.Length) + { + curPos++; + } + } + + // We shouldn't have exhausted our input, if we have, fail + if (curPos == name.Length) + { + // We are done with this arg, so pop our entry + isCurrentArgAssemblyQualified.Pop(); + currentState = ParsingState.Error; + break; } - if (parsingNestedClassDepth != 0) - parsingNestedClassDepth--; + // We're done with the arg list, so just call into DetermineNextStateAndPos which will do the right thing + if (name[curPos] == GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier) + { + // NOTE: We don't pop our entry because DetermineNextStateAndPos will put us into ResolveParsedGenericList, and we need to know + // that info in there. That code will clean up our entry when it finishes processing the arg. + (currentState, curPos) = DetermineNextStateAndPos(name, curPos); + break; + } - (currentState, curPos) = DetermineNextStateAndPos(name, curPos); + // We have encountered another argument, so figure out our new state and yield back to the main loop + if (name[curPos] == ArgSeparator) + { + // We are done with this arg, so pop our entry + isCurrentArgAssemblyQualified.Pop(); + + (currentState, curPos) = DetermineNextStateAndPos(name, curPos); + break; + } + + // Should never get here + // + // We are done with this arg, so pop our entry + isCurrentArgAssemblyQualified.Pop(); + currentState = ParsingState.Error; break; } case ParsingState.ParsingNestedClass: @@ -158,37 +264,76 @@ void EnsureGenericArgList() break; } + // Double check that we aren't looking at non-closed generic type (see Github bug #897). + if (curPos != name.Length && name[curPos] != GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier && name[curPos] != NestedClassSpecifier) + { + int targetIndex = targetList.Count - 1; + TypeNameSegment seg = targetList[targetIndex]; + seg.MarkAsNonClosedGenericType(); + targetList[targetIndex] = seg; + + if (isCurrentArgAssemblyQualified.Count != 0 && isCurrentArgAssemblyQualified.Peek()) + { + Debug.Assert(name[curPos] == ArgSeparator, $"{nameof(isCurrentArgAssemblyQualified)} is true but the non-closed generic we are processing was not followed by a '{ArgSeparator}'."); + if (name[curPos] != ArgSeparator) + { + currentState = ParsingState.Error; + break; + } + + currentState = ParsingState.ParsingGenericArgAssemblySpecifier; + break; + } + } + (currentState, curPos) = DetermineNextStateAndPos(name, curPos); break; } case ParsingState.ParsingGenericArgAssemblySpecifier: { // Nothing to do here, really, just skip the assembly name specified in the generic arg type - while (curPos < name.Length && name[curPos] != ']') + while (curPos < name.Length && name[curPos] != GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier) curPos++; + if (curPos != name.Length) + curPos++; // we hit the ] which closes the fully qualified name, so advance to the next char + + // Done with this arg, so clean up our stack + isCurrentArgAssemblyQualified.Pop(); + (currentState, curPos) = DetermineNextStateAndPos(name, curPos); break; } case ParsingState.ParsingGenericArgs: { - // Start parsing the list of generic types, this just entails marking that we are parsing a generic arg list. NOTE: to support nested generic arg lsits we + // Start parsing the list of generic types, this just entails marking that we are parsing a generic arg list. NOTE: to support nested generic arg lists we // have to keep track of list count, not just a simple bool are/aren't parsing. parsingGenericArgListDepth++; - currentState = ParsingState.ParsingTypeName; + + if (name[curPos + 1] == GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier) + { + currentState = ParsingState.ParsingAssemblyQualifiedGenericArgName; + curPos += 2; + } + else + { + currentState = ParsingState.ParsingNonAssemblyQualifiedGenericArgName; + curPos += 1; + } + break; } case ParsingState.ParsingArraySpecifier: { // Parse the array specifier, this mainly is to catch multi-dimensional arrays - // There is always at least a single dimenision in arrays, every comma counts as one more + // There is always at least a single dimension in arrays, every comma counts as one more int arrayDimensions = 1; // Calculate the array dimensions while ((curPos < name.Length) && (name[curPos] != ']')) { - if (name[curPos] == ',') + if (name[curPos] == ArgSeparator) arrayDimensions++; curPos++; @@ -242,15 +387,97 @@ void EnsureGenericArgList() // top-level). parsingGenericArgListDepth--; + bool isArgAssemblyQualified = isCurrentArgAssemblyQualified.Count != 0 && isCurrentArgAssemblyQualified.Peek(); + if (genericArgs == null || genericArgs.Count == 0) { + // Done with this arg so clean up its entry + isCurrentArgAssemblyQualified.Pop(); + // For top-level types with multiple-level generic arg lists (so a type with a generic arg which itself is a generic type) as we unwind the nested generic args // lists we can end up wth no more work to do upon exiting a level (because we already propagated the info backwards before getting here), in which case, do nothing. (currentState, curPos) = DetermineNextStateAndPos(name, curPos); break; } - (currentState, curPos) = ResolveParsedGenericList(name, curPos, parsingGenericArgListDepth, nameSegments, genericArgs); + bool succeeded = ResolveParsedGenericList(parsingGenericArgListDepth, nameSegments, genericArgs); + if (!succeeded) + { + // Done with this arg so clean up its entry + isCurrentArgAssemblyQualified.Pop(); + + currentState = ParsingState.Error; + break; + } + + if (curPos == name.Length) + { + // Done with this arg so clean up its entry + isCurrentArgAssemblyQualified.Pop(); + + currentState = ParsingState.Done; + break; + } + + if (name[curPos] == ArgSeparator) + { + // If we are in the middle of parsing some number of possibly nested assembly qualified type names, and we close out a generic arg list and land on a , it either means + // it will be followed by the assembly name OR by another type name that itself is or is not qualified. So, account for both cases. + if (isArgAssemblyQualified) + { + // NOTE: We will pop isCurrentArgAssemblyQualified in the handler for ParsingGenericArgAssemblySpecifier + // + // Should be an assembly name, so skip it + currentState = ParsingState.ParsingGenericArgAssemblySpecifier; + break; + } + + // Done with this arg so clean up its entry + isCurrentArgAssemblyQualified.Pop(); + + // move past the comma + curPos++; + + // Advance past any whitespace in the param list + int potentiallyNewPos = MoveCurPosPastWhitespaceOrFail(name, curPos); + if (potentiallyNewPos < 0) + { + currentState = ParsingState.Error; + break; + } + + curPos = potentiallyNewPos; + + // We are moving on from the resolved generic and we have encountered a comma. We popped the state of the generic argument already, but it's possible + // that beyond this comma lies the assembly qualified name of the previous param (which could be a generic type that the just resolved generic was a param for). + // So if we see there is another param still (isCurrentArgAssemblyQualified.Count != 0) and that param is assembly qualified (isCurrentArgAssemblyQualified.Peek()) + // then we need to enter a state to consume that assembly name + if (isCurrentArgAssemblyQualified.Count != 0 && isCurrentArgAssemblyQualified.Peek()) + { + // NOTE: The handler for ParsingGenericArgAssemblySpecifier will handle popping the isCurrentArgAssemblyQualified state for the arg + // it will be working on. + currentState = ParsingState.ParsingGenericArgAssemblySpecifier; + break; + } + + // we either have another qualified name, or a non-qualified one + if (name[curPos] == GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier) + { + currentState = ParsingState.ParsingAssemblyQualifiedGenericArgName; + curPos += 1; + break; + } + else + { + currentState = ParsingState.ParsingNonAssemblyQualifiedGenericArgName; + break; + } + } + + // Done with this arg so clean up its entry + isCurrentArgAssemblyQualified.Pop(); + + (currentState, curPos) = DetermineNextStateAndPos(name, curPos); break; } } @@ -277,16 +504,24 @@ void EnsureGenericArgList() } } - private static (ParsingState State, int CurrentPosition) ResolveParsedGenericList(string name, int currentPosition, int parsingGenericArgListDepth, List? nameSegments, List? genericArgs) + private static int MoveCurPosPastWhitespaceOrFail(string name, int curPos) + { + while (curPos < name.Length && name[curPos] == ' ') + curPos++; + + return (curPos != name.Length ? curPos : -1); + } + + private static bool ResolveParsedGenericList(int parsingGenericArgListDepth, List? nameSegments, List? genericArgs) { if (genericArgs == null) { - return (ParsingState.Error, currentPosition); + return false; } // This is the most complicated part of the state machine, it involves back-propagating completed generic argument types into previous types they belong to. // It has to take care to propagate both amongst the genericArgs list as well as into the nameSegments list, it also has to ensure it unifies nested classes that - // exist seperate from their parent type, before back-propagating that parent type. + // exist separate from their parent type, before back-propagating that parent type. // // NOTE: This is called one time per generic list, so in the case of nested generics (where a param to a generic is itself another generic) this will be called // twice (and so on and so forth for arbitrary levels of nesting). What that means is that each call we only want to clear as many generics off our queue @@ -306,7 +541,7 @@ private static (ParsingState State, int CurrentPosition) ResolveParsedGenericLis // (also waiting for its params) and one for ActionResponse, which is the arg to pair with the IEnumerable. So we have handle this arg rollup before we can // propagate the args to the nameSegments list // - // NOTE: It is important to do this walk backwards since our list is being used like a queue and later entries bind with entries before them during genric arg + // NOTE: It is important to do this walk backwards since our list is being used like a queue and later entries bind with entries before them during generic arg // back-propagation // // NOTE: Purposely not using FindLastIndexOf because want to avoid allocation cost of lambda + indirection cost of callback during search @@ -328,7 +563,7 @@ private static (ParsingState State, int CurrentPosition) ResolveParsedGenericLis propagatedTypesToGenericArgs = true; if (!TryPatchUnfulfilledGenericArgs(genericTargetIndex, genericArgs)) - return (ParsingState.Error, currentPosition); + return false; int previousTarget = genericTargetIndex; genericTargetIndex = -1; @@ -354,7 +589,7 @@ private static (ParsingState State, int CurrentPosition) ResolveParsedGenericLis if (nameSegments == null) { Debug.Fail("Ended resolving generic arg list but no top-level types to propagate them to."); - return (ParsingState.Error, currentPosition); + return false; } // Fill the nameSegment generics with args, in order, from the genericArgs list. This works correctly whether the nameSegments list is a single generic or a @@ -388,7 +623,7 @@ private static (ParsingState State, int CurrentPosition) ResolveParsedGenericLis targetSegmentIndex = nameSegments.FindIndex(targetSegmentIndex, (tns) => tns.HasUnfulfilledGenericArgs); if (targetSegmentIndex == -1) { - return (ParsingState.Error, currentPosition); + return false; } targetSegment = nameSegments[targetSegmentIndex]; @@ -400,18 +635,14 @@ private static (ParsingState State, int CurrentPosition) ResolveParsedGenericLis nameSegments[targetSegmentIndex] = targetSegment; DebugOnly.Assert(genericArgs.Count == 0, "Back-propagation to top-level generic types ended with generic args still in the genericArgs list."); - - return DetermineNextStateAndPos(name, currentPosition); } else { - return (ParsingState.Error, currentPosition); + return false; } } - else - { - return DetermineNextStateAndPos(name, currentPosition); - } + + return true; } private static bool TryPatchUnfulfilledGenericArgs(int genericTargetIndex, List genericArgs) @@ -420,7 +651,7 @@ private static bool TryPatchUnfulfilledGenericArgs(int genericTargetIndex, List< DebugOnly.Assert(targetTypeToPatch.HasUnfulfilledGenericArgs, "Called TryPatchUnfulfilledGenericArgs with an index pointing at a TypeNameSegment that does not have unfulfilled generic params"); // Patch ALL missing args from the target type, but no more. Any types before the target that need filling in will trigger another ResolveParsedGenericList state - // to be entered as we unwind the parse, and at that point we will progate types back another level. This is very important to correctly parse horrific types like: + // to be entered as we unwind the parse, and at that point we will propagate types back another level. This is very important to correctly parse horrific types like: // // Microsoft.CodeAnalysis.PooledObjects.ObjectPool+TagNode>+Node, System.Boolean>>>+Factory int nextTargetFulfillmentIndex = -1; @@ -491,7 +722,7 @@ private static bool TryPatchUnfulfilledGenericArgs(int genericTargetIndex, List< // Add the located argument as a generic arg to our previous generic type targetTypeToPatch.AddGenericArg(genericArgs[nextTargetFulfillmentIndex]); - // Remove the back-propagaeted argument from our argument list since it is now contained within targetTypeToPatch + // Remove the back-propagated argument from our argument list since it is now contained within targetTypeToPatch genericArgs.RemoveAt(nextTargetFulfillmentIndex); } @@ -519,7 +750,7 @@ private static void UnifyNestedClasses(int parsingGenericArgListDepth, List // while (cur < name.Length && - name[cur] != '[' && - (name[cur] != ',' || !parsingGenericArgList) && - (name[cur] != '+' || parsingGenericArgList) && // allow + when parsing generic arg type names because generic arg names can be nested types - (name[cur] != '`' || ((cur + 1 < name.Length) && !char.IsDigit(name[cur + 1])))) + name[cur] != GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier && + name[cur] != GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier && + (name[cur] != ArgSeparator || !parsingGenericArgList) && + (name[cur] != NestedClassSpecifier || parsingGenericArgList) && // allow + when parsing generic arg type names because generic arg names can be nested types + (name[cur] != GenericAritySpecifier || ((cur + 1 < name.Length) && !char.IsDigit(name[cur + 1])))) { cur++; } @@ -610,9 +842,26 @@ private static (ParsingState State, int Pos) DetermineNextStateAndPos(string nam switch (name[curPos]) { - case ArgSeperator: + case ArgSeparator: { - return (ParsingState.ParsingGenericArgAssemblySpecifier, curPos + 1); + curPos++; // move past the comma + + int potentiallyNewPos = MoveCurPosPastWhitespaceOrFail(name, curPos); + if (potentiallyNewPos < 0) + { + return (ParsingState.Error, curPos); + } + + curPos = potentiallyNewPos; + + if (name[curPos] == GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier) + { + return (ParsingState.ParsingAssemblyQualifiedGenericArgName, curPos + 1); + } + else + { + return (ParsingState.ParsingNonAssemblyQualifiedGenericArgName, curPos); + } } case GenericAritySpecifier: { @@ -622,33 +871,23 @@ private static (ParsingState State, int Pos) DetermineNextStateAndPos(string nam { return (ParsingState.ParsingNestedClass, curPos + 1); } - case GenericArgListOrArrayStartSpecifier: + case GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier: { if (curPos + 1 != name.Length) { - if (name[curPos + 1] is GenericArgListOrArrayEndSpecifier or ArgSeperator) + if (name[curPos + 1] is GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier or ArgSeparator) return (ParsingState.ParsingArraySpecifier, curPos); - else if (name[curPos + 1] == GenericArgListOrArrayStartSpecifier) - return (ParsingState.ParsingGenericArgs, curPos + 2); + else + return (ParsingState.ParsingGenericArgs, curPos); } return (ParsingState.Error, curPos); } - case GenericArgListOrArrayEndSpecifier: + case GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier: { if (curPos != name.Length) { - if (name[curPos + 1] == GenericArgListOrArrayEndSpecifier) - return (ParsingState.ResolveParsedGenericList, curPos + 2); - else if (name[curPos + 1] == ArgSeperator) - { - // +3 because cur pos == ']' and next position is a ',', which means we must have a list like this: - // - // [[Microsoft.VisualStudio.Threading.IJoinableTaskDependent, Microsoft.VisualStudio.Threading],[System.Int32, mscorlib],[Microsoft.VisualStudio.Threading.IJoinableTaskDependent, Microsoft.VisualStudio.Threading]] - // - // and thus curPos + 3 == '[', which we want to skip as well. - return (ParsingState.ParsingTypeName, curPos + 3); - } + return (ParsingState.ResolveParsedGenericList, curPos + 1); } return (ParsingState.Error, curPos); @@ -657,9 +896,12 @@ private static (ParsingState State, int Pos) DetermineNextStateAndPos(string nam return (ParsingState.Error, curPos); } } + private enum ParsingState { ParsingTypeName, + ParsingAssemblyQualifiedGenericArgName, + ParsingNonAssemblyQualifiedGenericArgName, ParsingArraySpecifier, ParsingNestedClass, ParsingGenericArgCount, @@ -679,6 +921,7 @@ private struct TypeNameSegment private int _nextUnfulfilledGenericArgSlot; private int _arrayDimensions; private int _arrayOfArraysCount; + private bool _isNonClosedGenericType; // NOTE: This is only an array to defeat the cycle detection stuff in the compiler. Since this is a struct if I have a field that is TypeNameSegment or // TypeNameSegment? it complains there is a cycle in the layout since to determine the size of the struct it must determine the size of the struct. To @@ -696,6 +939,7 @@ public TypeNameSegment(string input, (int Start, int End) extent, bool isNestedC _arrayDimensions = 0; _arrayOfArraysCount = 0; ParsingArgDepth = parsingArgDepth; + _isNonClosedGenericType = false; _nestedClass = null; } @@ -711,17 +955,17 @@ public TypeNameSegment(string input, (int Start, int End) extent, bool isNestedC public bool IsGenericClass => _expectedGenericArgCount != 0; - public bool HasUnfulfilledGenericArgs => IsGenericClass && (_typeArgSegments == null || _nextUnfulfilledGenericArgSlot < _typeArgSegments.Length); + public bool HasUnfulfilledGenericArgs => IsGenericClass && !_isNonClosedGenericType && (_typeArgSegments == null || _nextUnfulfilledGenericArgSlot < _typeArgSegments.Length); public int ExpectedGenericArgsCount => _expectedGenericArgCount; - public int RemainingUnfulfilledGenericArgCount => _expectedGenericArgCount - _nextUnfulfilledGenericArgSlot; + public int RemainingUnfulfilledGenericArgCount => _isNonClosedGenericType ? 0 : _expectedGenericArgCount - _nextUnfulfilledGenericArgSlot; public void SetArrayDimensions(int dimensions) { // There are two distinct versions of multi-dimensional arrays we represent in a single TypeNameSegment. There // is the classic multidimensional array (ala int[,]) and then there is the array of arrays specifier (int[][]). - // Instead of creating a seperate TypeNameSegment for the latter that is just indicating 'an array of the previous + // Instead of creating a separate TypeNameSegment for the latter that is just indicating 'an array of the previous // name segment', we just pile them all into the original TypeNameSegment and differentiate when we are printing // out the names at the end (to either print [,] or [][]). @@ -731,6 +975,11 @@ public void SetArrayDimensions(int dimensions) _arrayOfArraysCount++; } + public void MarkAsNonClosedGenericType() + { + _isNonClosedGenericType = true; + } + public void SetExpectedGenericArgCount(int expectedCount) { _expectedGenericArgCount = expectedCount; @@ -771,7 +1020,7 @@ public void ToString(StringBuilder destination) if (IsGenericClass) { - OutputTypeArguments(destination, _nextUnfulfilledGenericArgSlot, _expectedGenericArgCount, _typeArgSegments); + OutputTypeArguments(destination, _isNonClosedGenericType, _nextUnfulfilledGenericArgSlot, _expectedGenericArgCount, _typeArgSegments); } _nestedClass?[0].ToString(destination); @@ -785,18 +1034,18 @@ public void ToString(StringBuilder destination) } else if (_arrayDimensions != 0) { - destination.Append('['); + destination.Append(GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier); if (_arrayDimensions > 1) - destination.Append(',', _arrayDimensions - 1); - destination.Append(']'); + destination.Append(ArgSeparator, _arrayDimensions - 1); + destination.Append(GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier); } } private bool HasNestedClassSegment => _nestedClass != null; - private static void OutputTypeArguments(StringBuilder destination, int firstMissingGenericSlot, int expectedArgCount, TypeNameSegment[]? typeArgs) + private static void OutputTypeArguments(StringBuilder destination, bool isNonClosedGenericType, int firstMissingGenericSlot, int expectedArgCount, TypeNameSegment[]? typeArgs) { - if (typeArgs == null) + if (typeArgs == null || isNonClosedGenericType) { destination.Append('<'); AddMissingArgumentInfo(destination, firstMissingGenericSlot, expectedArgCount); @@ -822,7 +1071,7 @@ private static void OutputTypeArguments(StringBuilder destination, int firstMiss private static void AddMissingArgumentInfo(StringBuilder result, int firstMissingGenericSlot, int expectedArgCount) { - // This is an error case where the DAC gave no generic args or incomplete generic args for a type + // This is either an error case where the DAC gave no generic args or incomplete generic args for a type or a use of a non-closed generic type for (int i = firstMissingGenericSlot; i < expectedArgCount; i++) { if (i != 0)