Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove double tree traversal in checksum syncing. #70302

Merged
merged 14 commits into from
Oct 12, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@
using System.Threading.Tasks;
using Microsoft.CodeAnalysis.Diagnostics;
using Microsoft.CodeAnalysis.Host;
using Microsoft.CodeAnalysis.Remote;
using Microsoft.CodeAnalysis.Serialization;
using Microsoft.CodeAnalysis.Text;
using Roslyn.Test.Utilities;
using Roslyn.Utilities;
using Xunit;
Expand Down Expand Up @@ -78,9 +76,16 @@ public SerializationValidator(HostWorkspaceServices services)
Services = services;
}

public async Task<T> GetValueAsync<T>(Checksum checksum)
private async Task<SolutionAsset> GetRequiredAssetAsync(Checksum checksum)
{
var data = await AssetStorage.GetTestAccessor().GetRequiredAssetAsync(checksum, CancellationToken.None).ConfigureAwait(false);
Contract.ThrowIfNull(data);
return new(checksum, data);
}

public async Task<T> GetValueAsync<T>(Checksum checksum)
{
var data = await GetRequiredAssetAsync(checksum).ConfigureAwait(false);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is all test code, which expects to always find these values. so it's ok for it to put in these contract calls and requirements.

Contract.ThrowIfNull(data.Value);

using var context = new SolutionReplicationContext();
Expand Down Expand Up @@ -197,7 +202,7 @@ internal async Task<T> VerifyAssetSerializationAsync<T>(
Func<T, WellKnownSynchronizationKind, ISerializerService, SolutionAsset> assetGetter)
{
// re-create asset from object
var syncObject = await AssetStorage.GetTestAccessor().GetRequiredAssetAsync(checksum, CancellationToken.None).ConfigureAwait(false);
var syncObject = await GetRequiredAssetAsync(checksum).ConfigureAwait(false);

var recoveredValue = await GetValueAsync<T>(checksum).ConfigureAwait(false);
var recreatedSyncObject = assetGetter(recoveredValue, kind, Serializer);
Expand Down Expand Up @@ -339,7 +344,7 @@ internal async Task VerifySynchronizationObjectInServiceAsync<T>(ChecksumObjectC
internal async Task VerifyChecksumInServiceAsync(Checksum checksum, WellKnownSynchronizationKind kind)
{
Assert.NotNull(checksum);
var otherObject = await AssetStorage.GetTestAccessor().GetRequiredAssetAsync(checksum, CancellationToken.None).ConfigureAwait(false);
var otherObject = await GetRequiredAssetAsync(checksum).ConfigureAwait(false);

ChecksumEqual(checksum, kind, otherObject.Checksum, otherObject.Kind);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,13 @@ internal static Solution CreateFullSolution(Workspace workspace)
loader: TextLoader.From(TextAndVersion.Create(SourceText.From("root = true"), VersionStamp.Create())))));
}

private static async Task<SolutionAsset> GetRequiredAssetAsync(SolutionAssetStorage.Scope scope, Checksum checksum)
{
var data = await scope.GetTestAccessor().GetAssetAsync(checksum, CancellationToken.None).ConfigureAwait(false);
Contract.ThrowIfNull(data);
return new(checksum, data);
}
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same. also tests.


[Fact]
public async Task CreateSolutionSnapshotId_Empty()
{
Expand All @@ -83,16 +90,13 @@ public async Task CreateSolutionSnapshotId_Empty()

using var scope = await validator.AssetStorage.StoreAssetsAsync(solution, CancellationToken.None).ConfigureAwait(false);
var checksum = scope.SolutionChecksum;
var solutionSyncObject = await scope.GetTestAccessor().GetAssetAsync(checksum, CancellationToken.None).ConfigureAwait(false);
var solutionSyncObject = await GetRequiredAssetAsync(scope, checksum).ConfigureAwait(false);

await validator.VerifySynchronizationObjectInServiceAsync(solutionSyncObject).ConfigureAwait(false);

var solutionObject = await validator.GetValueAsync<SolutionStateChecksums>(checksum).ConfigureAwait(false);
await validator.VerifyChecksumInServiceAsync(solutionObject.Attributes, WellKnownSynchronizationKind.SolutionAttributes).ConfigureAwait(false);

var projectsSyncObject = await scope.GetTestAccessor().GetAssetAsync(solutionObject.Projects.Checksum, CancellationToken.None).ConfigureAwait(false);
await validator.VerifySynchronizationObjectInServiceAsync(projectsSyncObject).ConfigureAwait(false);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this will be explained below.


Assert.Equal(0, solutionObject.Projects.Count);
}

Expand All @@ -118,19 +122,15 @@ public async Task CreateSolutionSnapshotId_Project()

using var scope = await validator.AssetStorage.StoreAssetsAsync(project.Solution, CancellationToken.None).ConfigureAwait(false);
var checksum = scope.SolutionChecksum;
var solutionSyncObject = await scope.GetTestAccessor().GetAssetAsync(checksum, CancellationToken.None).ConfigureAwait(false);
var solutionSyncObject = await GetRequiredAssetAsync(scope, checksum).ConfigureAwait(false);

await validator.VerifySynchronizationObjectInServiceAsync(solutionSyncObject).ConfigureAwait(false);

var solutionObject = await validator.GetValueAsync<SolutionStateChecksums>(checksum).ConfigureAwait(false);

await validator.VerifyChecksumInServiceAsync(solutionObject.Attributes, WellKnownSynchronizationKind.SolutionAttributes);

var projectSyncObject = await scope.GetTestAccessor().GetAssetAsync(solutionObject.Projects.Checksum, CancellationToken.None).ConfigureAwait(false);
await validator.VerifySynchronizationObjectInServiceAsync(projectSyncObject).ConfigureAwait(false);

Assert.Equal(1, solutionObject.Projects.Count);
await validator.VerifySnapshotInServiceAsync(validator.ToProjectObjects(solutionObject.Projects)[0], 0, 0, 0, 0, 0).ConfigureAwait(false);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this will be explained below.

}

[Fact]
Expand All @@ -156,15 +156,11 @@ public async Task CreateSolutionSnapshotId()
var validator = new SerializationValidator(workspace.Services);

using var scope = await validator.AssetStorage.StoreAssetsAsync(document.Project.Solution, CancellationToken.None).ConfigureAwait(false);
var syncObject = await scope.GetTestAccessor().GetAssetAsync(scope.SolutionChecksum, CancellationToken.None).ConfigureAwait(false);
var syncObject = await GetRequiredAssetAsync(scope, scope.SolutionChecksum).ConfigureAwait(false);
var solutionObject = await validator.GetValueAsync<SolutionStateChecksums>(syncObject.Checksum).ConfigureAwait(false);

await validator.VerifySynchronizationObjectInServiceAsync(syncObject).ConfigureAwait(false);
await validator.VerifyChecksumInServiceAsync(solutionObject.Attributes, WellKnownSynchronizationKind.SolutionAttributes).ConfigureAwait(false);
await validator.VerifyChecksumInServiceAsync(solutionObject.Projects.Checksum, WellKnownSynchronizationKind.ChecksumCollection).ConfigureAwait(false);

Assert.Equal(1, solutionObject.Projects.Count);
await validator.VerifySnapshotInServiceAsync(validator.ToProjectObjects(solutionObject.Projects)[0], 1, 0, 0, 0, 0).ConfigureAwait(false);
}

[Fact]
Expand Down Expand Up @@ -194,18 +190,13 @@ public async Task CreateSolutionSnapshotId_Full()
var validator = new SerializationValidator(workspace.Services);

using var scope = await validator.AssetStorage.StoreAssetsAsync(solution, CancellationToken.None).ConfigureAwait(false);
var syncObject = await scope.GetTestAccessor().GetAssetAsync(scope.SolutionChecksum, CancellationToken.None).ConfigureAwait(false);
var syncObject = await GetRequiredAssetAsync(scope, scope.SolutionChecksum).ConfigureAwait(false);
var solutionObject = await validator.GetValueAsync<SolutionStateChecksums>(syncObject.Checksum).ConfigureAwait(false);

await validator.VerifySynchronizationObjectInServiceAsync(syncObject).ConfigureAwait(false);
await validator.VerifyChecksumInServiceAsync(solutionObject.Attributes, WellKnownSynchronizationKind.SolutionAttributes).ConfigureAwait(false);
await validator.VerifyChecksumInServiceAsync(solutionObject.Projects.Checksum, WellKnownSynchronizationKind.ChecksumCollection).ConfigureAwait(false);

Assert.Equal(2, solutionObject.Projects.Count);

var projects = validator.ToProjectObjects(solutionObject.Projects);
await validator.VerifySnapshotInServiceAsync(projects.Where(p => p.Checksum == firstProjectChecksum).First(), 1, 1, 1, 1, 1).ConfigureAwait(false);
await validator.VerifySnapshotInServiceAsync(projects.Where(p => p.Checksum == secondProjectChecksum).First(), 1, 0, 0, 0, 0).ConfigureAwait(false);
}

[Fact]
Expand Down
43 changes: 43 additions & 0 deletions src/VisualStudio/Core/Test.Next/Remote/SolutionAsset.cs
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note: this was a move. it was previously a product concept, but i made it into a test-only concept.

Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Microsoft.CodeAnalysis.Serialization;
using Roslyn.Utilities;

namespace Microsoft.CodeAnalysis.Remote;

/// <summary>
/// Represents a part of solution snapshot along with its checksum.
/// </summary>
internal readonly struct SolutionAsset
{
/// <summary>
/// Indicates what kind of object it.
///
/// Used in transportation framework and deserialization service to hand shake how to send over data and
/// deserialize serialized data.
/// </summary>
public readonly WellKnownSynchronizationKind Kind;

/// <summary>
/// Checksum of <see cref="Value"/>.
/// </summary>
public readonly Checksum Checksum;

public readonly object? Value;

public SolutionAsset(Checksum checksum, object value)
{
var kind = value.GetWellKnownSynchronizationKind();
// SolutionAsset is not allowed to hold strong references to SourceText. SerializableSourceText is used
// instead to allow data to be released from process address space when it is also held in temporary
// storage.
// https://github.com/dotnet/roslyn/issues/43802
Contract.ThrowIfTrue(kind is WellKnownSynchronizationKind.SourceText);

Checksum = checksum;
Kind = kind;
Value = value;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,13 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

#nullable disable

namespace Microsoft.CodeAnalysis.Serialization
{
// TODO: Kind might not actually needed. see whether we can get rid of this
internal enum WellKnownSynchronizationKind
{
Null,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removing this concept entirely. it was not allowed, and we had so many confusing checks everywhere around it.

// Start at a different value from 0 so that if we ever get 0 we know it's a bug.

SolutionState,
SolutionState = 1,
ProjectState,
DocumentState,

Expand Down
8 changes: 6 additions & 2 deletions src/Workspaces/Core/Portable/Serialization/PooledList.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,11 @@ public static PooledObject<HashSet<Checksum>> CreateChecksumSet(Checksum checksu
public static PooledObject<List<T>> CreateList<T>()
=> SharedPools.Default<List<T>>().GetPooledObject();

public static PooledObject<Dictionary<Checksum, object>> CreateResultSet()
=> SharedPools.Default<Dictionary<Checksum, object>>().GetPooledObject();
public static PooledObject<Dictionary<Checksum, object>> CreateResultMap(out Dictionary<Checksum, object> result)
{
var pooled = SharedPools.Default<Dictionary<Checksum, object>>().GetPooledObject();
result = pooled.Object;
return pooled;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

using System.Collections.Immutable;
using Microsoft.CodeAnalysis.Diagnostics;
using Microsoft.CodeAnalysis.Options;
using Microsoft.CodeAnalysis.PooledObjects;
using Microsoft.CodeAnalysis.Text;
using Roslyn.Utilities;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,6 @@ public Checksum CreateChecksum(object value, CancellationToken cancellationToken

switch (kind)
{
case WellKnownSynchronizationKind.Null:
return Checksum.Null;

case WellKnownSynchronizationKind.CompilationOptions:
case WellKnownSynchronizationKind.ParseOptions:
case WellKnownSynchronizationKind.ProjectReference:
Expand Down Expand Up @@ -110,10 +107,6 @@ public void Serialize(object value, ObjectWriter writer, SolutionReplicationCont

switch (kind)
{
case WellKnownSynchronizationKind.Null:
// do nothing
return;

case WellKnownSynchronizationKind.SolutionAttributes:
case WellKnownSynchronizationKind.ProjectAttributes:
case WellKnownSynchronizationKind.DocumentAttributes:
Expand Down Expand Up @@ -174,17 +167,14 @@ public void Serialize(object value, ObjectWriter writer, SolutionReplicationCont
}
}

public T? Deserialize<T>(WellKnownSynchronizationKind kind, ObjectReader reader, CancellationToken cancellationToken)
public T Deserialize<T>(WellKnownSynchronizationKind kind, ObjectReader reader, CancellationToken cancellationToken)
{
using (Logger.LogBlock(FunctionId.Serializer_Deserialize, s_logKind, kind, cancellationToken))
{
cancellationToken.ThrowIfCancellationRequested();

switch (kind)
{
case WellKnownSynchronizationKind.Null:
return default;

case WellKnownSynchronizationKind.SolutionState:
return (T)(object)SolutionStateChecksums.Deserialize(reader);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ IEnumerator IEnumerable.GetEnumerator()

public void AddAllTo(HashSet<Checksum> checksums)
{
checksums.AddIfNotNullChecksum(this.Checksum);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will also be explained below.

foreach (var checksum in this)
checksums.AddIfNotNullChecksum(checksum);
}
Expand All @@ -48,7 +47,7 @@ public void AddAllTo(HashSet<Checksum> checksums)
internal static async Task FindAsync<TState>(
TextDocumentStates<TState> documentStates,
HashSet<Checksum> searchingChecksumsLeft,
Dictionary<Checksum, object?> result,
Dictionary<Checksum, object> result,
CancellationToken cancellationToken) where TState : TextDocumentState
{
foreach (var (_, state) in documentStates.States)
Expand All @@ -67,8 +66,8 @@ internal static void Find<T>(
IReadOnlyList<T> values,
ChecksumCollection checksums,
HashSet<Checksum> searchingChecksumsLeft,
Dictionary<Checksum, object?> result,
CancellationToken cancellationToken)
Dictionary<Checksum, object> result,
CancellationToken cancellationToken) where T : class
{
Contract.ThrowIfFalse(values.Count == checksums.Children.Length);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Github doesn't seem to allow comments in sections of the file that weren't changed? For the two comments that were unclear, they were both in the loop below and were a bit nit-picky, so I don't care too much if they aren't taken:

  1. No need to check the cancellation token in the loop, nothing expensive going on in there.
  2. The .Count check could just be done inside the .Remove condition as it's status can't change unless the Remove condition succeeds.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair. I can revise!


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ public static SolutionStateChecksums Deserialize(ObjectReader reader)
public async Task FindAsync(
SolutionState state,
HashSet<Checksum> searchingChecksumsLeft,
Dictionary<Checksum, object?> result,
Dictionary<Checksum, object> result,
CancellationToken cancellationToken)
{
cancellationToken.ThrowIfCancellationRequested();
Expand All @@ -98,16 +98,6 @@ public async Task FindAsync(
result[FrozenSourceGeneratedDocumentText] = await SerializableSourceText.FromTextDocumentStateAsync(state.FrozenSourceGeneratedDocumentState, cancellationToken).ConfigureAwait(false);
}

if (searchingChecksumsLeft.Remove(Projects.Checksum))
{
result[Projects.Checksum] = Projects;
}
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this it he meat of the change. The prior logic was simply crazy, and deeply complex confusing. This it the logic that says "hey... you give me a checksum, and i will find the corresponding real object associated with it in the actual solution/project/document state.

except, lines like thsi were basically saying "oh... your checksum itself refers to a checksum object (like a SolutionChecksums/ProjectChecksums/DocumentChecksums instance), so i'll add my own checksum child back into the result.

This was one of the source of multiple passes and expensiveness with checksums. When looking up project info, we'd both recurse into child projects as real 'Project' instances, and then again as checksum instances. Insanity.

All of htat is gone now. we just pass in checksums, and we get real workspace objects back out.

Copy link
Member Author

@CyrusNajmabadi CyrusNajmabadi Oct 12, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is also what affected tests. which were testing a non-real scenario of asking for these checksums, then validating the checksum objects that were returned. all bogus things that don't reflect real scenarios.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there an incoming PR that will make it so that this doesn't do the depth first search? (as you mentioned in the office the other day)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. That comes in a follow-up


if (searchingChecksumsLeft.Remove(AnalyzerReferences.Checksum))
{
result[AnalyzerReferences.Checksum] = AnalyzerReferences;
}

foreach (var (_, projectState) in state.ProjectStates)
{
if (searchingChecksumsLeft.Count == 0)
Expand Down Expand Up @@ -209,7 +199,7 @@ public static ProjectStateChecksums Deserialize(ObjectReader reader)
public async Task FindAsync(
ProjectState state,
HashSet<Checksum> searchingChecksumsLeft,
Dictionary<Checksum, object?> result,
Dictionary<Checksum, object> result,
CancellationToken cancellationToken)
{
cancellationToken.ThrowIfCancellationRequested();
Expand Down Expand Up @@ -242,36 +232,6 @@ public async Task FindAsync(
result[ParseOptions] = state.ParseOptions;
}

if (searchingChecksumsLeft.Remove(Documents.Checksum))
{
result[Documents.Checksum] = Documents;
}

if (searchingChecksumsLeft.Remove(ProjectReferences.Checksum))
{
result[ProjectReferences.Checksum] = ProjectReferences;
}

if (searchingChecksumsLeft.Remove(MetadataReferences.Checksum))
{
result[MetadataReferences.Checksum] = MetadataReferences;
}

if (searchingChecksumsLeft.Remove(AnalyzerReferences.Checksum))
{
result[AnalyzerReferences.Checksum] = AnalyzerReferences;
}

if (searchingChecksumsLeft.Remove(AdditionalDocuments.Checksum))
{
result[AdditionalDocuments.Checksum] = AdditionalDocuments;
}

if (searchingChecksumsLeft.Remove(AnalyzerConfigDocuments.Checksum))
{
result[AnalyzerConfigDocuments.Checksum] = AnalyzerConfigDocuments;
}

ChecksumCollection.Find(state.ProjectReferences, ProjectReferences, searchingChecksumsLeft, result, cancellationToken);
ChecksumCollection.Find(state.MetadataReferences, MetadataReferences, searchingChecksumsLeft, result, cancellationToken);
ChecksumCollection.Find(state.AnalyzerReferences, AnalyzerReferences, searchingChecksumsLeft, result, cancellationToken);
Expand Down Expand Up @@ -313,7 +273,7 @@ public static DocumentStateChecksums Deserialize(ObjectReader reader)
public async Task FindAsync(
TextDocumentState state,
HashSet<Checksum> searchingChecksumsLeft,
Dictionary<Checksum, object?> result,
Dictionary<Checksum, object> result,
CancellationToken cancellationToken)
{
Debug.Assert(state.TryGetStateChecksums(out var stateChecksum) && this == stateChecksum);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public ValueTask<ImmutableArray<object>> GetAssetsAsync(
stream.Position = 0;
using var reader = ObjectReader.GetReader(stream, leaveOpen: true, cancellationToken);
var asset = deserializerService.Deserialize<object>(data.GetWellKnownSynchronizationKind(), reader, cancellationToken);
Contract.ThrowIfTrue(asset is null);
Contract.ThrowIfNull(asset);
results.Add(asset);
}
else
Expand Down
Loading
Loading