Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Be resilient to asset-cache mutating while retrieving assets from the host #72599

Merged
merged 10 commits into from
Mar 21, 2024
77 changes: 54 additions & 23 deletions src/Workspaces/Remote/ServiceHub/Host/AssetProvider.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,39 +29,34 @@ internal sealed partial class AssetProvider(Checksum solutionChecksum, SolutionA
private readonly SolutionAssetCache _assetCache = assetCache;
private readonly IAssetSource _assetSource = assetSource;

private T GetRequiredAsset<T>(Checksum checksum)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this operation si fundamentally not supported. there is no way, ever, to ensure that a checksum you're asking for is actually in the asset provider (unles sit was pinned. but that's a vanishingly small number of cases).

{
Contract.ThrowIfTrue(checksum == Checksum.Null);
Contract.ThrowIfFalse(_assetCache.TryGetAsset<T>(checksum, out var asset));
return asset;
}

public override async ValueTask<T> GetAssetAsync<T>(
AssetHint assetHint, Checksum checksum, CancellationToken cancellationToken)
{
Contract.ThrowIfTrue(checksum == Checksum.Null);
if (_assetCache.TryGetAsset<T>(checksum, out var asset))
return asset;

using var _ = PooledHashSet<Checksum>.GetInstance(out var checksums);
using var _1 = PooledHashSet<Checksum>.GetInstance(out var checksums);
checksums.Add(checksum);

await this.SynchronizeAssetsAsync(assetHint, checksums, cancellationToken).ConfigureAwait(false);
using var _2 = PooledDictionary<Checksum, object>.GetInstance(out var results);
await this.SynchronizeAssetsAsync(assetHint, checksums, results, cancellationToken).ConfigureAwait(false);

return GetRequiredAsset<T>(checksum);
return (T)results[checksum];
}

public async ValueTask<ImmutableArray<ValueTuple<Checksum, T>>> GetAssetsAsync<T>(
public async ValueTask<ImmutableArray<(Checksum checksum, T asset)>> GetAssetsAsync<T>(
AssetHint assetHint, HashSet<Checksum> checksums, CancellationToken cancellationToken)
{
using var _1 = PooledDictionary<Checksum, object>.GetInstance(out var results);

// bulk synchronize checksums first
var syncer = new ChecksumSynchronizer(this);
await syncer.SynchronizeAssetsAsync(assetHint, checksums, cancellationToken).ConfigureAwait(false);
await syncer.SynchronizeAssetsAsync(assetHint, checksums, results, cancellationToken).ConfigureAwait(false);

// this will be fast since we actually synchronized the checksums above.
using var _ = ArrayBuilder<ValueTuple<Checksum, T>>.GetInstance(checksums.Count, out var list);
foreach (var checksum in checksums)
list.Add(ValueTuple.Create(checksum, GetRequiredAsset<T>(checksum)));
using var _2 = ArrayBuilder<ValueTuple<Checksum, T>>.GetInstance(checksums.Count, out var list);
CyrusNajmabadi marked this conversation as resolved.
Show resolved Hide resolved
foreach (var (checksum, assetObject) in results)
list.Add((checksum, (T)assetObject));

return list.ToImmutableAndClear();
}
Expand Down Expand Up @@ -113,7 +108,7 @@ public async ValueTask SynchronizeProjectAssetsAsync(ProjectStateChecksums proje
}

public async ValueTask SynchronizeAssetsAsync(
AssetHint assetHint, HashSet<Checksum> checksums, CancellationToken cancellationToken)
AssetHint assetHint, HashSet<Checksum> checksums, Dictionary<Checksum, object>? results, CancellationToken cancellationToken)
{
Contract.ThrowIfTrue(checksums.Contains(Checksum.Null));
if (checksums.Count == 0)
Expand All @@ -136,24 +131,60 @@ public async ValueTask SynchronizeAssetsAsync(
missingChecksumsCount = 0;
foreach (var checksum in checksums)
{
if (!_assetCache.TryGetAsset<object>(checksum, out _))
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

view with whitespace off.

if (_assetCache.TryGetAsset<object>(checksum, out var existing))
{
AddResult(checksum, existing);
}
else
{
if (missingChecksumsCount == missingChecksums.Length)
{
// This can happen if the asset cache has been modified by another thread during this method's execution.
var newMissingChecksums = new Checksum[missingChecksumsCount * 2];
Array.Copy(missingChecksums, newMissingChecksums, missingChecksumsCount);

if (usePool)
{
s_checksumPool.Free(missingChecksums);
usePool = false;
}

missingChecksums = newMissingChecksums;
}

missingChecksums[missingChecksumsCount] = checksum;
missingChecksumsCount++;
}
}

var missingChecksumsMemory = new ReadOnlyMemory<Checksum>(missingChecksums, 0, missingChecksumsCount);
var assets = await RequestAssetsAsync(assetHint, missingChecksumsMemory, cancellationToken).ConfigureAwait(false);
if (missingChecksumsCount > 0)
{
var missingChecksumsMemory = new ReadOnlyMemory<Checksum>(missingChecksums, 0, missingChecksumsCount);
var missingAssets = await RequestAssetsAsync(assetHint, missingChecksumsMemory, cancellationToken).ConfigureAwait(false);

Contract.ThrowIfTrue(missingChecksumsMemory.Length != assets.Length);
Contract.ThrowIfTrue(missingChecksumsMemory.Length != missingAssets.Length);

for (int i = 0, n = assets.Length; i < n; i++)
_assetCache.GetOrAdd(missingChecksums[i], assets[i]);
for (int i = 0, n = missingAssets.Length; i < n; i++)
{
var missingChecksum = missingChecksums[i];
var missingAsset = missingAssets[i];

AddResult(missingChecksum, missingAsset);
_assetCache.GetOrAdd(missingChecksum, missingAsset);
}
}

if (usePool)
s_checksumPool.Free(missingChecksums);
}

return;

void AddResult(Checksum checksum, object result)
{
if (results != null)
results[checksum] = result;
}
}

private async Task<ImmutableArray<object>> RequestAssetsAsync(
Expand Down
38 changes: 25 additions & 13 deletions src/Workspaces/Remote/ServiceHub/Host/ChecksumSynchronizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,22 @@ private readonly struct ChecksumSynchronizer(AssetProvider assetProvider)

private readonly AssetProvider _assetProvider = assetProvider;

public async ValueTask SynchronizeAssetsAsync(AssetHint assetHint, HashSet<Checksum> checksums, CancellationToken cancellationToken)
public async ValueTask SynchronizeAssetsAsync(
AssetHint assetHint,
HashSet<Checksum> checksums,
Dictionary<Checksum, object>? results,
CancellationToken cancellationToken)
{
using (await s_gate.DisposableWaitAsync(cancellationToken).ConfigureAwait(false))
{
await _assetProvider.SynchronizeAssetsAsync(assetHint, checksums, cancellationToken).ConfigureAwait(false);
await _assetProvider.SynchronizeAssetsAsync(assetHint, checksums, results, cancellationToken).ConfigureAwait(false);
}
}

public async ValueTask SynchronizeSolutionAssetsAsync(Checksum solutionChecksum, CancellationToken cancellationToken)
{
SolutionStateChecksums solutionChecksumObject;

CyrusNajmabadi marked this conversation as resolved.
Show resolved Hide resolved
using (await s_gate.DisposableWaitAsync(cancellationToken).ConfigureAwait(false))
{
// this will make 4 round trip to data source (VS) to get all assets that belong to the given solution checksum
Expand All @@ -47,14 +52,17 @@ public async ValueTask SynchronizeSolutionAssetsAsync(Checksum solutionChecksum,

solutionChecksumObject.AddAllTo(checksums);
checksums.Remove(solutionChecksumObject.Checksum);
await _assetProvider.SynchronizeAssetsAsync(assetHint: AssetHint.None, checksums, cancellationToken).ConfigureAwait(false);
await _assetProvider.SynchronizeAssetsAsync(assetHint: AssetHint.None, checksums, results: null, cancellationToken).ConfigureAwait(false);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so we could pass a dictionary here that we then read in below. but i didn't feel like it was worth it. the data will normally always stay in the asset provider. and the cahnge below to use GetAssetAsync will normally find it, or rarely then go and reretrieve it in the rare unlucky case. that saves a potentially large dictionary alloc here (even if pooled).

}
}

// third and last get direct children for all projects and documents in the solution
foreach (var project in solutionChecksumObject.Projects)
foreach (var (projectChecksum, _) in solutionChecksumObject.Projects)
{
var projectStateChecksums = _assetProvider.GetRequiredAsset<ProjectStateChecksums>(project.checksum);
// These GetAssetAsync calls should be fast since they were just retrieved above. There's a small
// chance the asset-cache GC pass may have cleaned them up, but that should be exceedingly rare.
var projectStateChecksums = await _assetProvider.GetAssetAsync<ProjectStateChecksums>(
assetHint: AssetHint.None, projectChecksum, cancellationToken).ConfigureAwait(false);
await SynchronizeProjectAssetsAsync(projectStateChecksums, cancellationToken).ConfigureAwait(false);
}
}
Expand Down Expand Up @@ -82,24 +90,28 @@ private async ValueTask SynchronizeProjectAssets_NoLockAsync(ProjectStateChecksu
AddAll(checksums, projectChecksum.AdditionalDocuments.Checksums);
AddAll(checksums, projectChecksum.AnalyzerConfigDocuments.Checksums);

// First synchronize all the top-level info about this project.
await _assetProvider.SynchronizeAssetsAsync(
assetHint: projectChecksum.ProjectId, checksums, cancellationToken).ConfigureAwait(false);
assetHint: projectChecksum.ProjectId, checksums, results: null, cancellationToken).ConfigureAwait(false);

checksums.Clear();

CollectChecksumChildren(this, projectChecksum.Documents.Checksums);
CollectChecksumChildren(this, projectChecksum.AdditionalDocuments.Checksums);
CollectChecksumChildren(this, projectChecksum.AnalyzerConfigDocuments.Checksums);
// Then synchronize the info about all the documents within.
await CollectChecksumChildrenAsync(this, projectChecksum.Documents.Checksums).ConfigureAwait(false);
await CollectChecksumChildrenAsync(this, projectChecksum.AdditionalDocuments.Checksums).ConfigureAwait(false);
await CollectChecksumChildrenAsync(this, projectChecksum.AnalyzerConfigDocuments.Checksums).ConfigureAwait(false);

await _assetProvider.SynchronizeAssetsAsync(
assetHint: projectChecksum.ProjectId, checksums, cancellationToken).ConfigureAwait(false);
assetHint: projectChecksum.ProjectId, checksums, results: null, cancellationToken).ConfigureAwait(false);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here.


void CollectChecksumChildren(ChecksumSynchronizer @this, ChecksumCollection collection)
async ValueTask CollectChecksumChildrenAsync(ChecksumSynchronizer @this, ChecksumCollection collection)
{
foreach (var checksum in collection)
{
// These DocumentStateChecksums must be here due to the synchronizing step that just happened above.
var checksumObject = @this._assetProvider.GetRequiredAsset<DocumentStateChecksums>(checksum);
// These GetAssetAsync calls should be fast since they were just retrieved above. There's a small
// chance the asset-cache GC pass may have cleaned them up, but that should be exceedingly rare.
var checksumObject = await @this._assetProvider.GetAssetAsync<DocumentStateChecksums>(
assetHint: projectChecksum.ProjectId, checksum, cancellationToken).ConfigureAwait(false);
checksums.Add(checksumObject.Info);
checksums.Add(checksumObject.Text);
}
Expand Down