Skip to content

Commit

Permalink
Merge pull request #21 from microsoft/CacheWholeFiles
Browse files Browse the repository at this point in the history
Cache whole files in PipelineCaching for hardlinks and CoW
  • Loading branch information
johnterickson authored Dec 7, 2023
2 parents 308062c + 8609a4a commit 0f685fe
Show file tree
Hide file tree
Showing 3 changed files with 150 additions and 62 deletions.
181 changes: 133 additions & 48 deletions src/AzurePipelines/PipelineCachingCacheClient.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) Microsoft. All rights reserved.
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for full license information.

using System;
Expand Down Expand Up @@ -63,8 +63,6 @@ internal sealed class PipelineCachingCacheClient : CacheClient
#pragma warning restore CS1570 // XML comment has badly formed XML
{
private const string InternalMetadataPathPrefix = "/???";
private static readonly string InternalMetadataPathExcludeMinimatchFilter = // "!\\?\\?\\?/**";
"!" + InternalMetadataPathPrefix.TrimStart('/').Replace("?", "\\?", StringComparison.Ordinal) + "/**";

private const string NodeBuildResultRelativePath = $"{InternalMetadataPathPrefix}/NodeBuildResult";
private const string PathSetRelativePathBase = $"{InternalMetadataPathPrefix}/PathSets";
Expand Down Expand Up @@ -159,7 +157,7 @@ public PipelineCachingCacheClient(
_azureDevopsTracer);

// seed the OPTIONS call
_startupTask = Task.Run(() => QueryPipelineCaching(rootContext, new Microsoft.VisualStudio.Services.PipelineCache.WebApi.Fingerprint("init"), CancellationToken.None));
_startupTask = Task.Run(() => QueryPipelineCaching(rootContext, new VisualStudio.Services.PipelineCache.WebApi.Fingerprint("init"), CancellationToken.None));
}

protected override async Task AddNodeAsync(
Expand Down Expand Up @@ -210,41 +208,30 @@ protected override async Task AddNodeAsync(

// 2. Link out unique content to the temp folder

Dictionary<ContentHash, string> tempFilesPerHash = outputs.Values.Distinct().ToDictionary(
Dictionary<ContentHash, AbsolutePath> tempFilesPerHash = outputs.Values.Distinct().ToDictionary(
hash => hash,
hash =>
{
string tempFilePath = Path.Combine(TempFolder, Guid.NewGuid().ToString("N") + ".tmp");
tempFilePaths.Add(tempFilePath);
return tempFilePath;
return new AbsolutePath(tempFilePath);
});

List<ContentHashWithPath> tempFiles = tempFilesPerHash
.Select(kvp => new ContentHashWithPath(kvp.Key, new AbsolutePath(kvp.Value)))
.Select(kvp => new ContentHashWithPath(kvp.Key, kvp.Value))
.ToList();

foreach (IGrouping<FileRealizationMode, ContentHashWithPath>? tempFilesByRealizationMode in tempFiles.GroupBy(f => GetFileRealizationMode(f.Path.Path)))
Dictionary<AbsolutePath, PlaceFileResult> placeResults = await TryPlaceFilesFromCacheAsync(context, tempFiles, cancellationToken);
foreach (PlaceFileResult placeResult in placeResults.Values)
{
FileRealizationMode realizationMode = tempFilesByRealizationMode.Key;
FileAccessMode accessMode = realizationMode == FileRealizationMode.CopyNoVerify
? FileAccessMode.Write
: FileAccessMode.ReadOnly;

IEnumerable<Task<Indexed<PlaceFileResult>>> placeResults = await _localCAS.PlaceFileAsync(
context, tempFilesByRealizationMode.ToList(), accessMode, FileReplacementMode.FailIfExists, realizationMode, cancellationToken);

foreach (Task<Indexed<PlaceFileResult>> placeResultTask in placeResults)
{
Indexed<PlaceFileResult> placeResult = await placeResultTask;
placeResult.Item.ThrowIfFailure();
}
placeResult.ThrowIfFailure();
}

// 3. map all the relative paths to the temp files
foreach (KeyValuePair<AbsolutePath, ContentHash> output in outputs)
{
string relativePath = output.Key.Path.Replace(RepoRoot.Path, "", StringComparison.OrdinalIgnoreCase);
extras.Add(relativePath.Replace("\\", "/", StringComparison.Ordinal), new FileInfo(tempFilesPerHash[output.Value]));
extras.Add(relativePath.Replace("\\", "/", StringComparison.Ordinal), new FileInfo(tempFilesPerHash[output.Value].Path));
}
}
else
Expand All @@ -268,7 +255,7 @@ protected override async Task AddNodeAsync(

var key = ComputeKey(fingerprint, forWrite: true);
var entry = new CreatePipelineCacheArtifactContract(
new Microsoft.VisualStudio.Services.PipelineCache.WebApi.Fingerprint(key.Split(KeySegmentSeperator)),
new VisualStudio.Services.PipelineCache.WebApi.Fingerprint(key.Split(KeySegmentSeperator)),
result.ManifestId,
result.RootId,
result.ProofNodes,
Expand Down Expand Up @@ -350,7 +337,7 @@ protected override async Task AddNodeAsync(
cancellationToken);

var entry = new CreatePipelineCacheArtifactContract(
new Microsoft.VisualStudio.Services.PipelineCache.WebApi.Fingerprint(key.Split(KeySegmentSeperator)),
new VisualStudio.Services.PipelineCache.WebApi.Fingerprint(key.Split(KeySegmentSeperator)),
result.ManifestId,
result.RootId,
result.ProofNodes,
Expand All @@ -372,12 +359,75 @@ protected override async Task AddNodeAsync(
}
}

private static byte GetAlgorithmId(ContentHash hash)
{
switch (hash._hashType)
{
case HashType.Dedup1024K:
case HashType.Dedup64K:
return hash[hash.Length - 1];
default:
throw new NotSupportedException($"Hash type {hash._hashType} is not supported");
}
}

private async Task<Dictionary<AbsolutePath, PlaceFileResult>> TryPlaceFilesFromCacheAsync(Context context, IReadOnlyList<ContentHashWithPath> files, CancellationToken cancellationToken)
{
// cache expects destination directories already exist
foreach (ContentHashWithPath file in files)
{
CreateParentDirectory(file.Path);
}

Dictionary<AbsolutePath, PlaceFileResult> results = new();
List<ContentHashWithPath> places = new();

foreach (IGrouping<(byte algoId, FileRealizationMode mode), ContentHashWithPath>? filesGroup in files.GroupBy(f => (GetAlgorithmId(f.Hash), GetFileRealizationMode(f.Path.Path))))
{
FileRealizationMode realizationMode = filesGroup.Key.mode;
FileAccessMode accessMode = realizationMode == FileRealizationMode.CopyNoVerify
? FileAccessMode.Write
: FileAccessMode.ReadOnly;

places.Clear();
places.AddRange(filesGroup);

List<Task<Indexed<PlaceFileResult>>> groupResults = (await _localCAS.PlaceFileAsync(
context, places, accessMode, FileReplacementMode.ReplaceExisting, realizationMode, cancellationToken)).ToList();

// try to pull single-chunk files from chunk store
if (filesGroup.Key.algoId == ChunkDedupIdentifier.ChunkAlgorithmId)
{
for (int i = 0; i < groupResults.Count; i++)
{
Indexed<PlaceFileResult> result = await groupResults[i];
if (!result.Item.Succeeded)
{
byte[] hashBytes = places[result.Index].Hash.ToHashByteArray();

groupResults[i] = Task.Run(async () => (await _localCAS.PlaceFileAsync(
context, new ContentHash(HashType.DedupSingleChunk, hashBytes), places[result.Index].Path, accessMode,
FileReplacementMode.ReplaceExisting, realizationMode, cancellationToken)).WithIndex(result.Index));
}
}
}

foreach (Task<Indexed<PlaceFileResult>> resultTask in groupResults)
{
Indexed<PlaceFileResult> result = await resultTask;
results.Add(places[result.Index].Path, result.Item);
}
}

return results;
}

protected override async Task<ICacheEntry?> GetCacheEntryAsync(Context context, StrongFingerprint cacheStrongFingerprint, CancellationToken cancellationToken)
{
string key = ComputeKey(cacheStrongFingerprint, forWrite: false);
PipelineCacheArtifact? result = await QueryPipelineCaching(
context,
new Microsoft.VisualStudio.Services.PipelineCache.WebApi.Fingerprint(key.Split(KeySegmentSeperator)),
new VisualStudio.Services.PipelineCache.WebApi.Fingerprint(key.Split(KeySegmentSeperator)),
cancellationToken);

if (result == null)
Expand Down Expand Up @@ -421,36 +471,71 @@ public void Dispose() { }
public Task<Stream?> GetNodeBuildResultAsync(Context context, CancellationToken cancellationToken) =>
Task.FromResult((Stream?)new MemoryStream(_nodeBuildResultBytes));

public Task PlaceFilesAsync(Context context, IReadOnlyDictionary<AbsolutePath, ContentHash> files, CancellationToken cancellationToken)
public async Task PlaceFilesAsync(Context context, IReadOnlyDictionary<AbsolutePath, ContentHash> files, CancellationToken cancellationToken)
{
_client.Tracer.Debug(context, $"Placing manifest `{_manifestId}`.");

var manifestFiles = CreateNormalizedManifest(_manifest);
var requestFiles = _client.CreateNormalizedManifest(files);
ThrowIfDifferent(manifestFiles, requestFiles, $"Manifest `{_manifestId}` and PlaceFiles don't match:");

var manifestOptions = DownloadDedupManifestArtifactOptions.CreateWithManifestId(
_manifestId,
_client.RepoRoot.Path,
minimatchPatterns: new[] { InternalMetadataPathExcludeMinimatchFilter },
customMinimatchOptions: new Minimatch.Options()
// try to pull whole files from the cache
var places = files.Select(f => new ContentHashWithPath(f.Value, f.Key)).ToList();

Dictionary<AbsolutePath, PlaceFileResult> placeResults = await _client.TryPlaceFilesFromCacheAsync(context, places, cancellationToken);

Dictionary<AbsolutePath, ManifestItem> manifestItems = _manifest.Items.ToDictionary(i => _client.RepoRoot / new RelativePath(i.Path), i => i);
var itemsToDownload = new List<ManifestItem>();
var toAddToCacheAsWholeFile = new Dictionary<ContentHash, AbsolutePath>();
foreach (KeyValuePair<AbsolutePath, PlaceFileResult> placeResult in placeResults)
{
if (!placeResult.Value.Succeeded)
{
Dot = true,
NoBrace = true,
NoCase = false,
// From comments on GitHub as of 08/04/2019
// "If true, backslahes in patterns and paths will be treated as forward slashes. This disables escape characters."
// https://github.com/SLaks/Minimatch/blob/5a5bd62444005689d8ba71541ac36dcfc775e0c7/Minimatch/Minimatcher.cs#L37
AllowWindowsPaths = false,
});

return _client.WithHttpRetries(async () =>
AbsolutePath path = placeResult.Key;
itemsToDownload.Add(manifestItems[path]);

ContentHash hash = files[path];
// We don't need to add single-chunk files as whole files because they are already stored as a chunk
if (GetAlgorithmId(hash) != ChunkDedupIdentifier.ChunkAlgorithmId)
{
toAddToCacheAsWholeFile.TryAdd(hash, path);
}
}
}

if (itemsToDownload.Count == 0)
{
await _client._manifestClient.DownloadAsync(manifestOptions, cancellationToken);
return 0;
},
context: context.ToString()!,
cancellationToken);
return;
}

using var tempManifestFile = new TempFile(FileSystem.Instance, TempFolder);
var tempManifest = new Manifest(itemsToDownload);

#if NETFRAMEWORK
File.WriteAllText(tempManifestFile.Path, JsonSerializer.Serialize(tempManifest));
#else
await File.WriteAllTextAsync(tempManifestFile.Path, JsonSerializer.Serialize(tempManifest), cancellationToken);
#endif

var manifestOptions = DownloadDedupManifestArtifactOptions.CreateWithManifestPath(
tempManifestFile.Path,
_client.RepoRoot.Path);

await _client.WithHttpRetries(
async () =>
{
await _client._manifestClient.DownloadAsyncWithManifestPath(manifestOptions, cancellationToken);
return 0;
},
context: context.ToString()!,
cancellationToken);

foreach (KeyValuePair<ContentHash, AbsolutePath> addToCache in toAddToCacheAsWholeFile)
{
ContentHash hash = addToCache.Key;
AbsolutePath path = addToCache.Value;
await _client._localCAS.PutFileAsync(context, hash, path, _client.GetFileRealizationMode(path.Path), cancellationToken);
}
}
}

Expand All @@ -464,7 +549,7 @@ protected override async IAsyncEnumerable<Selector> GetSelectors(
string key = ComputeSelectorsKey(fingerprint, forWrite: false);
PipelineCacheArtifact? result = await QueryPipelineCaching(
context,
new Microsoft.VisualStudio.Services.PipelineCache.WebApi.Fingerprint(key.Split(KeySegmentSeperator)),
new VisualStudio.Services.PipelineCache.WebApi.Fingerprint(key.Split(KeySegmentSeperator)),
cancellationToken);

if (result == null)
Expand Down Expand Up @@ -570,7 +655,7 @@ private string ComputeSelectorsKey(BuildXL.Cache.MemoizationStore.Interfaces.Ses
? $"selector{InternalSeed}{KeySegmentSeperator}{_universe}{KeySegmentSeperator}{wfp.Serialize()}{KeySegmentSeperator}{DateTime.UtcNow.Ticks}"
: $"selector{InternalSeed}{KeySegmentSeperator}{_universe}{KeySegmentSeperator}{wfp.Serialize()}{KeySegmentSeperator}**";

private Task<PipelineCacheArtifact?> QueryPipelineCaching(Context context, Microsoft.VisualStudio.Services.PipelineCache.WebApi.Fingerprint key, CancellationToken cancellationToken)
private Task<PipelineCacheArtifact?> QueryPipelineCaching(Context context, VisualStudio.Services.PipelineCache.WebApi.Fingerprint key, CancellationToken cancellationToken)
{
return WithHttpRetries(
async () =>
Expand Down
16 changes: 16 additions & 0 deletions src/Common/Caching/CacheClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ public abstract class CacheClient : ICacheClient
private readonly OutputHasher _outputHasher;
private readonly ConcurrentDictionary<NodeContext, Task> _publishingTasks = new();
private readonly ConcurrentDictionary<NodeContext, Task> _materializationTasks = new();
private readonly ConcurrentDictionary<AbsolutePath, bool> _directoryCreationCache = new();
private readonly IContentHasher _hasher;
private readonly IFingerprintFactory _fingerprintFactory;
private readonly INodeContextRepository _nodeContextRepository;
Expand Down Expand Up @@ -167,6 +168,21 @@ async Task DrainTasksAsync(ConcurrentDictionary<NodeContext, Task> tasks, string
}
}

protected void CreateParentDirectory(AbsolutePath filePath)
{
AbsolutePath? parentDirectory = filePath.Parent;
if (parentDirectory is not null)
{
_directoryCreationCache.GetOrAdd(
parentDirectory,
dir =>
{
Directory.CreateDirectory(dir.Path);
return true;
});
}
}

private async Task<IReadOnlyDictionary<string, ContentHash>> AddContentAsync(IReadOnlyCollection<string> paths, CancellationToken cancellationToken)
{
Context context = new(RootContext);
Expand Down
15 changes: 1 addition & 14 deletions src/Common/Caching/CasCacheClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ public sealed class CasCacheClient : CacheClient

private readonly ConcurrentDictionary<ContentHash, Task<PutFileOperation>> _putRemoteTaskCache = new();

private readonly ConcurrentDictionary<AbsolutePath, bool> _directoryCreationCache = new();

private readonly ICache? _remoteCache;
private readonly ICache _localCache;

Expand Down Expand Up @@ -441,18 +439,7 @@ private async Task<PlaceFileResult> PlaceFileCoreAsync(
? FileAccessMode.Write
: FileAccessMode.ReadOnly;

// The cache doesn't create the directory for us.
AbsolutePath? parentDirectory = filePath.Parent;
if (parentDirectory is not null)
{
_directoryCreationCache.GetOrAdd(
parentDirectory,
dir =>
{
Directory.CreateDirectory(dir.Path);
return true;
});
}
CreateParentDirectory(filePath);

PlaceFileResult placeResult = await _twoLevelCacheSession.PlaceFileAsync(
context,
Expand Down

0 comments on commit 0f685fe

Please sign in to comment.