-
Notifications
You must be signed in to change notification settings - Fork 4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fork FingerprintValueCache from NestedSetSerializationCache.
This will be used in a generalized cache for shared values. Its semantics differ in two ways. * FingerprintValueCache works for arbitrary objects and not just Object[]. * FingerprintValueCache uses a future for the fingerprint computation so clients deduplicate fingerprint calculations in addition to uploads. This makes the two API methods slightly more uniforrm. PiperOrigin-RevId: 609877485 Change-Id: Ied2b490858fc6eaaac3ca716a7562560b2770570
- Loading branch information
1 parent
96c7d78
commit 3ebf7da
Showing
4 changed files
with
488 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
284 changes: 284 additions & 0 deletions
284
...main/java/com/google/devtools/build/lib/skyframe/serialization/FingerprintValueCache.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,284 @@ | ||
// Copyright 2024 The Bazel Authors. All rights reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
package com.google.devtools.build.lib.skyframe.serialization; | ||
|
||
import static com.google.common.util.concurrent.MoreExecutors.directExecutor; | ||
|
||
import com.github.benmanes.caffeine.cache.Cache; | ||
import com.github.benmanes.caffeine.cache.Caffeine; | ||
import com.google.auto.value.AutoValue; | ||
import com.google.common.annotations.VisibleForTesting; | ||
import com.google.common.util.concurrent.FutureCallback; | ||
import com.google.common.util.concurrent.Futures; | ||
import com.google.common.util.concurrent.ListenableFuture; | ||
import com.google.devtools.build.lib.skyframe.serialization.FingerprintValueStore.MissingFingerprintValueException; | ||
import com.google.protobuf.ByteString; | ||
import java.io.IOException; | ||
import java.util.concurrent.ExecutionException; | ||
import javax.annotation.Nullable; | ||
|
||
/** | ||
* A bidirectional, in-memory, weak cache storing fingerprint ↔ value associations for the {@link | ||
* FingerprintValueStore}. | ||
* | ||
* <p>The cache supports the possibility of semantically different values having the same serialized | ||
* representation. For this reason, a distinguisher object can be included in the key for the | ||
* fingerprint ⇒ value mapping. This object should encapsulate all additional context necessary to | ||
* deserialize a value. The value ⇒ fingerprint mapping, on the other hand, is expected to be | ||
* deterministic. See {@link FingerprintWithDistinguisher}. | ||
*/ | ||
public final class FingerprintValueCache { | ||
/** | ||
* Fingerprint to value cache. | ||
* | ||
* <p>Used to deduplicate fetches, or in some cases, where the object to be fetched was already | ||
* serialized, retrieves the already existing object. | ||
* | ||
* <p>The keys can either be a {@link ByteString} or a {@link FingerprintWithDistinguisher}. | ||
* | ||
* <p>The values in this cache are always {@code Object} or {@code ListenableFuture<Object>}. We | ||
* avoid a common wrapper object both for memory efficiency and because our cache eviction policy | ||
* is based on value GC, and wrapper objects would defeat that. | ||
* | ||
* <p>While a fetch for the contents is outstanding, the value in the cache will be a {@link | ||
* ListenableFuture}. When it is resolved, it is replaced with the unwrapped {@code Object}. | ||
*/ | ||
private final Cache<Object, Object> deserializationCache = | ||
Caffeine.newBuilder() | ||
.initialCapacity(SerializationConstants.DESERIALIZATION_POOL_SIZE) | ||
.weakValues() | ||
.build(); | ||
|
||
/** | ||
* {@link Object} contents to store result mapping, eventually a fingerprint, but a future while | ||
* in-flight or in case of errors. | ||
* | ||
* <p>This cache deduplicates serializing the same contents to the {@link FingerprintValueStore}. | ||
* Its entries are as follows. | ||
* | ||
* <ul> | ||
* <li>key: the content value object, using reference equality | ||
* <li>value: either a {@code ListenableFuture<PutOperation>} when the operation is in flight or | ||
* a {@link ByteString} fingerprint when it is complete | ||
* </ul> | ||
* | ||
* <p>{@code ListenableFuture<PutOperation>} contains two distinct asynchronous operations. | ||
* | ||
* <ul> | ||
* <li><em>Outer {@code ListenableFuture}</em>: represents the asynchronous completion of | ||
* serialization, fingerprinting and the initialization of the {@link | ||
* FingerprintValueStore#put} operation. | ||
* <li><em>{@link PutOperation#writeStatus}</em>: represents the completion of the {@link | ||
* FingerprintValueStore#put} operation. | ||
* </ul> | ||
*/ | ||
private final Cache<Object, Object> serializationCache = | ||
Caffeine.newBuilder() | ||
.initialCapacity(SerializationConstants.DESERIALIZATION_POOL_SIZE) | ||
.weakKeys() | ||
.build(); | ||
|
||
/** | ||
* Disables population of {@link #deserializationCache} during serialization. | ||
* | ||
* <p>When serialization completes, the usual behavior is to populate the deserialization cache so | ||
* that any subsequent deserialization of the value will hit the cache. This blocks the round-trip | ||
* serialization testing of deserialization code paths as a side effect. Setting this true | ||
* disables population of the deserialization cache on serialization. | ||
*/ | ||
private final boolean exerciseDeserializationForTesting; | ||
|
||
public FingerprintValueCache() { | ||
this(/* exerciseDeserializationForTesting= */ false); | ||
} | ||
|
||
@VisibleForTesting | ||
public FingerprintValueCache(boolean exerciseDeserializationForTesting) { | ||
this.exerciseDeserializationForTesting = exerciseDeserializationForTesting; | ||
} | ||
|
||
/** | ||
* Gets the result of a previous {@code putOperation} or registers a new one for {@code obj}, the | ||
* {@link #serializationCache} key. | ||
* | ||
* <p>If the {@code obj} has already been serialized or if its serialization is in-flight, returns | ||
* a non-null object that may be either: | ||
* | ||
* <ul> | ||
* <li>a {@code ListenableFuture<PutOperation>} if it is still in flight; or | ||
* <li>a {@link ByteString} fingerprint if writing to remote storage is successful. | ||
* </ul> | ||
* | ||
* <p>If a {@code ListenableFuture<PutOperation>} is returned, its expected {@link | ||
* ExecutionException} causes are {@link SerializationException} and {@link IOException}. The | ||
* caller must ensure that these are the only possible causes. | ||
* | ||
* <p>If a previous operation is returned, {@code putOperation} is ignored. Otherwise, if a null | ||
* value is returned, the caller owns the {@code putOperation} and must ensure it completes and | ||
* handle its errors. | ||
* | ||
* @param distinguisher an optional key distinguisher, see {@link FingerprintWithDistinguisher} | ||
*/ | ||
@Nullable | ||
Object getOrClaimPutOperation( | ||
Object obj, @Nullable Object distinguisher, ListenableFuture<PutOperation> putOperation) { | ||
// Any contention here is caused by two threads racing to serialize the same object. Since | ||
// the serialization is pure CPU work, it's tempting to simplify this code by using | ||
// `computeIfAbsent` instead. That unfortunately leads to recursive `ConcurrentMap` updates, | ||
// which isn't supported. | ||
Object previous = serializationCache.asMap().putIfAbsent(obj, putOperation); | ||
if (previous != null) { | ||
return previous; | ||
} | ||
unwrapFingerprintWhenDone(obj, distinguisher, putOperation); | ||
return null; | ||
} | ||
|
||
/** | ||
* Gets the result of a previous {@code getOperation} or registers a new one for {@code | ||
* fingerprint}, the {@link #deserializationCache} key. | ||
* | ||
* <p>This is used to avoid deduplicate fetches or fetching an object that had already been | ||
* serialized from this cache. If the key is for an already stored or retrieved object, or one | ||
* where retrievial is in-flight, returns a non-null value that can be one of the following. | ||
* | ||
* <ul> | ||
* <li>a {@code ListenableFuture<Object>} if retrieval is in-flight; or | ||
* <li>an {@link Object} if it is already known for the key. | ||
* </ul> | ||
* | ||
* <p>If a {@code ListenableFuture<Object>} is returned, its possible {@link ExecutionException} | ||
* causes are {@link SerializationException}, {@link IOException} and {@link | ||
* MissingFingerprintValueException}. The caller must ensure these are the only possible causes. | ||
* | ||
* <p>If a non-null value is returned, {@code getOperation} is ignored. Otherwise, when this | ||
* returns null, the caller must ensure that {@code getOperation} is eventually completed with the | ||
* or an error. The caller is responsible for handling errors. | ||
* | ||
* @param distinguisher an optional distinguisher, see {@link FingerprintWithDistinguisher} | ||
*/ | ||
@Nullable | ||
Object getOrClaimGetOperation( | ||
ByteString fingerprint, | ||
@Nullable Object distinguisher, | ||
ListenableFuture<Object> getOperation) { | ||
Object key = createKey(fingerprint, distinguisher); | ||
Object previous = deserializationCache.asMap().putIfAbsent(key, getOperation); | ||
if (previous != null) { | ||
return previous; | ||
} | ||
unwrapValueWhenDone(fingerprint, key, getOperation); | ||
return null; | ||
} | ||
|
||
/** Populates the reverse mapping and unwraps futures when they are no longer needed. */ | ||
private void unwrapFingerprintWhenDone( | ||
Object obj, @Nullable Object distinguisher, ListenableFuture<PutOperation> putOperation) { | ||
Futures.addCallback( | ||
putOperation, | ||
new FutureCallback<PutOperation>() { | ||
@Override | ||
public void onSuccess(PutOperation operation) { | ||
// Serialization and fingerprinting has succeeded and storing the bytes in the | ||
// FingerprintValueStore has started. | ||
|
||
// Stores the reverse mapping in `deserializationCache`. | ||
if (!exerciseDeserializationForTesting) { | ||
deserializationCache.put(createKey(operation.fingerprint(), distinguisher), obj); | ||
} | ||
|
||
// It's possible to discard the outermost future at this point and only keep the | ||
// PutOperation instead, but for simplicity, discards both once both succeed. | ||
|
||
Futures.addCallback( | ||
operation.writeStatus(), | ||
new FutureCallback<Void>() { | ||
@Override | ||
public void onSuccess(Void unused) { | ||
// The object has been successfully written to remote storage. Discards all the | ||
// wrappers. | ||
serializationCache.put(obj, operation.fingerprint()); | ||
} | ||
|
||
@Override | ||
public void onFailure(Throwable t) { | ||
// Failure will be reported by the owner of `putOperation`. | ||
} | ||
}, | ||
directExecutor()); | ||
} | ||
|
||
@Override | ||
public void onFailure(Throwable t) { | ||
// Failure will be reported by the owner of `putOperation`. | ||
} | ||
}, | ||
directExecutor()); | ||
} | ||
|
||
/** Unwraps the future and populates the reverse mapping when done. */ | ||
private void unwrapValueWhenDone( | ||
ByteString fingerprint, Object key, ListenableFuture<Object> getOperation) { | ||
Futures.addCallback( | ||
getOperation, | ||
new FutureCallback<Object>() { | ||
@Override | ||
public void onSuccess(Object value) { | ||
deserializationCache.put(key, value); | ||
serializationCache.put(value, fingerprint); | ||
} | ||
|
||
@Override | ||
public void onFailure(Throwable t) { | ||
// Failure will be reported by the owner of the `getOperation`. | ||
} | ||
}, | ||
directExecutor()); | ||
} | ||
|
||
private static Object createKey(ByteString fingerprint, @Nullable Object distinguisher) { | ||
if (distinguisher == null) { | ||
return fingerprint; | ||
} | ||
return FingerprintWithDistinguisher.of(fingerprint, distinguisher); | ||
} | ||
|
||
/** | ||
* An extended {@link #deserializationCache} key, needed when the fingerprint alone is not enough. | ||
* | ||
* <p>The mapping stores a bidirectional fingerprint to value associations. However, there can be | ||
* multiple values for the same fingerprint. For example, consider the parent and child objects | ||
* (A, B). Suppose that both A and B share a common value S. When serializing B, S may be omitted | ||
* because it is already known to A and can be reinjected during deserialization. | ||
* | ||
* <p>The problem is that the fingerprint of B does not include anything about the shared value S. | ||
* So it could collide on fingerprint with some other (C, D) with a different shared value T. | ||
* | ||
* <p>Including a <em>distinguisher</em> in the key to account for the contextual value can be | ||
* used to avoid conflicts in cases like this. | ||
*/ | ||
@AutoValue | ||
abstract static class FingerprintWithDistinguisher { | ||
/** The primary key for a {@link #deserializationCache} entry. */ | ||
abstract ByteString fingerprint(); | ||
|
||
/** A secondary key, sometimes needed to resolve ambiguity. */ | ||
abstract Object distinguisher(); | ||
|
||
static FingerprintWithDistinguisher of(ByteString fingerprint, Object distinguisher) { | ||
return new AutoValue_FingerprintValueCache_FingerprintWithDistinguisher( | ||
fingerprint, distinguisher); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.