diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/BUILD b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/BUILD
index b35f4041af1d64..04826fa562441b 100644
--- a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/BUILD
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/BUILD
@@ -34,11 +34,13 @@ java_library(
],
deps = [
":codec-scanning-constants",
+ ":constants",
"//src/main/java/com/google/devtools/build/lib/skyframe/serialization/autocodec:registered-singleton",
"//src/main/java/com/google/devtools/build/lib/skyframe/serialization/autocodec:serialization-constant",
"//src/main/java/com/google/devtools/build/lib/unsafe:string",
"//src/main/java/com/google/devtools/build/lib/unsafe:unsafe-provider",
"//third_party:auto_value",
+ "//third_party:caffeine",
"//third_party:error_prone_annotations",
"//third_party:fastutil",
"//third_party:flogger",
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/FingerprintValueCache.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/FingerprintValueCache.java
new file mode 100644
index 00000000000000..11b866421005d1
--- /dev/null
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/FingerprintValueCache.java
@@ -0,0 +1,284 @@
+// Copyright 2024 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package com.google.devtools.build.lib.skyframe.serialization;
+
+import static com.google.common.util.concurrent.MoreExecutors.directExecutor;
+
+import com.github.benmanes.caffeine.cache.Cache;
+import com.github.benmanes.caffeine.cache.Caffeine;
+import com.google.auto.value.AutoValue;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.util.concurrent.FutureCallback;
+import com.google.common.util.concurrent.Futures;
+import com.google.common.util.concurrent.ListenableFuture;
+import com.google.devtools.build.lib.skyframe.serialization.FingerprintValueStore.MissingFingerprintValueException;
+import com.google.protobuf.ByteString;
+import java.io.IOException;
+import java.util.concurrent.ExecutionException;
+import javax.annotation.Nullable;
+
+/**
+ * A bidirectional, in-memory, weak cache storing fingerprint ↔ value associations for the {@link
+ * FingerprintValueStore}.
+ *
+ *
The cache supports the possibility of semantically different values having the same serialized
+ * representation. For this reason, a distinguisher object can be included in the key for the
+ * fingerprint ⇒ value mapping. This object should encapsulate all additional context necessary to
+ * deserialize a value. The value ⇒ fingerprint mapping, on the other hand, is expected to be
+ * deterministic. See {@link FingerprintWithDistinguisher}.
+ */
+public final class FingerprintValueCache {
+ /**
+ * Fingerprint to value cache.
+ *
+ *
Used to deduplicate fetches, or in some cases, where the object to be fetched was already
+ * serialized, retrieves the already existing object.
+ *
+ *
The keys can either be a {@link ByteString} or a {@link FingerprintWithDistinguisher}.
+ *
+ *
The values in this cache are always {@code Object} or {@code ListenableFuture}. We
+ * avoid a common wrapper object both for memory efficiency and because our cache eviction policy
+ * is based on value GC, and wrapper objects would defeat that.
+ *
+ * While a fetch for the contents is outstanding, the value in the cache will be a {@link
+ * ListenableFuture}. When it is resolved, it is replaced with the unwrapped {@code Object}.
+ */
+ private final Cache deserializationCache =
+ Caffeine.newBuilder()
+ .initialCapacity(SerializationConstants.DESERIALIZATION_POOL_SIZE)
+ .weakValues()
+ .build();
+
+ /**
+ * {@link Object} contents to store result mapping, eventually a fingerprint, but a future while
+ * in-flight or in case of errors.
+ *
+ * This cache deduplicates serializing the same contents to the {@link FingerprintValueStore}.
+ * Its entries are as follows.
+ *
+ *
+ * key: the content value object, using reference equality
+ * value: either a {@code ListenableFuture} when the operation is in flight or
+ * a {@link ByteString} fingerprint when it is complete
+ *
+ *
+ * {@code ListenableFuture} contains two distinct asynchronous operations.
+ *
+ *
+ * Outer {@code ListenableFuture} : represents the asynchronous completion of
+ * serialization, fingerprinting and the initialization of the {@link
+ * FingerprintValueStore#put} operation.
+ * {@link PutOperation#writeStatus} : represents the completion of the {@link
+ * FingerprintValueStore#put} operation.
+ *
+ */
+ private final Cache serializationCache =
+ Caffeine.newBuilder()
+ .initialCapacity(SerializationConstants.DESERIALIZATION_POOL_SIZE)
+ .weakKeys()
+ .build();
+
+ /**
+ * Disables population of {@link #deserializationCache} during serialization.
+ *
+ * When serialization completes, the usual behavior is to populate the deserialization cache so
+ * that any subsequent deserialization of the value will hit the cache. This blocks the round-trip
+ * serialization testing of deserialization code paths as a side effect. Setting this true
+ * disables population of the deserialization cache on serialization.
+ */
+ private final boolean exerciseDeserializationForTesting;
+
+ public FingerprintValueCache() {
+ this(/* exerciseDeserializationForTesting= */ false);
+ }
+
+ @VisibleForTesting
+ public FingerprintValueCache(boolean exerciseDeserializationForTesting) {
+ this.exerciseDeserializationForTesting = exerciseDeserializationForTesting;
+ }
+
+ /**
+ * Gets the result of a previous {@code putOperation} or registers a new one for {@code obj}, the
+ * {@link #serializationCache} key.
+ *
+ *
If the {@code obj} has already been serialized or if its serialization is in-flight, returns
+ * a non-null object that may be either:
+ *
+ *
+ * a {@code ListenableFuture} if it is still in flight; or
+ * a {@link ByteString} fingerprint if writing to remote storage is successful.
+ *
+ *
+ * If a {@code ListenableFuture} is returned, its expected {@link
+ * ExecutionException} causes are {@link SerializationException} and {@link IOException}. The
+ * caller must ensure that these are the only possible causes.
+ *
+ * If a previous operation is returned, {@code putOperation} is ignored. Otherwise, if a null
+ * value is returned, the caller owns the {@code putOperation} and must ensure it completes and
+ * handle its errors.
+ *
+ * @param distinguisher an optional key distinguisher, see {@link FingerprintWithDistinguisher}
+ */
+ @Nullable
+ Object getOrClaimPutOperation(
+ Object obj, @Nullable Object distinguisher, ListenableFuture putOperation) {
+ // Any contention here is caused by two threads racing to serialize the same object. Since
+ // the serialization is pure CPU work, it's tempting to simplify this code by using
+ // `computeIfAbsent` instead. That unfortunately leads to recursive `ConcurrentMap` updates,
+ // which isn't supported.
+ Object previous = serializationCache.asMap().putIfAbsent(obj, putOperation);
+ if (previous != null) {
+ return previous;
+ }
+ unwrapFingerprintWhenDone(obj, distinguisher, putOperation);
+ return null;
+ }
+
+ /**
+ * Gets the result of a previous {@code getOperation} or registers a new one for {@code
+ * fingerprint}, the {@link #deserializationCache} key.
+ *
+ * This is used to avoid deduplicate fetches or fetching an object that had already been
+ * serialized from this cache. If the key is for an already stored or retrieved object, or one
+ * where retrievial is in-flight, returns a non-null value that can be one of the following.
+ *
+ *
+ * a {@code ListenableFuture} if retrieval is in-flight; or
+ * an {@link Object} if it is already known for the key.
+ *
+ *
+ * If a {@code ListenableFuture} is returned, its possible {@link ExecutionException}
+ * causes are {@link SerializationException}, {@link IOException} and {@link
+ * MissingFingerprintValueException}. The caller must ensure these are the only possible causes.
+ *
+ * If a non-null value is returned, {@code getOperation} is ignored. Otherwise, when this
+ * returns null, the caller must ensure that {@code getOperation} is eventually completed with the
+ * or an error. The caller is responsible for handling errors.
+ *
+ * @param distinguisher an optional distinguisher, see {@link FingerprintWithDistinguisher}
+ */
+ @Nullable
+ Object getOrClaimGetOperation(
+ ByteString fingerprint,
+ @Nullable Object distinguisher,
+ ListenableFuture getOperation) {
+ Object key = createKey(fingerprint, distinguisher);
+ Object previous = deserializationCache.asMap().putIfAbsent(key, getOperation);
+ if (previous != null) {
+ return previous;
+ }
+ unwrapValueWhenDone(fingerprint, key, getOperation);
+ return null;
+ }
+
+ /** Populates the reverse mapping and unwraps futures when they are no longer needed. */
+ private void unwrapFingerprintWhenDone(
+ Object obj, @Nullable Object distinguisher, ListenableFuture putOperation) {
+ Futures.addCallback(
+ putOperation,
+ new FutureCallback() {
+ @Override
+ public void onSuccess(PutOperation operation) {
+ // Serialization and fingerprinting has succeeded and storing the bytes in the
+ // FingerprintValueStore has started.
+
+ // Stores the reverse mapping in `deserializationCache`.
+ if (!exerciseDeserializationForTesting) {
+ deserializationCache.put(createKey(operation.fingerprint(), distinguisher), obj);
+ }
+
+ // It's possible to discard the outermost future at this point and only keep the
+ // PutOperation instead, but for simplicity, discards both once both succeed.
+
+ Futures.addCallback(
+ operation.writeStatus(),
+ new FutureCallback() {
+ @Override
+ public void onSuccess(Void unused) {
+ // The object has been successfully written to remote storage. Discards all the
+ // wrappers.
+ serializationCache.put(obj, operation.fingerprint());
+ }
+
+ @Override
+ public void onFailure(Throwable t) {
+ // Failure will be reported by the owner of `putOperation`.
+ }
+ },
+ directExecutor());
+ }
+
+ @Override
+ public void onFailure(Throwable t) {
+ // Failure will be reported by the owner of `putOperation`.
+ }
+ },
+ directExecutor());
+ }
+
+ /** Unwraps the future and populates the reverse mapping when done. */
+ private void unwrapValueWhenDone(
+ ByteString fingerprint, Object key, ListenableFuture getOperation) {
+ Futures.addCallback(
+ getOperation,
+ new FutureCallback() {
+ @Override
+ public void onSuccess(Object value) {
+ deserializationCache.put(key, value);
+ serializationCache.put(value, fingerprint);
+ }
+
+ @Override
+ public void onFailure(Throwable t) {
+ // Failure will be reported by the owner of the `getOperation`.
+ }
+ },
+ directExecutor());
+ }
+
+ private static Object createKey(ByteString fingerprint, @Nullable Object distinguisher) {
+ if (distinguisher == null) {
+ return fingerprint;
+ }
+ return FingerprintWithDistinguisher.of(fingerprint, distinguisher);
+ }
+
+ /**
+ * An extended {@link #deserializationCache} key, needed when the fingerprint alone is not enough.
+ *
+ * The mapping stores a bidirectional fingerprint to value associations. However, there can be
+ * multiple values for the same fingerprint. For example, consider the parent and child objects
+ * (A, B). Suppose that both A and B share a common value S. When serializing B, S may be omitted
+ * because it is already known to A and can be reinjected during deserialization.
+ *
+ *
The problem is that the fingerprint of B does not include anything about the shared value S.
+ * So it could collide on fingerprint with some other (C, D) with a different shared value T.
+ *
+ *
Including a distinguisher in the key to account for the contextual value can be
+ * used to avoid conflicts in cases like this.
+ */
+ @AutoValue
+ abstract static class FingerprintWithDistinguisher {
+ /** The primary key for a {@link #deserializationCache} entry. */
+ abstract ByteString fingerprint();
+
+ /** A secondary key, sometimes needed to resolve ambiguity. */
+ abstract Object distinguisher();
+
+ static FingerprintWithDistinguisher of(ByteString fingerprint, Object distinguisher) {
+ return new AutoValue_FingerprintValueCache_FingerprintWithDistinguisher(
+ fingerprint, distinguisher);
+ }
+ }
+}
diff --git a/src/test/java/com/google/devtools/build/lib/skyframe/serialization/BUILD b/src/test/java/com/google/devtools/build/lib/skyframe/serialization/BUILD
index b9e014765542e1..68dd7147040974 100644
--- a/src/test/java/com/google/devtools/build/lib/skyframe/serialization/BUILD
+++ b/src/test/java/com/google/devtools/build/lib/skyframe/serialization/BUILD
@@ -409,6 +409,7 @@ java_library(
"//src/main/protobuf:build_java_proto",
"//third_party:auto_value",
"//third_party:guava",
+ "//third_party:jsr305",
"//third_party:junit4",
"//third_party:mockito",
"//third_party:truth",
diff --git a/src/test/java/com/google/devtools/build/lib/skyframe/serialization/FingerprintValueCacheTest.java b/src/test/java/com/google/devtools/build/lib/skyframe/serialization/FingerprintValueCacheTest.java
new file mode 100644
index 00000000000000..7c39a989776331
--- /dev/null
+++ b/src/test/java/com/google/devtools/build/lib/skyframe/serialization/FingerprintValueCacheTest.java
@@ -0,0 +1,201 @@
+// Copyright 2024 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package com.google.devtools.build.lib.skyframe.serialization;
+
+import static com.google.common.truth.Truth.assertThat;
+import static com.google.common.util.concurrent.Futures.immediateFuture;
+import static com.google.common.util.concurrent.Futures.immediateVoidFuture;
+
+import com.google.common.util.concurrent.ListenableFuture;
+import com.google.common.util.concurrent.SettableFuture;
+import com.google.protobuf.ByteString;
+import com.google.testing.junit.testparameterinjector.TestParameter;
+import com.google.testing.junit.testparameterinjector.TestParameterInjector;
+import javax.annotation.Nullable;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+
+@RunWith(TestParameterInjector.class)
+public final class FingerprintValueCacheTest {
+ enum Distinguisher {
+ NULL_DISTINGUISHER(/* value= */ null),
+ NON_NULL_DISTINGUISHER(/* value= */ new Object());
+
+ @SuppressWarnings("ImmutableEnumChecker") // always an immutable instance
+ @Nullable
+ private final Object value;
+
+ Distinguisher(@Nullable Object value) {
+ this.value = value;
+ }
+
+ @Nullable
+ Object value() {
+ return value;
+ }
+ }
+
+ @Test
+ public void putOperation_isCached(@TestParameter Distinguisher distinguisher) {
+ FingerprintValueCache cache = new FingerprintValueCache();
+
+ Object value = new Object();
+
+ SettableFuture op1 = SettableFuture.create();
+ Object result = cache.getOrClaimPutOperation(value, distinguisher.value(), op1);
+ assertThat(result).isNull();
+
+ SettableFuture op2 = SettableFuture.create();
+ result = cache.getOrClaimPutOperation(value, distinguisher.value(), op2);
+ assertThat(result).isSameInstanceAs(op1);
+ }
+
+ @Test
+ public void getOperation_isCached(@TestParameter Distinguisher distinguisher) {
+ FingerprintValueCache cache = new FingerprintValueCache();
+
+ ByteString fingerprint = ByteString.copyFromUtf8("foo");
+
+ SettableFuture op1 = SettableFuture.create();
+ Object result = cache.getOrClaimGetOperation(fingerprint, distinguisher.value(), op1);
+ assertThat(result).isNull();
+
+ SettableFuture op2 = SettableFuture.create();
+ result = cache.getOrClaimGetOperation(fingerprint, distinguisher.value(), op2);
+ assertThat(result).isSameInstanceAs(op1);
+ }
+
+ @Test
+ public void putOperation_isUnwrapped(@TestParameter Distinguisher distinguisher) {
+ FingerprintValueCache cache = new FingerprintValueCache();
+
+ Object value = new Object();
+
+ SettableFuture putOp1 = SettableFuture.create();
+ Object putResult = cache.getOrClaimPutOperation(value, distinguisher.value(), putOp1);
+ assertThat(putResult).isNull();
+
+ // Sets the `PutOperation` in `putOp1`, which triggers the first stage of unwrapping and
+ // populates the reverse cache.
+ ByteString fingerprint = ByteString.copyFromUtf8("foo");
+ SettableFuture writeStatus = SettableFuture.create();
+ putOp1.set(PutOperation.create(fingerprint, writeStatus));
+
+ // A get of `fingerprint` now returns `value` immediately.
+ SettableFuture getOp = SettableFuture.create();
+ Object getResult = cache.getOrClaimGetOperation(fingerprint, distinguisher.value(), getOp);
+ assertThat(getResult).isSameInstanceAs(value);
+
+ // A second "put" of `value' sees the original, wrapped `putOp1`.
+ SettableFuture putOp2 = SettableFuture.create();
+ putResult = cache.getOrClaimPutOperation(value, distinguisher.value(), putOp2);
+ assertThat(putResult).isSameInstanceAs(putOp1);
+
+ // Setting the write status fully unwraps the value.
+ writeStatus.set(null);
+ putResult = cache.getOrClaimPutOperation(value, distinguisher.value(), putOp2);
+ assertThat(putResult).isSameInstanceAs(fingerprint);
+ }
+
+ @Test
+ public void getOperation_isUnwrapped(@TestParameter Distinguisher distinguisher) {
+ FingerprintValueCache cache = new FingerprintValueCache();
+
+ ByteString fingerprint = ByteString.copyFromUtf8("foo");
+
+ SettableFuture getOp = SettableFuture.create();
+ Object result = cache.getOrClaimGetOperation(fingerprint, distinguisher.value(), getOp);
+ assertThat(result).isNull();
+
+ // The first put operation is owned by the caller.
+ Object value = new Object();
+ SettableFuture putOp = SettableFuture.create();
+ Object putResult = cache.getOrClaimPutOperation(value, distinguisher.value(), putOp);
+ assertThat(putResult).isNull();
+
+ // Completes the `getOp`, causing it to be unwrapped.
+ getOp.set(value);
+
+ // The next put operation gets the unwrapped fingerprint.
+ SettableFuture putOp2 = SettableFuture.create();
+ putResult = cache.getOrClaimPutOperation(value, distinguisher.value(), putOp2);
+ assertThat(putResult).isSameInstanceAs(fingerprint);
+
+ // Completing `putOp` overwrites values, but this is benign because `value`s fingerprint should
+ // be deterministic.
+ ByteString fingerprint2 = ByteString.copyFromUtf8("foo");
+ putOp.set(PutOperation.create(fingerprint2, immediateVoidFuture()));
+
+ SettableFuture putOp3 = SettableFuture.create();
+ putResult = cache.getOrClaimPutOperation(value, distinguisher.value(), putOp3);
+ assertThat(putResult).isSameInstanceAs(fingerprint2);
+ }
+
+ @Test
+ public void distinguisher_distinguishesSameFingerprint() {
+ // Puts two values with the same fingerprint, but different distinguishers, then verifies that
+ // they are distinguishable on retrieval.
+ FingerprintValueCache cache = new FingerprintValueCache();
+
+ ByteString fingerprint = ByteString.copyFromUtf8("foo");
+
+ ListenableFuture put =
+ immediateFuture(PutOperation.create(fingerprint, immediateVoidFuture()));
+
+ Object value1 = new Object();
+ Object distinguisher1 = new Object();
+ Object result = cache.getOrClaimPutOperation(value1, distinguisher1, put);
+ assertThat(result).isNull();
+
+ Object value2 = new Object();
+ Object distinguisher2 = new Object();
+ // Reusing `put` here is fine because it's the same fingerprint.
+ result = cache.getOrClaimPutOperation(value2, distinguisher2, put);
+ assertThat(result).isNull();
+
+ // The correct values are returned for the distinguisher values.
+ SettableFuture unusedGetOperation = SettableFuture.create();
+ result = cache.getOrClaimGetOperation(fingerprint, distinguisher1, unusedGetOperation);
+ assertThat(result).isSameInstanceAs(value1);
+ result = cache.getOrClaimGetOperation(fingerprint, distinguisher2, unusedGetOperation);
+ assertThat(result).isSameInstanceAs(value2);
+ }
+
+ @Test
+ public void exerciseDeserializationForTesting_doesNotAddReverseEntry(
+ @TestParameter Distinguisher distinguisher,
+ @TestParameter boolean exerciseDeserializationForTesting) {
+ FingerprintValueCache cache = new FingerprintValueCache(exerciseDeserializationForTesting);
+
+ // Puts the `fingerprint` to `value` association in the cache.
+ ByteString fingerprint = ByteString.copyFromUtf8("foo");
+ Object value = new Object();
+ Object result =
+ cache.getOrClaimPutOperation(
+ value,
+ distinguisher.value(),
+ immediateFuture(PutOperation.create(fingerprint, immediateVoidFuture())));
+ assertThat(result).isNull();
+
+ SettableFuture getOperation = SettableFuture.create();
+ result = cache.getOrClaimGetOperation(fingerprint, distinguisher.value(), getOperation);
+ if (exerciseDeserializationForTesting) {
+ // Ordinarily, the reverse `value` to `fingerprint` would also be added to the cache, but it's
+ // not because `exerciseDeserializationForTesting` is true.
+ assertThat(result).isNull();
+ } else {
+ assertThat(result).isSameInstanceAs(value);
+ }
+ }
+}