diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/BUILD b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/BUILD index b35f4041af1d64..04826fa562441b 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/BUILD +++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/BUILD @@ -34,11 +34,13 @@ java_library( ], deps = [ ":codec-scanning-constants", + ":constants", "//src/main/java/com/google/devtools/build/lib/skyframe/serialization/autocodec:registered-singleton", "//src/main/java/com/google/devtools/build/lib/skyframe/serialization/autocodec:serialization-constant", "//src/main/java/com/google/devtools/build/lib/unsafe:string", "//src/main/java/com/google/devtools/build/lib/unsafe:unsafe-provider", "//third_party:auto_value", + "//third_party:caffeine", "//third_party:error_prone_annotations", "//third_party:fastutil", "//third_party:flogger", diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/FingerprintValueCache.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/FingerprintValueCache.java new file mode 100644 index 00000000000000..11b866421005d1 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/FingerprintValueCache.java @@ -0,0 +1,284 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.skyframe.serialization; + +import static com.google.common.util.concurrent.MoreExecutors.directExecutor; + +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; +import com.google.auto.value.AutoValue; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.util.concurrent.FutureCallback; +import com.google.common.util.concurrent.Futures; +import com.google.common.util.concurrent.ListenableFuture; +import com.google.devtools.build.lib.skyframe.serialization.FingerprintValueStore.MissingFingerprintValueException; +import com.google.protobuf.ByteString; +import java.io.IOException; +import java.util.concurrent.ExecutionException; +import javax.annotation.Nullable; + +/** + * A bidirectional, in-memory, weak cache storing fingerprint ↔ value associations for the {@link + * FingerprintValueStore}. + * + *

The cache supports the possibility of semantically different values having the same serialized + * representation. For this reason, a distinguisher object can be included in the key for the + * fingerprint ⇒ value mapping. This object should encapsulate all additional context necessary to + * deserialize a value. The value ⇒ fingerprint mapping, on the other hand, is expected to be + * deterministic. See {@link FingerprintWithDistinguisher}. + */ +public final class FingerprintValueCache { + /** + * Fingerprint to value cache. + * + *

Used to deduplicate fetches, or in some cases, where the object to be fetched was already + * serialized, retrieves the already existing object. + * + *

The keys can either be a {@link ByteString} or a {@link FingerprintWithDistinguisher}. + * + *

The values in this cache are always {@code Object} or {@code ListenableFuture}. We + * avoid a common wrapper object both for memory efficiency and because our cache eviction policy + * is based on value GC, and wrapper objects would defeat that. + * + *

While a fetch for the contents is outstanding, the value in the cache will be a {@link + * ListenableFuture}. When it is resolved, it is replaced with the unwrapped {@code Object}. + */ + private final Cache deserializationCache = + Caffeine.newBuilder() + .initialCapacity(SerializationConstants.DESERIALIZATION_POOL_SIZE) + .weakValues() + .build(); + + /** + * {@link Object} contents to store result mapping, eventually a fingerprint, but a future while + * in-flight or in case of errors. + * + *

This cache deduplicates serializing the same contents to the {@link FingerprintValueStore}. + * Its entries are as follows. + * + *

+ * + *

{@code ListenableFuture} contains two distinct asynchronous operations. + * + *

+ */ + private final Cache serializationCache = + Caffeine.newBuilder() + .initialCapacity(SerializationConstants.DESERIALIZATION_POOL_SIZE) + .weakKeys() + .build(); + + /** + * Disables population of {@link #deserializationCache} during serialization. + * + *

When serialization completes, the usual behavior is to populate the deserialization cache so + * that any subsequent deserialization of the value will hit the cache. This blocks the round-trip + * serialization testing of deserialization code paths as a side effect. Setting this true + * disables population of the deserialization cache on serialization. + */ + private final boolean exerciseDeserializationForTesting; + + public FingerprintValueCache() { + this(/* exerciseDeserializationForTesting= */ false); + } + + @VisibleForTesting + public FingerprintValueCache(boolean exerciseDeserializationForTesting) { + this.exerciseDeserializationForTesting = exerciseDeserializationForTesting; + } + + /** + * Gets the result of a previous {@code putOperation} or registers a new one for {@code obj}, the + * {@link #serializationCache} key. + * + *

If the {@code obj} has already been serialized or if its serialization is in-flight, returns + * a non-null object that may be either: + * + *

    + *
  • a {@code ListenableFuture} if it is still in flight; or + *
  • a {@link ByteString} fingerprint if writing to remote storage is successful. + *
+ * + *

If a {@code ListenableFuture} is returned, its expected {@link + * ExecutionException} causes are {@link SerializationException} and {@link IOException}. The + * caller must ensure that these are the only possible causes. + * + *

If a previous operation is returned, {@code putOperation} is ignored. Otherwise, if a null + * value is returned, the caller owns the {@code putOperation} and must ensure it completes and + * handle its errors. + * + * @param distinguisher an optional key distinguisher, see {@link FingerprintWithDistinguisher} + */ + @Nullable + Object getOrClaimPutOperation( + Object obj, @Nullable Object distinguisher, ListenableFuture putOperation) { + // Any contention here is caused by two threads racing to serialize the same object. Since + // the serialization is pure CPU work, it's tempting to simplify this code by using + // `computeIfAbsent` instead. That unfortunately leads to recursive `ConcurrentMap` updates, + // which isn't supported. + Object previous = serializationCache.asMap().putIfAbsent(obj, putOperation); + if (previous != null) { + return previous; + } + unwrapFingerprintWhenDone(obj, distinguisher, putOperation); + return null; + } + + /** + * Gets the result of a previous {@code getOperation} or registers a new one for {@code + * fingerprint}, the {@link #deserializationCache} key. + * + *

This is used to avoid deduplicate fetches or fetching an object that had already been + * serialized from this cache. If the key is for an already stored or retrieved object, or one + * where retrievial is in-flight, returns a non-null value that can be one of the following. + * + *

    + *
  • a {@code ListenableFuture} if retrieval is in-flight; or + *
  • an {@link Object} if it is already known for the key. + * + * + *

    If a {@code ListenableFuture} is returned, its possible {@link ExecutionException} + * causes are {@link SerializationException}, {@link IOException} and {@link + * MissingFingerprintValueException}. The caller must ensure these are the only possible causes. + * + *

    If a non-null value is returned, {@code getOperation} is ignored. Otherwise, when this + * returns null, the caller must ensure that {@code getOperation} is eventually completed with the + * or an error. The caller is responsible for handling errors. + * + * @param distinguisher an optional distinguisher, see {@link FingerprintWithDistinguisher} + */ + @Nullable + Object getOrClaimGetOperation( + ByteString fingerprint, + @Nullable Object distinguisher, + ListenableFuture getOperation) { + Object key = createKey(fingerprint, distinguisher); + Object previous = deserializationCache.asMap().putIfAbsent(key, getOperation); + if (previous != null) { + return previous; + } + unwrapValueWhenDone(fingerprint, key, getOperation); + return null; + } + + /** Populates the reverse mapping and unwraps futures when they are no longer needed. */ + private void unwrapFingerprintWhenDone( + Object obj, @Nullable Object distinguisher, ListenableFuture putOperation) { + Futures.addCallback( + putOperation, + new FutureCallback() { + @Override + public void onSuccess(PutOperation operation) { + // Serialization and fingerprinting has succeeded and storing the bytes in the + // FingerprintValueStore has started. + + // Stores the reverse mapping in `deserializationCache`. + if (!exerciseDeserializationForTesting) { + deserializationCache.put(createKey(operation.fingerprint(), distinguisher), obj); + } + + // It's possible to discard the outermost future at this point and only keep the + // PutOperation instead, but for simplicity, discards both once both succeed. + + Futures.addCallback( + operation.writeStatus(), + new FutureCallback() { + @Override + public void onSuccess(Void unused) { + // The object has been successfully written to remote storage. Discards all the + // wrappers. + serializationCache.put(obj, operation.fingerprint()); + } + + @Override + public void onFailure(Throwable t) { + // Failure will be reported by the owner of `putOperation`. + } + }, + directExecutor()); + } + + @Override + public void onFailure(Throwable t) { + // Failure will be reported by the owner of `putOperation`. + } + }, + directExecutor()); + } + + /** Unwraps the future and populates the reverse mapping when done. */ + private void unwrapValueWhenDone( + ByteString fingerprint, Object key, ListenableFuture getOperation) { + Futures.addCallback( + getOperation, + new FutureCallback() { + @Override + public void onSuccess(Object value) { + deserializationCache.put(key, value); + serializationCache.put(value, fingerprint); + } + + @Override + public void onFailure(Throwable t) { + // Failure will be reported by the owner of the `getOperation`. + } + }, + directExecutor()); + } + + private static Object createKey(ByteString fingerprint, @Nullable Object distinguisher) { + if (distinguisher == null) { + return fingerprint; + } + return FingerprintWithDistinguisher.of(fingerprint, distinguisher); + } + + /** + * An extended {@link #deserializationCache} key, needed when the fingerprint alone is not enough. + * + *

    The mapping stores a bidirectional fingerprint to value associations. However, there can be + * multiple values for the same fingerprint. For example, consider the parent and child objects + * (A, B). Suppose that both A and B share a common value S. When serializing B, S may be omitted + * because it is already known to A and can be reinjected during deserialization. + * + *

    The problem is that the fingerprint of B does not include anything about the shared value S. + * So it could collide on fingerprint with some other (C, D) with a different shared value T. + * + *

    Including a distinguisher in the key to account for the contextual value can be + * used to avoid conflicts in cases like this. + */ + @AutoValue + abstract static class FingerprintWithDistinguisher { + /** The primary key for a {@link #deserializationCache} entry. */ + abstract ByteString fingerprint(); + + /** A secondary key, sometimes needed to resolve ambiguity. */ + abstract Object distinguisher(); + + static FingerprintWithDistinguisher of(ByteString fingerprint, Object distinguisher) { + return new AutoValue_FingerprintValueCache_FingerprintWithDistinguisher( + fingerprint, distinguisher); + } + } +} diff --git a/src/test/java/com/google/devtools/build/lib/skyframe/serialization/BUILD b/src/test/java/com/google/devtools/build/lib/skyframe/serialization/BUILD index b9e014765542e1..68dd7147040974 100644 --- a/src/test/java/com/google/devtools/build/lib/skyframe/serialization/BUILD +++ b/src/test/java/com/google/devtools/build/lib/skyframe/serialization/BUILD @@ -409,6 +409,7 @@ java_library( "//src/main/protobuf:build_java_proto", "//third_party:auto_value", "//third_party:guava", + "//third_party:jsr305", "//third_party:junit4", "//third_party:mockito", "//third_party:truth", diff --git a/src/test/java/com/google/devtools/build/lib/skyframe/serialization/FingerprintValueCacheTest.java b/src/test/java/com/google/devtools/build/lib/skyframe/serialization/FingerprintValueCacheTest.java new file mode 100644 index 00000000000000..7c39a989776331 --- /dev/null +++ b/src/test/java/com/google/devtools/build/lib/skyframe/serialization/FingerprintValueCacheTest.java @@ -0,0 +1,201 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.skyframe.serialization; + +import static com.google.common.truth.Truth.assertThat; +import static com.google.common.util.concurrent.Futures.immediateFuture; +import static com.google.common.util.concurrent.Futures.immediateVoidFuture; + +import com.google.common.util.concurrent.ListenableFuture; +import com.google.common.util.concurrent.SettableFuture; +import com.google.protobuf.ByteString; +import com.google.testing.junit.testparameterinjector.TestParameter; +import com.google.testing.junit.testparameterinjector.TestParameterInjector; +import javax.annotation.Nullable; +import org.junit.Test; +import org.junit.runner.RunWith; + +@RunWith(TestParameterInjector.class) +public final class FingerprintValueCacheTest { + enum Distinguisher { + NULL_DISTINGUISHER(/* value= */ null), + NON_NULL_DISTINGUISHER(/* value= */ new Object()); + + @SuppressWarnings("ImmutableEnumChecker") // always an immutable instance + @Nullable + private final Object value; + + Distinguisher(@Nullable Object value) { + this.value = value; + } + + @Nullable + Object value() { + return value; + } + } + + @Test + public void putOperation_isCached(@TestParameter Distinguisher distinguisher) { + FingerprintValueCache cache = new FingerprintValueCache(); + + Object value = new Object(); + + SettableFuture op1 = SettableFuture.create(); + Object result = cache.getOrClaimPutOperation(value, distinguisher.value(), op1); + assertThat(result).isNull(); + + SettableFuture op2 = SettableFuture.create(); + result = cache.getOrClaimPutOperation(value, distinguisher.value(), op2); + assertThat(result).isSameInstanceAs(op1); + } + + @Test + public void getOperation_isCached(@TestParameter Distinguisher distinguisher) { + FingerprintValueCache cache = new FingerprintValueCache(); + + ByteString fingerprint = ByteString.copyFromUtf8("foo"); + + SettableFuture op1 = SettableFuture.create(); + Object result = cache.getOrClaimGetOperation(fingerprint, distinguisher.value(), op1); + assertThat(result).isNull(); + + SettableFuture op2 = SettableFuture.create(); + result = cache.getOrClaimGetOperation(fingerprint, distinguisher.value(), op2); + assertThat(result).isSameInstanceAs(op1); + } + + @Test + public void putOperation_isUnwrapped(@TestParameter Distinguisher distinguisher) { + FingerprintValueCache cache = new FingerprintValueCache(); + + Object value = new Object(); + + SettableFuture putOp1 = SettableFuture.create(); + Object putResult = cache.getOrClaimPutOperation(value, distinguisher.value(), putOp1); + assertThat(putResult).isNull(); + + // Sets the `PutOperation` in `putOp1`, which triggers the first stage of unwrapping and + // populates the reverse cache. + ByteString fingerprint = ByteString.copyFromUtf8("foo"); + SettableFuture writeStatus = SettableFuture.create(); + putOp1.set(PutOperation.create(fingerprint, writeStatus)); + + // A get of `fingerprint` now returns `value` immediately. + SettableFuture getOp = SettableFuture.create(); + Object getResult = cache.getOrClaimGetOperation(fingerprint, distinguisher.value(), getOp); + assertThat(getResult).isSameInstanceAs(value); + + // A second "put" of `value' sees the original, wrapped `putOp1`. + SettableFuture putOp2 = SettableFuture.create(); + putResult = cache.getOrClaimPutOperation(value, distinguisher.value(), putOp2); + assertThat(putResult).isSameInstanceAs(putOp1); + + // Setting the write status fully unwraps the value. + writeStatus.set(null); + putResult = cache.getOrClaimPutOperation(value, distinguisher.value(), putOp2); + assertThat(putResult).isSameInstanceAs(fingerprint); + } + + @Test + public void getOperation_isUnwrapped(@TestParameter Distinguisher distinguisher) { + FingerprintValueCache cache = new FingerprintValueCache(); + + ByteString fingerprint = ByteString.copyFromUtf8("foo"); + + SettableFuture getOp = SettableFuture.create(); + Object result = cache.getOrClaimGetOperation(fingerprint, distinguisher.value(), getOp); + assertThat(result).isNull(); + + // The first put operation is owned by the caller. + Object value = new Object(); + SettableFuture putOp = SettableFuture.create(); + Object putResult = cache.getOrClaimPutOperation(value, distinguisher.value(), putOp); + assertThat(putResult).isNull(); + + // Completes the `getOp`, causing it to be unwrapped. + getOp.set(value); + + // The next put operation gets the unwrapped fingerprint. + SettableFuture putOp2 = SettableFuture.create(); + putResult = cache.getOrClaimPutOperation(value, distinguisher.value(), putOp2); + assertThat(putResult).isSameInstanceAs(fingerprint); + + // Completing `putOp` overwrites values, but this is benign because `value`s fingerprint should + // be deterministic. + ByteString fingerprint2 = ByteString.copyFromUtf8("foo"); + putOp.set(PutOperation.create(fingerprint2, immediateVoidFuture())); + + SettableFuture putOp3 = SettableFuture.create(); + putResult = cache.getOrClaimPutOperation(value, distinguisher.value(), putOp3); + assertThat(putResult).isSameInstanceAs(fingerprint2); + } + + @Test + public void distinguisher_distinguishesSameFingerprint() { + // Puts two values with the same fingerprint, but different distinguishers, then verifies that + // they are distinguishable on retrieval. + FingerprintValueCache cache = new FingerprintValueCache(); + + ByteString fingerprint = ByteString.copyFromUtf8("foo"); + + ListenableFuture put = + immediateFuture(PutOperation.create(fingerprint, immediateVoidFuture())); + + Object value1 = new Object(); + Object distinguisher1 = new Object(); + Object result = cache.getOrClaimPutOperation(value1, distinguisher1, put); + assertThat(result).isNull(); + + Object value2 = new Object(); + Object distinguisher2 = new Object(); + // Reusing `put` here is fine because it's the same fingerprint. + result = cache.getOrClaimPutOperation(value2, distinguisher2, put); + assertThat(result).isNull(); + + // The correct values are returned for the distinguisher values. + SettableFuture unusedGetOperation = SettableFuture.create(); + result = cache.getOrClaimGetOperation(fingerprint, distinguisher1, unusedGetOperation); + assertThat(result).isSameInstanceAs(value1); + result = cache.getOrClaimGetOperation(fingerprint, distinguisher2, unusedGetOperation); + assertThat(result).isSameInstanceAs(value2); + } + + @Test + public void exerciseDeserializationForTesting_doesNotAddReverseEntry( + @TestParameter Distinguisher distinguisher, + @TestParameter boolean exerciseDeserializationForTesting) { + FingerprintValueCache cache = new FingerprintValueCache(exerciseDeserializationForTesting); + + // Puts the `fingerprint` to `value` association in the cache. + ByteString fingerprint = ByteString.copyFromUtf8("foo"); + Object value = new Object(); + Object result = + cache.getOrClaimPutOperation( + value, + distinguisher.value(), + immediateFuture(PutOperation.create(fingerprint, immediateVoidFuture()))); + assertThat(result).isNull(); + + SettableFuture getOperation = SettableFuture.create(); + result = cache.getOrClaimGetOperation(fingerprint, distinguisher.value(), getOperation); + if (exerciseDeserializationForTesting) { + // Ordinarily, the reverse `value` to `fingerprint` would also be added to the cache, but it's + // not because `exerciseDeserializationForTesting` is true. + assertThat(result).isNull(); + } else { + assertThat(result).isSameInstanceAs(value); + } + } +}