From 9925a5b68657d85df50c4dfbb31e31a2899279c2 Mon Sep 17 00:00:00 2001 From: Googler Date: Tue, 3 Oct 2023 19:26:19 -0700 Subject: [PATCH] Codec for CcCompilationContext.HeaderInfo. HeaderInfo requires cross-value memoization, like NestedSets. PiperOrigin-RevId: 570562294 Change-Id: I57b666ac876ffa2c6a64634ad7c725dcd4117291 --- .../google/devtools/build/lib/rules/cpp/BUILD | 17 +- .../lib/rules/cpp/CcCompilationContext.java | 22 +- .../build/lib/rules/cpp/HeaderInfoCodec.java | 309 ++++++++++++++++++ 3 files changed, 337 insertions(+), 11 deletions(-) create mode 100644 src/main/java/com/google/devtools/build/lib/rules/cpp/HeaderInfoCodec.java diff --git a/src/main/java/com/google/devtools/build/lib/rules/cpp/BUILD b/src/main/java/com/google/devtools/build/lib/rules/cpp/BUILD index ba02ddcab5709a..0da6a4f4cc4ed8 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/cpp/BUILD +++ b/src/main/java/com/google/devtools/build/lib/rules/cpp/BUILD @@ -22,13 +22,28 @@ INTERFACE_SOURCES = [ "IncludeScannable.java", ] +CODEC_SOURCES = ["HeaderInfoCodec.java"] + +java_library( + name = "header_info_codec", + srcs = CODEC_SOURCES, + deps = [ + ":cpp", + "//src/main/java/com/google/devtools/build/lib/actions:artifacts", + "//src/main/java/com/google/devtools/build/lib/collect/nestedset", + "//src/main/java/com/google/devtools/build/lib/skyframe/serialization", + "//third_party:guava", + "//third_party/protobuf:protobuf_java", + ], +) + # Description: # C++ rule support java_library( name = "cpp", srcs = glob( ["*.java"], - exclude = INTERFACE_SOURCES, + exclude = INTERFACE_SOURCES + CODEC_SOURCES, ), deps = [ ":cpp_interface", diff --git a/src/main/java/com/google/devtools/build/lib/rules/cpp/CcCompilationContext.java b/src/main/java/com/google/devtools/build/lib/rules/cpp/CcCompilationContext.java index d129df66c05959..fb548877d541a8 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/cpp/CcCompilationContext.java +++ b/src/main/java/com/google/devtools/build/lib/rules/cpp/CcCompilationContext.java @@ -1281,35 +1281,37 @@ public ImmutableList getMergedResult() { * The transitive collection can be iterated without materialization in memory. */ @Immutable - private static final class HeaderInfo { + static final class HeaderInfo { + // This class has non-private visibility testing and HeaderInfoCodec. + /** * The modules built for this context. If null, then no module is being compiled for this * context. */ - private final DerivedArtifact headerModule; + final DerivedArtifact headerModule; - private final DerivedArtifact picHeaderModule; + final DerivedArtifact picHeaderModule; /** All public header files that are compiled into this module. */ - private final ImmutableList modularPublicHeaders; + final ImmutableList modularPublicHeaders; /** All private header files that are compiled into this module. */ - private final ImmutableList modularPrivateHeaders; + final ImmutableList modularPrivateHeaders; /** All header files that are compiled into this module. */ private final ImmutableList modularHeaders; /** All textual header files that are contained in this module. */ - private final ImmutableList textualHeaders; + final ImmutableList textualHeaders; /** Headers that can be compiled into a separate, smaller module for performance reasons. */ - private final ImmutableList separateModuleHeaders; + final ImmutableList separateModuleHeaders; - private final DerivedArtifact separateModule; - private final DerivedArtifact separatePicModule; + final DerivedArtifact separateModule; + final DerivedArtifact separatePicModule; /** HeaderInfos of direct dependencies of C++ target represented by this context. */ - private final ImmutableList deps; + final ImmutableList deps; /** Collection representing the memoized form of transitive information, set by flatten(). */ private TransitiveHeaderCollection memo = null; diff --git a/src/main/java/com/google/devtools/build/lib/rules/cpp/HeaderInfoCodec.java b/src/main/java/com/google/devtools/build/lib/rules/cpp/HeaderInfoCodec.java new file mode 100644 index 00000000000000..a5ee684e05e01e --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/rules/cpp/HeaderInfoCodec.java @@ -0,0 +1,309 @@ +// Copyright 2023 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.rules.cpp; + +import static com.google.common.base.Throwables.throwIfInstanceOf; +import static com.google.common.util.concurrent.Futures.immediateFuture; +import static com.google.common.util.concurrent.MoreExecutors.directExecutor; + +import com.google.common.collect.ImmutableList; +import com.google.common.hash.Hashing; +import com.google.common.util.concurrent.Futures; +import com.google.common.util.concurrent.ListenableFuture; +import com.google.common.util.concurrent.SettableFuture; +import com.google.devtools.build.lib.actions.Artifact; +import com.google.devtools.build.lib.actions.Artifact.DerivedArtifact; +import com.google.devtools.build.lib.collect.nestedset.NestedSetStore.NestedSetStorageEndpoint; +import com.google.devtools.build.lib.rules.cpp.CcCompilationContext.HeaderInfo; +import com.google.devtools.build.lib.skyframe.serialization.DeserializationContext; +import com.google.devtools.build.lib.skyframe.serialization.ObjectCodec; +import com.google.devtools.build.lib.skyframe.serialization.SerializationContext; +import com.google.devtools.build.lib.skyframe.serialization.SerializationException; +import com.google.protobuf.ByteString; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InterruptedIOException; +import java.util.ArrayList; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Executor; + +/** + * A codec for {@link HeaderInfo} with intra-value memoization. + * + *

{@link HeaderInfo} recursively refers to other {@link HeaderInfo} instances that must be + * serialized in memoized fashion to avoid quadratic storage costs. + */ +public final class HeaderInfoCodec implements ObjectCodec { + private final NestedSetStorageEndpoint storageEndpoint; + + /** + * An executor that performs deserialization computations when bytes are fetched from storage. + * + *

This exists to avoid tying up an RPC callback thread. + */ + private final Executor executor; + + // TODO(b/297857068): while this is only in test code, a ConcurrentHashMap is fine. In production, + // we will need to carefully consider how entries in these maps are invalidated. This could be as + // simple as wiping them between invocations. + // TODO(b/297857068): in both of the maps below, it is possible to unwrap the futures after they + // complete to reduce memory consumption. + private final ConcurrentHashMap> fingerprintToValue = + new ConcurrentHashMap<>(); + private final ConcurrentHashMap> referenceToFingerprint = + new ConcurrentHashMap<>(); + + /** + * A parameter set to improve coverage in round-trip testing. + * + *

When false, completion of serialization populates the {@link #fingerprintToValue} entries, + * which is used to look up values from fingerprints during deserialization. However, this means + * that when round-tripping in test scenarios, the deserialization isn't really exercised as it + * always hits the populated {@link #fingerprintToValue} entries. Setting this flag true disables + * populating {@link #fingerprintToValue} from serialization so the deserialization code can be + * exercised. + */ + private final boolean exerciseDeserializationForTesting; + + public HeaderInfoCodec( + NestedSetStorageEndpoint storageEndpoint, + Executor executor, + boolean exerciseDeserializationForTesting) { + this.storageEndpoint = storageEndpoint; + this.executor = executor; + this.exerciseDeserializationForTesting = exerciseDeserializationForTesting; + } + + @Override + public Class getEncodedClass() { + return HeaderInfo.class; + } + + @Override + public void serialize(SerializationContext context, HeaderInfo obj, CodedOutputStream codedOut) + throws SerializationException, IOException { + ByteString fingerprint; + try { + fingerprint = serializeToStorage(context, obj).get(); + } catch (InterruptedException e) { + throw new InterruptedIOException(); + } catch (ExecutionException e) { + Throwable cause = e.getCause(); + throwIfInstanceOf(cause, SerializationException.class); + throwIfInstanceOf(cause, IOException.class); + throw new SerializationException("Unexpected execution exception", cause); + } + codedOut.writeBytesNoTag(fingerprint); + } + + @Override + public HeaderInfo deserialize(DeserializationContext context, CodedInputStream codedIn) + throws SerializationException, IOException { + ByteString fingerprint = codedIn.readBytes(); + try { + return deserializeFromStorage(context, fingerprint).get(); + } catch (InterruptedException e) { + throw new InterruptedIOException(); + } catch (ExecutionException e) { + Throwable cause = e.getCause(); + throwIfInstanceOf(cause, SerializationException.class); + throwIfInstanceOf(cause, IOException.class); + throw new SerializationException("Unexpected execution exception", cause); + } + } + + /** + * Serializes {@code headerInfo} to storage. + * + *

{@link HeaderInfo} is a recursive data structure that may include an entire DAG of {@link + * HeaderInfo} values. This method is called recursively as subvalues occur. + * + * @return a unique fingerprint mapped to the {@link HeaderInfo} value + */ + private ListenableFuture serializeToStorage( + SerializationContext baseContext, HeaderInfo headerInfo) + throws SerializationException, IOException { + var settableFingerprint = SettableFuture.create(); + var previousFingerprint = referenceToFingerprint.putIfAbsent(headerInfo, settableFingerprint); + if (previousFingerprint != null) { + return previousFingerprint; + } + + // The context must be created fresh each time to reset the memoization state. The result should + // be deserializable on its own, without additional context. + SerializationContext context = baseContext.getNewMemoizingContext(); + // Care must be taken to ensure the SettableFuture is actually set to avoid hanging elsewhere. + boolean futureWasSet = false; + try { + var bytesOut = new ByteArrayOutputStream(); + var codedOut = CodedOutputStream.newInstance(bytesOut); + context.serialize(headerInfo.headerModule, codedOut); + context.serialize(headerInfo.picHeaderModule, codedOut); + context.serialize(headerInfo.modularPublicHeaders, codedOut); + context.serialize(headerInfo.modularPrivateHeaders, codedOut); + context.serialize(headerInfo.textualHeaders, codedOut); + context.serialize(headerInfo.separateModuleHeaders, codedOut); + context.serialize(headerInfo.separateModule, codedOut); + context.serialize(headerInfo.separatePicModule, codedOut); + + ImmutableList deps = headerInfo.deps; + codedOut.writeInt32NoTag(deps.size()); + + // In both branches below, the future waits until the bytes are stored in the NestedSetStore. + // It may be possible to achieve higher throughput by making the fingerprints available as + // soon as possible, without waiting for storage to complete, but that would require + // additional coordination to be built somewhere. + if (deps.isEmpty()) { + codedOut.flush(); + settableFingerprint.setFuture(storeByFingerprint(headerInfo, bytesOut.toByteArray())); + } else { + var depFutures = new ArrayList>(deps.size()); + for (HeaderInfo dep : deps) { + depFutures.add(serializeToStorage(baseContext, dep)); + } + settableFingerprint.setFuture( + Futures.whenAllSucceed(depFutures) + .callAsync( + () -> { + for (var depFuture : depFutures) { + codedOut.writeBytesNoTag(Futures.getDone(depFuture)); + } + codedOut.flush(); + return storeByFingerprint(headerInfo, bytesOut.toByteArray()); + }, + directExecutor())); + } + futureWasSet = true; + } catch (SerializationException | IOException e) { + settableFingerprint.setException(e); + futureWasSet = true; + throw e; + } finally { + if (!futureWasSet) { + // Fails fast (instead of potentially causing a client to hang) if there's an unanticipated + // runtime exception. + settableFingerprint.setException(new IllegalStateException("The future was not set!")); + } + } + + return settableFingerprint; + } + + private ListenableFuture deserializeFromStorage( + DeserializationContext context, ByteString fingerprint) throws IOException { + var settableValue = SettableFuture.create(); + var previousValue = fingerprintToValue.putIfAbsent(fingerprint, settableValue); + if (previousValue != null) { + return previousValue; + } + + boolean futureWasSet = false; + try { + settableValue.setFuture( + Futures.transformAsync( + storageEndpoint.get(fingerprint), + bytes -> deserializeBytes(context, fingerprint, bytes), + executor)); + futureWasSet = true; + } catch (IOException e) { + settableValue.setException(e); + futureWasSet = true; + throw e; + } finally { + if (!futureWasSet) { + // Fails fast (instead of potentially causing a client to hang) if there's an unanticipated + // runtime exception. + settableValue.setException(new IllegalStateException("The future was not set!")); + } + } + + return settableValue; + } + + private ListenableFuture deserializeBytes( + DeserializationContext baseContext, ByteString fingerprint, byte[] bytes) + throws SerializationException, IOException { + var codedIn = CodedInputStream.newInstance(bytes); + var context = baseContext.getNewMemoizingContext(); + + DerivedArtifact headerModule = context.deserialize(codedIn); + DerivedArtifact picHeaderModule = context.deserialize(codedIn); + ImmutableList modularPublicHeaders = context.deserialize(codedIn); + ImmutableList modularPrivateHeaders = context.deserialize(codedIn); + ImmutableList textualHeaders = context.deserialize(codedIn); + ImmutableList separateModuleHeaders = context.deserialize(codedIn); + DerivedArtifact separateModule = context.deserialize(codedIn); + DerivedArtifact separatePicModule = context.deserialize(codedIn); + + int depCount = codedIn.readInt32(); + if (depCount == 0) { + var headerInfo = + new HeaderInfo( + headerModule, + picHeaderModule, + modularPublicHeaders, + modularPrivateHeaders, + textualHeaders, + separateModuleHeaders, + separateModule, + separatePicModule, + ImmutableList.of()); + referenceToFingerprint.put(headerInfo, immediateFuture(fingerprint)); + return immediateFuture(headerInfo); + } + + var futureDeps = new ArrayList>(depCount); + for (int i = 0; i < depCount; ++i) { + futureDeps.add(deserializeFromStorage(baseContext, codedIn.readBytes())); + } + return Futures.whenAllSucceed(futureDeps) + .call( + () -> { + var deps = ImmutableList.builderWithExpectedSize(depCount); + for (var futureDep : futureDeps) { + deps.add(Futures.getDone(futureDep)); + } + var headerInfo = + new HeaderInfo( + headerModule, + picHeaderModule, + modularPublicHeaders, + modularPrivateHeaders, + textualHeaders, + separateModuleHeaders, + separateModule, + separatePicModule, + deps.build()); + referenceToFingerprint.put(headerInfo, immediateFuture(fingerprint)); + return headerInfo; + }, + directExecutor()); + } + + private ListenableFuture storeByFingerprint(HeaderInfo info, byte[] bytes) + throws IOException { + // TODO(b/297857068): observe that the fingerprint value is not made available until after a + // round trip to through the storage service. This means that it should be possible to replace + // the locally generated fingerprint with a storage-service generated UUID. + ByteString fingerprint = ByteString.copyFrom(Hashing.md5().hashBytes(bytes).asBytes()); + if (!exerciseDeserializationForTesting) { + fingerprintToValue.putIfAbsent(fingerprint, immediateFuture(info)); + } + return Futures.transform( + storageEndpoint.put(fingerprint, bytes), unusedVoid -> fingerprint, directExecutor()); + } +}