Skip to content

Commit

Permalink
Construct TreeArtifactValues on multiple threads. (bazelbuild#18194)
Browse files Browse the repository at this point in the history
This makes it possible to use every core available for checksumming, which
makes a huge difference for large tree artifacts.

Fixes bazelbuild#17009.

RELNOTES: None.
PiperOrigin-RevId: 525085502
Change-Id: I2a995d3445940333c21eeb89b4ba60887f99e51b
(cherry picked from commit 368bf11)

# Conflicts:
#	src/main/java/com/google/devtools/build/lib/skyframe/TreeArtifactValue.java

Co-authored-by: Googler <lberki@google.com>
  • Loading branch information
BalestraPatrick and lberki authored Apr 25, 2023
1 parent e4682f6 commit d94dee2
Show file tree
Hide file tree
Showing 6 changed files with 110 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,10 @@ private TreeArtifactValue constructTreeArtifactValueFromFilesystem(SpecialArtifa
throw new IOException(errorMessage, e);
}

tree.putChild(child, metadata);
// visitTree() uses multiple threads and putChild() is not thread-safe
synchronized (tree) {
tree.putChild(child, metadata);
}
});

if (archivedTreeArtifactsEnabled) {
Expand Down
1 change: 1 addition & 0 deletions src/main/java/com/google/devtools/build/lib/skyframe/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -2842,6 +2842,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/actions:artifacts",
"//src/main/java/com/google/devtools/build/lib/actions:file_metadata",
"//src/main/java/com/google/devtools/build/lib/actions:has_digest",
"//src/main/java/com/google/devtools/build/lib/concurrent",
"//src/main/java/com/google/devtools/build/lib/skyframe/serialization/autocodec",
"//src/main/java/com/google/devtools/build/lib/skyframe/serialization/autocodec:serialization-constant",
"//src/main/java/com/google/devtools/build/lib/util",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,8 @@ private boolean treeArtifactIsDirty(Artifact artifact, TreeArtifactValue value)

// This could be improved by short-circuiting as soon as we see a child that is not present in
// the TreeArtifactValue, but it doesn't seem to be a major source of overhead.
Set<PathFragment> currentChildren = new HashSet<>();
// visitTree() is called from multiple threads in parallel so this need to be a hash set
Set<PathFragment> currentChildren = Sets.newConcurrentHashSet();
try {
TreeArtifactValue.visitTree(
path,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,15 @@
import com.google.devtools.build.lib.actions.FileStateType;
import com.google.devtools.build.lib.actions.HasDigest;
import com.google.devtools.build.lib.actions.cache.MetadataDigestUtils;
import com.google.devtools.build.lib.concurrent.AbstractQueueVisitor;
import com.google.devtools.build.lib.concurrent.ErrorClassifier;
import com.google.devtools.build.lib.concurrent.NamedForkJoinPool;
import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe;
import com.google.devtools.build.lib.skyframe.serialization.autocodec.AutoCodec;
import com.google.devtools.build.lib.skyframe.serialization.autocodec.SerializationConstant;
import com.google.devtools.build.lib.util.Fingerprint;
import com.google.devtools.build.lib.vfs.Dirent;
import com.google.devtools.build.lib.vfs.IORuntimeException;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
import com.google.devtools.build.lib.vfs.Symlinks;
Expand All @@ -51,14 +56,17 @@
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.concurrent.ForkJoinPool;
import javax.annotation.Nullable;

/**
* Value for TreeArtifacts, which contains a digest and the {@link FileArtifactValue}s of its child
* {@link TreeFileArtifact}s.
*/
public class TreeArtifactValue implements HasDigest, SkyValue {

private static final ForkJoinPool VISITOR_POOL =
NamedForkJoinPool.newNamedPool(
"tree-artifact-visitor", Runtime.getRuntime().availableProcessors());
/**
* Comparator based on exec path which works on {@link ActionInput} as opposed to {@link
* com.google.devtools.build.lib.actions.Artifact}. This way, we can use an {@link ActionInput} to
Expand Down Expand Up @@ -487,10 +495,71 @@ public interface TreeArtifactVisitor {
*
* <p>If the implementation throws {@link IOException}, traversal is immediately halted and the
* exception is propagated.
*
* <p>This method can be called from multiple threads in parallel during a single call of {@link
* TreeArtifactVisitor#visitTree(Path, TreeArtifactVisitor)}.
*/
@ThreadSafe
void visit(PathFragment parentRelativePath, Dirent.Type type) throws IOException;
}

/** An {@link AbstractQueueVisitor} that visits every file in the tree artifact. */
static class Visitor extends AbstractQueueVisitor {
private final Path parentDir;
private final TreeArtifactVisitor visitor;

Visitor(Path parentDir, TreeArtifactVisitor visitor) {
super(
VISITOR_POOL,
/* shutdownOnCompletion= */ false,
/* failFastOnException= */ true,
ErrorClassifier.DEFAULT);
this.parentDir = checkNotNull(parentDir);
this.visitor = checkNotNull(visitor);
}

void run() throws IOException, InterruptedException {
execute(() -> visitTree(PathFragment.EMPTY_FRAGMENT));
try {
awaitQuiescence(true);
} catch (IORuntimeException e) {
throw e.getCauseIOException();
}
}

// IOExceptions are wrapped in IORuntimeException so that it can be propagated to the main
// thread
private void visitTree(PathFragment subdir) {
try {
for (Dirent dirent : parentDir.getRelative(subdir).readdir(Symlinks.NOFOLLOW)) {
PathFragment parentRelativePath = subdir.getChild(dirent.getName());
Dirent.Type type = dirent.getType();

if (type == Dirent.Type.UNKNOWN) {
throw new IOException(
"Could not determine type of file for "
+ parentRelativePath
+ " under "
+ parentDir);
}

if (type == Dirent.Type.SYMLINK) {
checkSymlink(subdir, parentDir.getRelative(parentRelativePath));
}

visitor.visit(parentRelativePath, type);

if (type == Dirent.Type.DIRECTORY) {
execute(() -> visitTree(parentRelativePath));
}
}
} catch (IOException e) {
// We can't throw checked exceptions here since AQV expects Runnables
throw new IORuntimeException(e);
}
}
}

/**
* Recursively visits all descendants under a directory.
*
Expand All @@ -502,33 +571,18 @@ public interface TreeArtifactVisitor {
* symlink that traverses outside of the tree artifact and any entry of {@link
* Dirent.Type#UNKNOWN}, such as a named pipe.
*
* <p>The visitor will be called on multiple threads in parallel. Accordingly, it must be
* thread-safe.
*
* @throws IOException if there is any problem reading or validating outputs under the given tree
* artifact directory, or if {@link TreeArtifactVisitor#visit} throws {@link IOException}
*/
public static void visitTree(Path parentDir, TreeArtifactVisitor visitor) throws IOException {
visitTree(parentDir, PathFragment.EMPTY_FRAGMENT, checkNotNull(visitor));
}

private static void visitTree(Path parentDir, PathFragment subdir, TreeArtifactVisitor visitor)
throws IOException {
for (Dirent dirent : parentDir.getRelative(subdir).readdir(Symlinks.NOFOLLOW)) {
PathFragment parentRelativePath = subdir.getChild(dirent.getName());
Dirent.Type type = dirent.getType();

if (type == Dirent.Type.UNKNOWN) {
throw new IOException(
"Could not determine type of file for " + parentRelativePath + " under " + parentDir);
}

if (type == Dirent.Type.SYMLINK) {
checkSymlink(subdir, parentDir.getRelative(parentRelativePath));
}

visitor.visit(parentRelativePath, type);

if (type == Dirent.Type.DIRECTORY) {
visitTree(parentDir, parentRelativePath, visitor);
}
public static void visitTree(Path parentDir, TreeArtifactVisitor treeArtifactVisitor) throws IOException {
try {
Visitor visitor = new Visitor(parentDir, treeArtifactVisitor);
visitor.run();
} catch (InterruptedException e) {
throw new IOException(e);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1240,7 +1240,9 @@ public TreeArtifactValue getTreeArtifactValue(SpecialArtifact output) throws IOE
Artifact.TreeFileArtifact.createTreeOutput(output, parentRelativePath);
FileArtifactValue metadata =
FileArtifactValue.createForTesting(treeDir.getRelative(parentRelativePath));
tree.putChild(child, metadata);
synchronized (tree) {
tree.putChild(child, metadata);
}
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,13 @@ public void visitTree_visitsEachChild() throws Exception {
scratch.resolve("tree/a/b/dangling_link").createSymbolicLink(PathFragment.create("?"));
List<Pair<PathFragment, Dirent.Type>> children = new ArrayList<>();

TreeArtifactValue.visitTree(treeDir, (child, type) -> children.add(Pair.of(child, type)));
TreeArtifactValue.visitTree(
treeDir,
(child, type) -> {
synchronized (children) {
children.add(Pair.of(child, type));
}
});

assertThat(children)
.containsExactly(
Expand Down Expand Up @@ -435,7 +441,13 @@ public void visitTree_pemitsUpLevelSymlinkInsideTree() throws Exception {
scratch.resolve("tree/a/up_link").createSymbolicLink(PathFragment.create("../file"));
List<Pair<PathFragment, Dirent.Type>> children = new ArrayList<>();

TreeArtifactValue.visitTree(treeDir, (child, type) -> children.add(Pair.of(child, type)));
TreeArtifactValue.visitTree(
treeDir,
(child, type) -> {
synchronized (children) {
children.add(Pair.of(child, type));
}
});

assertThat(children)
.containsExactly(
Expand All @@ -450,7 +462,13 @@ public void visitTree_permitsAbsoluteSymlink() throws Exception {
scratch.resolve("tree/absolute_link").createSymbolicLink(PathFragment.create("/tmp"));
List<Pair<PathFragment, Dirent.Type>> children = new ArrayList<>();

TreeArtifactValue.visitTree(treeDir, (child, type) -> children.add(Pair.of(child, type)));
TreeArtifactValue.visitTree(
treeDir,
(child, type) -> {
synchronized (children) {
children.add(Pair.of(child, type));
}
});

assertThat(children)
.containsExactly(Pair.of(PathFragment.create("absolute_link"), Dirent.Type.SYMLINK));
Expand Down

0 comments on commit d94dee2

Please sign in to comment.