Skip to content

Commit

Permalink
Construct TreeArtifactValues on multiple threads.
Browse files Browse the repository at this point in the history
This makes it possible to use every core available for checksumming, which
makes a huge difference for large tree artifacts.

Fixes bazelbuild#17009.

RELNOTES: None.
PiperOrigin-RevId: 525085502
Change-Id: I2a995d3445940333c21eeb89b4ba60887f99e51b
  • Loading branch information
lberki authored and copybara-github committed Apr 18, 2023
1 parent 91584dd commit 368bf11
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,10 @@ private TreeArtifactValue constructTreeArtifactValueFromFilesystem(SpecialArtifa
throw new IOException(errorMessage, e);
}

tree.putChild(child, metadata);
// visitTree() uses multiple threads and putChild() is not thread-safe
synchronized (tree) {
tree.putChild(child, metadata);
}
});

if (archivedTreeArtifactsEnabled) {
Expand Down
1 change: 1 addition & 0 deletions src/main/java/com/google/devtools/build/lib/skyframe/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -2893,6 +2893,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/actions:artifacts",
"//src/main/java/com/google/devtools/build/lib/actions:file_metadata",
"//src/main/java/com/google/devtools/build/lib/actions:has_digest",
"//src/main/java/com/google/devtools/build/lib/concurrent",
"//src/main/java/com/google/devtools/build/lib/skyframe/serialization/autocodec",
"//src/main/java/com/google/devtools/build/lib/skyframe/serialization/autocodec:serialization-constant",
"//src/main/java/com/google/devtools/build/lib/util",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,8 @@ private boolean treeArtifactIsDirty(Artifact artifact, TreeArtifactValue value)

// This could be improved by short-circuiting as soon as we see a child that is not present in
// the TreeArtifactValue, but it doesn't seem to be a major source of overhead.
Set<PathFragment> currentChildren = new HashSet<>();
// visitTree() is called from multiple threads in parallel so this need to be a hash set
Set<PathFragment> currentChildren = Sets.newConcurrentHashSet();
try {
TreeArtifactValue.visitTree(
path,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,15 @@
import com.google.devtools.build.lib.actions.FileStateType;
import com.google.devtools.build.lib.actions.HasDigest;
import com.google.devtools.build.lib.actions.cache.MetadataDigestUtils;
import com.google.devtools.build.lib.concurrent.AbstractQueueVisitor;
import com.google.devtools.build.lib.concurrent.ErrorClassifier;
import com.google.devtools.build.lib.concurrent.NamedForkJoinPool;
import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe;
import com.google.devtools.build.lib.skyframe.serialization.autocodec.AutoCodec;
import com.google.devtools.build.lib.skyframe.serialization.autocodec.SerializationConstant;
import com.google.devtools.build.lib.util.Fingerprint;
import com.google.devtools.build.lib.vfs.Dirent;
import com.google.devtools.build.lib.vfs.IORuntimeException;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
import com.google.devtools.build.lib.vfs.Symlinks;
Expand All @@ -51,14 +56,17 @@
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.concurrent.ForkJoinPool;
import javax.annotation.Nullable;

/**
* Value for TreeArtifacts, which contains a digest and the {@link FileArtifactValue}s of its child
* {@link TreeFileArtifact}s.
*/
public class TreeArtifactValue implements HasDigest, SkyValue {

private static final ForkJoinPool VISITOR_POOL =
NamedForkJoinPool.newNamedPool(
"tree-artifact-visitor", Runtime.getRuntime().availableProcessors());
/**
* Comparator based on exec path which works on {@link ActionInput} as opposed to {@link
* com.google.devtools.build.lib.actions.Artifact}. This way, we can use an {@link ActionInput} to
Expand Down Expand Up @@ -486,10 +494,71 @@ public interface TreeArtifactVisitor {
*
* <p>If the implementation throws {@link IOException}, traversal is immediately halted and the
* exception is propagated.
*
* <p>This method can be called from multiple threads in parallel during a single call of {@link
* TreeArtifactVisitor#visitTree(Path, TreeArtifactVisitor)}.
*/
@ThreadSafe
void visit(PathFragment parentRelativePath, Dirent.Type type) throws IOException;
}

/** An {@link AbstractQueueVisitor} that visits every file in the tree artifact. */
static class Visitor extends AbstractQueueVisitor {
private final Path parentDir;
private final TreeArtifactVisitor visitor;

Visitor(Path parentDir, TreeArtifactVisitor visitor) {
super(
VISITOR_POOL,
/* shutdownOnCompletion= */ false,
/* failFastOnException= */ true,
ErrorClassifier.DEFAULT);
this.parentDir = checkNotNull(parentDir);
this.visitor = checkNotNull(visitor);
}

void run() throws IOException, InterruptedException {
execute(() -> visitTree(PathFragment.EMPTY_FRAGMENT));
try {
awaitQuiescence(true);
} catch (IORuntimeException e) {
throw e.getCauseIOException();
}
}

// IOExceptions are wrapped in IORuntimeException so that it can be propagated to the main
// thread
private void visitTree(PathFragment subdir) {
try {
for (Dirent dirent : parentDir.getRelative(subdir).readdir(Symlinks.NOFOLLOW)) {
PathFragment parentRelativePath = subdir.getChild(dirent.getName());
Dirent.Type type = dirent.getType();

if (type == Dirent.Type.UNKNOWN) {
throw new IOException(
"Could not determine type of file for "
+ parentRelativePath
+ " under "
+ parentDir);
}

if (type == Dirent.Type.SYMLINK) {
checkSymlink(subdir, parentDir.getRelative(parentRelativePath));
}

visitor.visit(parentRelativePath, type);

if (type == Dirent.Type.DIRECTORY) {
execute(() -> visitTree(parentRelativePath));
}
}
} catch (IOException e) {
// We can't throw checked exceptions here since AQV expects Runnables
throw new IORuntimeException(e);
}
}
}

/**
* Recursively visits all descendants under a directory.
*
Expand All @@ -501,35 +570,16 @@ public interface TreeArtifactVisitor {
* symlink that traverses outside of the tree artifact and any entry of {@link
* Dirent.Type#UNKNOWN}, such as a named pipe.
*
* <p>The visitor will be called on multiple threads in parallel. Accordingly, it must be
* thread-safe.
*
* @throws IOException if there is any problem reading or validating outputs under the given tree
* artifact directory, or if {@link TreeArtifactVisitor#visit} throws {@link IOException}
*/
public static void visitTree(Path parentDir, TreeArtifactVisitor visitor)
public static void visitTree(Path parentDir, TreeArtifactVisitor treeArtifactVisitor)
throws IOException, InterruptedException {
visitTree(parentDir, PathFragment.EMPTY_FRAGMENT, checkNotNull(visitor));
}

private static void visitTree(Path parentDir, PathFragment subdir, TreeArtifactVisitor visitor)
throws IOException {
for (Dirent dirent : parentDir.getRelative(subdir).readdir(Symlinks.NOFOLLOW)) {
PathFragment parentRelativePath = subdir.getChild(dirent.getName());
Dirent.Type type = dirent.getType();

if (type == Dirent.Type.UNKNOWN) {
throw new IOException(
"Could not determine type of file for " + parentRelativePath + " under " + parentDir);
}

if (type == Dirent.Type.SYMLINK) {
checkSymlink(subdir, parentDir.getRelative(parentRelativePath));
}

visitor.visit(parentRelativePath, type);

if (type == Dirent.Type.DIRECTORY) {
visitTree(parentDir, parentRelativePath, visitor);
}
}
Visitor visitor = new Visitor(parentDir, treeArtifactVisitor);
visitor.run();
}

private static void checkSymlink(PathFragment subDir, Path path) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1393,7 +1393,9 @@ public TreeArtifactValue getTreeArtifactValue(SpecialArtifact output)
Artifact.TreeFileArtifact.createTreeOutput(output, parentRelativePath);
FileArtifactValue metadata =
FileArtifactValue.createForTesting(treeDir.getRelative(parentRelativePath));
tree.putChild(child, metadata);
synchronized (tree) {
tree.putChild(child, metadata);
}
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,13 @@ public void visitTree_visitsEachChild() throws Exception {
scratch.resolve("tree/a/b/dangling_link").createSymbolicLink(PathFragment.create("?"));
List<Pair<PathFragment, Dirent.Type>> children = new ArrayList<>();

TreeArtifactValue.visitTree(treeDir, (child, type) -> children.add(Pair.of(child, type)));
TreeArtifactValue.visitTree(
treeDir,
(child, type) -> {
synchronized (children) {
children.add(Pair.of(child, type));
}
});

assertThat(children)
.containsExactly(
Expand Down Expand Up @@ -435,7 +441,13 @@ public void visitTree_pemitsUpLevelSymlinkInsideTree() throws Exception {
scratch.resolve("tree/a/up_link").createSymbolicLink(PathFragment.create("../file"));
List<Pair<PathFragment, Dirent.Type>> children = new ArrayList<>();

TreeArtifactValue.visitTree(treeDir, (child, type) -> children.add(Pair.of(child, type)));
TreeArtifactValue.visitTree(
treeDir,
(child, type) -> {
synchronized (children) {
children.add(Pair.of(child, type));
}
});

assertThat(children)
.containsExactly(
Expand All @@ -450,7 +462,13 @@ public void visitTree_permitsAbsoluteSymlink() throws Exception {
scratch.resolve("tree/absolute_link").createSymbolicLink(PathFragment.create("/tmp"));
List<Pair<PathFragment, Dirent.Type>> children = new ArrayList<>();

TreeArtifactValue.visitTree(treeDir, (child, type) -> children.add(Pair.of(child, type)));
TreeArtifactValue.visitTree(
treeDir,
(child, type) -> {
synchronized (children) {
children.add(Pair.of(child, type));
}
});

assertThat(children)
.containsExactly(Pair.of(PathFragment.create("absolute_link"), Dirent.Type.SYMLINK));
Expand Down

0 comments on commit 368bf11

Please sign in to comment.