Skip to content

Commit

Permalink
No public description
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 588441468
Change-Id: Ib98b3e661efcc230b079c4653344b6b3e1683c8a
  • Loading branch information
yuyue730 authored and copybara-github committed Dec 6, 2023
1 parent d160b12 commit 962db94
Show file tree
Hide file tree
Showing 22 changed files with 2,023 additions and 560 deletions.
1 change: 1 addition & 0 deletions src/main/java/com/google/devtools/build/lib/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ filegroup(
"//src/main/java/com/google/devtools/build/lib/network:srcs",
"//src/main/java/com/google/devtools/build/lib/packages:srcs",
"//src/main/java/com/google/devtools/build/lib/packages/metrics:srcs",
"//src/main/java/com/google/devtools/build/lib/packages/producers:srcs",
"//src/main/java/com/google/devtools/build/lib/packages/semantics:srcs",
"//src/main/java/com/google/devtools/build/lib/pkgcache:srcs",
"//src/main/java/com/google/devtools/build/lib/platform:srcs",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ java_library(
"//src/main/java/net/starlark/java/syntax",
"//src/main/protobuf:failure_details_java_proto",
"//third_party:auto_value",
"//third_party:error_prone_annotations",
"//third_party:guava",
"//third_party:jsr305",
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ ImmutableSet<Artifact> getPathLevelHintedInclusions(
globKey);
continue;
}
for (PathFragment file : globValue.getMatches().toList()) {
for (PathFragment file : globValue.getMatches()) {
hints.add(
artifactFactory.getSourceArtifact(
packageFragment.getRelative(file), globKey.getPackageRoot()));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
load("@rules_java//java:defs.bzl", "java_library")

package(
default_applicable_licenses = ["//:license"],
default_visibility = ["//src:__subpackages__"],
)

filegroup(
name = "srcs",
srcs = glob(["**"]),
visibility = ["//src:__subpackages__"],
)

java_library(
name = "producers",
srcs = glob(["*.java"]),
deps = [
"//src/main/java/com/google/devtools/build/lib/actions:file_metadata",
"//src/main/java/com/google/devtools/build/lib/cmdline",
"//src/main/java/com/google/devtools/build/lib/io:file_symlink_infinite_expansion_exception",
"//src/main/java/com/google/devtools/build/lib/io:file_symlink_infinite_expansion_uniqueness_function",
"//src/main/java/com/google/devtools/build/lib/io:inconsistent_filesystem_exception",
"//src/main/java/com/google/devtools/build/lib/packages:globber",
"//src/main/java/com/google/devtools/build/lib/skyframe:directory_listing_value",
"//src/main/java/com/google/devtools/build/lib/skyframe:glob_descriptor",
"//src/main/java/com/google/devtools/build/lib/skyframe:ignored_package_prefixes_value",
"//src/main/java/com/google/devtools/build/lib/skyframe:package_lookup_value",
"//src/main/java/com/google/devtools/build/lib/util:pair",
"//src/main/java/com/google/devtools/build/lib/vfs",
"//src/main/java/com/google/devtools/build/lib/vfs:pathfragment",
"//src/main/java/com/google/devtools/build/skyframe",
"//src/main/java/com/google/devtools/build/skyframe:skyframe-objects",
"//third_party:auto_value",
"//third_party:guava",
"//third_party:jsr305",
],
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
// Copyright 2023 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.packages.producers;

import com.google.common.base.Preconditions;
import com.google.devtools.build.lib.cmdline.PackageIdentifier;
import com.google.devtools.build.lib.packages.Globber.Operation;
import com.google.devtools.build.lib.packages.producers.GlobComputationProducer.GlobDetail;
import com.google.devtools.build.lib.skyframe.PackageLookupValue;
import com.google.devtools.build.lib.util.Pair;
import com.google.devtools.build.lib.vfs.PathFragment;
import com.google.devtools.build.skyframe.SkyValue;
import com.google.devtools.build.skyframe.state.StateMachine;
import java.util.Set;
import java.util.function.Consumer;
import javax.annotation.Nullable;

/**
* Handles package (sub-)directory entries which matches part or all of the glob pattern fragments.
*
* <p>Created by {@link PatternWithWildcardProducer} or {@link PatternWithoutWildcardProducer} after
* they confirm that {@link #direntPath} matches part or all of the glob pattern fragments.
*
* <p>Checks whether {@link #direntPath} is a qualified glob matching result, and add to result if
* so. Before creating the next {@link FragmentProducer} also checks whether (1) there are pattern
* fragment(s) left to match and (2) subpackage crossing exists .
*/
final class DirentProducer implements StateMachine, Consumer<SkyValue> {

// -------------------- Input --------------------
private final GlobDetail globDetail;

/** The {@link PathFragment} of the dirent containing the package fragments. */
private final PathFragment direntPath;

private final int fragmentIndex;
private final boolean isDir;

// -------------------- Internal State --------------------
private PackageLookupValue packageLookupValue = null;
@Nullable private final Set<Pair<PathFragment, Integer>> visitedGlobSubTasks;

// -------------------- Output --------------------
private final FragmentProducer.ResultSink resultSink;

DirentProducer(
GlobDetail globDetail,
PathFragment direntPath,
int fragmentIndex,
boolean isDir,
FragmentProducer.ResultSink resultSink,
@Nullable Set<Pair<PathFragment, Integer>> visitedGlobSubTasks) {
// Upstream logic should already have appended some dirent to the package fragment when
// constructing this `direntPath`.
Preconditions.checkArgument(
!direntPath.equals(globDetail.packageIdentifier().getPackageFragment()));
this.direntPath = direntPath;
this.globDetail = globDetail;
this.fragmentIndex = fragmentIndex;
this.isDir = isDir;
this.resultSink = resultSink;
this.visitedGlobSubTasks = visitedGlobSubTasks;
}

@Override
public StateMachine step(Tasks tasks) {
if (!isDir) {
// The dirent is just a file, we need to check whether this is a glob matching before
// returning.
if (shouldAddResult(/* isDir= */ false, /* isSubpackage= */ false)) {
resultSink.acceptPathFragmentWithPackageFragment(direntPath);
}
return DONE;
}

// Check whether the next directory path matches any `IgnoredPackagePrefix`.
for (PathFragment ignoredPrefix : globDetail.ignoredPackagePrefixesPatterns()) {
if (direntPath.startsWith(ignoredPrefix)) {
return DONE;
}
}

tasks.lookUp(
PackageLookupValue.key(
PackageIdentifier.create(globDetail.packageIdentifier().getRepository(), direntPath)),
(Consumer<SkyValue>) this);
return this::checkSubpackageExistence;
}

@Override
public void accept(SkyValue skyValue) {
packageLookupValue = (PackageLookupValue) skyValue;
}

private StateMachine checkSubpackageExistence(Tasks tasks) {
Preconditions.checkNotNull(packageLookupValue);
if (packageLookupValue
instanceof PackageLookupValue.IncorrectRepositoryReferencePackageLookupValue) {
// Cross repository boundary, so glob expansion should not descend into that subdir.
return DONE;
}

if (packageLookupValue.packageExists()) {
// Cross the package boundary. The subdir contains a BUILD file and thus defines another
// package, so glob expansion should not descend into that subdir.
if (globDetail.globOperation().equals(Operation.SUBPACKAGES)) {
// If this is a subpackages() call, we need to check whether this subpackage is a glob
// matching before returning.
if (shouldAddResult(/* isDir= */ true, /* isSubpackage= */ true)) {
resultSink.acceptPathFragmentWithPackageFragment(direntPath);
}
}
return DONE;
}

return addResultsOrCreateNextFragmentProducer(tasks);
}

private StateMachine addResultsOrCreateNextFragmentProducer(Tasks tasks) {
// Even for directory dirent, we need to check whether this path is a matching result.
if (shouldAddResult(/* isDir= */ true, /* isSubpackage= */ false)) {
resultSink.acceptPathFragmentWithPackageFragment(direntPath);
}

int nextFragmentIndex =
globDetail.patternFragments().get(fragmentIndex).equals("**")
? fragmentIndex
: fragmentIndex + 1;
if (nextFragmentIndex == globDetail.patternFragments().size()) {
// When the last glob pattern is not double star, we have already processed all pattern
// fragments, so execution enters this block and immediately returns.
return DONE;
}

if (visitedGlobSubTasks == null
|| visitedGlobSubTasks.add(Pair.of(direntPath, nextFragmentIndex))) {
// Create the next unvisited `FragmentProducer` if it has not been processed.
tasks.enqueue(
new FragmentProducer(
globDetail, direntPath, nextFragmentIndex, visitedGlobSubTasks, resultSink));
}
return DONE;
}

/** Returns {@code true} if {@code path} can be added as a glob matching result. */
private boolean shouldAddResult(boolean isDir, boolean isSubpackage) {
if (fragmentIndex < globDetail.patternFragments().size() - 1) {
// Early return because this dirent path has not matched all pattern fragments.
return false;
}

switch (globDetail.globOperation()) {
case FILES:
return !isDir;
case SUBPACKAGES:
return isDir && isSubpackage;
case FILES_AND_DIRS:
return !isSubpackage;
}
throw new IllegalStateException(
"Unexpected globber globberOperation = [" + globDetail.globOperation() + "]");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
// Copyright 2023 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.packages.producers;

import com.google.common.base.Preconditions;
import com.google.devtools.build.lib.cmdline.PackageIdentifier;
import com.google.devtools.build.lib.packages.Globber.Operation;
import com.google.devtools.build.lib.packages.producers.GlobComputationProducer.GlobDetail;
import com.google.devtools.build.lib.util.Pair;
import com.google.devtools.build.lib.vfs.PathFragment;
import com.google.devtools.build.skyframe.state.StateMachine;
import java.util.Set;
import javax.annotation.Nullable;

/**
* Recursively created to start handling each pattern fragment. Based on whether wildcard character
* exists in the pattern, it creates either {@link PatternWithoutWildcardProducer} or {@link
* PatternWithWildcardProducer} producer.
*
* <p>{@link FragmentProducer} also handles special condition when the pattern is {@code **} by
* immediately skipping the {@code **} and creating the next {@link FragmentProducer}.
*/
final class FragmentProducer implements StateMachine {

/** Accepts matching {@link PathFragment}s or any exceptions wrapped in {@link GlobError}. */
interface ResultSink {

void acceptPathFragmentWithPackageFragment(PathFragment pathFragment);

void acceptGlobError(GlobError error);
}

// -------------------- Input --------------------
private final GlobDetail globDetail;

/**
* Contains package fragments of the {@link PackageIdentifier}. It is guaranteed that:
*
* <ul>
* <li>{@link #base} is a directory;
* <li>there is no subpackage under {@link #base}, when {@link #base} is not the package
* fragment.
* </ul>
*/
private final PathFragment base;

/** Position of the pattern in {@link GlobDetail#patternFragments()} to be processed. */
private final int fragmentIndex;

/**
* The visited set is created to prevent potential duplicate work when handling glob pattern
* containing multiple {@code **}s.
*
* <p>Each pair in the {@link #visitedGlobSubTasks} reflects that some previous {@link
* FragmentProducer} has already processed a state when the {@link #base} is at the {@code
* pair.getFirst()} location and {@link #fragmentIndex} at {@code pair.getSecond()} position in
* the {@link GlobDetail#patternFragments()}.
*
* <p>Consider this concrete example: {@code glob(['**\/a/**\/foo.txt'])} with the only file being
* {@code a/a/foo.txt}.
*
* <p>There are multiple routes to reach a point when a {@code FragmentProducer} whose base is
* {@code a/a/foo.txt} and fragmentIndex is 3 (at "foo.txt") should be created.
*
* <ul>
* <li>One route starts by recursively globbing 'a/**\/foo.txt' in the base directory of the
* package.
* <li>Another route starts by recursively globbing '**\/a/**\/foo.txt' in subdirectory 'a'.
* </ul>
*
* <p>{@link #visitedGlobSubTasks} prevents such a {@code FragmentProducer} from being created and
* processed for the second time, and thus reduces duplicate computation.
*/
@Nullable private final Set<Pair<PathFragment, Integer>> visitedGlobSubTasks;

// -------------------- Output --------------------
final ResultSink resultSink;

FragmentProducer(
GlobDetail globDetail,
PathFragment base,
int fragmentIndex,
@Nullable Set<Pair<PathFragment, Integer>> visitedGlobSubTasks,
ResultSink resultSink) {
// Make sure condition (1) glob patterns contains multiple `**`s and condition (2)
// `visitedGlobSubTasks` is null should be the same.
Preconditions.checkState(
globDetail.containsMultipleDoubleStars() == (visitedGlobSubTasks != null));
this.globDetail = globDetail;
this.base = base;
this.fragmentIndex = fragmentIndex;
this.visitedGlobSubTasks = visitedGlobSubTasks;
this.resultSink = resultSink;
}

@Override
public StateMachine step(Tasks tasks) {
Preconditions.checkState(fragmentIndex < globDetail.patternFragments().size());

String patternFragment = globDetail.patternFragments().get(fragmentIndex);
if (!patternFragment.equals("**")) {
return handlePatternFragment(patternFragment);
}

// It is valid that `**` matches nothing, which is handled in the if-else block below.
if (fragmentIndex < globDetail.patternFragments().size() - 1) {
// In the case when `**` is not the last pattern, skip `**` and directly move onto the next
// pattern fragment.
if (visitedGlobSubTasks == null
|| visitedGlobSubTasks.add(Pair.of(base, fragmentIndex + 1))) {
tasks.enqueue(
new FragmentProducer(
globDetail, base, fragmentIndex + 1, visitedGlobSubTasks, resultSink));
}
} else {
// In the case when `**` is the last pattern, add `base` to result when operator is
// FILES_AND_DIRS.
if (globDetail.globOperation().equals(Operation.FILES_AND_DIRS)
&& !base.equals(globDetail.packageIdentifier().getPackageFragment())) {
resultSink.acceptPathFragmentWithPackageFragment(base);
}
}

// Handle the case when `**` does not match an empty fragment.
return handlePatternFragment(patternFragment);
}

private StateMachine handlePatternFragment(String patternFragment) {
if (!patternFragment.contains("*") && !patternFragment.contains("?")) {
return new PatternWithoutWildcardProducer(
globDetail,
base.getChild(patternFragment),
fragmentIndex,
resultSink,
visitedGlobSubTasks);
}
return new PatternWithWildcardProducer(
globDetail, base, fragmentIndex, resultSink, visitedGlobSubTasks);
}
}
Loading

1 comment on commit 962db94

@yuyue730
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Below are messages for this commit: Add GlobFunctionWithStateMachine implementation and apply it to most of the SkyframeExecutors.

Problem with canonical recursive implementation:

The canonical GlobFunction implementation recursively creates sub-glob nodes until the full glob pattern is evaluated. If the glob pattern contains many fragments and/or the package contains multiple sub-directories from vertical and/or horizontal dimensions. The sub-glob nodes tree can potentially become giant, leading to significant memory overhead.

This commit intends to eliminate the potentially giant sub-glob nodes tree for cases when incrementality performance is not strict.

Work done in this commit:

  • Refactor GlobFunction and GlobValue into two pairs of subclasses:
    • GlobFunctionWithMultipleRecursiveFunctions and GlobValueWithNestedSet: This pair is a copy of the canonical implementation of GlobFunction. It is still preferred to be used when incrementality performance is strict.
    • GlobFunctionWithRecursionInSingleFunction and GlobValueWithImmutableSet: This pair is the new GlobFunction implementation using Skyframe StateMachine mechanism (https://bazel.build/contribute/statemachine-guide).
  • Implement multiple StateMachine producers under third_party/bazel/src/main/java/com/google/devtools/build/lib/packages/producers directory to support GlobFunctionWithRecursionInSingleFunction computation;
  • Switch to use GlobFunctionWithRecursionInSingleFunction for all conditions which does not require strict incremental evaluation performance;
  • Refactor and add more tests in GlobFunctionTest to cover both recursive and State Machine globbing logic.

Benefits of using GlobFunctionWithRecursionInSingleFunction:

  • GlobFunctionWithRecursionInSingleFunction uses StateMachines as the recursive globbing units instead of Glob nodes. The StateMachines are discarded after each blaze build or query invocation. And thus, it eliminates giant sub-Glob nodes tree cached in Skyframe.
  • The number of skyframe restarting times is also significantly reduced. This is due to StateMachine enables structured concurrency when querying dependent SkyKeys.

Limitations of current GlobFunctionWithRecursionInSingleFunction:

Current GlobFunctionWithRecursionInSingleFunction is not performance friendly for incremental blaze evaluation. Due to the lose of the sub-glob nodes tree, skyframe incrementality is totally lost. So the computation cost of clean and non-clean glob evaluation is the same with GlobFunctionWithRecursionInSingleFunction, even when this is just a simple filesystem change in the package.

On the other hand, with GlobFunctionWithMultipleRecursiveFunctions, the sub-Glob nodes tree cached in skyframe guarantees incrementality. A simple filesystem change in the package leads to re-evaluation a very small subset of the sub-Glob nodes tree.

Experiments have also shown significant performance regression for certain incremental evaluations.

Please sign in to comment.