-
Notifications
You must be signed in to change notification settings - Fork 4.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
PiperOrigin-RevId: 588441468 Change-Id: Ib98b3e661efcc230b079c4653344b6b3e1683c8a
- Loading branch information
1 parent
d160b12
commit 962db94
Showing
22 changed files
with
2,023 additions
and
560 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
37 changes: 37 additions & 0 deletions
37
src/main/java/com/google/devtools/build/lib/packages/producers/BUILD
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
load("@rules_java//java:defs.bzl", "java_library") | ||
|
||
package( | ||
default_applicable_licenses = ["//:license"], | ||
default_visibility = ["//src:__subpackages__"], | ||
) | ||
|
||
filegroup( | ||
name = "srcs", | ||
srcs = glob(["**"]), | ||
visibility = ["//src:__subpackages__"], | ||
) | ||
|
||
java_library( | ||
name = "producers", | ||
srcs = glob(["*.java"]), | ||
deps = [ | ||
"//src/main/java/com/google/devtools/build/lib/actions:file_metadata", | ||
"//src/main/java/com/google/devtools/build/lib/cmdline", | ||
"//src/main/java/com/google/devtools/build/lib/io:file_symlink_infinite_expansion_exception", | ||
"//src/main/java/com/google/devtools/build/lib/io:file_symlink_infinite_expansion_uniqueness_function", | ||
"//src/main/java/com/google/devtools/build/lib/io:inconsistent_filesystem_exception", | ||
"//src/main/java/com/google/devtools/build/lib/packages:globber", | ||
"//src/main/java/com/google/devtools/build/lib/skyframe:directory_listing_value", | ||
"//src/main/java/com/google/devtools/build/lib/skyframe:glob_descriptor", | ||
"//src/main/java/com/google/devtools/build/lib/skyframe:ignored_package_prefixes_value", | ||
"//src/main/java/com/google/devtools/build/lib/skyframe:package_lookup_value", | ||
"//src/main/java/com/google/devtools/build/lib/util:pair", | ||
"//src/main/java/com/google/devtools/build/lib/vfs", | ||
"//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", | ||
"//src/main/java/com/google/devtools/build/skyframe", | ||
"//src/main/java/com/google/devtools/build/skyframe:skyframe-objects", | ||
"//third_party:auto_value", | ||
"//third_party:guava", | ||
"//third_party:jsr305", | ||
], | ||
) |
174 changes: 174 additions & 0 deletions
174
src/main/java/com/google/devtools/build/lib/packages/producers/DirentProducer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,174 @@ | ||
// Copyright 2023 The Bazel Authors. All rights reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
package com.google.devtools.build.lib.packages.producers; | ||
|
||
import com.google.common.base.Preconditions; | ||
import com.google.devtools.build.lib.cmdline.PackageIdentifier; | ||
import com.google.devtools.build.lib.packages.Globber.Operation; | ||
import com.google.devtools.build.lib.packages.producers.GlobComputationProducer.GlobDetail; | ||
import com.google.devtools.build.lib.skyframe.PackageLookupValue; | ||
import com.google.devtools.build.lib.util.Pair; | ||
import com.google.devtools.build.lib.vfs.PathFragment; | ||
import com.google.devtools.build.skyframe.SkyValue; | ||
import com.google.devtools.build.skyframe.state.StateMachine; | ||
import java.util.Set; | ||
import java.util.function.Consumer; | ||
import javax.annotation.Nullable; | ||
|
||
/** | ||
* Handles package (sub-)directory entries which matches part or all of the glob pattern fragments. | ||
* | ||
* <p>Created by {@link PatternWithWildcardProducer} or {@link PatternWithoutWildcardProducer} after | ||
* they confirm that {@link #direntPath} matches part or all of the glob pattern fragments. | ||
* | ||
* <p>Checks whether {@link #direntPath} is a qualified glob matching result, and add to result if | ||
* so. Before creating the next {@link FragmentProducer} also checks whether (1) there are pattern | ||
* fragment(s) left to match and (2) subpackage crossing exists . | ||
*/ | ||
final class DirentProducer implements StateMachine, Consumer<SkyValue> { | ||
|
||
// -------------------- Input -------------------- | ||
private final GlobDetail globDetail; | ||
|
||
/** The {@link PathFragment} of the dirent containing the package fragments. */ | ||
private final PathFragment direntPath; | ||
|
||
private final int fragmentIndex; | ||
private final boolean isDir; | ||
|
||
// -------------------- Internal State -------------------- | ||
private PackageLookupValue packageLookupValue = null; | ||
@Nullable private final Set<Pair<PathFragment, Integer>> visitedGlobSubTasks; | ||
|
||
// -------------------- Output -------------------- | ||
private final FragmentProducer.ResultSink resultSink; | ||
|
||
DirentProducer( | ||
GlobDetail globDetail, | ||
PathFragment direntPath, | ||
int fragmentIndex, | ||
boolean isDir, | ||
FragmentProducer.ResultSink resultSink, | ||
@Nullable Set<Pair<PathFragment, Integer>> visitedGlobSubTasks) { | ||
// Upstream logic should already have appended some dirent to the package fragment when | ||
// constructing this `direntPath`. | ||
Preconditions.checkArgument( | ||
!direntPath.equals(globDetail.packageIdentifier().getPackageFragment())); | ||
this.direntPath = direntPath; | ||
this.globDetail = globDetail; | ||
this.fragmentIndex = fragmentIndex; | ||
this.isDir = isDir; | ||
this.resultSink = resultSink; | ||
this.visitedGlobSubTasks = visitedGlobSubTasks; | ||
} | ||
|
||
@Override | ||
public StateMachine step(Tasks tasks) { | ||
if (!isDir) { | ||
// The dirent is just a file, we need to check whether this is a glob matching before | ||
// returning. | ||
if (shouldAddResult(/* isDir= */ false, /* isSubpackage= */ false)) { | ||
resultSink.acceptPathFragmentWithPackageFragment(direntPath); | ||
} | ||
return DONE; | ||
} | ||
|
||
// Check whether the next directory path matches any `IgnoredPackagePrefix`. | ||
for (PathFragment ignoredPrefix : globDetail.ignoredPackagePrefixesPatterns()) { | ||
if (direntPath.startsWith(ignoredPrefix)) { | ||
return DONE; | ||
} | ||
} | ||
|
||
tasks.lookUp( | ||
PackageLookupValue.key( | ||
PackageIdentifier.create(globDetail.packageIdentifier().getRepository(), direntPath)), | ||
(Consumer<SkyValue>) this); | ||
return this::checkSubpackageExistence; | ||
} | ||
|
||
@Override | ||
public void accept(SkyValue skyValue) { | ||
packageLookupValue = (PackageLookupValue) skyValue; | ||
} | ||
|
||
private StateMachine checkSubpackageExistence(Tasks tasks) { | ||
Preconditions.checkNotNull(packageLookupValue); | ||
if (packageLookupValue | ||
instanceof PackageLookupValue.IncorrectRepositoryReferencePackageLookupValue) { | ||
// Cross repository boundary, so glob expansion should not descend into that subdir. | ||
return DONE; | ||
} | ||
|
||
if (packageLookupValue.packageExists()) { | ||
// Cross the package boundary. The subdir contains a BUILD file and thus defines another | ||
// package, so glob expansion should not descend into that subdir. | ||
if (globDetail.globOperation().equals(Operation.SUBPACKAGES)) { | ||
// If this is a subpackages() call, we need to check whether this subpackage is a glob | ||
// matching before returning. | ||
if (shouldAddResult(/* isDir= */ true, /* isSubpackage= */ true)) { | ||
resultSink.acceptPathFragmentWithPackageFragment(direntPath); | ||
} | ||
} | ||
return DONE; | ||
} | ||
|
||
return addResultsOrCreateNextFragmentProducer(tasks); | ||
} | ||
|
||
private StateMachine addResultsOrCreateNextFragmentProducer(Tasks tasks) { | ||
// Even for directory dirent, we need to check whether this path is a matching result. | ||
if (shouldAddResult(/* isDir= */ true, /* isSubpackage= */ false)) { | ||
resultSink.acceptPathFragmentWithPackageFragment(direntPath); | ||
} | ||
|
||
int nextFragmentIndex = | ||
globDetail.patternFragments().get(fragmentIndex).equals("**") | ||
? fragmentIndex | ||
: fragmentIndex + 1; | ||
if (nextFragmentIndex == globDetail.patternFragments().size()) { | ||
// When the last glob pattern is not double star, we have already processed all pattern | ||
// fragments, so execution enters this block and immediately returns. | ||
return DONE; | ||
} | ||
|
||
if (visitedGlobSubTasks == null | ||
|| visitedGlobSubTasks.add(Pair.of(direntPath, nextFragmentIndex))) { | ||
// Create the next unvisited `FragmentProducer` if it has not been processed. | ||
tasks.enqueue( | ||
new FragmentProducer( | ||
globDetail, direntPath, nextFragmentIndex, visitedGlobSubTasks, resultSink)); | ||
} | ||
return DONE; | ||
} | ||
|
||
/** Returns {@code true} if {@code path} can be added as a glob matching result. */ | ||
private boolean shouldAddResult(boolean isDir, boolean isSubpackage) { | ||
if (fragmentIndex < globDetail.patternFragments().size() - 1) { | ||
// Early return because this dirent path has not matched all pattern fragments. | ||
return false; | ||
} | ||
|
||
switch (globDetail.globOperation()) { | ||
case FILES: | ||
return !isDir; | ||
case SUBPACKAGES: | ||
return isDir && isSubpackage; | ||
case FILES_AND_DIRS: | ||
return !isSubpackage; | ||
} | ||
throw new IllegalStateException( | ||
"Unexpected globber globberOperation = [" + globDetail.globOperation() + "]"); | ||
} | ||
} |
151 changes: 151 additions & 0 deletions
151
src/main/java/com/google/devtools/build/lib/packages/producers/FragmentProducer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
// Copyright 2023 The Bazel Authors. All rights reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
package com.google.devtools.build.lib.packages.producers; | ||
|
||
import com.google.common.base.Preconditions; | ||
import com.google.devtools.build.lib.cmdline.PackageIdentifier; | ||
import com.google.devtools.build.lib.packages.Globber.Operation; | ||
import com.google.devtools.build.lib.packages.producers.GlobComputationProducer.GlobDetail; | ||
import com.google.devtools.build.lib.util.Pair; | ||
import com.google.devtools.build.lib.vfs.PathFragment; | ||
import com.google.devtools.build.skyframe.state.StateMachine; | ||
import java.util.Set; | ||
import javax.annotation.Nullable; | ||
|
||
/** | ||
* Recursively created to start handling each pattern fragment. Based on whether wildcard character | ||
* exists in the pattern, it creates either {@link PatternWithoutWildcardProducer} or {@link | ||
* PatternWithWildcardProducer} producer. | ||
* | ||
* <p>{@link FragmentProducer} also handles special condition when the pattern is {@code **} by | ||
* immediately skipping the {@code **} and creating the next {@link FragmentProducer}. | ||
*/ | ||
final class FragmentProducer implements StateMachine { | ||
|
||
/** Accepts matching {@link PathFragment}s or any exceptions wrapped in {@link GlobError}. */ | ||
interface ResultSink { | ||
|
||
void acceptPathFragmentWithPackageFragment(PathFragment pathFragment); | ||
|
||
void acceptGlobError(GlobError error); | ||
} | ||
|
||
// -------------------- Input -------------------- | ||
private final GlobDetail globDetail; | ||
|
||
/** | ||
* Contains package fragments of the {@link PackageIdentifier}. It is guaranteed that: | ||
* | ||
* <ul> | ||
* <li>{@link #base} is a directory; | ||
* <li>there is no subpackage under {@link #base}, when {@link #base} is not the package | ||
* fragment. | ||
* </ul> | ||
*/ | ||
private final PathFragment base; | ||
|
||
/** Position of the pattern in {@link GlobDetail#patternFragments()} to be processed. */ | ||
private final int fragmentIndex; | ||
|
||
/** | ||
* The visited set is created to prevent potential duplicate work when handling glob pattern | ||
* containing multiple {@code **}s. | ||
* | ||
* <p>Each pair in the {@link #visitedGlobSubTasks} reflects that some previous {@link | ||
* FragmentProducer} has already processed a state when the {@link #base} is at the {@code | ||
* pair.getFirst()} location and {@link #fragmentIndex} at {@code pair.getSecond()} position in | ||
* the {@link GlobDetail#patternFragments()}. | ||
* | ||
* <p>Consider this concrete example: {@code glob(['**\/a/**\/foo.txt'])} with the only file being | ||
* {@code a/a/foo.txt}. | ||
* | ||
* <p>There are multiple routes to reach a point when a {@code FragmentProducer} whose base is | ||
* {@code a/a/foo.txt} and fragmentIndex is 3 (at "foo.txt") should be created. | ||
* | ||
* <ul> | ||
* <li>One route starts by recursively globbing 'a/**\/foo.txt' in the base directory of the | ||
* package. | ||
* <li>Another route starts by recursively globbing '**\/a/**\/foo.txt' in subdirectory 'a'. | ||
* </ul> | ||
* | ||
* <p>{@link #visitedGlobSubTasks} prevents such a {@code FragmentProducer} from being created and | ||
* processed for the second time, and thus reduces duplicate computation. | ||
*/ | ||
@Nullable private final Set<Pair<PathFragment, Integer>> visitedGlobSubTasks; | ||
|
||
// -------------------- Output -------------------- | ||
final ResultSink resultSink; | ||
|
||
FragmentProducer( | ||
GlobDetail globDetail, | ||
PathFragment base, | ||
int fragmentIndex, | ||
@Nullable Set<Pair<PathFragment, Integer>> visitedGlobSubTasks, | ||
ResultSink resultSink) { | ||
// Make sure condition (1) glob patterns contains multiple `**`s and condition (2) | ||
// `visitedGlobSubTasks` is null should be the same. | ||
Preconditions.checkState( | ||
globDetail.containsMultipleDoubleStars() == (visitedGlobSubTasks != null)); | ||
this.globDetail = globDetail; | ||
this.base = base; | ||
this.fragmentIndex = fragmentIndex; | ||
this.visitedGlobSubTasks = visitedGlobSubTasks; | ||
this.resultSink = resultSink; | ||
} | ||
|
||
@Override | ||
public StateMachine step(Tasks tasks) { | ||
Preconditions.checkState(fragmentIndex < globDetail.patternFragments().size()); | ||
|
||
String patternFragment = globDetail.patternFragments().get(fragmentIndex); | ||
if (!patternFragment.equals("**")) { | ||
return handlePatternFragment(patternFragment); | ||
} | ||
|
||
// It is valid that `**` matches nothing, which is handled in the if-else block below. | ||
if (fragmentIndex < globDetail.patternFragments().size() - 1) { | ||
// In the case when `**` is not the last pattern, skip `**` and directly move onto the next | ||
// pattern fragment. | ||
if (visitedGlobSubTasks == null | ||
|| visitedGlobSubTasks.add(Pair.of(base, fragmentIndex + 1))) { | ||
tasks.enqueue( | ||
new FragmentProducer( | ||
globDetail, base, fragmentIndex + 1, visitedGlobSubTasks, resultSink)); | ||
} | ||
} else { | ||
// In the case when `**` is the last pattern, add `base` to result when operator is | ||
// FILES_AND_DIRS. | ||
if (globDetail.globOperation().equals(Operation.FILES_AND_DIRS) | ||
&& !base.equals(globDetail.packageIdentifier().getPackageFragment())) { | ||
resultSink.acceptPathFragmentWithPackageFragment(base); | ||
} | ||
} | ||
|
||
// Handle the case when `**` does not match an empty fragment. | ||
return handlePatternFragment(patternFragment); | ||
} | ||
|
||
private StateMachine handlePatternFragment(String patternFragment) { | ||
if (!patternFragment.contains("*") && !patternFragment.contains("?")) { | ||
return new PatternWithoutWildcardProducer( | ||
globDetail, | ||
base.getChild(patternFragment), | ||
fragmentIndex, | ||
resultSink, | ||
visitedGlobSubTasks); | ||
} | ||
return new PatternWithWildcardProducer( | ||
globDetail, base, fragmentIndex, resultSink, visitedGlobSubTasks); | ||
} | ||
} |
Oops, something went wrong.
962db94
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Below are messages for this commit: Add
GlobFunctionWithStateMachine
implementation and apply it to most of theSkyframeExecutors
.Problem with canonical recursive implementation:
The canonical
GlobFunction
implementation recursively creates sub-glob nodes until the full glob pattern is evaluated. If the glob pattern contains many fragments and/or the package contains multiple sub-directories from vertical and/or horizontal dimensions. The sub-glob nodes tree can potentially become giant, leading to significant memory overhead.This commit intends to eliminate the potentially giant sub-glob nodes tree for cases when incrementality performance is not strict.
Work done in this commit:
GlobFunction
andGlobValue
into two pairs of subclasses:GlobFunctionWithMultipleRecursiveFunctions
andGlobValueWithNestedSet
: This pair is a copy of the canonical implementation ofGlobFunction
. It is still preferred to be used when incrementality performance is strict.GlobFunctionWithRecursionInSingleFunction
andGlobValueWithImmutableSet
: This pair is the newGlobFunction
implementation using Skyframe StateMachine mechanism (https://bazel.build/contribute/statemachine-guide).StateMachine
producers underthird_party/bazel/src/main/java/com/google/devtools/build/lib/packages/producers
directory to supportGlobFunctionWithRecursionInSingleFunction
computation;GlobFunctionWithRecursionInSingleFunction
for all conditions which does not require strict incremental evaluation performance;GlobFunctionTest
to cover both recursive and State Machine globbing logic.Benefits of using GlobFunctionWithRecursionInSingleFunction:
GlobFunctionWithRecursionInSingleFunction
usesStateMachine
s as the recursive globbing units instead of Glob nodes. TheStateMachine
s are discarded after each blaze build or query invocation. And thus, it eliminates giant sub-Glob nodes tree cached in Skyframe.StateMachine
enables structured concurrency when querying dependentSkyKey
s.Limitations of current
GlobFunctionWithRecursionInSingleFunction
:Current
GlobFunctionWithRecursionInSingleFunction
is not performance friendly for incremental blaze evaluation. Due to the lose of the sub-glob nodes tree, skyframe incrementality is totally lost. So the computation cost of clean and non-clean glob evaluation is the same withGlobFunctionWithRecursionInSingleFunction
, even when this is just a simple filesystem change in the package.On the other hand, with
GlobFunctionWithMultipleRecursiveFunctions
, the sub-Glob nodes tree cached in skyframe guarantees incrementality. A simple filesystem change in the package leads to re-evaluation a very small subset of the sub-Glob nodes tree.Experiments have also shown significant performance regression for certain incremental evaluations.