Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add simplified critical path scheduler to improve build times #2177

Merged
merged 22 commits into from
Feb 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
4af9fc5
support explicit build order
nico Sep 21, 2014
12b5b7c
Use explicit std:: style and remove debug print statements
peterbell10 Aug 25, 2021
8e23200
Change priority_list_ into a std::priority_queue of ready edges
peterbell10 Aug 25, 2021
2fcf403
Fix critical time calculation
peterbell10 Aug 25, 2021
c5d355c
clang-format diff
peterbell10 Aug 25, 2021
63b0a9a
Address review comments
peterbell10 Aug 25, 2021
5b8d19b
Fix total_time computation
peterbell10 Aug 25, 2021
fe80637
Address review comments
peterbell10 Aug 27, 2021
c83167f
Improve heuristic for unknown cost edges
peterbell10 Aug 27, 2021
77448b4
Remove redundant include
peterbell10 Oct 8, 2021
24d1f5f
Address review comments
peterbell10 Mar 7, 2022
1af6daf
Merge remote-tracking branch 'upstream/master' into cpsched
peterbell10 Mar 7, 2022
6ee9049
Remove unnecessary whitespace
peterbell10 Mar 7, 2022
1128a56
Add simple test for EdgeQueue
peterbell10 Mar 7, 2022
a861164
Improve comments and retrieve edges into ready_queue directly
peterbell10 Mar 7, 2022
026498f
Add run_time_ms accessors and more comments
peterbell10 Mar 7, 2022
4bd8db1
Add test and fix priority bug
peterbell10 Mar 8, 2022
a643af2
Pool: sort equally-weighted edges by priority
peterbell10 Mar 8, 2022
f2333b7
Rename critical_time to critical_time_ms
peterbell10 Mar 8, 2022
09d4faa
Clarify the purpose of active_edges in back-propagation
peterbell10 Mar 8, 2022
18220a3
Merge remote-tracking branch 'upstream/master' into cpsched-2
peterbell10 Aug 10, 2022
29fe3ef
Simplify scheduler to not use build log/execution time
peterbell10 Aug 10, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 122 additions & 7 deletions src/build.cc
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ void Plan::Reset() {
}

bool Plan::AddTarget(const Node* target, string* err) {
targets_.push_back(target);
return AddSubTarget(target, NULL, err, NULL);
}

Expand Down Expand Up @@ -123,8 +124,6 @@ bool Plan::AddSubTarget(const Node* node, const Node* dependent, string* err,
if (node->dirty() && want == kWantNothing) {
want = kWantToStart;
EdgeWanted(edge);
if (!dyndep_walk && edge->AllInputsReady())
ScheduleWork(want_ins.first);
}

if (dyndep_walk)
Expand All @@ -151,10 +150,10 @@ void Plan::EdgeWanted(const Edge* edge) {
Edge* Plan::FindWork() {
if (ready_.empty())
return NULL;
EdgeSet::iterator e = ready_.begin();
Edge* edge = *e;
ready_.erase(e);
return edge;

Edge* work = ready_.top();
ready_.pop();
return work;
}

void Plan::ScheduleWork(map<Edge*, Want>::iterator want_e) {
Expand All @@ -175,7 +174,7 @@ void Plan::ScheduleWork(map<Edge*, Want>::iterator want_e) {
pool->RetrieveReadyEdges(&ready_);
} else {
pool->EdgeScheduled(*edge);
ready_.insert(edge);
ready_.push(edge);
}
}

Expand Down Expand Up @@ -437,6 +436,121 @@ void Plan::UnmarkDependents(const Node* node, set<Node*>* dependents) {
}
}

namespace {

template <typename T>
struct SeenBefore {
std::set<const T*>* seen_;

SeenBefore(std::set<const T*>* seen) : seen_(seen) {}

bool operator() (const T* item) {
// Return true if the item has been seen before
return !seen_->insert(item).second;
}
};

// Heuristic for edge priority weighting.
// Phony edges are free (0 cost), all other edges are weighted equally.
int64_t EdgeWeightHeuristic(Edge *edge) {
return edge->is_phony() ? 0 : 1;
}

} // namespace

void Plan::ComputeCriticalPath() {
METRIC_RECORD("ComputeCriticalPath");
// Remove duplicate targets
{
std::set<const Node*> seen;
SeenBefore<Node> seen_before(&seen);
targets_.erase(std::remove_if(targets_.begin(), targets_.end(), seen_before),
targets_.end());
}

// Use backflow algorithm to compute the critical path for all
// nodes, starting from the destination nodes.
// XXX: ignores pools
std::queue<Edge*> work_queue; // Queue, for breadth-first traversal
// The set of edges currently in work_queue, to avoid duplicates.
std::set<const Edge*> active_edges;
SeenBefore<Edge> seen_edge(&active_edges);

for (size_t i = 0; i < targets_.size(); ++i) {
const Node* target = targets_[i];
if (Edge* in = target->in_edge()) {
int64_t edge_weight = EdgeWeightHeuristic(in);
in->set_critical_path_weight(
std::max<int64_t>(edge_weight, in->critical_path_weight()));
if (!seen_edge(in)) {
work_queue.push(in);
}
}
}

while (!work_queue.empty()) {
Edge* e = work_queue.front();
work_queue.pop();
// If the critical path of any dependent edges is updated, this
// edge may need to be processed again. So re-allow insertion.
active_edges.erase(e);

for (std::vector<Node*>::iterator it = e->inputs_.begin(),
end = e->inputs_.end();
it != end; ++it) {
Edge* in = (*it)->in_edge();
if (!in) {
continue;
}
// Only process edge if this node offers a higher weighted path
const int64_t edge_weight = EdgeWeightHeuristic(in);
const int64_t proposed_weight = e->critical_path_weight() + edge_weight;
if (proposed_weight > in->critical_path_weight()) {
in->set_critical_path_weight(proposed_weight);
if (!seen_edge(in)) {
work_queue.push(in);
}
}
}
}
}

void Plan::ScheduleInitialEdges() {
// Add ready edges to queue.
assert(ready_.empty());
std::set<Pool*> pools;

for (std::map<Edge*, Plan::Want>::iterator it = want_.begin(),
end = want_.end(); it != end; ++it) {
Edge* edge = it->first;
Plan::Want want = it->second;
if (!(want == kWantToStart && edge->AllInputsReady())) {
continue;
}

Pool* pool = edge->pool();
if (pool->ShouldDelayEdge()) {
pool->DelayEdge(edge);
pools.insert(pool);
} else {
ScheduleWork(it);
}
}

// Call RetrieveReadyEdges only once at the end so higher priority
// edges are retrieved first, not the ones that happen to be first
// in the want_ map.
for (std::set<Pool*>::iterator it=pools.begin(),
end = pools.end(); it != end; ++it) {
(*it)->RetrieveReadyEdges(&ready_);
}
}

void Plan::PrepareQueue() {
ComputeCriticalPath();
ScheduleInitialEdges();
}

void Plan::Dump() const {
printf("pending: %d\n", (int)want_.size());
for (map<Edge*, Want>::const_iterator e = want_.begin(); e != want_.end(); ++e) {
Expand Down Expand Up @@ -606,6 +720,7 @@ bool Builder::AlreadyUpToDate() const {

bool Builder::Build(string* err) {
assert(!AlreadyUpToDate());
plan_.PrepareQueue();

status_->PlanHasTotalEdges(plan_.command_edge_count());
int pending_commands = 0;
Expand Down
38 changes: 24 additions & 14 deletions src/build.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,11 @@
#include <cstdio>
#include <map>
#include <memory>
#include <queue>
#include <string>
#include <vector>

#include "depfile_parser.h"
#include "graph.h" // XXX needed for DependencyScan; should rearrange.
#include "graph.h"
#include "exit_status.h"
#include "util.h" // int64_t

Expand Down Expand Up @@ -76,21 +75,13 @@ struct Plan {
/// Reset state. Clears want and ready sets.
void Reset();

// After all targets have been added, prepares the ready queue for find work.
void PrepareQueue();

/// Update the build plan to account for modifications made to the graph
/// by information loaded from a dyndep file.
bool DyndepsLoaded(DependencyScan* scan, const Node* node,
const DyndepFile& ddf, std::string* err);
private:
bool RefreshDyndepDependents(DependencyScan* scan, const Node* node, std::string* err);
void UnmarkDependents(const Node* node, std::set<Node*>* dependents);
bool AddSubTarget(const Node* node, const Node* dependent, std::string* err,
std::set<Edge*>* dyndep_walk);

/// Update plan with knowledge that the given node is up to date.
/// If the node is a dyndep binding on any of its dependents, this
/// loads dynamic dependencies from the node's path.
/// Returns 'false' if loading dyndep info fails and 'true' otherwise.
bool NodeFinished(Node* node, std::string* err);

/// Enumerate possible steps we want for an edge.
enum Want
Expand All @@ -105,6 +96,23 @@ struct Plan {
kWantToFinish
};

private:
void ComputeCriticalPath();
bool RefreshDyndepDependents(DependencyScan* scan, const Node* node, std::string* err);
void UnmarkDependents(const Node* node, std::set<Node*>* dependents);
bool AddSubTarget(const Node* node, const Node* dependent, std::string* err,
std::set<Edge*>* dyndep_walk);

// Add edges that kWantToStart into the ready queue
// Must be called after ComputeCriticalPath and before FindWork
void ScheduleInitialEdges();

/// Update plan with knowledge that the given node is up to date.
/// If the node is a dyndep binding on any of its dependents, this
/// loads dynamic dependencies from the node's path.
/// Returns 'false' if loading dyndep info fails and 'true' otherwise.
bool NodeFinished(Node* node, std::string* err);

void EdgeWanted(const Edge* edge);
bool EdgeMaybeReady(std::map<Edge*, Want>::iterator want_e, std::string* err);

Expand All @@ -119,9 +127,11 @@ struct Plan {
/// we want for the edge.
std::map<Edge*, Want> want_;

EdgeSet ready_;
EdgePriorityQueue ready_;

Builder* builder_;
/// user provided targets in build order, earlier one have higher priority
std::vector<const Node*> targets_;

/// Total number of edges that have commands (not phony).
int command_edges_;
Expand Down
Loading