Skip to content

Commit

Permalink
Add WorkerPool metrics in blaze.invocations table.
Browse files Browse the repository at this point in the history
Added number of created and destroyed metrics per worker pool hash during an invocation. This could help to analyze eviction strategies.

But we need to be careful with this metrics, because worker could be destroyed on the begging of build, if configuration changed.

PiperOrigin-RevId: 526936395
Change-Id: I58f9fab5935b2ff627da8098f44221aaa94f82cf
  • Loading branch information
Googler authored and fweikert committed May 25, 2023
1 parent 85e1242 commit f4d27d4
Show file tree
Hide file tree
Showing 9 changed files with 204 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1099,6 +1099,27 @@ message BuildMetrics {
}

NetworkMetrics network_metrics = 10;

// Information about worker pool actions.
message WorkerPoolMetrics {
// Statistics of worker pool per worker pool hash. Basically it's a map from
// worker pool hash to statistics.
repeated WorkerPoolStats worker_pool_stats = 1;

message WorkerPoolStats {
// Hash of worker pool these stats are for. Contains information about
// startup flags.
int32 hash = 1;
// Mnemonic of workers these stats are for.
string mnemonic = 2;
// Number of workers created during a build.
int64 created_count = 3;
// Number of workers destroyed during a build.
int64 destroyed_count = 4;
}
}

WorkerPoolMetrics worker_pool_metrics = 11;
}

// Event providing additional statistics/logs after completion of the build.
Expand Down
1 change: 1 addition & 0 deletions src/main/java/com/google/devtools/build/lib/metrics/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/profiler:network_metrics_collector",
"//src/main/java/com/google/devtools/build/lib/skyframe:execution_finished_event",
"//src/main/java/com/google/devtools/build/lib/skyframe:top_level_status_events",
"//src/main/java/com/google/devtools/build/lib/worker:worker_events",
"//src/main/java/com/google/devtools/build/lib/worker:worker_metric",
"//src/main/java/com/google/devtools/build/skyframe:skyframe_graph_stats_event",
"//src/main/java/com/google/devtools/common/options",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import com.google.devtools.build.lib.buildeventstream.BuildEventStreamProtos.BuildMetrics.PackageMetrics;
import com.google.devtools.build.lib.buildeventstream.BuildEventStreamProtos.BuildMetrics.TargetMetrics;
import com.google.devtools.build.lib.buildeventstream.BuildEventStreamProtos.BuildMetrics.TimingMetrics;
import com.google.devtools.build.lib.buildeventstream.BuildEventStreamProtos.BuildMetrics.WorkerPoolMetrics;
import com.google.devtools.build.lib.buildtool.BuildPrecompleteEvent;
import com.google.devtools.build.lib.buildtool.buildevent.ExecutionStartingEvent;
import com.google.devtools.build.lib.clock.BlazeClock;
Expand All @@ -51,11 +52,14 @@
import com.google.devtools.build.lib.runtime.SpawnStats;
import com.google.devtools.build.lib.skyframe.ExecutionFinishedEvent;
import com.google.devtools.build.lib.skyframe.TopLevelStatusEvents.TopLevelTargetPendingExecutionEvent;
import com.google.devtools.build.lib.worker.WorkerCreatedEvent;
import com.google.devtools.build.lib.worker.WorkerDestroyedEvent;
import com.google.devtools.build.lib.worker.WorkerMetricsCollector;
import com.google.devtools.build.skyframe.SkyframeGraphStatsEvent;
import com.google.errorprone.annotations.CanIgnoreReturnValue;
import java.time.Duration;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicBoolean;
Expand All @@ -69,6 +73,8 @@ class MetricsCollector {
private final boolean recordMetricsForAllMnemonics;
// For ActionSummary.
private final ConcurrentHashMap<String, ActionStats> actionStatsMap = new ConcurrentHashMap<>();
// Mapping from worker pool hash, to statistics which we collect during a build.
private final HashMap<Integer, WorkerPoolStats> workerPoolStats = new HashMap<>();

// For CumulativeMetrics.
private final AtomicInteger numAnalyses;
Expand Down Expand Up @@ -154,6 +160,34 @@ public synchronized void accountForBuild(
}
}

@Subscribe
public void onWorkerDestroyedAction(WorkerDestroyedEvent event) {
synchronized (this) {
WorkerPoolStats stats =
getWorkerPoolStatsOrInsert(event.getWorkerPoolHash(), event.getMnemonic());

stats.incrementDestroyedCount();
}
}

@Subscribe
public void onWorkerCreatedAction(WorkerCreatedEvent event) {
synchronized (this) {
WorkerPoolStats stats =
getWorkerPoolStatsOrInsert(event.getWorkerPoolHash(), event.getMnemonic());

stats.incrementCreatedCount();
}
}

private WorkerPoolStats getWorkerPoolStatsOrInsert(int workerPoolHash, String mnemonic) {
WorkerPoolStats stats =
workerPoolStats.computeIfAbsent(
workerPoolHash, (Integer k) -> new WorkerPoolStats(mnemonic));

return stats;
}

@SuppressWarnings("unused")
@Subscribe
@AllowConcurrentEvents
Expand Down Expand Up @@ -215,7 +249,8 @@ private BuildMetrics createBuildMetrics() {
.setCumulativeMetrics(createCumulativeMetrics())
.setArtifactMetrics(artifactMetrics.build())
.setBuildGraphMetrics(buildGraphMetrics.build())
.addAllWorkerMetrics(WorkerMetricsCollector.instance().createWorkerMetricsProto());
.addAllWorkerMetrics(WorkerMetricsCollector.instance().createWorkerMetricsProto())
.setWorkerPoolMetrics(createWorkerPoolMetrics());

NetworkMetrics networkMetrics = NetworkMetricsCollector.instance().collectMetrics();
if (networkMetrics != null) {
Expand Down Expand Up @@ -318,6 +353,52 @@ private TimingMetrics finishTimingMetrics() {
return timingMetrics.build();
}

private WorkerPoolMetrics createWorkerPoolMetrics() {
WorkerPoolMetrics.Builder metricsBuilder = WorkerPoolMetrics.newBuilder();

workerPoolStats.forEach(
(workerPoolHash, workerStats) ->
metricsBuilder.addWorkerPoolStats(
WorkerPoolMetrics.WorkerPoolStats.newBuilder()
.setHash(workerPoolHash)
.setMnemonic(workerStats.getMnemonic())
.setCreatedCount(workerStats.getCreatedCount())
.setDestroyedCount(workerStats.getDestroyedCount())
.build()));

return metricsBuilder.build();
}

private static class WorkerPoolStats {
private int createdCount;
private int destroyedCount;
private final String mnemonic;

WorkerPoolStats(String mnemonic) {
this.mnemonic = mnemonic;
}

void incrementCreatedCount() {
createdCount++;
}

void incrementDestroyedCount() {
destroyedCount++;
}

public int getCreatedCount() {
return createdCount;
}

public int getDestroyedCount() {
return destroyedCount;
}

public String getMnemonic() {
return mnemonic;
}
}

private static class ActionStats {
final LongAccumulator firstStarted;
final LongAccumulator lastEnded;
Expand Down
10 changes: 10 additions & 0 deletions src/main/java/com/google/devtools/build/lib/worker/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ java_library(
":multiplex_worker",
":singleplex_worker",
":worker",
":worker_events",
":worker_key",
"//src/main/java/com/google/devtools/build/lib/events",
"//src/main/java/com/google/devtools/build/lib/vfs",
Expand All @@ -192,6 +193,15 @@ java_library(
],
)

java_library(
name = "worker_events",
srcs = [
"WorkerCreatedEvent.java",
"WorkerDestroyedEvent.java",
],
deps = ["//src/main/java/com/google/devtools/build/lib/events"],
)

java_library(
name = "worker_pool",
srcs = [
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Copyright 2015 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.worker;

import com.google.devtools.build.lib.events.ExtendedEventHandler.Postable;

/** An event fired during execution, when worker was destroyed. */
public final class WorkerCreatedEvent implements Postable {
private final int workerPoolHash;
private final String mnemonic;

public WorkerCreatedEvent(int workerPoolHash, String mnemonic) {
this.workerPoolHash = workerPoolHash;
this.mnemonic = mnemonic;
}

public String getMnemonic() {
return mnemonic;
}

public int getWorkerPoolHash() {
return workerPoolHash;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Copyright 2015 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.worker;

import com.google.devtools.build.lib.events.ExtendedEventHandler.Postable;

/** An event fired during execution, when worker was destroyed. */
public final class WorkerDestroyedEvent implements Postable {
private final int workerPoolHash;
private final String mnemonic;

public WorkerDestroyedEvent(int workerPoolHash, String mnemonic) {
this.workerPoolHash = workerPoolHash;
this.mnemonic = mnemonic;
}

public String getMnemonic() {
return mnemonic;
}

public int getWorkerPoolHash() {
return workerPoolHash;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.
package com.google.devtools.build.lib.worker;

import com.google.common.eventbus.EventBus;
import com.google.common.flogger.GoogleLogger;
import com.google.common.io.BaseEncoding;
import com.google.devtools.build.lib.events.Event;
Expand Down Expand Up @@ -46,6 +47,8 @@ public class WorkerFactory extends BaseKeyedPooledObjectFactory<WorkerKey, Worke

private final Path workerBaseDir;
private Reporter reporter;
private EventBus eventBus;

/**
* Options specific to hardened sandbox. Null if {@code --experimental_worker_sandbox_hardening}
* is not set.
Expand All @@ -65,6 +68,10 @@ public void setReporter(Reporter reporter) {
this.reporter = reporter;
}

public void setEventBus(EventBus eventBus) {
this.eventBus = eventBus;
}

@Override
public Worker create(WorkerKey key) throws IOException {
int workerId = pidCounter.getAndIncrement();
Expand Down Expand Up @@ -103,6 +110,9 @@ public Worker create(WorkerKey key) throws IOException {
workerId,
worker.getLogFile());
WorkerLoggingHelper.logMessage(reporter, WorkerLoggingHelper.LogLevel.INFO, msg);
if (eventBus != null) {
eventBus.post(new WorkerCreatedEvent(key.hashCode(), key.toString()));
}
return worker;
}

Expand Down Expand Up @@ -135,6 +145,9 @@ public void destroyObject(WorkerKey key, PooledObject<Worker> p) {
"Destroying %s %s (id %d)", key.getMnemonic(), key.getWorkerTypeName(), workerId);
WorkerLoggingHelper.logMessage(reporter, WorkerLoggingHelper.LogLevel.INFO, msg);
p.getObject().destroy();
if (eventBus != null) {
eventBus.post(new WorkerDestroyedEvent(key.hashCode(), key.toString()));
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ public void cleanStarting(CleanStartingEvent event) {
if (workerPool != null) {
WorkerOptions options = event.getOptionsProvider().getOptions(WorkerOptions.class);
workerFactory.setReporter(options.workerVerbose ? env.getReporter() : null);
workerFactory.setEventBus(env.getEventBus());
shutdownPool(
"Clean command is running, shutting down worker pool...",
/* alwaysLog= */ false,
Expand All @@ -85,6 +86,7 @@ public void buildStarting(BuildStartingEvent event) {
WorkerOptions options = checkNotNull(event.request().getOptions(WorkerOptions.class));
if (workerFactory != null) {
workerFactory.setReporter(options.workerVerbose ? env.getReporter() : null);
workerFactory.setEventBus(env.getEventBus());
}
Path workerDir =
env.getOutputBase().getRelative(env.getRuntime().getProductName() + "-workers");
Expand Down Expand Up @@ -141,6 +143,7 @@ public void buildStarting(BuildStartingEvent event) {
options.workerVerbose);
workerFactory = newWorkerFactory;
workerFactory.setReporter(options.workerVerbose ? env.getReporter() : null);
workerFactory.setEventBus(env.getEventBus());
}

WorkerPoolConfig newConfig =
Expand Down Expand Up @@ -235,6 +238,7 @@ public void afterCommand() {

if (this.workerFactory != null) {
this.workerFactory.setReporter(null);
this.workerFactory.setEventBus(null);
}
WorkerMultiplexerManager.afterCommand();
}
Expand Down
6 changes: 3 additions & 3 deletions src/test/shell/integration/minimal_jdk_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,13 @@ export BAZEL_SUFFIX="_jdk_minimal"
source "$(rlocation "io_bazel/src/test/shell/integration_test_setup.sh")" \
|| { echo "integration_test_setup.sh not found!" >&2; exit 1; }

# Bazel's install base is < 342MB with minimal JDK and > 342MB with an all
# Bazel's install base is < 345MB with minimal JDK and > 342MB with an all
# modules JDK.
function test_size_less_than_342MB() {
function test_size_less_than_345MB() {
bazel info
ib=$(bazel info install_base)
size=$(du -s "$ib" | cut -d\ -f1)
maxsize=$((1024*342))
maxsize=$((1024*345))
if [ $size -gt $maxsize ]; then
echo "$ib was too big:" 1>&2
du -a "$ib" 1>&2
Expand Down

0 comments on commit f4d27d4

Please sign in to comment.