Skip to content

Commit

Permalink
RELNOTES: Refactor system suspend event handling.
Browse files Browse the repository at this point in the history
We are looking to add more build anomaly reporting, so refactor the current system suspend handling to match the new model where we have a module and an event that we can record.

Note this deprecates the AnomalyRecord out of BES for the time being.

PiperOrigin-RevId: 409502784
  • Loading branch information
dmaclach authored and copybara-github committed Nov 12, 2021
1 parent 72428ec commit 286fb80
Show file tree
Hide file tree
Showing 27 changed files with 414 additions and 232 deletions.
1 change: 0 additions & 1 deletion src/main/java/com/google/devtools/build/lib/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,6 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/packages",
"//src/main/java/com/google/devtools/build/lib/packages/semantics",
"//src/main/java/com/google/devtools/build/lib/pkgcache",
"//src/main/java/com/google/devtools/build/lib/platform:suspend_counter",
"//src/main/java/com/google/devtools/build/lib/profiler",
"//src/main/java/com/google/devtools/build/lib/profiler:google-auto-profiler-utils",
"//src/main/java/com/google/devtools/build/lib/profiler/memory:allocationtracker",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,6 @@
/** {@link BuildEvent} indicating that a request that does not involve building as finished. */
public final class NoBuildRequestFinishedEvent extends BuildCompletingEvent {
public NoBuildRequestFinishedEvent(ExitCode exitCode, long finishTimeMillis) {
super(exitCode, finishTimeMillis, false);
super(exitCode, finishTimeMillis);
}
}
1 change: 1 addition & 0 deletions src/main/java/com/google/devtools/build/lib/bazel/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/outputfilter",
"//src/main/java/com/google/devtools/build/lib/packages/metrics",
"//src/main/java/com/google/devtools/build/lib/platform:sleep_prevention_module",
"//src/main/java/com/google/devtools/build/lib/platform:system_suspension_module",
"//src/main/java/com/google/devtools/build/lib/profiler/callcounts:callcounts_module",
"//src/main/java/com/google/devtools/build/lib/profiler/memory:allocationtracker_module",
"//src/main/java/com/google/devtools/build/lib/remote",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ public final class Bazel {
com.google.devtools.build.lib.runtime.NoSpawnCacheModule.class,
com.google.devtools.build.lib.runtime.CommandLogModule.class,
com.google.devtools.build.lib.platform.SleepPreventionModule.class,
com.google.devtools.build.lib.platform.SystemSuspensionModule.class,
com.google.devtools.build.lib.runtime.BazelFileSystemModule.class,
com.google.devtools.build.lib.runtime.mobileinstall.MobileInstallModule.class,
com.google.devtools.build.lib.bazel.BazelWorkspaceStatusModule.class,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,24 +30,17 @@ public abstract class BuildCompletingEvent implements BuildEvent {
private final ExitCode exitCode;
private final long finishTimeMillis;

/** Was the build suspended mid-build (e.g. hardware sleep, SIGSTOP). */
private final boolean wasSuspended;

private final Collection<BuildEventId> children;

public BuildCompletingEvent(
ExitCode exitCode,
long finishTimeMillis,
Collection<BuildEventId> children,
boolean wasSuspended) {
ExitCode exitCode, long finishTimeMillis, Collection<BuildEventId> children) {
this.exitCode = exitCode;
this.finishTimeMillis = finishTimeMillis;
this.children = children;
this.wasSuspended = wasSuspended;
}

public BuildCompletingEvent(ExitCode exitCode, long finishTimeMillis, boolean wasSuspended) {
this(exitCode, finishTimeMillis, ImmutableList.of(), wasSuspended);
public BuildCompletingEvent(ExitCode exitCode, long finishTimeMillis) {
this(exitCode, finishTimeMillis, ImmutableList.of());
}

public ExitCode getExitCode() {
Expand All @@ -72,18 +65,12 @@ public BuildEventStreamProtos.BuildEvent asStreamProto(BuildEventContext convert
.setCode(exitCode.getNumericExitCode())
.build();

BuildEventStreamProtos.BuildFinished.AnomalyReport protoAnomalyReport =
BuildEventStreamProtos.BuildFinished.AnomalyReport.newBuilder()
.setWasSuspended(wasSuspended)
.build();

BuildEventStreamProtos.BuildFinished finished =
BuildEventStreamProtos.BuildFinished.newBuilder()
.setOverallSuccess(ExitCode.SUCCESS.equals(exitCode))
.setExitCode(protoExitCode)
.setFinishTime(Timestamps.fromMillis(finishTimeMillis))
.setFinishTimeMillis(finishTimeMillis)
.setAnomalyReport(protoAnomalyReport)
.build();
return GenericBuildEvent.protoChaining(this).setFinished(finished).build();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -799,6 +799,7 @@ message BuildFinished {
// Examples of suspensions are SIGSTOP, or the hardware being put to sleep.
// If was_suspended is true, then most of the timings for this build are
// suspect.
// NOTE: This is no longer set and is deprecated.
bool was_suspended = 1;
}

Expand All @@ -819,7 +820,7 @@ message BuildFinished {
// End of the build.
google.protobuf.Timestamp finish_time = 5;

AnomalyReport anomaly_report = 4;
AnomalyReport anomaly_report = 4 [deprecated = true];
}

message BuildMetrics {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,6 @@ public final class BuildResult {
private long startTimeMillis = 0; // milliseconds since UNIX epoch.
private long stopTimeMillis = 0;

private boolean wasSuspended = false;

private Throwable crash = null;
private boolean catastrophe = false;
private boolean stopOnFirstFailure;
Expand Down Expand Up @@ -97,16 +95,6 @@ public double getElapsedSeconds() {
return (stopTimeMillis - startTimeMillis) / 1000.0;
}

/** Record if the build was suspended (SIGSTOP or hardware put to sleep). */
public void setWasSuspended(boolean wasSuspended) {
this.wasSuspended = wasSuspended;
}

/** Whether the build was suspended (SIGSTOP or hardware put to sleep). */
public boolean getWasSuspended() {
return wasSuspended;
}

public void setDetailedExitCode(DetailedExitCode detailedExitCode) {
this.detailedExitCode = detailedExitCode;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
// limitations under the License.
package com.google.devtools.build.lib.buildtool;

import static com.google.devtools.build.lib.platform.SuspendCounter.suspendCount;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
Expand Down Expand Up @@ -438,7 +436,6 @@ public BuildResult processRequest(
BuildRequest request, TargetValidator validator, PostBuildCallback postBuildCallback) {
BuildResult result = new BuildResult(request.getStartTime());
maybeSetStopOnFirstFailure(request, result);
int startSuspendCount = suspendCount();
Throwable crash = null;
DetailedExitCode detailedExitCode = null;
try {
Expand Down Expand Up @@ -536,7 +533,7 @@ public BuildResult processRequest(
new IllegalStateException("Unspecified DetailedExitCode"));
}
try (SilentCloseable c = Profiler.instance().profile("stopRequest")) {
stopRequest(result, crash, detailedExitCode, startSuspendCount);
stopRequest(result, crash, detailedExitCode);
}
}

Expand Down Expand Up @@ -576,16 +573,10 @@ private static boolean needsExecutionPhase(BuildRequestOptions options) {
* @param crash any unexpected {@link RuntimeException} or {@link Error}, may be null
* @param detailedExitCode describes the exit code and an optional detailed failure value to add
* to {@code result}
* @param startSuspendCount number of suspensions before the build started
*/
public void stopRequest(
BuildResult result,
@Nullable Throwable crash,
DetailedExitCode detailedExitCode,
int startSuspendCount) {
BuildResult result, @Nullable Throwable crash, DetailedExitCode detailedExitCode) {
Preconditions.checkState((crash == null) || !detailedExitCode.isSuccess());
int stopSuspendCount = suspendCount();
Preconditions.checkState(startSuspendCount <= stopSuspendCount);
result.setUnhandledThrowable(crash);
result.setDetailedExitCode(detailedExitCode);

Expand All @@ -598,7 +589,6 @@ public void stopRequest(
}
// The stop time has to be captured before we send the BuildCompleteEvent.
result.setStopTime(runtime.getClock().currentTimeMillis());
result.setWasSuspended(stopSuspendCount > startSuspendCount);

// Skip the build complete events so that modules can run blazeShutdownOnCrash without thinking
// that the build completed normally. BlazeCommandDispatcher will call handleCrash.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,7 @@ public final class BuildCompleteEvent extends BuildCompletingEvent {

/** Construct the BuildCompleteEvent. */
public BuildCompleteEvent(BuildResult result, Collection<BuildEventId> children) {
super(
result.getDetailedExitCode().getExitCode(),
result.getStopTime(),
children,
result.getWasSuspended());
super(result.getDetailedExitCode().getExitCode(), result.getStopTime(), children);
this.result = checkNotNull(result);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// Copyright 2021 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.devtools.build.lib.buildtool.buildevent;

import com.google.devtools.build.lib.events.ExtendedEventHandler;

/**
* This event is fired from {@link
* com.google.devtools.build.lib.platform.SystemSuspensionModule#suspendCallback} to indicate that
* the user either suspended the build with a signal or put their computer to sleep.
*/
public class SystemSuspensionEvent implements ExtendedEventHandler.Postable {

/** The possible reasons a system could be suspended. */
public enum Reason {
SIGTSTP("Signal SIGTSTP"),
SIGCONT("Signal SIGCONT"),
SLEEP("Computer put to sleep"),
WAKE("Computer woken up");

private final String logString;

Reason(String logString) {
this.logString = logString;
}

public String logString() {
return logString;
}

static Reason fromInt(int number) {
switch (number) {
case 0:
return SIGTSTP;
case 1:
return SIGCONT;
case 2:
return SLEEP;
case 3:
return WAKE;
default:
throw new IllegalStateException("Unknown suspension reason: " + number);
}
}
};

/** These constants are mapped to enum in third_party/bazel/src/main/native/unix_jni.h. */
private final Reason reason;

public SystemSuspensionEvent(int reason) {
this.reason = Reason.fromInt(reason);
}

public Reason getReason() {
return reason;
}

public String logString() {
return "SystemSuspensionEvent: " + reason.logString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,8 @@ public class TestingCompleteEvent extends BuildCompletingEvent {
*
* @param exitCode the overall exit code of "bazel test".
* @param finishTimeMillis the finish time in milliseconds since the epoch.
* @param wasSuspended was the build suspended at any point.
*/
public TestingCompleteEvent(ExitCode exitCode, long finishTimeMillis, boolean wasSuspended) {
super(
exitCode,
finishTimeMillis,
ImmutableList.of(BuildEventIdUtil.buildToolLogs()),
wasSuspended);
public TestingCompleteEvent(ExitCode exitCode, long finishTimeMillis) {
super(exitCode, finishTimeMillis, ImmutableList.of(BuildEventIdUtil.buildToolLogs()));
}
}
8 changes: 6 additions & 2 deletions src/main/java/com/google/devtools/build/lib/platform/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,16 @@ java_library(
)

java_library(
name = "suspend_counter",
name = "system_suspension_module",
srcs = [
"SuspendCounter.java",
"SystemSuspensionModule.java",
],
deps = [
"//src/main/java/com/google/devtools/build/lib:runtime",
"//src/main/java/com/google/devtools/build/lib/events",
"//src/main/java/com/google/devtools/build/lib/jni",
"//third_party:flogger",
"//third_party:jsr305",
],
)

Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Copyright 2019 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.devtools.build.lib.platform;

import com.google.common.flogger.GoogleLogger;
import com.google.devtools.build.lib.buildtool.buildevent.SystemSuspensionEvent;
import com.google.devtools.build.lib.events.Event;
import com.google.devtools.build.lib.events.Reporter;
import com.google.devtools.build.lib.jni.JniLoader;
import com.google.devtools.build.lib.runtime.BlazeModule;
import com.google.devtools.build.lib.runtime.CommandEnvironment;
import javax.annotation.concurrent.GuardedBy;

/** Detects suspension events. */
public final class SystemSuspensionModule extends BlazeModule {
private static final GoogleLogger logger = GoogleLogger.forEnclosingClass();

static {
JniLoader.loadJni();
}

@GuardedBy("this")
private Reporter reporter;

private native void registerJNI();

public SystemSuspensionModule() {
if (JniLoader.isJniAvailable()) {
registerJNI();
}
}

@Override
public synchronized void beforeCommand(CommandEnvironment env) {
this.reporter = env.getReporter();
}

@Override
public synchronized void afterCommand() {
this.reporter = null;
}

/** Callback method called from JNI whenever a suspension event occurs. */
synchronized void suspendCallback(int reason) {
if (reporter != null) {
SystemSuspensionEvent event = new SystemSuspensionEvent(reason);
String logString = event.logString();
reporter.handle(Event.info(logString));
reporter.post(event);
logger.atInfo().log(logString);
}
}
}
Loading

0 comments on commit 286fb80

Please sign in to comment.