Skip to content

Commit

Permalink
Check exception type to set retryCountsAgainstLimit for alarms
Browse files Browse the repository at this point in the history
  • Loading branch information
jqmmes committed Jul 24, 2024
1 parent d1b6269 commit e7520d9
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 3 deletions.
25 changes: 23 additions & 2 deletions src/workerd/api/global-scope.c++
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,10 @@ kj::Promise<WorkerInterface::AlarmResult> ServiceWorkerGlobalScope::runAlarm(
}
}

// We only want to retry against limits if it's a user error. By default let's check if the
// output gate is broken.
auto shouldRetryCountsAgainstLimits = !context.isOutputGateBroken();

// We want to alert if we aren't going to count this alarm retry against limits
if (auto desc = e.getDescription();
!jsg::isTunneledException(desc) && !jsg::isDoNotLogException(desc)
Expand All @@ -476,10 +480,17 @@ kj::Promise<WorkerInterface::AlarmResult> ServiceWorkerGlobalScope::runAlarm(
// We don't usually log these messages, but it's useful to know the real reason we failed
// to correctly investigate stuck alarms.
LOG_NOSENTRY(ERROR, "output lock broke during alarm execution without an interesting error description", actorId, e);
if (e.getType() == kj::Exception::Type::OVERLOADED) {
if (e.getDetail(jsg::EXCEPTION_IS_USER_ERROR) != kj::none) {
// The handler failed because the user overloaded the object. It's their fault, we'll not
// retry forever.
shouldRetryCountsAgainstLimits = true;
}
}
}
return WorkerInterface::AlarmResult {
.retry = true,
.retryCountsAgainstLimit = !context.isOutputGateBroken(),
.retryCountsAgainstLimit = shouldRetryCountsAgainstLimits,
.outcome = outcome
};
})
Expand All @@ -497,6 +508,9 @@ kj::Promise<WorkerInterface::AlarmResult> ServiceWorkerGlobalScope::runAlarm(
actorId = kj::str(s);
}
}
// We only want to retry against limits if it's a user error. By default let's assume it's our
// fault.
auto shouldRetryCountsAgainstLimits = false;
if (auto desc = e.getDescription();
!jsg::isTunneledException(desc) && !jsg::isDoNotLogException(desc)) {
if (isInterestingException(e)) {
Expand All @@ -508,10 +522,17 @@ kj::Promise<WorkerInterface::AlarmResult> ServiceWorkerGlobalScope::runAlarm(
// We don't usually log these messages, but it's useful to know the real reason we failed
// to correctly investigate stuck alarms.
LOG_NOSENTRY(ERROR, "output lock broke after executing alarm without an interesting error description", actorId, e);
if (e.getType() == kj::Exception::Type::OVERLOADED) {
if (e.getDetail(jsg::EXCEPTION_IS_USER_ERROR) != kj::none) {
// The handler failed because the user overloaded the object. It's their fault, we'll not
// retry forever.
shouldRetryCountsAgainstLimits = true;
}
}
}
return WorkerInterface::AlarmResult {
.retry = true,
.retryCountsAgainstLimit = false,
.retryCountsAgainstLimit = shouldRetryCountsAgainstLimits,
.outcome = EventOutcome::EXCEPTION
};
});
Expand Down
2 changes: 1 addition & 1 deletion src/workerd/io/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -321,4 +321,4 @@ kj_test(
":observer",
"@capnp-cpp//src/capnp:capnpc",
],
)
)
3 changes: 3 additions & 0 deletions src/workerd/io/actor-cache.c++
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,9 @@ void ActorCache::evictOrOomIfNeeded(Lock& lock) {
// Add trace info sufficient to tell us which operation caused the failure.
exception.addTraceHere();
exception.addTrace(__builtin_return_address(0));
// We know this exeption happens due to user error. Let's add an exception detail so we can
// parse it later.
exception.setDetail(jsg::EXCEPTION_IS_USER_ERROR, kj::heapArray<byte>(0));

if (maybeTerminalException == kj::none) {
maybeTerminalException.emplace(kj::cp(exception));
Expand Down
2 changes: 2 additions & 0 deletions src/workerd/jsg/exception.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,4 +146,6 @@ TunneledErrorType tunneledErrorType(kj::StringPtr internalMessage);
// Annotate an internal message with the corresponding brokenness reason.
kj::String annotateBroken(kj::StringPtr internalMessage, kj::StringPtr brokennessReason);

constexpr kj::Exception::DetailTypeId EXCEPTION_IS_USER_ERROR = 0x82aff7d637c30e47ull;

} // namespace workerd::jsg

0 comments on commit e7520d9

Please sign in to comment.