Skip to content

Commit

Permalink
emit failure type in attempt_failure_by_origin (#20349)
Browse files Browse the repository at this point in the history
  • Loading branch information
cgardens authored Dec 12, 2022
1 parent c9fb128 commit 0af6bd0
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ public static final class Tags {
*/
public static final String FAILURE_ORIGINS_KEY = "failure_origins";

/**
* Name of the APM trace tag that holds the failure type(s) associated with the trace.
*/
public static final String FAILURE_TYPES_KEY = "failure_types";

/**
* Name of the APM trace tag that holds the job ID value associated with the trace.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
package io.airbyte.metrics.lib;

import io.airbyte.config.FailureReason.FailureOrigin;
import io.airbyte.config.FailureReason.FailureType;
import io.airbyte.db.instance.configs.jooq.generated.enums.ReleaseStage;
import io.airbyte.db.instance.jobs.jooq.generated.enums.JobStatus;

Expand All @@ -15,6 +16,7 @@ public class MetricTags {

public static final String CONNECTION_ID = "connection_id";
public static final String FAILURE_ORIGIN = "failure_origin";
public static final String FAILURE_TYPE = "failure_type";
public static final String JOB_ID = "job_id";
public static final String JOB_STATUS = "job_status";
public static final String RELEASE_STAGE = "release_stage";
Expand All @@ -32,6 +34,10 @@ public static String getFailureOrigin(final FailureOrigin origin) {
return origin != null ? origin.value() : FailureOrigin.UNKNOWN.value();
}

public static String getFailureType(final FailureType origin) {
return origin != null ? origin.value() : UNKNOWN;
}

public static String getJobStatus(final JobStatus status) {
return status != null ? status.getLiteral() : UNKNOWN;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public enum OssMetricsRegistry implements MetricsRegistry {
ATTEMPT_FAILED_BY_FAILURE_ORIGIN(
MetricEmittingApps.WORKER,
"attempt_failed_by_failure_origin",
"increments for every failure origin a failed attempt has. since a failure can have multiple origins, a single failure can be counted more than once. tagged by failure origin."),
"increments for every failure origin a failed attempt has. since a failure can have multiple origins, a single failure can be counted more than once. tagged by failure origin and failure type."),
ATTEMPT_SUCCEEDED_BY_RELEASE_STAGE(
MetricEmittingApps.WORKER,
"attempt_succeeded_by_release_stage",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import static io.airbyte.metrics.lib.ApmTraceConstants.Tags.ATTEMPT_NUMBER_KEY;
import static io.airbyte.metrics.lib.ApmTraceConstants.Tags.CONNECTION_ID_KEY;
import static io.airbyte.metrics.lib.ApmTraceConstants.Tags.FAILURE_ORIGINS_KEY;
import static io.airbyte.metrics.lib.ApmTraceConstants.Tags.FAILURE_TYPES_KEY;
import static io.airbyte.metrics.lib.ApmTraceConstants.Tags.JOB_ID_KEY;
import static io.airbyte.persistence.job.models.AttemptStatus.FAILED;

Expand Down Expand Up @@ -504,11 +505,24 @@ private void trackCompletionForInternalFailure(final Long jobId,
private void traceFailures(final AttemptFailureSummary failureSummary) {
if (failureSummary != null) {
if (CollectionUtils.isNotEmpty(failureSummary.getFailures())) {
ApmTraceUtils.addTagsToTrace(Map.of(FAILURE_ORIGINS_KEY, failureSummary.getFailures().stream().map(FailureReason::getFailureOrigin).map(
FailureOrigin::name).collect(Collectors.joining(","))));
ApmTraceUtils.addTagsToTrace(Map.of(
FAILURE_TYPES_KEY,
failureSummary.getFailures()
.stream()
.map(FailureReason::getFailureType)
.map(MetricTags::getFailureType)
.collect(Collectors.joining(",")),
FAILURE_ORIGINS_KEY,
failureSummary.getFailures()
.stream()
.map(FailureReason::getFailureOrigin)
.map(FailureOrigin::name)
.collect(Collectors.joining(","))));
}
} else {
ApmTraceUtils.addTagsToTrace(Map.of(FAILURE_ORIGINS_KEY, FailureOrigin.UNKNOWN.value()));
ApmTraceUtils.addTagsToTrace(Map.of(
FAILURE_TYPES_KEY, MetricTags.getFailureType(null),
FAILURE_ORIGINS_KEY, FailureOrigin.UNKNOWN.value()));
}
}

Expand All @@ -521,11 +535,13 @@ private void trackFailures(final AttemptFailureSummary failureSummary) {
if (failureSummary != null) {
for (final FailureReason reason : failureSummary.getFailures()) {
MetricClientFactory.getMetricClient().count(OssMetricsRegistry.ATTEMPT_FAILED_BY_FAILURE_ORIGIN, 1,
new MetricAttribute(MetricTags.FAILURE_ORIGIN, MetricTags.getFailureOrigin(reason.getFailureOrigin())));
new MetricAttribute(MetricTags.FAILURE_ORIGIN, MetricTags.getFailureOrigin(reason.getFailureOrigin())),
new MetricAttribute(MetricTags.FAILURE_TYPE, MetricTags.getFailureType(reason.getFailureType())));
}
} else {
MetricClientFactory.getMetricClient().count(OssMetricsRegistry.ATTEMPT_FAILED_BY_FAILURE_ORIGIN, 1,
new MetricAttribute(MetricTags.FAILURE_ORIGIN, FailureOrigin.UNKNOWN.value()));
new MetricAttribute(MetricTags.FAILURE_ORIGIN, FailureOrigin.UNKNOWN.value()),
new MetricAttribute(MetricTags.FAILURE_TYPE, MetricTags.getFailureType(null)));
}
}

Expand Down

0 comments on commit 0af6bd0

Please sign in to comment.