-
Notifications
You must be signed in to change notification settings - Fork 4.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add and persist job failures for Normalization #14790
Changes from all commits
9f33750
f9d7844
ff954d1
7ed5164
f730f77
d92674c
77ee33d
1e1b1b1
23efe1c
af2ee53
d2d0a2a
bb99589
9a7efec
3d442ad
16b588d
6890ec4
d076969
01ed748
9138bfa
745f7c4
33707f1
9073cc5
29e138c
001da52
0416e54
ea39d1e
eb09ec8
9c951ae
f6eba9c
5564ecd
63095ad
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,3 +13,7 @@ properties: | |
type: integer | ||
endTime: | ||
type: integer | ||
failures: | ||
type: array | ||
items: | ||
"$ref": FailureReason.yaml |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,24 +8,34 @@ | |
import com.google.common.annotations.VisibleForTesting; | ||
import com.google.common.base.Strings; | ||
import com.google.common.collect.ImmutableMap; | ||
import io.airbyte.commons.io.IOs; | ||
import io.airbyte.commons.io.LineGobbler; | ||
import io.airbyte.commons.json.Jsons; | ||
import io.airbyte.commons.logging.LoggingHelper.Color; | ||
import io.airbyte.commons.logging.MdcScope; | ||
import io.airbyte.commons.logging.MdcScope.Builder; | ||
import io.airbyte.config.OperatorDbt; | ||
import io.airbyte.config.ResourceRequirements; | ||
import io.airbyte.protocol.models.AirbyteErrorTraceMessage; | ||
import io.airbyte.protocol.models.AirbyteErrorTraceMessage.FailureType; | ||
import io.airbyte.protocol.models.AirbyteMessage; | ||
import io.airbyte.protocol.models.AirbyteMessage.Type; | ||
import io.airbyte.protocol.models.AirbyteTraceMessage; | ||
import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; | ||
import io.airbyte.workers.WorkerConfigs; | ||
import io.airbyte.workers.WorkerConstants; | ||
import io.airbyte.workers.WorkerUtils; | ||
import io.airbyte.workers.exception.WorkerException; | ||
import io.airbyte.workers.process.AirbyteIntegrationLauncher; | ||
import io.airbyte.workers.process.ProcessFactory; | ||
import java.io.InputStream; | ||
import java.nio.file.Path; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.concurrent.TimeUnit; | ||
import java.util.stream.Collectors; | ||
import java.util.stream.Stream; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
|
@@ -40,6 +50,8 @@ public class DefaultNormalizationRunner implements NormalizationRunner { | |
private final DestinationType destinationType; | ||
private final ProcessFactory processFactory; | ||
private final String normalizationImageName; | ||
private final NormalizationAirbyteStreamFactory streamFactory = new NormalizationAirbyteStreamFactory(CONTAINER_LOG_MDC_BUILDER); | ||
private Map<Type, List<AirbyteMessage>> airbyteMessagesByType; | ||
|
||
private Process process = null; | ||
|
||
|
@@ -135,7 +147,30 @@ private boolean runProcess(final String jobId, | |
Collections.emptyMap(), | ||
args); | ||
|
||
LineGobbler.gobble(process.getInputStream(), LOGGER::info, CONTAINER_LOG_MDC_BUILDER); | ||
try (final InputStream stdout = process.getInputStream()) { | ||
// finds and collects any AirbyteMessages from stdout | ||
// also builds a list of raw dbt errors and stores in streamFactory | ||
airbyteMessagesByType = streamFactory.create(IOs.newBufferedReader(stdout)) | ||
.collect(Collectors.groupingBy(AirbyteMessage::getType)); | ||
|
||
// picks up error logs from dbt | ||
String dbtErrorStack = String.join("\n\t", streamFactory.getDbtErrors()); | ||
|
||
if (!"".equals(dbtErrorStack)) { | ||
AirbyteMessage dbtTraceMessage = new AirbyteMessage() | ||
.withType(Type.TRACE) | ||
.withTrace(new AirbyteTraceMessage() | ||
.withType(AirbyteTraceMessage.Type.ERROR) | ||
.withEmittedAt((double) System.currentTimeMillis()) | ||
.withError(new AirbyteErrorTraceMessage() | ||
.withFailureType(FailureType.SYSTEM_ERROR) // TODO: decide on best FailureType for this | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we should use The reason for an error from dbt is not necessarily clear, e.g. it could be a problem with the source data or it could be a system error from a bug we've introduced or it could be an issue with the destination (and other ors)... |
||
.withMessage("Normalization failed during the dbt run. This may indicate a problem with the data itself.") | ||
.withInternalMessage(dbtErrorStack) | ||
.withStackTrace(dbtErrorStack))); | ||
Comment on lines
+160
to
+169
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (curious) why not create a FailureReason directly? I think this is the first time we're creating a trace message platform side rather than coming from connectors There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At this point we could have TRACE messages in Due to the way we run and consume output from dbt, it wouldn't be trivial to create trace messages directly from normalization. |
||
|
||
airbyteMessagesByType.putIfAbsent(Type.TRACE, List.of(dbtTraceMessage)); | ||
} | ||
} | ||
LineGobbler.gobble(process.getErrorStream(), LOGGER::error, CONTAINER_LOG_MDC_BUILDER); | ||
|
||
WorkerUtils.wait(process); | ||
|
@@ -163,6 +198,14 @@ public void close() throws Exception { | |
} | ||
} | ||
|
||
@Override | ||
public Stream<AirbyteTraceMessage> getTraceMessages() { | ||
if (airbyteMessagesByType != null && airbyteMessagesByType.get(Type.TRACE) != null) { | ||
return airbyteMessagesByType.get(Type.TRACE).stream().map(AirbyteMessage::getTrace); | ||
} | ||
return Stream.empty(); | ||
} | ||
|
||
@VisibleForTesting | ||
DestinationType getDestinationType() { | ||
return destinationType; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
/* | ||
* Copyright (c) 2022 Airbyte, Inc., all rights reserved. | ||
*/ | ||
|
||
package io.airbyte.workers.normalization; | ||
|
||
import com.fasterxml.jackson.databind.JsonNode; | ||
import com.fasterxml.jackson.databind.node.JsonNodeType; | ||
import io.airbyte.commons.json.Jsons; | ||
import io.airbyte.commons.logging.MdcScope; | ||
import io.airbyte.protocol.models.AirbyteLogMessage; | ||
import io.airbyte.protocol.models.AirbyteMessage; | ||
import io.airbyte.workers.internal.AirbyteStreamFactory; | ||
import java.io.BufferedReader; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.Optional; | ||
import java.util.stream.Stream; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
/** | ||
* Creates a stream from an input stream. The produced stream attempts to parse each line of the | ||
* InputStream into a AirbyteMessage. If the line cannot be parsed into a AirbyteMessage it is | ||
* assumed to be from dbt. dbt [error] messages are also parsed | ||
* | ||
* <p> | ||
* If a line starts with a AirbyteMessage and then has other characters after it, that | ||
* AirbyteMessage will still be parsed. If there are multiple AirbyteMessage records on the same | ||
* line, only the first will be parsed. | ||
*/ | ||
public class NormalizationAirbyteStreamFactory implements AirbyteStreamFactory { | ||
|
||
private static final Logger LOGGER = LoggerFactory.getLogger(NormalizationAirbyteStreamFactory.class); | ||
|
||
private final MdcScope.Builder containerLogMdcBuilder; | ||
private final Logger logger; | ||
private final List<String> dbtErrors = new ArrayList<>(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not so sure about storing dbtErrors as data within an instance of this object, does that seem fine or is there a better approach? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the getter use anywhere? creating it in the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the getter is used here |
||
|
||
public NormalizationAirbyteStreamFactory(final MdcScope.Builder containerLogMdcBuilder) { | ||
this(LOGGER, containerLogMdcBuilder); | ||
} | ||
|
||
NormalizationAirbyteStreamFactory(final Logger logger, final MdcScope.Builder containerLogMdcBuilder) { | ||
this.logger = logger; | ||
this.containerLogMdcBuilder = containerLogMdcBuilder; | ||
} | ||
|
||
@Override | ||
public Stream<AirbyteMessage> create(final BufferedReader bufferedReader) { | ||
return bufferedReader | ||
.lines() | ||
.flatMap(this::filterOutAndHandleNonJsonLines) | ||
.flatMap(this::filterOutAndHandleNonAirbyteMessageLines) | ||
// so now we are just left with AirbyteMessages | ||
.filter(airbyteMessage -> { | ||
final boolean isLog = airbyteMessage.getType() == AirbyteMessage.Type.LOG; | ||
if (isLog) { | ||
try (final var mdcScope = containerLogMdcBuilder.build()) { | ||
internalLog(airbyteMessage.getLog()); | ||
} | ||
} | ||
return !isLog; | ||
}); | ||
} | ||
|
||
private Stream<JsonNode> filterOutAndHandleNonJsonLines(String line) { | ||
final Optional<JsonNode> jsonLine = Jsons.tryDeserialize(line); | ||
if (jsonLine.isEmpty()) { | ||
// we log as info all the lines that are not valid json. | ||
try (final var mdcScope = containerLogMdcBuilder.build()) { | ||
logger.info(line); | ||
// this is really hacky and vulnerable to picking up lines we don't want, | ||
// however it is only for destinations that are using dbt version < 1.0. | ||
// For v1 + we switch on JSON logging and parse those in the next block. | ||
if (line.contains("[error]")) { | ||
dbtErrors.add(line); | ||
} | ||
} | ||
} | ||
return jsonLine.stream(); | ||
} | ||
|
||
private Stream<AirbyteMessage> filterOutAndHandleNonAirbyteMessageLines(JsonNode jsonLine) { | ||
final Optional<AirbyteMessage> m = Jsons.tryObject(jsonLine, AirbyteMessage.class); | ||
if (m.isEmpty()) { | ||
// valid JSON but not an AirbyteMessage, so we assume this is a dbt json log | ||
try { | ||
final String logLevel = (jsonLine.getNodeType() == JsonNodeType.NULL || jsonLine.get("level").isNull()) | ||
? "" | ||
: jsonLine.get("level").asText(); | ||
final String logMsg = jsonLine.get("msg").isNull() ? "" : jsonLine.get("msg").asText(); | ||
try (final var mdcScope = containerLogMdcBuilder.build()) { | ||
switch (logLevel) { | ||
case "debug" -> logger.debug(logMsg); | ||
case "info" -> logger.info(logMsg); | ||
case "warn" -> logger.warn(logMsg); | ||
case "error" -> logAndCollectErrorMessage(logMsg); | ||
default -> logger.info(jsonLine.asText()); // this shouldn't happen but logging it to avoid hiding unexpected lines. | ||
} | ||
} | ||
} catch (final Exception e) { | ||
logger.info(jsonLine.asText()); | ||
} | ||
} | ||
return m.stream(); | ||
} | ||
|
||
private void logAndCollectErrorMessage(String logMessage) { | ||
logger.error(logMessage); | ||
dbtErrors.add(logMessage); | ||
} | ||
|
||
public List<String> getDbtErrors() { | ||
return dbtErrors; | ||
} | ||
|
||
private void internalLog(final AirbyteLogMessage logMessage) { | ||
switch (logMessage.getLevel()) { | ||
case FATAL, ERROR -> logger.error(logMessage.getMessage()); | ||
case WARN -> logger.warn(logMessage.getMessage()); | ||
case DEBUG -> logger.debug(logMessage.getMessage()); | ||
case TRACE -> logger.trace(logMessage.getMessage()); | ||
default -> logger.info(logMessage.getMessage()); | ||
} | ||
} | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's way better than trying to parse line breaks!