From 15ee54c8f6b1f16923a1a1a64787bb5c1eb906b0 Mon Sep 17 00:00:00 2001 From: Mert Buran Date: Mon, 27 Jul 2020 14:08:43 +0200 Subject: [PATCH] RUMM-513 Validating JSON payload before sending to intake We observe non-parseable logs sent to intake In a certain org which has high volumes of logs, %0.3-0.5 of all logs are not parsed properly in dashboard Hypothesis #1 Data being read is malformed, because every malformed log has their malformed part at the very end So if we can read the file later, we can have valid payload This commit implements this hypothesis Hypothesis #2 Write op corrupts the file and we need to watch write ops closely so that we can catch and recover errors This requires major refactoring in File.swift and related files --- Sources/Datadog/Core/Persistence/FileReader.swift | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Sources/Datadog/Core/Persistence/FileReader.swift b/Sources/Datadog/Core/Persistence/FileReader.swift index 741676d8cd..6a6e744117 100644 --- a/Sources/Datadog/Core/Persistence/FileReader.swift +++ b/Sources/Datadog/Core/Persistence/FileReader.swift @@ -38,11 +38,17 @@ internal final class FileReader { } } + private static let utf8NewlineData = "\n".data(using: .utf8)!.first! // swiftlint:disable:this force_unwrapping private func synchronizedReadNextBatch() -> Batch? { if let file = orchestrator.getReadableFile(excludingFilesNamed: Set(filesRead.map { $0.name })) { do { let fileData = try file.read() let batchData = dataFormat.prefixData + fileData + dataFormat.suffixData + // Validate data here, return nil if corrupt + // NOTE: dataFormat.validate(batchData) would make more sense + try batchData.split(separator: Self.utf8NewlineData).forEach { + try JSONSerialization.jsonObject(with: $0, options: []) + } return Batch(data: batchData, file: file) } catch { developerLogger?.error("🔥 Failed to read file: \(error)")