Skip to content

Commit

Permalink
RUM-3461 Write fatal App Hang to RUM data store, read upon restart
Browse files Browse the repository at this point in the history
  • Loading branch information
ncreated committed Mar 28, 2024
1 parent 3af24b5 commit 77a811a
Show file tree
Hide file tree
Showing 28 changed files with 898 additions and 182 deletions.
54 changes: 48 additions & 6 deletions Datadog/Datadog.xcodeproj/project.pbxproj

Large diffs are not rendered by default.

18 changes: 9 additions & 9 deletions Datadog/IntegrationUnitTests/RUM/AppHangsMonitoringTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ class AppHangsMonitoringTests: XCTestCase {
let appHangError = try XCTUnwrap(errors.first)
let actualHangDuration = try XCTUnwrap(appHangError.freeze?.duration)

XCTAssertEqual(appHangError.error.message, AppHangsObserver.Constants.appHangErrorMessage)
XCTAssertEqual(appHangError.error.type, AppHangsObserver.Constants.appHangErrorType)
XCTAssertEqual(appHangError.error.message, AppHangsMonitor.Constants.appHangErrorMessage)
XCTAssertEqual(appHangError.error.type, AppHangsMonitor.Constants.appHangErrorType)
XCTAssertEqual(appHangError.error.category, .appHang)
XCTAssertTrue(expectedHangDurationRangeNs.contains(actualHangDuration))
}
Expand All @@ -74,8 +74,8 @@ class AppHangsMonitoringTests: XCTestCase {
let appHangError = try XCTUnwrap(errors.first)
let actualHangDuration = try XCTUnwrap(appHangError.freeze?.duration)

XCTAssertEqual(appHangError.error.message, AppHangsObserver.Constants.appHangErrorMessage)
XCTAssertEqual(appHangError.error.type, AppHangsObserver.Constants.appHangErrorType)
XCTAssertEqual(appHangError.error.message, AppHangsMonitor.Constants.appHangErrorMessage)
XCTAssertEqual(appHangError.error.type, AppHangsMonitor.Constants.appHangErrorType)
XCTAssertEqual(appHangError.error.category, .appHang)
XCTAssertTrue(expectedHangDurationRangeNs.contains(actualHangDuration))
}
Expand Down Expand Up @@ -104,8 +104,8 @@ class AppHangsMonitoringTests: XCTestCase {
let appHangError = try XCTUnwrap(errors.first)
let mainThreadStack = try XCTUnwrap(appHangError.error.stack)

XCTAssertEqual(appHangError.error.message, AppHangsObserver.Constants.appHangErrorMessage)
XCTAssertEqual(appHangError.error.type, AppHangsObserver.Constants.appHangErrorType)
XCTAssertEqual(appHangError.error.message, AppHangsMonitor.Constants.appHangErrorMessage)
XCTAssertEqual(appHangError.error.type, AppHangsMonitor.Constants.appHangErrorType)
XCTAssertTrue(mainThreadStack.contains(uiKitLibraryName), "Main thread stack should include UIKit symbols")
XCTAssertEqual(appHangError.error.source, .source)
XCTAssertNotNil(appHangError.error.threads, "Other threads should be available")
Expand All @@ -128,9 +128,9 @@ class AppHangsMonitoringTests: XCTestCase {
let errors = core.waitAndReturnEvents(ofFeature: RUMFeature.name, ofType: RUMErrorEvent.self)
let appHangError = try XCTUnwrap(errors.first)

XCTAssertEqual(appHangError.error.message, AppHangsObserver.Constants.appHangErrorMessage)
XCTAssertEqual(appHangError.error.type, AppHangsObserver.Constants.appHangErrorType)
XCTAssertEqual(appHangError.error.stack, AppHangsObserver.Constants.appHangStackNotAvailableErrorMessage)
XCTAssertEqual(appHangError.error.message, AppHangsMonitor.Constants.appHangErrorMessage)
XCTAssertEqual(appHangError.error.type, AppHangsMonitor.Constants.appHangErrorType)
XCTAssertEqual(appHangError.error.stack, AppHangsMonitor.Constants.appHangStackNotAvailableErrorMessage)
XCTAssertEqual(appHangError.error.source, .source)
XCTAssertNil(appHangError.error.threads, "Threads should be unavailable as CrashReporting was not enabled")
XCTAssertNil(appHangError.error.binaryImages, "Binary Images should be unavailable as CrashReporting was not enabled")
Expand Down
3 changes: 3 additions & 0 deletions DatadogInternal/Sources/DataStore/DataStore.swift
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ public protocol DataStore {
/// - Parameters:
/// - key: The unique identifier for the data. Must be a valid file name, as it will be persisted in files.
/// - callback: A closure providing the result asynchronously on an internal queue.
///
/// Note: The implementation must log errors to console and notify them through telemetry. Callers are not required
/// to implement logging of errors upon receiving `.error()` result.
func value(forKey key: String, callback: @escaping (DataStoreValueResult) -> Void)

/// Deletes the value associated with the specified key from the data store.
Expand Down
77 changes: 77 additions & 0 deletions DatadogRUM/Sources/Feature/RUMDataStore.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/*
* Unless explicitly stated otherwise all files in this repository are licensed under the Apache License Version 2.0.
* This product includes software developed at Datadog (https://www.datadoghq.com/).
* Copyright 2019-Present Datadog, Inc.
*/

import Foundation
import DatadogInternal

internal extension FeatureScope {
/// Data store endpoint suited for RUM data.
var rumDataStore: RUMDataStore {
RUMDataStore(featureScope: self)
}

/// RUM data store endpoint within SDK context.
func rumDataStoreContext(_ block: @escaping (DatadogContext, RUMDataStore) -> Void) {
dataStoreContext { context, dataStore in
block(context, rumDataStore)
}
}
}

/// RUM interface for data store.
///
/// It stores values in JSON format and implements convenience for type-safe key referencing and data serialization.
/// Serialization errors are logged to telemetry.
internal struct RUMDataStore {
internal enum Key: String {
/// References pending App Hang information.
/// If found during app start it is considered a fatal hang in previous process.
case fatalAppHangKey = "fatal-app-hang"
}

/// Encodes values in RUM data store.
private static let encoder = JSONEncoder()
/// Decodes values in RUM data store.
private static let decoder = JSONDecoder()

/// RUM feature scope.
let featureScope: FeatureScope

func setValue<V: Codable>(_ value: V, forKey key: Key, version: DataStoreKeyVersion = dataStoreDefaultKeyVersion) {
do {
let data = try RUMDataStore.encoder.encode(value)
featureScope.dataStore.setValue(data, forKey: key.rawValue, version: version)
} catch let error {
DD.logger.error("Failed to encode \(type(of: value)) in RUM Data Store")
featureScope.telemetry.error("Failed to encode \(type(of: value)) in RUM Data Store", error: error)
}
}

func value<V: Codable>(forKey key: Key, version: DataStoreKeyVersion = dataStoreDefaultKeyVersion, callback: @escaping (V?) -> Void) {
featureScope.dataStore.value(forKey: key.rawValue) { result in
guard let data = result.data(expectedVersion: version) else {
// One of following:
// - no value
// - value but in wrong version → skip
// - error in reading the value (already logged in telemetry by `store`)
callback(nil)
return
}
do {
let value = try RUMDataStore.decoder.decode(V.self, from: data)
callback(value)
} catch let error {
DD.logger.error("Failed to decode \(V.self) from RUM Data Store")
featureScope.telemetry.error("Failed to decode \(V.self) from RUM Data Store", error: error)
callback(nil)
}
}
}

func removeValue(forKey key: Key) {
featureScope.dataStore.removeValue(forKey: key.rawValue)
}
}
4 changes: 3 additions & 1 deletion DatadogRUM/Sources/Feature/RUMFeature.swift
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,16 @@ internal final class RUMFeature: DatadogRemoteFeature {
)

self.instrumentation = RUMInstrumentation(
featureScope: featureScope,
uiKitRUMViewsPredicate: configuration.uiKitViewsPredicate,
uiKitRUMActionsPredicate: configuration.uiKitActionsPredicate,
longTaskThreshold: configuration.longTaskThreshold,
appHangThreshold: configuration.appHangThreshold,
mainQueue: configuration.mainQueue,
dateProvider: configuration.dateProvider,
backtraceReporter: core.backtraceReporter,
telemetry: core.telemetry
fatalErrorContext: dependencies.fatalErrorContext,
processID: configuration.processID
)
self.requestBuilder = RequestBuilder(
customIntakeURL: configuration.customEndpoint,
Expand Down
15 changes: 15 additions & 0 deletions DatadogRUM/Sources/Instrumentation/AppHangs/AppHang.swift
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,22 @@ internal struct AppHang: Codable {
}

/// The date of hang start.
/// It is defined as device time, without considering NTP offset.
let startDate: Date
/// The result of generating backtrace for the hang.
let backtraceResult: BacktraceGenerationResult
}

/// Persisted information on App Hang that may likely become fatal.
///
/// It encodes all information necessary to report error on app restart.
internal struct FatalAppHang: Codable {
/// An identifier of the process that the hang was recorded in.
let processID: UUID
/// The actual hang that was recorded.
let hang: AppHang
/// Interval between device and server time.
let serverTimeOffset: TimeInterval
/// The last RUM view at the moment of hang's recording.
let lastRUMView: RUMViewEvent
}
99 changes: 99 additions & 0 deletions DatadogRUM/Sources/Instrumentation/AppHangs/AppHangsMonitor.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
/*
* Unless explicitly stated otherwise all files in this repository are licensed under the Apache License Version 2.0.
* This product includes software developed at Datadog (https://www.datadoghq.com/).
* Copyright 2019-Present Datadog, Inc.
*/

import Foundation
import DatadogInternal

internal final class AppHangsMonitor {
enum Constants {
/// The standardized `error.message` for RUM errors describing an app hang.
static let appHangErrorMessage = "App Hang"
/// The standardized `error.type` for RUM errors describing an app hang.
static let appHangErrorType = "AppHang"
/// The standardized `error.stack` when backtrace generation was not available.
static let appHangStackNotAvailableErrorMessage = "Stack trace was not generated because `DatadogCrashReporting` had not been enabled."
/// The standardized `error.stack` when backtrace generation failed due to an internal error.
static let appHangStackGenerationFailedErrorMessage = "Failed to generate stack trace. This is a known issue and we work on it."
}

/// Watchdog thread that monitors the main queue for App Hangs.
private let watchdogThread: AppHangsObservingThread
/// Handles non-fatal App Hangs.
internal let nonFatalHangsHandler: NonFatalAppHangsHandler
/// Handles non-fatal App Hangs.
internal let fatalHangsHandler: FatalAppHangsHandler

convenience init(
featureScope: FeatureScope,
appHangThreshold: TimeInterval,
observedQueue: DispatchQueue,
backtraceReporter: BacktraceReporting,
fatalErrorContext: FatalErrorContextNotifier,
dateProvider: DateProvider,
processID: UUID
) {
self.init(
featureScope: featureScope,
watchdogThread: AppHangsWatchdogThread(
appHangThreshold: appHangThreshold,
queue: observedQueue,
dateProvider: dateProvider,
backtraceReporter: backtraceReporter,
telemetry: featureScope.telemetry
),
fatalErrorContext: fatalErrorContext,
processID: processID
)
}

init(
featureScope: FeatureScope,
watchdogThread: AppHangsObservingThread,
fatalErrorContext: FatalErrorContextNotifier,
processID: UUID
) {
self.watchdogThread = watchdogThread
self.nonFatalHangsHandler = NonFatalAppHangsHandler()
self.fatalHangsHandler = FatalAppHangsHandler(
featureScope: featureScope,
fatalErrorContext: fatalErrorContext,
processID: processID
)
}

func start() {
fatalHangsHandler.reportFatalAppHangIfFound()
watchdogThread.onHangStarted = { [weak self] hang in
self?.fatalHangsHandler.startHang(hang: hang)
}
watchdogThread.onHangCancelled = { [weak self] _ in
self?.fatalHangsHandler.cancelHang()
}
watchdogThread.onHangEnded = { [weak self] hang, duration in
self?.fatalHangsHandler.endHang()
self?.nonFatalHangsHandler.endHang(appHang: hang, duration: duration)
}
watchdogThread.start()
}

func stop() {
watchdogThread.stop()
watchdogThread.onHangStarted = nil
watchdogThread.onHangCancelled = nil
watchdogThread.onHangEnded = nil
}
}

extension AppHangsMonitor {
/// Awaits the processing of pending app hang.
///
/// Note: This method is synchronous and will block the caller thread, in worst case up for `appHangThreshold`.
func flush() {
let semaphore = DispatchSemaphore(value: 0)
watchdogThread.onBeforeSleep = { semaphore.signal() }
semaphore.wait()
}
}
119 changes: 0 additions & 119 deletions DatadogRUM/Sources/Instrumentation/AppHangs/AppHangsObserver.swift

This file was deleted.

Loading

0 comments on commit 77a811a

Please sign in to comment.