diff --git a/components/service/firefox-accounts/src/main/java/mozilla/components/service/fxa/sync/WorkManagerSyncManager.kt b/components/service/firefox-accounts/src/main/java/mozilla/components/service/fxa/sync/WorkManagerSyncManager.kt index f6701831da8..b4715955ae3 100644 --- a/components/service/firefox-accounts/src/main/java/mozilla/components/service/fxa/sync/WorkManagerSyncManager.kt +++ b/components/service/firefox-accounts/src/main/java/mozilla/components/service/fxa/sync/WorkManagerSyncManager.kt @@ -435,22 +435,7 @@ class WorkManagerSyncWorker( } // Process telemetry. - syncResult.telemetry?.let { - // Yes, this is non-ideal... - // But, what this does: individual 'process' function will report global sync errors - // as part of its corresponding ping. We don't want to report a global sync error multiple times, - // so we check for the boolean flag that indicates if this happened or not. - // There's a complete mismatch between what Glean supports and what we need it to do here. - // Glean doesn't support "nested metrics" and so we resort to these hacks. - // It shouldn't matter in which order these 'process' functions are called. - var noGlobalErrorsReported = SyncTelemetry.processBookmarksPing(it) - if (noGlobalErrorsReported) { - noGlobalErrorsReported = SyncTelemetry.processHistoryPing(it) - } - if (noGlobalErrorsReported) { - SyncTelemetry.processPasswordsPing(it) - } - } + syncResult.telemetry?.let { SyncTelemetry.processSyncTelemetry(it) } // Finally, declare success, failure or request a retry based on 'sync status'. return when (syncResult.status) { diff --git a/components/service/sync-logins/src/main/java/mozilla/components/service/sync/logins/AsyncLoginsStorage.kt b/components/service/sync-logins/src/main/java/mozilla/components/service/sync/logins/AsyncLoginsStorage.kt index 31503a090d7..d5d2f28d887 100644 --- a/components/service/sync-logins/src/main/java/mozilla/components/service/sync/logins/AsyncLoginsStorage.kt +++ b/components/service/sync-logins/src/main/java/mozilla/components/service/sync/logins/AsyncLoginsStorage.kt @@ -333,7 +333,7 @@ open class AsyncLoginsStorageAdapter(private val wrapped: T) override fun sync(syncInfo: SyncUnlockInfo): Deferred { return scope.async { val ping = wrapped.sync(syncInfo) - SyncTelemetry.processPasswordsPing(ping) + SyncTelemetry.processLoginsPing(ping) ping } } diff --git a/components/support/sync-telemetry/docs/metrics.md b/components/support/sync-telemetry/docs/metrics.md index a87af317312..aff9345c5f9 100644 --- a/components/support/sync-telemetry/docs/metrics.md +++ b/components/support/sync-telemetry/docs/metrics.md @@ -10,6 +10,7 @@ This means you might have to go searching through the dependency tree to get a f - [bookmarks-sync](#bookmarks-sync) - [history-sync](#history-sync) - [logins-sync](#logins-sync) + - [sync](#sync) ## bookmarks-sync @@ -50,13 +51,26 @@ The following metrics are added to the ping: | Name | Type | Description | Data reviews | Extras | Expiration | | --- | --- | --- | --- | --- | --- | +| bookmarks_sync.sync_uuid |[uuid](https://mozilla.github.io/glean/book/user/metrics/uuid.html) |Unique identifier for the overarching sync, used to correlate this ping together with other individual sync pings (history, logins) and the global sync ping. |[1](TODO)||never | +| history_sync.sync_uuid |[uuid](https://mozilla.github.io/glean/book/user/metrics/uuid.html) |Unique identifier for the overarching sync, used to correlate this ping together with other individual sync pings (bookmarks, logins) and the global sync ping. |[1](TODO)||never | | logins_sync.failure_reason |[labeled_string](https://mozilla.github.io/glean/book/user/metrics/labeled_strings.html) |Records why the passwords sync failed: either due to an authentication error, unexpected exception, or other error. The error strings are truncated and sanitized to omit PII, like usernames and passwords. |[1](https://github.com/mozilla-mobile/android-components/pull/5294)||never | | logins_sync.finished_at |[datetime](https://mozilla.github.io/glean/book/user/metrics/datetime.html) |Records when the passwords sync finished. This includes the time to download, apply, and upload all records. |[1](https://github.com/mozilla-mobile/android-components/pull/5294)||never | | logins_sync.incoming |[labeled_counter](https://mozilla.github.io/glean/book/user/metrics/labeled_counters.html) |Records incoming passwords record counts. `applied` is the number of incoming passwords entries that were successfully stored or updated in the local database. `failed_to_apply` is the number of entries that were ignored due to errors. `reconciled` is the number of entries with changes both locally and remotely that were merged. |[1](https://github.com/mozilla-mobile/android-components/pull/5294)||never | | logins_sync.outgoing |[labeled_counter](https://mozilla.github.io/glean/book/user/metrics/labeled_counters.html) |Records outgoing passwords record counts. `uploaded` is the number of records that were successfully sent to the server. `failed_to_upload` is the number of records that weren't uploaded, and will be retried on the next sync. |[1](https://github.com/mozilla-mobile/android-components/pull/5294)||never | | logins_sync.outgoing_batches |[counter](https://mozilla.github.io/glean/book/user/metrics/counter.html) |Records the number of batches needed to upload all outgoing records. The Sync server has a hard limit on the number of records (and request body bytes) on the number of records that can fit into a single batch, and large syncs may require multiple batches. |[1](https://github.com/mozilla-mobile/android-components/pull/5294)||never | | logins_sync.started_at |[datetime](https://mozilla.github.io/glean/book/user/metrics/datetime.html) |Records when the passwords sync started. |[1](https://github.com/mozilla-mobile/android-components/pull/5294)||never | +| logins_sync.sync_uuid |[uuid](https://mozilla.github.io/glean/book/user/metrics/uuid.html) |Unique identifier for the overarching sync, used to correlate this ping together with other individual sync pings (bookmarks, history) and the global sync ping. |[1](TODO)||never | | logins_sync.uid |[string](https://mozilla.github.io/glean/book/user/metrics/string.html) |The user's hashed Firefox Account ID. |[1](https://github.com/mozilla-mobile/android-components/pull/5294)||never | +| sync.sync_uuid |[uuid](https://mozilla.github.io/glean/book/user/metrics/uuid.html) |Unique identifier for this sync, used to correlate together individual sync pings (history, bookmarks, logins). |[1](TODO)||never | + +## sync +A summary sync ping, sent for every sync. It doesn't include the `client_id` because it reports a hashed version of the user's Firefox Account ID. + +The following metrics are added to the ping: + +| Name | Type | Description | Data reviews | Extras | Expiration | +| --- | --- | --- | --- | --- | --- | +| sync.failure_reason |[labeled_string](https://mozilla.github.io/glean/book/user/metrics/labeled_strings.html) |Records a global sync failure: either due to an authentication error, unexpected exception, or other error. The error strings are truncated and sanitized to omit PII, like URLs and file system paths. |[1](https://github.com/mozilla-mobile/android-components/pull/3092)||never | diff --git a/components/support/sync-telemetry/metrics.yaml b/components/support/sync-telemetry/metrics.yaml index 48abd33bc23..0dade2237ab 100644 --- a/components/support/sync-telemetry/metrics.yaml +++ b/components/support/sync-telemetry/metrics.yaml @@ -4,11 +4,61 @@ $schema: moz://mozilla.org/schemas/glean/metrics/1-0-0 +sync: + sync_uuid: + type: uuid + description: > + Unique identifier for this sync, used to correlate together individual sync pings (history, bookmarks, logins). + send_in_pings: + - logins-sync + bugs: + - https://github.com/mozilla-mobile/android-components/issues/5371 + data_reviews: + - TODO + notification_emails: + - sync-core@mozilla.com + expires: never + lifetime: ping + failure_reason: + type: labeled_string + labels: + - other + - unexpected + - auth + description: > + Records a global sync failure: either due to an authentication error, unexpected exception, + or other error that caused the sync to fail. Error strings are truncated and sanitized to omit + PII, like URLs and file system paths. + send_in_pings: + - sync + bugs: + - https://github.com/mozilla-mobile/android-components/pull/3092 + data_reviews: + - https://github.com/mozilla-mobile/android-components/pull/3092 + notification_emails: + - sync-core@mozilla.com + expires: never + lifetime: ping + # `history-sync`, `logins-sync` and `bookmarks-sync` metrics all use the same structure, # but must be specified individually. We can't define them once and use # `send_in_pings` because the stores might be synced in parallel, and we can't # guarantee that a ping for one store would be sent before the others. history_sync: + sync_uuid: + type: uuid + description: > + Unique identifier for the overarching sync, used to correlate this ping together with other individual sync pings (bookmarks, logins) and the global sync ping. + send_in_pings: + - logins-sync + bugs: + - https://github.com/mozilla-mobile/android-components/issues/5371 + data_reviews: + - TODO + notification_emails: + - sync-core@mozilla.com + expires: never + lifetime: ping uid: type: string description: > @@ -135,6 +185,20 @@ history_sync: lifetime: ping logins_sync: + sync_uuid: + type: uuid + description: > + Unique identifier for the overarching sync, used to correlate this ping together with other individual sync pings (bookmarks, history) and the global sync ping. + send_in_pings: + - logins-sync + bugs: + - https://github.com/mozilla-mobile/android-components/issues/5371 + data_reviews: + - TODO + notification_emails: + - sync-core@mozilla.com + expires: never + lifetime: ping uid: type: string description: > @@ -261,6 +325,20 @@ logins_sync: lifetime: ping bookmarks_sync: + sync_uuid: + type: uuid + description: > + Unique identifier for the overarching sync, used to correlate this ping together with other individual sync pings (history, logins) and the global sync ping. + send_in_pings: + - logins-sync + bugs: + - https://github.com/mozilla-mobile/android-components/issues/5371 + data_reviews: + - TODO + notification_emails: + - sync-core@mozilla.com + expires: never + lifetime: ping uid: type: string description: > diff --git a/components/support/sync-telemetry/pings.yaml b/components/support/sync-telemetry/pings.yaml index b6ba738522a..1db1bb74e25 100644 --- a/components/support/sync-telemetry/pings.yaml +++ b/components/support/sync-telemetry/pings.yaml @@ -4,6 +4,17 @@ $schema: moz://mozilla.org/schemas/glean/pings/1-0-0 +sync: + description: > + A summary sync ping, sent for every sync. + It doesn't include the `client_id` because it reports a hashed version of the user's Firefox Account ID. + include_client_id: false + bugs: + - https://github.com/mozilla-mobile/android-components/issues/5371 + notification_emails: + - sync-core@mozilla.com + data_reviews: + - TODO history-sync: description: > A ping sent for every history sync. It doesn't include the `client_id` diff --git a/components/support/sync-telemetry/src/main/java/mozilla/components/support/sync/telemetry/SyncTelemetry.kt b/components/support/sync-telemetry/src/main/java/mozilla/components/support/sync/telemetry/SyncTelemetry.kt index edc6dee2461..68a9e19a36b 100644 --- a/components/support/sync-telemetry/src/main/java/mozilla/components/support/sync/telemetry/SyncTelemetry.kt +++ b/components/support/sync-telemetry/src/main/java/mozilla/components/support/sync/telemetry/SyncTelemetry.kt @@ -4,28 +4,78 @@ package mozilla.components.support.sync.telemetry +import mozilla.appservices.sync15.EngineInfo import mozilla.appservices.sync15.FailureName import mozilla.appservices.sync15.FailureReason import mozilla.appservices.sync15.SyncTelemetryPing import mozilla.components.service.glean.private.LabeledMetricType import mozilla.components.service.glean.private.StringMetricType +import mozilla.components.support.base.log.logger.Logger import mozilla.components.support.sync.telemetry.GleanMetrics.BookmarksSync import mozilla.components.support.sync.telemetry.GleanMetrics.HistorySync import mozilla.components.support.sync.telemetry.GleanMetrics.LoginsSync +import mozilla.components.support.sync.telemetry.GleanMetrics.Sync import mozilla.components.support.sync.telemetry.GleanMetrics.Pings +import java.util.UUID const val MAX_FAILURE_REASON_LENGTH = 100 /** * Contains functionality necessary to process instances of [SyncTelemetryPing]. */ +@Suppress("LargeClass") object SyncTelemetry { + private val logger = Logger("SyncTelemetry") + + /** + * Process [SyncTelemetryPing] as returned from [mozilla.appservices.syncmanager.SyncManager]. + */ + fun processSyncTelemetry(syncTelemetry: SyncTelemetryPing) { + syncTelemetry.syncs.forEach { syncInfo -> + val syncUuid = UUID.randomUUID() + Sync.syncUuid.set(syncUuid) + + // It's possible for us to sync some engines, and then get a hard error that fails the + // entire sync. Examples of such errors are an HTTP server error, token authentication + // error, or other kind of network error. + // We can have some engines that succeed (and others that fail, with different reasons) + // and still have a global failure_reason. + syncInfo.failureReason?.let { failureReason -> + recordFailureReason(failureReason, Sync.failureReason) + } + + syncInfo.engines.forEach { engineInfo -> + when (engineInfo.name) { + "bookmarks" -> { + individualBookmarksSync(syncTelemetry.uid, syncUuid, engineInfo) + Pings.bookmarksSync.send() + } + "history" -> { + individualHistorySync(syncTelemetry.uid, syncUuid, engineInfo) + Pings.historySync.send() + } + "passwords" -> { + individualLoginsSync(syncTelemetry.uid, syncUuid, engineInfo) + Pings.loginsSync.send() + } + else -> logger.warn("Ignoring telemetry for engine ${engineInfo.name}") + } + } + + Pings.sync.send() + } + } + /** * Processes a history-related ping information from the [ping]. * @return 'false' if global error was encountered, 'true' otherwise. */ @Suppress("ComplexMethod", "NestedBlockDepth") - fun processHistoryPing(ping: SyncTelemetryPing, sendPing: () -> Unit = { Pings.historySync.send() }): Boolean { + fun processHistoryPing( + ping: SyncTelemetryPing, + globalSyncUuid: UUID = UUID.randomUUID(), + sendPing: () -> Unit = { Pings.historySync.send() } + ): Boolean { ping.syncs.forEach eachSync@{ sync -> sync.failureReason?.let { recordFailureReason(it, HistorySync.failureReason) @@ -36,36 +86,7 @@ object SyncTelemetry { if (engine.name != "history") { return@eachEngine } - HistorySync.apply { - val base = BaseGleanSyncPing.fromEngineInfo(ping.uid, engine) - uid.set(base.uid) - startedAt.set(base.startedAt) - finishedAt.set(base.finishedAt) - if (base.applied > 0) { - // Since all Sync ping counters have `lifetime: ping`, and - // we send the ping immediately after, we don't need to - // reset the counters before calling `add`. - incoming["applied"].add(base.applied) - } - if (base.failedToApply > 0) { - incoming["failed_to_apply"].add(base.failedToApply) - } - if (base.reconciled > 0) { - incoming["reconciled"].add(base.reconciled) - } - if (base.uploaded > 0) { - outgoing["uploaded"].add(base.uploaded) - } - if (base.failedToUpload > 0) { - outgoing["failed_to_upload"].add(base.failedToUpload) - } - if (base.outgoingBatches > 0) { - outgoingBatches.add(base.outgoingBatches) - } - base.failureReason?.let { - recordFailureReason(it, failureReason) - } - } + individualHistorySync(ping.uid, globalSyncUuid, engine) sendPing() } } @@ -77,7 +98,11 @@ object SyncTelemetry { * @return 'false' if global error was encountered, 'true' otherwise. */ @Suppress("ComplexMethod", "NestedBlockDepth") - fun processPasswordsPing(ping: SyncTelemetryPing, sendPing: () -> Unit = { Pings.loginsSync.send() }): Boolean { + fun processLoginsPing( + ping: SyncTelemetryPing, + globalSyncUuid: UUID = UUID.randomUUID(), + sendPing: () -> Unit = { Pings.loginsSync.send() } + ): Boolean { ping.syncs.forEach eachSync@{ sync -> sync.failureReason?.let { recordFailureReason(it, LoginsSync.failureReason) @@ -88,36 +113,7 @@ object SyncTelemetry { if (engine.name != "passwords") { return@eachEngine } - LoginsSync.apply { - val base = BaseGleanSyncPing.fromEngineInfo(ping.uid, engine) - uid.set(base.uid) - startedAt.set(base.startedAt) - finishedAt.set(base.finishedAt) - if (base.applied > 0) { - // Since all Sync ping counters have `lifetime: ping`, and - // we send the ping immediately after, we don't need to - // reset the counters before calling `add`. - incoming["applied"].add(base.applied) - } - if (base.failedToApply > 0) { - incoming["failed_to_apply"].add(base.failedToApply) - } - if (base.reconciled > 0) { - incoming["reconciled"].add(base.reconciled) - } - if (base.uploaded > 0) { - outgoing["uploaded"].add(base.uploaded) - } - if (base.failedToUpload > 0) { - outgoing["failed_to_upload"].add(base.failedToUpload) - } - if (base.outgoingBatches > 0) { - outgoingBatches.add(base.outgoingBatches) - } - base.failureReason?.let { - recordFailureReason(it, failureReason) - } - } + individualLoginsSync(ping.uid, globalSyncUuid, engine) sendPing() } } @@ -129,7 +125,11 @@ object SyncTelemetry { * @return 'false' if global error was encountered, 'true' otherwise. */ @Suppress("ComplexMethod", "NestedBlockDepth") - fun processBookmarksPing(ping: SyncTelemetryPing, sendPing: () -> Unit = { Pings.bookmarksSync.send() }): Boolean { + fun processBookmarksPing( + ping: SyncTelemetryPing, + globalSyncUuid: UUID = UUID.randomUUID(), + sendPing: () -> Unit = { Pings.bookmarksSync.send() } + ): Boolean { // This function is almost identical to `recordHistoryPing`, with additional // reporting for validation problems. Unfortunately, since the // `BookmarksSync` and `HistorySync` metrics are two separate objects, we @@ -146,44 +146,124 @@ object SyncTelemetry { if (engine.name != "bookmarks") { return@eachEngine } - BookmarksSync.apply { - val base = BaseGleanSyncPing.fromEngineInfo(ping.uid, engine) - uid.set(base.uid) - startedAt.set(base.startedAt) - finishedAt.set(base.finishedAt) - if (base.applied > 0) { - incoming["applied"].add(base.applied) - } - if (base.failedToApply > 0) { - incoming["failed_to_apply"].add(base.failedToApply) - } - if (base.reconciled > 0) { - incoming["reconciled"].add(base.reconciled) - } - if (base.uploaded > 0) { - outgoing["uploaded"].add(base.uploaded) - } - if (base.failedToUpload > 0) { - outgoing["failed_to_upload"].add(base.failedToUpload) - } - if (base.outgoingBatches > 0) { - outgoingBatches.add(base.outgoingBatches) - } - base.failureReason?.let { - recordFailureReason(it, failureReason) - } - engine.validation?.let { - it.problems.forEach { - remoteTreeProblems[it.name].add(it.count) - } - } - } + individualBookmarksSync(ping.uid, globalSyncUuid, engine) sendPing() } } return true } + private fun individualLoginsSync(hashedFxaUid: String, globalSyncUuid: UUID, engineInfo: EngineInfo) { + require(engineInfo.name == "passwords") { "Expected 'passwords', got ${engineInfo.name}" } + + LoginsSync.apply { + syncUuid.set(globalSyncUuid) + val base = BaseGleanSyncPing.fromEngineInfo(hashedFxaUid, engineInfo) + uid.set(base.uid) + startedAt.set(base.startedAt) + finishedAt.set(base.finishedAt) + if (base.applied > 0) { + // Since all Sync ping counters have `lifetime: ping`, and + // we send the ping immediately after, we don't need to + // reset the counters before calling `add`. + incoming["applied"].add(base.applied) + } + if (base.failedToApply > 0) { + incoming["failed_to_apply"].add(base.failedToApply) + } + if (base.reconciled > 0) { + incoming["reconciled"].add(base.reconciled) + } + if (base.uploaded > 0) { + outgoing["uploaded"].add(base.uploaded) + } + if (base.failedToUpload > 0) { + outgoing["failed_to_upload"].add(base.failedToUpload) + } + if (base.outgoingBatches > 0) { + outgoingBatches.add(base.outgoingBatches) + } + base.failureReason?.let { + recordFailureReason(it, failureReason) + } + } + } + + @Suppress("ComplexMethod") + private fun individualBookmarksSync(hashedFxaUid: String, globalSyncUuid: UUID, engineInfo: EngineInfo) { + require(engineInfo.name == "bookmarks") { "Expected 'bookmarks', got ${engineInfo.name}" } + + BookmarksSync.apply { + syncUuid.set(globalSyncUuid) + val base = BaseGleanSyncPing.fromEngineInfo(hashedFxaUid, engineInfo) + uid.set(base.uid) + startedAt.set(base.startedAt) + finishedAt.set(base.finishedAt) + if (base.applied > 0) { + incoming["applied"].add(base.applied) + } + if (base.failedToApply > 0) { + incoming["failed_to_apply"].add(base.failedToApply) + } + if (base.reconciled > 0) { + incoming["reconciled"].add(base.reconciled) + } + if (base.uploaded > 0) { + outgoing["uploaded"].add(base.uploaded) + } + if (base.failedToUpload > 0) { + outgoing["failed_to_upload"].add(base.failedToUpload) + } + if (base.outgoingBatches > 0) { + outgoingBatches.add(base.outgoingBatches) + } + base.failureReason?.let { + recordFailureReason(it, failureReason) + } + engineInfo.validation?.let { + it.problems.forEach { problemInfo -> + remoteTreeProblems[problemInfo.name].add(problemInfo.count) + } + } + } + } + + private fun individualHistorySync(hashedFxaUid: String, globalSyncUuid: UUID, engineInfo: EngineInfo) { + require(engineInfo.name == "history") { "Expected 'history', got ${engineInfo.name}" } + + HistorySync.apply { + syncUuid.set(globalSyncUuid) + val base = BaseGleanSyncPing.fromEngineInfo(hashedFxaUid, engineInfo) + uid.set(base.uid) + startedAt.set(base.startedAt) + finishedAt.set(base.finishedAt) + if (base.applied > 0) { + // Since all Sync ping counters have `lifetime: ping`, and + // we send the ping immediately after, we don't need to + // reset the counters before calling `add`. + incoming["applied"].add(base.applied) + } + if (base.failedToApply > 0) { + incoming["failed_to_apply"].add(base.failedToApply) + } + if (base.reconciled > 0) { + incoming["reconciled"].add(base.reconciled) + } + if (base.uploaded > 0) { + outgoing["uploaded"].add(base.uploaded) + } + if (base.failedToUpload > 0) { + outgoing["failed_to_upload"].add(base.failedToUpload) + } + if (base.outgoingBatches > 0) { + outgoingBatches.add(base.outgoingBatches) + } + base.failureReason?.let { + recordFailureReason(it, failureReason) + } + } + } + private fun recordFailureReason(reason: FailureReason, failureReasonMetric: LabeledMetricType) { val metric = when (reason.name) { FailureName.Other, FailureName.Unknown -> failureReasonMetric["other"] diff --git a/components/support/sync-telemetry/src/test/java/mozilla/components/support/sync/telemetry/SyncTelemetryTest.kt b/components/support/sync-telemetry/src/test/java/mozilla/components/support/sync/telemetry/SyncTelemetryTest.kt index 16506d68bcf..eea424818b0 100644 --- a/components/support/sync-telemetry/src/test/java/mozilla/components/support/sync/telemetry/SyncTelemetryTest.kt +++ b/components/support/sync-telemetry/src/test/java/mozilla/components/support/sync/telemetry/SyncTelemetryTest.kt @@ -327,7 +327,7 @@ class SyncTelemetryTest { @Test fun `sends passwords telemetry pings on success`() { - val noGlobalError = SyncTelemetry.processPasswordsPing(SyncTelemetryPing( + val noGlobalError = SyncTelemetry.processLoginsPing(SyncTelemetryPing( version = 1, uid = "abc123", syncs = listOf( @@ -437,7 +437,7 @@ class SyncTelemetryTest { @Test fun `sends passwords telemetry pings on engine failure`() { - val noGlobalError = SyncTelemetry.processPasswordsPing(SyncTelemetryPing( + val noGlobalError = SyncTelemetry.processLoginsPing(SyncTelemetryPing( version = 1, uid = "abc123", syncs = listOf( @@ -578,7 +578,7 @@ class SyncTelemetryTest { @Test fun `sends passwords telemetry pings on sync failure`() { - val noGlobalError = SyncTelemetry.processPasswordsPing(SyncTelemetryPing( + val noGlobalError = SyncTelemetry.processLoginsPing(SyncTelemetryPing( version = 1, uid = "abc123", syncs = listOf(