diff --git a/changelog.d/88.feature b/changelog.d/88.feature new file mode 100644 index 00000000..725ff1c9 --- /dev/null +++ b/changelog.d/88.feature @@ -0,0 +1 @@ +Add prometheus metrics to track pushkin things. diff --git a/sygnal/apnspushkin.py b/sygnal/apnspushkin.py index 8b4fcad4..3d875de5 100644 --- a/sygnal/apnspushkin.py +++ b/sygnal/apnspushkin.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2014 OpenMarket Ltd # Copyright 2017 Vector Creations Ltd -# Copyright 2019 The Matrix.org Foundation C.I.C. +# Copyright 2019-2020 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ import aioapns from aioapns import APNs, NotificationRequest from opentracing import logs, tags -from prometheus_client import Histogram, Counter +from prometheus_client import Histogram, Counter, Gauge from twisted.internet.defer import Deferred from sygnal import apnstruncate @@ -41,6 +41,10 @@ "sygnal_apns_request_time", "Time taken to send HTTP request to APNS" ) +ACTIVE_REQUESTS_GAUGE = Gauge( + "sygnal_active_apns_requests", "Number of APNS requests in flight" +) + RESPONSE_STATUS_CODES_COUNTER = Counter( "sygnal_apns_status_codes", "Number of HTTP response status codes received from APNS", @@ -148,8 +152,10 @@ async def _dispatch_request(self, log, span, device, shaved_payload, prio): ) try: - with SEND_TIME_HISTOGRAM.time(): - response = await self._send_notification(request) + + with ACTIVE_REQUESTS_GAUGE.track_inprogress(): + with SEND_TIME_HISTOGRAM.time(): + response = await self._send_notification(request) except aioapns.ConnectionError: raise TemporaryNotificationDispatchException("aioapns Connection Failure") diff --git a/sygnal/gcmpushkin.py b/sygnal/gcmpushkin.py index a605fb18..26eb618d 100644 --- a/sygnal/gcmpushkin.py +++ b/sygnal/gcmpushkin.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2014 Leon Handreke # Copyright 2017 New Vector Ltd -# Copyright 2019 The Matrix.org Foundation C.I.C. +# Copyright 2019-2020 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -21,24 +21,37 @@ from json import JSONDecodeError from opentracing import logs, tags -from prometheus_client import Histogram, Counter +from prometheus_client import Counter, Gauge, Histogram from twisted.internet.defer import DeferredSemaphore -from twisted.web.client import HTTPConnectionPool, Agent, FileBodyProducer, readBody +from twisted.web.client import Agent, FileBodyProducer, HTTPConnectionPool, readBody from twisted.web.http_headers import Headers from sygnal.exceptions import ( - TemporaryNotificationDispatchException, NotificationDispatchException, + TemporaryNotificationDispatchException, ) -from sygnal.utils import twisted_sleep, NotificationLoggerAdapter from sygnal.helper.context_factory import ClientTLSOptionsFactory +from sygnal.utils import NotificationLoggerAdapter, twisted_sleep + from .exceptions import PushkinSetupException from .notifications import Pushkin +QUEUE_TIME_HISTOGRAM = Histogram( + "sygnal_gcm_queue_time", "Time taken waiting for a connection to GCM" +) + SEND_TIME_HISTOGRAM = Histogram( "sygnal_gcm_request_time", "Time taken to send HTTP request to GCM" ) +PENDING_REQUESTS_GAUGE = Gauge( + "sygnal_pending_gcm_requests", "Number of GCM requests waiting for a connection" +) + +ACTIVE_REQUESTS_GAUGE = Gauge( + "sygnal_active_gcm_requests", "Number of GCM requests in flight" +) + RESPONSE_STATUS_CODES_COUNTER = Counter( "sygnal_gcm_status_codes", "Number of HTTP response status codes received from GCM", @@ -145,12 +158,20 @@ async def _perform_http_request(self, body, headers): # we use the semaphore to actually limit the number of concurrent # requests, since the HTTPConnectionPool will actually just lead to more # requests being created but not pooled – it does not perform limiting. - await self.connection_semaphore.acquire() + with QUEUE_TIME_HISTOGRAM.time(): + with PENDING_REQUESTS_GAUGE.track_inprogress(): + await self.connection_semaphore.acquire() + try: - response = await self.http_agent.request( - b"POST", GCM_URL, headers=Headers(headers), bodyProducer=body_producer - ) - response_text = (await readBody(response)).decode() + with SEND_TIME_HISTOGRAM.time(): + with ACTIVE_REQUESTS_GAUGE.track_inprogress(): + response = await self.http_agent.request( + b"POST", + GCM_URL, + headers=Headers(headers), + bodyProducer=body_producer, + ) + response_text = (await readBody(response)).decode() except Exception as exception: raise TemporaryNotificationDispatchException( "GCM request failure" @@ -164,8 +185,7 @@ async def _request_dispatch(self, n, log, body, headers, pushkeys, span): failed = [] - with SEND_TIME_HISTOGRAM.time(): - response, response_text = await self._perform_http_request(body, headers) + response, response_text = await self._perform_http_request(body, headers) RESPONSE_STATUS_CODES_COUNTER.labels( pushkin=self.name, code=response.code