Skip to content

Commit

Permalink
Add prometheus metrics to track pushkin things (#88)
Browse files Browse the repository at this point in the history
* time waiting for locks
 * number of requests in flight
  • Loading branch information
richvdh committed Mar 24, 2020
1 parent 9b1a1dd commit 699aff8
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 16 deletions.
1 change: 1 addition & 0 deletions changelog.d/88.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add prometheus metrics to track pushkin things.
14 changes: 10 additions & 4 deletions sygnal/apnspushkin.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright 2014 OpenMarket Ltd
# Copyright 2017 Vector Creations Ltd
# Copyright 2019 The Matrix.org Foundation C.I.C.
# Copyright 2019-2020 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -23,7 +23,7 @@
import aioapns
from aioapns import APNs, NotificationRequest
from opentracing import logs, tags
from prometheus_client import Histogram, Counter
from prometheus_client import Histogram, Counter, Gauge
from twisted.internet.defer import Deferred

from sygnal import apnstruncate
Expand All @@ -41,6 +41,10 @@
"sygnal_apns_request_time", "Time taken to send HTTP request to APNS"
)

ACTIVE_REQUESTS_GAUGE = Gauge(
"sygnal_active_apns_requests", "Number of APNS requests in flight"
)

RESPONSE_STATUS_CODES_COUNTER = Counter(
"sygnal_apns_status_codes",
"Number of HTTP response status codes received from APNS",
Expand Down Expand Up @@ -148,8 +152,10 @@ async def _dispatch_request(self, log, span, device, shaved_payload, prio):
)

try:
with SEND_TIME_HISTOGRAM.time():
response = await self._send_notification(request)

with ACTIVE_REQUESTS_GAUGE.track_inprogress():
with SEND_TIME_HISTOGRAM.time():
response = await self._send_notification(request)
except aioapns.ConnectionError:
raise TemporaryNotificationDispatchException("aioapns Connection Failure")

Expand Down
44 changes: 32 additions & 12 deletions sygnal/gcmpushkin.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright 2014 Leon Handreke
# Copyright 2017 New Vector Ltd
# Copyright 2019 The Matrix.org Foundation C.I.C.
# Copyright 2019-2020 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -21,24 +21,37 @@
from json import JSONDecodeError

from opentracing import logs, tags
from prometheus_client import Histogram, Counter
from prometheus_client import Counter, Gauge, Histogram
from twisted.internet.defer import DeferredSemaphore
from twisted.web.client import HTTPConnectionPool, Agent, FileBodyProducer, readBody
from twisted.web.client import Agent, FileBodyProducer, HTTPConnectionPool, readBody
from twisted.web.http_headers import Headers

from sygnal.exceptions import (
TemporaryNotificationDispatchException,
NotificationDispatchException,
TemporaryNotificationDispatchException,
)
from sygnal.utils import twisted_sleep, NotificationLoggerAdapter
from sygnal.helper.context_factory import ClientTLSOptionsFactory
from sygnal.utils import NotificationLoggerAdapter, twisted_sleep

from .exceptions import PushkinSetupException
from .notifications import Pushkin

QUEUE_TIME_HISTOGRAM = Histogram(
"sygnal_gcm_queue_time", "Time taken waiting for a connection to GCM"
)

SEND_TIME_HISTOGRAM = Histogram(
"sygnal_gcm_request_time", "Time taken to send HTTP request to GCM"
)

PENDING_REQUESTS_GAUGE = Gauge(
"sygnal_pending_gcm_requests", "Number of GCM requests waiting for a connection"
)

ACTIVE_REQUESTS_GAUGE = Gauge(
"sygnal_active_gcm_requests", "Number of GCM requests in flight"
)

RESPONSE_STATUS_CODES_COUNTER = Counter(
"sygnal_gcm_status_codes",
"Number of HTTP response status codes received from GCM",
Expand Down Expand Up @@ -145,12 +158,20 @@ async def _perform_http_request(self, body, headers):
# we use the semaphore to actually limit the number of concurrent
# requests, since the HTTPConnectionPool will actually just lead to more
# requests being created but not pooled – it does not perform limiting.
await self.connection_semaphore.acquire()
with QUEUE_TIME_HISTOGRAM.time():
with PENDING_REQUESTS_GAUGE.track_inprogress():
await self.connection_semaphore.acquire()

try:
response = await self.http_agent.request(
b"POST", GCM_URL, headers=Headers(headers), bodyProducer=body_producer
)
response_text = (await readBody(response)).decode()
with SEND_TIME_HISTOGRAM.time():
with ACTIVE_REQUESTS_GAUGE.track_inprogress():
response = await self.http_agent.request(
b"POST",
GCM_URL,
headers=Headers(headers),
bodyProducer=body_producer,
)
response_text = (await readBody(response)).decode()
except Exception as exception:
raise TemporaryNotificationDispatchException(
"GCM request failure"
Expand All @@ -164,8 +185,7 @@ async def _request_dispatch(self, n, log, body, headers, pushkeys, span):

failed = []

with SEND_TIME_HISTOGRAM.time():
response, response_text = await self._perform_http_request(body, headers)
response, response_text = await self._perform_http_request(body, headers)

RESPONSE_STATUS_CODES_COUNTER.labels(
pushkin=self.name, code=response.code
Expand Down

0 comments on commit 699aff8

Please sign in to comment.