Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add prometheus metrics to track pushkin things #88

Merged
merged 1 commit into from
Mar 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/88.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add prometheus metrics to track pushkin things.
14 changes: 10 additions & 4 deletions sygnal/apnspushkin.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright 2014 OpenMarket Ltd
# Copyright 2017 Vector Creations Ltd
# Copyright 2019 The Matrix.org Foundation C.I.C.
# Copyright 2019-2020 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -23,7 +23,7 @@
import aioapns
from aioapns import APNs, NotificationRequest
from opentracing import logs, tags
from prometheus_client import Histogram, Counter
from prometheus_client import Histogram, Counter, Gauge
from twisted.internet.defer import Deferred

from sygnal import apnstruncate
Expand All @@ -41,6 +41,10 @@
"sygnal_apns_request_time", "Time taken to send HTTP request to APNS"
)

ACTIVE_REQUESTS_GAUGE = Gauge(
"sygnal_active_apns_requests", "Number of APNS requests in flight"
)

RESPONSE_STATUS_CODES_COUNTER = Counter(
"sygnal_apns_status_codes",
"Number of HTTP response status codes received from APNS",
Expand Down Expand Up @@ -148,8 +152,10 @@ async def _dispatch_request(self, log, span, device, shaved_payload, prio):
)

try:
with SEND_TIME_HISTOGRAM.time():
response = await self._send_notification(request)

with ACTIVE_REQUESTS_GAUGE.track_inprogress():
with SEND_TIME_HISTOGRAM.time():
response = await self._send_notification(request)
except aioapns.ConnectionError:
raise TemporaryNotificationDispatchException("aioapns Connection Failure")

Expand Down
44 changes: 32 additions & 12 deletions sygnal/gcmpushkin.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright 2014 Leon Handreke
# Copyright 2017 New Vector Ltd
# Copyright 2019 The Matrix.org Foundation C.I.C.
# Copyright 2019-2020 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -21,24 +21,37 @@
from json import JSONDecodeError

from opentracing import logs, tags
from prometheus_client import Histogram, Counter
from prometheus_client import Counter, Gauge, Histogram
from twisted.internet.defer import DeferredSemaphore
from twisted.web.client import HTTPConnectionPool, Agent, FileBodyProducer, readBody
from twisted.web.client import Agent, FileBodyProducer, HTTPConnectionPool, readBody
from twisted.web.http_headers import Headers

from sygnal.exceptions import (
TemporaryNotificationDispatchException,
NotificationDispatchException,
TemporaryNotificationDispatchException,
)
from sygnal.utils import twisted_sleep, NotificationLoggerAdapter
from sygnal.helper.context_factory import ClientTLSOptionsFactory
from sygnal.utils import NotificationLoggerAdapter, twisted_sleep

from .exceptions import PushkinSetupException
from .notifications import Pushkin

QUEUE_TIME_HISTOGRAM = Histogram(
"sygnal_gcm_queue_time", "Time taken waiting for a connection to GCM"
)

SEND_TIME_HISTOGRAM = Histogram(
"sygnal_gcm_request_time", "Time taken to send HTTP request to GCM"
)

PENDING_REQUESTS_GAUGE = Gauge(
"sygnal_pending_gcm_requests", "Number of GCM requests waiting for a connection"
)

ACTIVE_REQUESTS_GAUGE = Gauge(
"sygnal_active_gcm_requests", "Number of GCM requests in flight"
)

RESPONSE_STATUS_CODES_COUNTER = Counter(
"sygnal_gcm_status_codes",
"Number of HTTP response status codes received from GCM",
Expand Down Expand Up @@ -145,12 +158,20 @@ async def _perform_http_request(self, body, headers):
# we use the semaphore to actually limit the number of concurrent
# requests, since the HTTPConnectionPool will actually just lead to more
# requests being created but not pooled – it does not perform limiting.
await self.connection_semaphore.acquire()
with QUEUE_TIME_HISTOGRAM.time():
with PENDING_REQUESTS_GAUGE.track_inprogress():
await self.connection_semaphore.acquire()

try:
response = await self.http_agent.request(
b"POST", GCM_URL, headers=Headers(headers), bodyProducer=body_producer
)
response_text = (await readBody(response)).decode()
with SEND_TIME_HISTOGRAM.time():
with ACTIVE_REQUESTS_GAUGE.track_inprogress():
response = await self.http_agent.request(
b"POST",
GCM_URL,
headers=Headers(headers),
bodyProducer=body_producer,
)
response_text = (await readBody(response)).decode()
except Exception as exception:
raise TemporaryNotificationDispatchException(
"GCM request failure"
Expand All @@ -164,8 +185,7 @@ async def _request_dispatch(self, n, log, body, headers, pushkeys, span):

failed = []

with SEND_TIME_HISTOGRAM.time():
response, response_text = await self._perform_http_request(body, headers)
response, response_text = await self._perform_http_request(body, headers)

RESPONSE_STATUS_CODES_COUNTER.labels(
pushkin=self.name, code=response.code
Expand Down