Fix double-counting exception with Gunicorn

Fixes #113
rycus86 · Nov 21, 2021 · c9ff2b1 · c9ff2b1
1 parent 17ba4f7
commit c9ff2b1
Show file tree

Hide file tree

Showing 13 changed files with 171 additions and 14 deletions.
diff --git a/README.md b/README.md
@@ -24,7 +24,7 @@ or paste it into requirements.txt:
 prometheus-flask-exporter
 
 # or with specific version number
-prometheus-flask-exporter==0.18.5
+prometheus-flask-exporter==0.18.6
 ```
 and then install dependencies from requirements.txt file as usual:
 ```

diff --git a/examples/gunicorn-exceptions-113/Dockerfile b/examples/gunicorn-exceptions-113/Dockerfile
@@ -0,0 +1,14 @@
+FROM python:3.8-alpine
+
+ADD examples/gunicorn-exceptions-113/requirements.txt /tmp/requirements.txt
+RUN pip install -r /tmp/requirements.txt
+
+ADD . /tmp/latest
+RUN pip install -e /tmp/latest --upgrade
+
+ADD examples/gunicorn-exceptions-113/server.py examples/gunicorn-exceptions-113/config.py /var/flask/
+WORKDIR /var/flask
+
+ENV PROMETHEUS_MULTIPROC_DIR /tmp
+
+CMD gunicorn -c config.py -w 4 -b 0.0.0.0:4000 server
diff --git a/examples/gunicorn-exceptions-113/README.md b/examples/gunicorn-exceptions-113/README.md
@@ -0,0 +1,12 @@
+# Gunicorn example
+
+This [Gunicorn](https://gunicorn.org/) example has a sample app in [server.py](server.py) with
+multiprocessing enabled for metrics collection.
+
+This example checks that exception metrics are not counted twice due to both the Flask `after_request`
+and `teardown_request` callbacks seeing that request.
+
+## Thanks
+
+Huge thanks for [@idlefella](https://github.com/idlefella) for
+bringing this to my attention in [prometheus_flask_exporter#113](https://github.com/rycus86/prometheus_flask_exporter/issues/113) !
diff --git a/examples/gunicorn-exceptions-113/config.py b/examples/gunicorn-exceptions-113/config.py
@@ -0,0 +1,5 @@
+from prometheus_flask_exporter.multiprocess import GunicornInternalPrometheusMetrics
+
+
+def child_exit(server, worker):
+    GunicornInternalPrometheusMetrics.mark_process_dead_on_child_exit(worker.pid)
diff --git a/examples/gunicorn-exceptions-113/requirements.txt b/examples/gunicorn-exceptions-113/requirements.txt
@@ -0,0 +1,4 @@
+flask
+gunicorn
+prometheus_client
+prometheus_flask_exporter
diff --git a/examples/gunicorn-exceptions-113/run_tests.sh b/examples/gunicorn-exceptions-113/run_tests.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+cd "$(dirname "$0")"
+
+_fail() {
+    docker rm -f gunicorn-sample > /dev/null 2>&1
+    exit 1
+}
+
+docker build -f Dockerfile -t gunicorn-sample ../../. > /dev/null || _fail
+docker run -d --name gunicorn-sample -p 4000:4000 -p 9200:9200 gunicorn-sample > /dev/null || _fail
+
+echo 'Waiting for Gunicorn to start...'
+
+for _ in $(seq 1 10); do
+    PROCESS_COUNT=$(docker exec gunicorn-sample sh -c 'pgrep -a gunicorn | wc -l')
+    if [ "$PROCESS_COUNT" -ge 5 ]; then
+        break
+    fi
+done
+
+echo 'Starting the tests...'
+
+for _ in $(seq 1 10); do
+    curl -s http://localhost:4000/test > /dev/null
+done
+
+curl -s http://localhost:4000/metrics \
+  | grep 'flask_http_request_total{method="GET",status="500"} 10.0' \
+  > /dev/null
+
+if [ "$?" != "0" ]; then
+    echo 'The expected metrics are not found'
+    _fail
+fi
+
+curl -s http://localhost:4000/metrics \
+  | grep 'flask_http_request_duration_seconds_count{method="GET",path="/test",status="500"} 10.0' \
+  > /dev/null
+
+if [ "$?" != "0" ]; then
+    echo 'The expected metrics are not found'
+    _fail
+fi
+
+docker rm -f gunicorn-sample > /dev/null
+echo 'OK, all done'
diff --git a/examples/gunicorn-exceptions-113/server.py b/examples/gunicorn-exceptions-113/server.py
@@ -0,0 +1,18 @@
+from flask import Flask
+from prometheus_flask_exporter.multiprocess import GunicornInternalPrometheusMetrics
+
+application = Flask(__name__)
+metrics = GunicornInternalPrometheusMetrics(application)
+
+# static information as metric
+metrics.info('app_info', 'Application info', version='1.0.3')
+
+
+@application.route('/test')
+def main():
+    raise Exception("Crashing")
+    pass  # requests tracked by default
+
+
+if __name__ == '__main__':
+    application.run(debug=False, port=5000)
diff --git a/examples/gunicorn-internal/run_tests.sh b/examples/gunicorn-internal/run_tests.sh
@@ -38,5 +38,18 @@ if [ "$?" != "0" ]; then
     _fail
 fi
 
+for _ in $(seq 1 10); do
+    curl -s http://localhost:4000/error > /dev/null
+done
+
+curl -s http://localhost:4000/metrics \
+  | grep 'flask_http_request_total{method="GET",status="500"} 10.0' \
+  > /dev/null
+
+if [ "$?" != "0" ]; then
+    echo 'The expected error metrics are not found'
+    _fail
+fi
+
 docker rm -f gunicorn-internal-sample > /dev/null
 echo 'OK, all done'
diff --git a/examples/gunicorn-internal/server.py b/examples/gunicorn-internal/server.py
@@ -11,5 +11,10 @@ def index():
     return 'Hello world'
 
 
+@app.route('/error')
+def error():
+    raise Exception('Fail')
+
+
 if __name__ == '__main__':
     app.run(debug=False, port=5000)
diff --git a/examples/gunicorn/run_tests.sh b/examples/gunicorn/run_tests.sh
@@ -38,5 +38,18 @@ if [ "$?" != "0" ]; then
     _fail
 fi
 
+for _ in $(seq 1 10); do
+    curl -s http://localhost:4000/error > /dev/null
+done
+
+curl -s http://localhost:9200/metrics \
+  | grep 'flask_http_request_total{method="GET",status="500"} 10.0' \
+  > /dev/null
+
+if [ "$?" != "0" ]; then
+    echo 'The expected error metrics are not found'
+    _fail
+fi
+
 docker rm -f gunicorn-sample > /dev/null
 echo 'OK, all done'
diff --git a/examples/gunicorn/server.py b/examples/gunicorn/server.py
@@ -11,5 +11,10 @@ def index():
     return 'Hello world'
 
 
+@app.route('/error')
+def error():
+    raise Exception('Fail')
+
+
 if __name__ == '__main__':
     app.run(debug=False, port=5000)
diff --git a/prometheus_flask_exporter/__init__.py b/prometheus_flask_exporter/__init__.py
@@ -428,7 +428,7 @@ def after_request(response):
                 if any(pattern.match(request.path) for pattern in self.excluded_paths):
                     return response
 
-            if hasattr(request, 'prom_start_time'):
+            if hasattr(request, 'prom_start_time') and self._not_yet_handled('duration_reported'):
                 total_time = max(default_timer() - request.prom_start_time, 0)
 
                 if callable(duration_group):
@@ -445,10 +445,11 @@ def after_request(response):
 
                 request_duration_metric.labels(**request_duration_labels).observe(total_time)
 
-            request_total_metric.labels(
-                method=request.method, status=_to_status_code(response.status_code),
-                **labels.values_for(response)
-            ).inc()
+            if self._not_yet_handled('total_reported'):
+                request_total_metric.labels(
+                    method=request.method, status=_to_status_code(response.status_code),
+                    **labels.values_for(response)
+                ).inc()
 
             return response
 
@@ -472,7 +473,7 @@ def teardown_request(exception=None):
                 **labels.values_for(response)
             ).inc()
 
-            if hasattr(request, 'prom_start_time'):
+            if hasattr(request, 'prom_start_time') and self._not_yet_handled('duration_reported'):
                 total_time = max(default_timer() - request.prom_start_time, 0)
 
                 request_duration_labels = {
@@ -484,10 +485,11 @@ def teardown_request(exception=None):
 
                 request_duration_metric.labels(**request_duration_labels).observe(total_time)
 
-            request_total_metric.labels(
-                method=request.method, status=500,
-                **labels.values_for(response)
-            ).inc()
+            if self._not_yet_handled('total_reported'):
+                request_total_metric.labels(
+                    method=request.method, status=500,
+                    **labels.values_for(response)
+                ).inc()
 
             return
 
@@ -850,6 +852,25 @@ def _is_string(value):
         except NameError:
             return isinstance(value, str)  # python3
 
+    @staticmethod
+    def _not_yet_handled(tracking_key):
+        """
+        Check if the request has not handled some tracking yet,
+        and mark the request if this is the first time.
+        This is to avoid follow-up actions.
+
+        :param tracking_key: a key identifying a processing step
+        :return: True if this is the first time the request is
+          trying to handle this processing step
+        """
+
+        key = 'prom_' + tracking_key
+        if hasattr(request, key):
+            return False
+        else:
+            setattr(request, key, True)
+            return True
+
 
 class ConnexionPrometheusMetrics(PrometheusMetrics):
     """
@@ -931,4 +952,4 @@ def _make_response(response):
         return _make_response
 
 
-__version__ = '0.18.5'
+__version__ = '0.18.6'
diff --git a/setup.py b/setup.py
@@ -6,15 +6,15 @@
 setup(
     name='prometheus_flask_exporter',
     packages=['prometheus_flask_exporter'],
-    version='0.18.5',
+    version='0.18.6',
     description='Prometheus metrics exporter for Flask',
     long_description=long_description,
     long_description_content_type='text/markdown',
     license='MIT',
     author='Viktor Adam',
     author_email='rycus86@gmail.com',
     url='https://github.com/rycus86/prometheus_flask_exporter',
-    download_url='https://github.com/rycus86/prometheus_flask_exporter/archive/0.18.5.tar.gz',
+    download_url='https://github.com/rycus86/prometheus_flask_exporter/archive/0.18.6.tar.gz',
     keywords=['prometheus', 'flask', 'monitoring', 'exporter'],
     classifiers=[
         'Development Status :: 4 - Beta',