Skip to content

Commit

Permalink
Merge pull request #86 from man-group/add-prometheus-stats
Browse files Browse the repository at this point in the history
Adding prometheus metrics to webapp for fails/successes
  • Loading branch information
jonbannister authored Apr 27, 2022
2 parents 0f823cc + 1437b82 commit 4db0f46
Show file tree
Hide file tree
Showing 10 changed files with 113 additions and 12 deletions.
4 changes: 2 additions & 2 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ jobs:
PYTHON_VERSION: "3_6"
CIRCLE_ARTIFACTS: /tmp/circleci-artifacts/3_6
CIRCLE_TEST_REPORTS: /tmp/circleci-test-results/3_6
VERSION: 0.4.1
VERSION: 0.4.2
PANDOC_RELEASES_URL: https://github.com/jgm/pandoc/releases
YARN_STATIC_DIR: notebooker/web/static/
IMAGE_NAME: mangroup/notebooker
Expand All @@ -189,7 +189,7 @@ jobs:
environment:
CIRCLE_ARTIFACTS: /tmp/circleci-artifacts/3_7
CIRCLE_TEST_REPORTS: /tmp/circleci-test-results/3_7
VERSION: 0.4.1
VERSION: 0.4.2
PANDOC_RELEASES_URL: https://github.com/jgm/pandoc/releases
YARN_STATIC_DIR: notebooker/web/static/
IMAGE_NAME: mangroup/notebooker
Expand Down
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
0.4.2 (2022-04-27)
------------------

* Improvement: Prometheus metrics now record number of successes/failures which have been captured by the webapp.
* Improvement: Unpinned nbconvert and added ipython_genutils dependency


0.4.1 (2022-03-09)
------------------

Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
author = "Man Group Quant Tech"

# The full version, including alpha/beta/rc tags
release = "0.4.1"
release = "0.4.2"


# -- General configuration ---------------------------------------------------
Expand Down
1 change: 0 additions & 1 deletion notebooker/execute_notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ def _run_checks(
generate_pdf_output: Optional[bool] = True,
hide_code: Optional[bool] = False,
mailto: Optional[str] = "",
error_mailto: Optional[str] = "",
email_subject: Optional[str] = "",
prepare_only: Optional[bool] = False,
notebooker_disable_git: bool = False,
Expand Down
2 changes: 1 addition & 1 deletion notebooker/utils/notebook_execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import shutil
import tempfile
from logging import getLogger
from typing import AnyStr, Union
from typing import Union

from notebooker.constants import TEMPLATE_DIR_SEPARATOR, NotebookResultComplete, NotebookResultError
from notebooker.utils.mail import mail
Expand Down
2 changes: 1 addition & 1 deletion notebooker/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.4.1"
__version__ = "0.4.2"
31 changes: 26 additions & 5 deletions notebooker/web/report_hunter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,23 @@
logger = getLogger(__name__)


def _report_hunter(webapp_config: WebappConfig, run_once: bool = False, timeout: int = 5):
def try_register_success_prometheus(report_name: str, report_title: str):
try:
from notebooker.web.routes.prometheus import record_successful_report
record_successful_report(report_name, report_title)
except ImportError as e:
logger.info(f"Attempted to log success to prometheus but failed with ImportError({e}).")


def try_register_fail_prometheus(report_name: str, report_title: str):
try:
from notebooker.web.routes.prometheus import record_failed_report
record_failed_report(report_name, report_title)
except ImportError as e:
logger.info(f"Attempted to log failure to prometheus but failed with ImportError({e}).")


def _report_hunter(webapp_config: WebappConfig, run_once: bool = False, timeout: int = 120):
"""
This is a function designed to run in a thread alongside the webapp. It updates the cache which the
web app reads from and performs some admin on pending/running jobs. The function terminates either when
Expand All @@ -21,12 +37,13 @@ def _report_hunter(webapp_config: WebappConfig, run_once: bool = False, timeout:
:param run_once:
Whether to infinitely run this function or not.
:param timeout:
The time in seconds that we cache results.
The time in seconds that we cache results. Defaults to 120s.
:param serializer_kwargs:
Any kwargs which are required for a Serializer to be initialised successfully.
"""
serializer = initialize_serializer_from_config(webapp_config)
last_query = None
refresh_period_seconds = 10
while not os.getenv("NOTEBOOKER_APP_STOPPING"):
try:
ct = 0
Expand All @@ -51,8 +68,8 @@ def _report_hunter(webapp_config: WebappConfig, run_once: bool = False, timeout:
"Please try again! Timed out after {:.0f} minutes "
"{:.0f} seconds.".format(delta_seconds / 60, delta_seconds % 60),
)
# Finally, check we have the latest updates
_last_query = datetime.datetime.now() - datetime.timedelta(minutes=1)
# Finally, check we have the latest updates with a small buffer
_last_query = datetime.datetime.now() - datetime.timedelta(seconds=refresh_period_seconds)
query_results = serializer.get_all_results(since=last_query)
for result in query_results:
ct += 1
Expand All @@ -61,6 +78,10 @@ def _report_hunter(webapp_config: WebappConfig, run_once: bool = False, timeout:
set_report_cache(
result.report_name, result.job_id, result, timeout=timeout, cache_dir=webapp_config.CACHE_DIR
)
if result.status == JobStatus.DONE:
try_register_success_prometheus(result.report_name, result.report_title)
if result.status == JobStatus.ERROR:
try_register_fail_prometheus(result.report_name, result.report_title)
logger.info(
"Report-hunter found a change for {} (status: {}->{})".format(
result.job_id, existing.status if existing else None, result.status
Expand All @@ -74,5 +95,5 @@ def _report_hunter(webapp_config: WebappConfig, run_once: bool = False, timeout:
logger.exception(str(e))
if run_once:
break
time.sleep(10)
time.sleep(refresh_period_seconds)
logger.info("Report-hunting thread successfully killed.")
20 changes: 20 additions & 0 deletions notebooker/web/routes/prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,18 @@
registry=REGISTRY,
labelnames=["env", "method", "path", "http_status", "hostname"],
)
N_SUCCESSFUL_REPORTS = Counter(
"notebooker_n_successful_reports",
"Number of successful runs in the current session for the report",
registry=REGISTRY,
labelnames=["report_name", "report_title"],
)
N_FAILED_REPORTS = Counter(
"notebooker_n_failed_reports",
"Number of failed runs in the current session for the report",
registry=REGISTRY,
labelnames=["report_name", "report_title"],
)

prometheus_bp = Blueprint("prometheus", __name__)

Expand All @@ -39,6 +51,14 @@ def record_request_data(response):
return response


def record_successful_report(report_name, report_title):
N_SUCCESSFUL_REPORTS.labels(report_name, report_title).inc()


def record_failed_report(report_name, report_title):
N_FAILED_REPORTS.labels(report_name, report_title).inc()


def setup_metrics(app):
app.before_request(start_timer)
# The order here matters since we want stop_timer
Expand Down
2 changes: 1 addition & 1 deletion notebooker/web/static/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "notebooker",
"version": "0.4.1",
"version": "0.4.2",
"description": "Notebooker - Turn notebooks into reports",
"dependencies": {
"bootstrap-table": "1.15.3",
Expand Down
54 changes: 54 additions & 0 deletions tests/integration/test_report_hunter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import uuid

import freezegun
import mock.mock
import pytest

from notebooker.constants import JobStatus, NotebookResultComplete, NotebookResultError, NotebookResultPending
Expand Down Expand Up @@ -173,3 +174,56 @@ def test_report_hunter_pending_to_done(bson_library, webapp_config):
serializer.save_check_result(expected)
_report_hunter(webapp_config=webapp_config, run_once=True)
assert get_report_cache(report_name, job_id, cache_dir=webapp_config.CACHE_DIR) == expected


@mock.patch("notebooker.web.routes.prometheus.record_failed_report")
def test_prometheus_logging_in_report_hunter_no_prometheus_fail(record_failed_report, bson_library, webapp_config):
job_id = str(uuid.uuid4())
report_name = str(uuid.uuid4())
serializer = initialize_serializer_from_config(webapp_config)
record_failed_report.side_effect = ImportError("wah")

with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 37)):
expected = NotebookResultError(
job_id=job_id,
report_name=report_name,
report_title=report_name,
status=JobStatus.ERROR,
update_time=datetime.datetime(2018, 1, 12, 2, 37),
job_start_time=datetime.datetime(2018, 1, 12, 2, 30),
error_info="This was cancelled!",
)
serializer.save_check_result(expected)
_report_hunter(webapp_config=webapp_config, run_once=True)
assert get_report_cache(report_name, job_id, cache_dir=webapp_config.CACHE_DIR) == expected
record_failed_report.assert_called_once_with(report_name, report_name)


@mock.patch("notebooker.web.routes.prometheus.record_successful_report")
def test_prometheus_logging_in_report_hunter_no_prometheus_success(
record_successful_report, bson_library, webapp_config
):
job_id = str(uuid.uuid4())
report_name = str(uuid.uuid4())
serializer = initialize_serializer_from_config(webapp_config)
record_successful_report.side_effect = ImportError("wah")

with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 37)):
expected = NotebookResultComplete(
job_id=job_id,
report_name=report_name,
report_title=report_name,
status=JobStatus.DONE,
update_time=datetime.datetime(2018, 1, 12, 2, 37),
job_start_time=datetime.datetime(2018, 1, 12, 2, 30),
job_finish_time=datetime.datetime(2018, 1, 12, 2, 37),
pdf=b"abc",
raw_html="rawstuff",
email_html="emailstuff",
raw_html_resources={"outputs": {}, "inlining": []},
raw_ipynb_json="[]",
)
serializer.save_check_result(expected)
_report_hunter(webapp_config=webapp_config, run_once=True)
assert get_report_cache(report_name, job_id, cache_dir=webapp_config.CACHE_DIR) == expected
record_successful_report.assert_called_once_with(report_name, report_name)

0 comments on commit 4db0f46

Please sign in to comment.