Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding prometheus metrics to webapp for fails/successes #86

Merged
merged 2 commits into from
Apr 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ jobs:
PYTHON_VERSION: "3_6"
CIRCLE_ARTIFACTS: /tmp/circleci-artifacts/3_6
CIRCLE_TEST_REPORTS: /tmp/circleci-test-results/3_6
VERSION: 0.4.1
VERSION: 0.4.2
PANDOC_RELEASES_URL: https://github.com/jgm/pandoc/releases
YARN_STATIC_DIR: notebooker/web/static/
IMAGE_NAME: mangroup/notebooker
Expand All @@ -189,7 +189,7 @@ jobs:
environment:
CIRCLE_ARTIFACTS: /tmp/circleci-artifacts/3_7
CIRCLE_TEST_REPORTS: /tmp/circleci-test-results/3_7
VERSION: 0.4.1
VERSION: 0.4.2
PANDOC_RELEASES_URL: https://github.com/jgm/pandoc/releases
YARN_STATIC_DIR: notebooker/web/static/
IMAGE_NAME: mangroup/notebooker
Expand Down
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
0.4.2 (2022-04-27)
------------------

* Improvement: Prometheus metrics now record number of successes/failures which have been captured by the webapp.
* Improvement: Unpinned nbconvert and added ipython_genutils dependency


0.4.1 (2022-03-09)
------------------

Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
author = "Man Group Quant Tech"

# The full version, including alpha/beta/rc tags
release = "0.4.1"
release = "0.4.2"


# -- General configuration ---------------------------------------------------
Expand Down
1 change: 0 additions & 1 deletion notebooker/execute_notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ def _run_checks(
generate_pdf_output: Optional[bool] = True,
hide_code: Optional[bool] = False,
mailto: Optional[str] = "",
error_mailto: Optional[str] = "",
email_subject: Optional[str] = "",
prepare_only: Optional[bool] = False,
notebooker_disable_git: bool = False,
Expand Down
2 changes: 1 addition & 1 deletion notebooker/utils/notebook_execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import shutil
import tempfile
from logging import getLogger
from typing import AnyStr, Union
from typing import Union

from notebooker.constants import TEMPLATE_DIR_SEPARATOR, NotebookResultComplete, NotebookResultError
from notebooker.utils.mail import mail
Expand Down
2 changes: 1 addition & 1 deletion notebooker/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.4.1"
__version__ = "0.4.2"
31 changes: 26 additions & 5 deletions notebooker/web/report_hunter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,23 @@
logger = getLogger(__name__)


def _report_hunter(webapp_config: WebappConfig, run_once: bool = False, timeout: int = 5):
def try_register_success_prometheus(report_name: str, report_title: str):
try:
from notebooker.web.routes.prometheus import record_successful_report
record_successful_report(report_name, report_title)
except ImportError as e:
logger.info(f"Attempted to log success to prometheus but failed with ImportError({e}).")


def try_register_fail_prometheus(report_name: str, report_title: str):
try:
from notebooker.web.routes.prometheus import record_failed_report
record_failed_report(report_name, report_title)
except ImportError as e:
logger.info(f"Attempted to log failure to prometheus but failed with ImportError({e}).")


def _report_hunter(webapp_config: WebappConfig, run_once: bool = False, timeout: int = 120):
"""
This is a function designed to run in a thread alongside the webapp. It updates the cache which the
web app reads from and performs some admin on pending/running jobs. The function terminates either when
Expand All @@ -21,12 +37,13 @@ def _report_hunter(webapp_config: WebappConfig, run_once: bool = False, timeout:
:param run_once:
Whether to infinitely run this function or not.
:param timeout:
The time in seconds that we cache results.
The time in seconds that we cache results. Defaults to 120s.
:param serializer_kwargs:
Any kwargs which are required for a Serializer to be initialised successfully.
"""
serializer = initialize_serializer_from_config(webapp_config)
last_query = None
refresh_period_seconds = 10
while not os.getenv("NOTEBOOKER_APP_STOPPING"):
try:
ct = 0
Expand All @@ -51,8 +68,8 @@ def _report_hunter(webapp_config: WebappConfig, run_once: bool = False, timeout:
"Please try again! Timed out after {:.0f} minutes "
"{:.0f} seconds.".format(delta_seconds / 60, delta_seconds % 60),
)
# Finally, check we have the latest updates
_last_query = datetime.datetime.now() - datetime.timedelta(minutes=1)
# Finally, check we have the latest updates with a small buffer
_last_query = datetime.datetime.now() - datetime.timedelta(seconds=refresh_period_seconds)
query_results = serializer.get_all_results(since=last_query)
for result in query_results:
ct += 1
Expand All @@ -61,6 +78,10 @@ def _report_hunter(webapp_config: WebappConfig, run_once: bool = False, timeout:
set_report_cache(
result.report_name, result.job_id, result, timeout=timeout, cache_dir=webapp_config.CACHE_DIR
)
if result.status == JobStatus.DONE:
try_register_success_prometheus(result.report_name, result.report_title)
if result.status == JobStatus.ERROR:
try_register_fail_prometheus(result.report_name, result.report_title)
logger.info(
"Report-hunter found a change for {} (status: {}->{})".format(
result.job_id, existing.status if existing else None, result.status
Expand All @@ -74,5 +95,5 @@ def _report_hunter(webapp_config: WebappConfig, run_once: bool = False, timeout:
logger.exception(str(e))
if run_once:
break
time.sleep(10)
time.sleep(refresh_period_seconds)
logger.info("Report-hunting thread successfully killed.")
20 changes: 20 additions & 0 deletions notebooker/web/routes/prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,18 @@
registry=REGISTRY,
labelnames=["env", "method", "path", "http_status", "hostname"],
)
N_SUCCESSFUL_REPORTS = Counter(
"notebooker_n_successful_reports",
"Number of successful runs in the current session for the report",
registry=REGISTRY,
labelnames=["report_name", "report_title"],
)
N_FAILED_REPORTS = Counter(
"notebooker_n_failed_reports",
"Number of failed runs in the current session for the report",
registry=REGISTRY,
labelnames=["report_name", "report_title"],
)

prometheus_bp = Blueprint("prometheus", __name__)

Expand All @@ -39,6 +51,14 @@ def record_request_data(response):
return response


def record_successful_report(report_name, report_title):
N_SUCCESSFUL_REPORTS.labels(report_name, report_title).inc()


def record_failed_report(report_name, report_title):
N_FAILED_REPORTS.labels(report_name, report_title).inc()


def setup_metrics(app):
app.before_request(start_timer)
# The order here matters since we want stop_timer
Expand Down
2 changes: 1 addition & 1 deletion notebooker/web/static/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "notebooker",
"version": "0.4.1",
"version": "0.4.2",
"description": "Notebooker - Turn notebooks into reports",
"dependencies": {
"bootstrap-table": "1.15.3",
Expand Down
54 changes: 54 additions & 0 deletions tests/integration/test_report_hunter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import uuid

import freezegun
import mock.mock
import pytest

from notebooker.constants import JobStatus, NotebookResultComplete, NotebookResultError, NotebookResultPending
Expand Down Expand Up @@ -173,3 +174,56 @@ def test_report_hunter_pending_to_done(bson_library, webapp_config):
serializer.save_check_result(expected)
_report_hunter(webapp_config=webapp_config, run_once=True)
assert get_report_cache(report_name, job_id, cache_dir=webapp_config.CACHE_DIR) == expected


@mock.patch("notebooker.web.routes.prometheus.record_failed_report")
def test_prometheus_logging_in_report_hunter_no_prometheus_fail(record_failed_report, bson_library, webapp_config):
job_id = str(uuid.uuid4())
report_name = str(uuid.uuid4())
serializer = initialize_serializer_from_config(webapp_config)
record_failed_report.side_effect = ImportError("wah")

with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 37)):
expected = NotebookResultError(
job_id=job_id,
report_name=report_name,
report_title=report_name,
status=JobStatus.ERROR,
update_time=datetime.datetime(2018, 1, 12, 2, 37),
job_start_time=datetime.datetime(2018, 1, 12, 2, 30),
error_info="This was cancelled!",
)
serializer.save_check_result(expected)
_report_hunter(webapp_config=webapp_config, run_once=True)
assert get_report_cache(report_name, job_id, cache_dir=webapp_config.CACHE_DIR) == expected
record_failed_report.assert_called_once_with(report_name, report_name)


@mock.patch("notebooker.web.routes.prometheus.record_successful_report")
def test_prometheus_logging_in_report_hunter_no_prometheus_success(
record_successful_report, bson_library, webapp_config
):
job_id = str(uuid.uuid4())
report_name = str(uuid.uuid4())
serializer = initialize_serializer_from_config(webapp_config)
record_successful_report.side_effect = ImportError("wah")

with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 37)):
expected = NotebookResultComplete(
job_id=job_id,
report_name=report_name,
report_title=report_name,
status=JobStatus.DONE,
update_time=datetime.datetime(2018, 1, 12, 2, 37),
job_start_time=datetime.datetime(2018, 1, 12, 2, 30),
job_finish_time=datetime.datetime(2018, 1, 12, 2, 37),
pdf=b"abc",
raw_html="rawstuff",
email_html="emailstuff",
raw_html_resources={"outputs": {}, "inlining": []},
raw_ipynb_json="[]",
)
serializer.save_check_result(expected)
_report_hunter(webapp_config=webapp_config, run_once=True)
assert get_report_cache(report_name, job_id, cache_dir=webapp_config.CACHE_DIR) == expected
record_successful_report.assert_called_once_with(report_name, report_name)