From aeb42dc063bb8a558e7b76fc4275987dd69ecd79 Mon Sep 17 00:00:00 2001 From: grahamalama Date: Tue, 10 Jan 2023 11:04:25 -0500 Subject: [PATCH] Always run container with single Uvicorn process (#503) Previously, we'd run our application container with uvicorn in development, but gunicorn with multiple workers in production. This made the app a little more complex when it came to metrics collection, and we have k8s to manage scaling. So, with this commit, we remove gunicorn and now always run the application with a single process. With removing Gunicorn, we also had to do some tweaking to our logging and application settings. --- asgi.py | 22 +++++++++++ ctms/app.py | 16 +++----- ctms/config.py | 11 +++--- ctms/log.py | 66 +++++++++----------------------- ctms/metrics.py | 34 ----------------- docker/Dockerfile | 22 +++-------- docker/docker-entrypoint.sh | 19 ---------- docker/gunicorn_conf.py | 76 ------------------------------------- docs/configuration.md | 5 ++- poetry.lock | 25 +----------- pyproject.toml | 1 - tests/unit/test_log.py | 2 +- tests/unit/test_metrics.py | 42 +------------------- 13 files changed, 64 insertions(+), 277 deletions(-) create mode 100644 asgi.py delete mode 100644 docker/docker-entrypoint.sh delete mode 100644 docker/gunicorn_conf.py diff --git a/asgi.py b/asgi.py new file mode 100644 index 00000000..1125bde9 --- /dev/null +++ b/asgi.py @@ -0,0 +1,22 @@ +import uvicorn + +from ctms.app import get_settings +from ctms.log import configure_logging + +settings = get_settings() + + +if __name__ == "__main__": + logging_config = configure_logging( + settings.use_mozlog, settings.logging_level.name, settings.log_sqlalchemy + ) + + server = uvicorn.Server( + uvicorn.Config( + "ctms.app:app", + host=settings.host, + port=settings.port, + log_config=logging_config, + ) + ) + server.run() diff --git a/ctms/app.py b/ctms/app.py index 0bb15b3f..26010f2f 100644 --- a/ctms/app.py +++ b/ctms/app.py @@ -57,13 +57,8 @@ StripeIngestUnknownObjectError, ingest_stripe_object, ) -from .log import configure_logging, context_from_request, get_log_line -from .metrics import ( - emit_response_metrics, - get_metrics_reporting_registry, - init_metrics, - init_metrics_labels, -) +from .log import context_from_request, get_log_line +from .metrics import emit_response_metrics, init_metrics, init_metrics_labels from .models import Email, StripeCustomer from .monitor import check_database, get_version from .schemas import ( @@ -99,6 +94,9 @@ SessionLocal = None METRICS_REGISTRY = CollectorRegistry() METRICS = None + +# We could use the default prometheus_client.REGISTRY, but it makes tests +# easier to write if it is possible to replace the registry with a fresh one. get_metrics_registry = lambda: METRICS_REGISTRY get_metrics = lambda: METRICS oauth2_scheme = OAuth2ClientCredentials(tokenUrl="token") @@ -119,8 +117,6 @@ def get_settings() -> config.Settings: @app.on_event("startup") def startup_event(): # pragma: no cover global SessionLocal, METRICS # pylint:disable = W0603 - settings = get_settings() - configure_logging(settings.use_mozlog, settings.logging_level.name) _, SessionLocal = get_db_engine(get_settings()) METRICS = init_metrics(METRICS_REGISTRY) init_metrics_labels(SessionLocal(), app, METRICS) @@ -915,7 +911,7 @@ def metrics(request: Request): if agent.startswith("Prometheus/"): request.state.log_context["trivial_code"] = 200 headers = {"Content-Type": CONTENT_TYPE_LATEST} - registry = get_metrics_reporting_registry(get_metrics_registry()) + registry = get_metrics_registry() return Response(generate_latest(registry), status_code=200, headers=headers) diff --git a/ctms/config.py b/ctms/config.py index 6028a509..d9dc52f7 100644 --- a/ctms/config.py +++ b/ctms/config.py @@ -3,7 +3,7 @@ from enum import Enum from typing import Optional -from pydantic import BaseSettings, DirectoryPath, PostgresDsn +from pydantic import BaseSettings, PostgresDsn # If primary email matches, then add trace to logs re_trace_email = re.compile(r".*\+trace-me-mozilla-.*@.*") @@ -27,12 +27,13 @@ class Settings(BaseSettings): token_expiration: timedelta = timedelta(minutes=60) server_prefix: str = "http://localhost:8000" use_mozlog: bool = True + log_sqlalchemy: bool = False logging_level: LogLevel = LogLevel.INFO sentry_debug: bool = False fastapi_env: Optional[str] = None - is_gunicorn: bool = False - prometheus_multiproc_dir: Optional[DirectoryPath] = None + host: str = "0.0.0.0" + port: int = 8000 pubsub_audience: Optional[str] = None pubsub_email: Optional[str] = None @@ -68,8 +69,8 @@ class Config: fields = { "fastapi_env": {"env": "fastapi_env"}, - "is_gunicorn": {"env": "is_gunicorn"}, - "prometheus_multiproc_dir": {"env": "prometheus_multiproc_dir"}, + "host": {"env": "host"}, + "port": {"env": "port"}, } diff --git a/ctms/log.py b/ctms/log.py index b4ce5bdc..bc43ecb3 100644 --- a/ctms/log.py +++ b/ctms/log.py @@ -5,7 +5,6 @@ from typing import Any, Dict, List, Optional import structlog -import uvicorn from fastapi import Request from starlette.routing import Match from structlog.types import Processor @@ -13,7 +12,7 @@ def configure_logging( use_mozlog: bool = True, logging_level: str = "INFO", log_sqlalchemy: bool = False -) -> None: +) -> dict: """Configure Python logging. :param use_mozlog: If True, use MozLog format, appropriate for deployments. @@ -21,8 +20,6 @@ def configure_logging( :param logging_level: The logging level, such as DEBUG or INFO. :param log_sqlalchemy: Include SQLAlchemy engine logs, such as SQL statements """ - # Processors used for logs generated by stdlib's logging - uvicorn_formatters = uvicorn.config.LOGGING_CONFIG["formatters"] if use_mozlog: structlog_fmt_prep: Processor = structlog.stdlib.render_to_log_kwargs @@ -47,67 +44,37 @@ def configure_logging( "()": "dockerflow.logging.JsonLogFormatter", "logger_name": "ctms", }, - "uvicorn_access": uvicorn_formatters["access"], - "uvicorn_default": uvicorn_formatters["default"], }, "handlers": { "humans": { "class": "logging.StreamHandler", "formatter": "dev_console", - "level": "DEBUG", + "level": logging.DEBUG, + }, + "null": { + "class": "logging.NullHandler", }, "mozlog": { "class": "logging.StreamHandler", "formatter": "mozlog_json", - "level": "DEBUG", - }, - "uvicorn.access": { - "class": "logging.StreamHandler", - "formatter": "uvicorn_access", - "level": "INFO", - }, - "uvicorn.default": { - "class": "logging.StreamHandler", - "formatter": "uvicorn_default", - "level": "INFO", + "level": logging.DEBUG, }, }, + "root": { + "handlers": ["mozlog" if use_mozlog else "humans"], + "level": logging_level, + }, "loggers": { - "alembic": { - "propagate": False, - "handlers": ["mozlog" if use_mozlog else "humans"], - "level": logging_level, - }, - "ctms": { - "propagate": False, - "handlers": ["mozlog" if use_mozlog else "humans"], - "level": logging_level, - }, - "uvicorn": { - "handlers": ["mozlog" if use_mozlog else "uvicorn.default"], - "level": logging_level, - "propagate": False, - }, - "uvicorn.error": { - "handlers": ["mozlog" if use_mozlog else "uvicorn.default"], - "level": logging_level, - "propagate": False, - }, - "uvicorn.access": { - "handlers": ["mozlog" if use_mozlog else "uvicorn.access"], - "level": "WARNING", # was INFO, but duplicates ctms.web - "propagate": False, - }, + "alembic": {"level": logging_level}, + "ctms": {"level": logging_level}, + "uvicorn": {"level": logging_level}, + "uvicorn.access": {"handlers": ["null"], "propagate": False}, "sqlalchemy.engine": { - "handlers": ["mozlog" if use_mozlog else "uvicorn.default"], - "level": logging_level if log_sqlalchemy else "WARNING", + "handlers": ["mozlog" if use_mozlog else "humans"], + "level": logging_level if log_sqlalchemy else logging.WARNING, "propagate": False, }, }, - "root": { - "handlers": ["mozlog" if use_mozlog else "humans"], - "level": "WARNING", - }, } logging.config.dictConfig(logging_config) @@ -129,6 +96,7 @@ def configure_logging( wrapper_class=structlog.stdlib.BoundLogger, cache_logger_on_first_use=True, ) + return logging_config def context_from_request(request: Request) -> Dict: diff --git a/ctms/metrics.py b/ctms/metrics.py index 1728f12b..2f1de087 100644 --- a/ctms/metrics.py +++ b/ctms/metrics.py @@ -5,13 +5,10 @@ from fastapi import FastAPI from prometheus_client import CollectorRegistry, Counter, Histogram -from prometheus_client.multiprocess import MultiProcessCollector from prometheus_client.utils import INF -from pydantic import ValidationError from sqlalchemy.orm import Session from starlette.routing import Route -from ctms import config from ctms.crud import get_active_api_client_ids METRICS_PARAMS: dict[str, tuple[Type[Counter] | Type[Histogram], dict]] = { @@ -60,37 +57,6 @@ } -def get_metrics_reporting_registry( - process_registry: CollectorRegistry, -) -> CollectorRegistry: - """ - Get the metrics registry for reporting metrics. - - If we're running under gunicorn, then each worker has its own process and - its own process collector. For reporting, we need a fresh registry with a - multiprocess collector that points to the metrics folder (created empty at - startup, with a database file for each process and metric type). It will - use the databases in this folder to generate combined metrics across the - processes, and will not double-count the reporting process's metrics. - - If we're not running under gunicorn, then return the passed per-process - registry, which is the only metrics registry. In the single-process case, - We could use the default prometheus_client.REGISTRY, but it makes tests - easier to write if it is possible to replace the registry with a fresh one. - """ - try: - settings = config.Settings() - prometheus_multiproc_dir = settings.prometheus_multiproc_dir - except ValidationError: - prometheus_multiproc_dir = None - - if prometheus_multiproc_dir: - registry = CollectorRegistry() - MultiProcessCollector(registry, path=prometheus_multiproc_dir) - return registry - return process_registry - - def init_metrics(registry: CollectorRegistry) -> dict[str, Counter | Histogram]: """Initialize the metrics with the registry.""" metrics = {} diff --git a/docker/Dockerfile b/docker/Dockerfile index 267479a2..05035337 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -17,7 +17,7 @@ ENV PYTHONPATH=/app \ VENV_PATH="/opt/pysetup/.venv" \ PORT=8000 -ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH" +ENV PATH="$VENV_PATH/bin:$POETRY_HOME/bin:$PATH" # Set up user and group ARG userid=10001 @@ -68,10 +68,6 @@ USER app COPY --from=builder-base $POETRY_HOME $POETRY_HOME COPY --from=builder-base $PYSETUP_PATH $PYSETUP_PATH -# Copying in our entrypoint -COPY --chown=app:app ./docker/docker-entrypoint.sh /docker-entrypoint.sh -RUN chmod +x /docker-entrypoint.sh - # venv already has runtime deps installed we get a quicker install WORKDIR $PYSETUP_PATH RUN poetry install --no-root @@ -80,25 +76,17 @@ WORKDIR /app COPY --chown=app:app . . EXPOSE $PORT -ENTRYPOINT ["/docker-entrypoint.sh"] -CMD uvicorn ctms.app:app --reload --host=0.0.0.0 --port=$PORT +CMD ["python", "asgi.py"] # 'production' stage uses the clean 'python-base' stage and copyies # in only our runtime deps that were installed in the 'builder-base' FROM python-base as production -ENV FASTAPI_ENV=production \ - IS_GUNICORN=1 \ - PROMETHEUS_MULTIPROC=1 +ENV FASTAPI_ENV=production COPY --from=builder-base $VENV_PATH $VENV_PATH -COPY ./docker/gunicorn_conf.py /gunicorn_conf.py - -COPY ./docker/docker-entrypoint.sh /docker-entrypoint.sh -RUN chmod +x /docker-entrypoint.sh -COPY . /app WORKDIR /app +COPY --chown=app:app . . EXPOSE $PORT -ENTRYPOINT ["/docker-entrypoint.sh"] -CMD gunicorn -k uvicorn.workers.UvicornWorker -c /gunicorn_conf.py ctms.app:app +CMD ["python", "asgi.py"] diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh deleted file mode 100644 index 7cb52c34..00000000 --- a/docker/docker-entrypoint.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -# activate our virtual environment here -. /opt/pysetup/.venv/bin/activate - -# setup an empty directory for Prometheus metrics -if [ ${PROMETHEUS_MULTIPROC:-0} -eq 1 ]; then - prometheus_multiproc_dir=`mktemp -td prometheus.XXXXXXXXXX` || exit 1 - export prometheus_multiproc_dir -fi - -# Evaluating passed command: -exec "$@" - -if [ -z ${prometheus_multiproc_dir+x} ]; then - rm -rf ${prometheus_multiproc_dir} -fi diff --git a/docker/gunicorn_conf.py b/docker/gunicorn_conf.py deleted file mode 100644 index 871973c8..00000000 --- a/docker/gunicorn_conf.py +++ /dev/null @@ -1,76 +0,0 @@ -# From: https://github.com/tiangolo/uvicorn-gunicorn-docker/blob/2daa3e3873c837d5781feb4ff6a40a89f791f81b/docker-images/gunicorn_conf.py -# pylint: disable=invalid-name - -import json -import multiprocessing -import os - -from prometheus_client import multiprocess - -workers_per_core_str = os.getenv("WORKERS_PER_CORE", "1") -max_workers_str = os.getenv("MAX_WORKERS") -use_max_workers = None -if max_workers_str: - use_max_workers = int(max_workers_str) -web_concurrency_str = os.getenv("WEB_CONCURRENCY", None) - -host = os.getenv("HOST", "0.0.0.0") -port = os.getenv("PORT", "80") -bind_env = os.getenv("BIND", None) -use_loglevel = os.getenv("LOG_LEVEL", "info") -if bind_env: - use_bind = bind_env -else: - use_bind = f"{host}:{port}" - -cores = multiprocessing.cpu_count() -workers_per_core = float(workers_per_core_str) -default_web_concurrency = workers_per_core * cores -if web_concurrency_str: - web_concurrency = int(web_concurrency_str) - assert web_concurrency > 0 -else: - web_concurrency = max(int(default_web_concurrency), 2) - if use_max_workers: - web_concurrency = min(web_concurrency, use_max_workers) -accesslog_var = os.getenv("ACCESS_LOG", "-") -use_accesslog = accesslog_var or None -errorlog_var = os.getenv("ERROR_LOG", "-") -use_errorlog = errorlog_var or None -graceful_timeout_str = os.getenv("GRACEFUL_TIMEOUT", "120") -timeout_str = os.getenv("TIMEOUT", "120") -keepalive_str = os.getenv("KEEP_ALIVE", "5") - -# Gunicorn config variables -loglevel = use_loglevel -workers = web_concurrency -bind = use_bind -errorlog = use_errorlog -worker_tmp_dir = "/dev/shm" -accesslog = use_accesslog -graceful_timeout = int(graceful_timeout_str) -timeout = int(timeout_str) -keepalive = int(keepalive_str) - - -# For debugging and testing -log_data = { - "loglevel": loglevel, - "workers": workers, - "bind": bind, - "graceful_timeout": graceful_timeout, - "timeout": timeout, - "keepalive": keepalive, - "errorlog": errorlog, - "accesslog": accesslog, - # Additional, non-gunicorn variables - "workers_per_core": workers_per_core, - "use_max_workers": use_max_workers, - "host": host, - "port": port, -} -print(json.dumps(log_data)) - - -def child_exit(server, worker): - multiprocess.mark_process_dead(worker.pid) diff --git a/docs/configuration.md b/docs/configuration.md index 6f6ebc8d..8174e315 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -30,12 +30,13 @@ production, they are set as part of the runtime environment. MozLog format. * ``CTMS_LOGGING_LEVEL`` - The minimum level for logs. Defaults to ``INFO`` if unset. Unset in production, and set to ``INFO`` in development. +* ``CTMS_LOG_SQLALCHEMY`` - Determines whether SQLAlchemy logs should be + emmited. Defaults to ``False`` if unset. Unset in production, and set to + ``False`` in development. * ``CTMS_SENTRY_DEBUG`` - If set to True, then sentry initialization and capture is logged as well. This may be useful for development, but is not recommended for production. * ``CTMS_FASTAPI_ENV`` - To determine which environment is being run; defaults to `None`. -* ``CTMS_IS_GUNICORN`` - Is Gunicorn being used to run FastAPI app; defaults to `False` -* ``CTMS_PROMETHEUS_MULTIPROC_DIR`` - For collecting and pushing App metrics to Promethesus for Monitoring; defaults to `None`. * ``CTMS_PUBSUB_AUDIENCE`` - Audience (or Server) shared between FxA and CTMS; part of claims analysis to ensure request payload is trustworthy. * ``CTMS_PUBSUB_EMAIL`` - Email (or Service Account) shared between FxA and CTMS; part of claims analysis to ensure request payload is trustworthy. * ``CTMS_PUBSUB_CLIENT`` - Client (or Token) shared between FxA and CTMS; part of claims analysis to ensure request payload is trustworthy. diff --git a/poetry.lock b/poetry.lock index c73f39c2..76842481 100644 --- a/poetry.lock +++ b/poetry.lock @@ -724,27 +724,6 @@ enterprise-cert = ["cryptography (==36.0.2)", "pyopenssl (==22.0.0)"] pyopenssl = ["cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"] reauth = ["pyu2f (>=0.1.5)"] -[[package]] -name = "gunicorn" -version = "20.1.0" -description = "WSGI HTTP Server for UNIX" -category = "main" -optional = false -python-versions = ">=3.5" -files = [ - {file = "gunicorn-20.1.0-py3-none-any.whl", hash = "sha256:9dcc4547dbb1cb284accfb15ab5667a0e5d1881cc443e0677b4882a4067a807e"}, - {file = "gunicorn-20.1.0.tar.gz", hash = "sha256:e0a968b5ba15f8a328fdfd7ab1fcb5af4470c28aaf7e55df02a99bc13138e6e8"}, -] - -[package.dependencies] -setuptools = ">=3.0" - -[package.extras] -eventlet = ["eventlet (>=0.24.1)"] -gevent = ["gevent (>=1.4.0)"] -setproctitle = ["setproctitle"] -tornado = ["tornado (>=0.2)"] - [[package]] name = "h11" version = "0.13.0" @@ -1824,7 +1803,7 @@ tornado = ["tornado (>=5)"] name = "setuptools" version = "65.3.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2575,4 +2554,4 @@ testing = ["flake8 (<5)", "func-timeout", "jaraco.functools", "jaraco.itertools" [metadata] lock-version = "2.0" python-versions = ">=3.7, <3.11" -content-hash = "b2e261e79cb9dd206c2562c2f16bd6066b2f4a6392c6f190b7dcd7fc95fb0d2e" +content-hash = "d01e132108ab7012c9418c092e4e61049c5a798dec221454a0032c6a66c77362" diff --git a/pyproject.toml b/pyproject.toml index 39c7007f..f9b98eea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,6 @@ fastapi = "^0.84.0" starlette = "^0.19.1" requests = "^2.28.1" uvicorn = {extras = ["standard"], version = "^0.20.0"} -gunicorn = "^20.0.4" pydantic = {extras = ["email"], version = "^1.10.2"} SQLAlchemy = ">1.3.23, <1.4" # 1.4 is big update, requires SQLAlchemy-Utils support alembic = "^1.8.1" diff --git a/tests/unit/test_log.py b/tests/unit/test_log.py index d685e39f..9296d623 100644 --- a/tests/unit/test_log.py +++ b/tests/unit/test_log.py @@ -110,5 +110,5 @@ def test_configure_logging(use_mozlog, logging_level): handlers = ["mozlog"] else: handlers = ["humans"] - assert args[0]["root"] == {"handlers": handlers, "level": "WARNING"} + assert args[0]["root"] == {"handlers": handlers, "level": logging_level} assert args[0]["loggers"]["ctms"]["level"] == logging_level diff --git a/tests/unit/test_metrics.py b/tests/unit/test_metrics.py index ee2296f5..a96afb99 100644 --- a/tests/unit/test_metrics.py +++ b/tests/unit/test_metrics.py @@ -1,19 +1,13 @@ # Test for metrics -from unittest.mock import Mock, patch +from unittest.mock import patch import pytest from prometheus_client import CollectorRegistry, generate_latest -from prometheus_client.multiprocess import MultiProcessCollector from prometheus_client.parser import text_string_to_metric_families -from pydantic import ValidationError from structlog.testing import capture_logs from ctms.app import app -from ctms.metrics import ( - get_metrics_reporting_registry, - init_metrics, - init_metrics_labels, -) +from ctms.metrics import init_metrics, init_metrics_labels # Metric cardinatility numbers # These numbers change as routes are added or changed @@ -60,38 +54,6 @@ def metrics(setup_metrics): return test_metrics -def test_get_metrics_reporting_registry_standard(): - """get_metrics_reporting_registry() returns the passed registry.""" - passed_registry = CollectorRegistry() - with patch("ctms.metrics.config.Settings") as settings: - settings.return_value.prometheus_multiproc_dir = None - the_registry = get_metrics_reporting_registry(passed_registry) - assert the_registry is passed_registry - - -def test_get_metrics_reporting_registry_multiprocess(tmp_path): - """get_metrics_reporting_registry() can register a multiprocessing collector.""" - passed_registry = CollectorRegistry() - with patch("ctms.metrics.config.Settings") as settings: - settings.return_value.prometheus_multiproc_dir = tmp_path - the_registry = get_metrics_reporting_registry(passed_registry) - assert the_registry is not passed_registry - # pylint: disable=protected-access - collectors = list(the_registry._collector_to_names.keys()) - assert len(collectors) == 1 - assert isinstance(collectors[0], MultiProcessCollector) - - -def test_get_metrics_reporting_registry_settings_error(): - """get_metrics_reporting_registry() handles invalid settings.""" - passed_registry = CollectorRegistry() - with patch("ctms.metrics.config.Settings") as settings: - settings.side_effect = ValidationError(errors=[], model=Mock) - the_registry = get_metrics_reporting_registry(passed_registry) - assert the_registry is passed_registry - assert not the_registry._collector_to_names # pylint: disable=protected-access - - def test_init_metrics_labels(dbsession, client_id_and_secret, registry, metrics): """Test that init_metric_labels populates variants""" init_metrics_labels(dbsession, app, metrics)