From 70c394570950cee266c78f175f014f28ce77ddcb Mon Sep 17 00:00:00 2001 From: leftmove <100anonyo@gmail.com> Date: Sat, 21 Dec 2024 00:35:32 -0500 Subject: [PATCH] uv migration and docker refactor --- backend/.env.example | 2 +- backend/Dockerfile | 4 +- backend/docker-compose.dev.yaml | 15 ++++ backend/docker-compose.prod.yaml | 73 ++++++++++++------- backend/routers/filer.py | 7 +- backend/routers/general.py | 4 +- backend/routers/lib/analysis.py | 10 +-- backend/routers/lib/api.py | 22 ++---- backend/routers/lib/cache.py | 6 +- backend/routers/lib/database.py | 2 +- backend/routers/lib/search.py | 8 +- backend/routers/lib/web.py | 2 +- backend/routers/utils.py | 8 +- backend/static/statistics.json | 12 +-- backend/worker/Dockerfile | 15 ++++ .../{routers/worker.py => worker/tasks.py} | 25 +++++-- 16 files changed, 136 insertions(+), 79 deletions(-) create mode 100644 backend/worker/Dockerfile rename backend/{routers/worker.py => worker/tasks.py} (76%) diff --git a/backend/.env.example b/backend/.env.example index 05d5c7a7..48da0f8e 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -23,7 +23,7 @@ REDIS_PORT = 6379 REDIS_PASSWORD = "***********" MEILI_SERVER_URL = "http://${SERVER}:7700" -MEILI_MASTER_KEY = "***********" +MEILI_MASTER_KEY = "qq80RvopBK1kjvdlSVG_8VaxsRZICP0uniq5F2v0nlM" SENTRY_DSN = "" TELEMETRY = False diff --git a/backend/Dockerfile b/backend/Dockerfile index 7d180100..cd6a05d3 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -7,9 +7,11 @@ COPY uv.lock pyproject.toml /app/ # Install requirements RUN uv sync +# Copy the applications COPY main.py /app/ COPY routers /app/routers/ +COPY worker /app/worker/ COPY static /app/static/ # Command to run the application -CMD ["uv", "run", "python", "main.py"] +CMD ["uv", "run", "python", "-m", "main"] diff --git a/backend/docker-compose.dev.yaml b/backend/docker-compose.dev.yaml index b616ab1b..e9fda3c3 100644 --- a/backend/docker-compose.dev.yaml +++ b/backend/docker-compose.dev.yaml @@ -1,4 +1,5 @@ services: + # Redis cache: container_name: cache build: @@ -7,6 +8,8 @@ services: restart: always ports: - 6379:6379 + + # MongoDB database: container_name: database build: @@ -17,6 +20,8 @@ services: restart: always ports: - 27017:27017 + + # Meilisearch search: container_name: search build: @@ -27,3 +32,13 @@ services: restart: always ports: - 7700:7700 + + # Celery + worker: + container_name: worker + build: + context: ./ + dockerfile: ./worker/Dockerfile + depends_on: + - cache + restart: always diff --git a/backend/docker-compose.prod.yaml b/backend/docker-compose.prod.yaml index ae1360b8..782b7c97 100644 --- a/backend/docker-compose.prod.yaml +++ b/backend/docker-compose.prod.yaml @@ -1,43 +1,65 @@ version: "3.4" +x-common-variables: &common-variables # General + APP_NAME: "backend" + ENVIRONMENT: "production" + ADMIN_PASSWORD: "***********" + + # Server Config + WORKERS: 9 + HOST: "0.0.0.0" + EXPOSE_PORT: 8000 + FORWARDED_ALLOW_IPS: "*" + + # Stock APIs + FINN_HUB_API_KEY: "***********" + ALPHA_VANTAGE_API_KEY: "***********" + OPEN_FIGI_API_KEY: "***********" + + # Database APIs + MONGO_SERVER_URL: "database" + MONGO_BACKUP_URL: "1LT4xiFJkh6YlAPQDcov8YIKqcvevFlEE" + REDIS_SERVER_URL: "cache" + REDIS_PORT: 6379 + MEILI_SERVER_URL: "search" + MEILI_MASTER_KEY: "***********" + + # Telemetry + TELEMETRY: True + SENTRY_DSN: "***********" + services: + # FastAPI backend: container_name: backend build: dockerfile: Dockerfile - restart: always depends_on: - database - cache - search + - worker volumes: - ./public:/app/public networks: - proxy-network - environment: - APP_NAME: "backend" - ENVIRONMENT: "production" - ADMIN_PASSWORD: "***********" - - WORKERS: 9 - HOST: "0.0.0.0" - EXPOSE_PORT: 8000 - FORWARDED_ALLOW_IPS: "*" - - FINN_HUB_API_KEY: "***********" - ALPHA_VANTAGE_API_KEY: "***********" - OPEN_FIGI_API_KEY: "***********" - - MONGO_SERVER_URL: "database" - MONGO_BACKUP_URL: "1LT4xiFJkh6YlAPQDcov8YIKqcvevFlEE" - REDIS_SERVER_URL: "cache" - REDIS_PORT: 6379 - MEILI_SERVER_URL: "search" - MEILI_MASTER_KEY: "***********" + environment: *common-variables + restart: always - TELEMETRY: True - SENTRY_DSN: "***********" + # Celery + worker: + container_name: worker + build: + context: ./ + dockerfile: ./worker/Dockerfile + depends_on: + - cache + networks: + - proxy-network + environment: *common-variables + restart: always + # Redis cache: container_name: cache build: @@ -47,6 +69,7 @@ services: - proxy-network restart: always + # MongoDB database: container_name: database build: @@ -58,6 +81,7 @@ services: - ./database/main_db:/data/db restart: always + # Meilisearch search: container_name: search build: @@ -67,9 +91,8 @@ services: - ./search/search_db:/meili_data networks: - proxy-network + environment: *common-variables # Only MEILI_SERVER_URL and MEILI_MASTER_KEY are needed. restart: always - environment: - MEILI_MASTER_KEY: "***********" networks: proxy-network: diff --git a/backend/routers/filer.py b/backend/routers/filer.py index b7890da2..bc26d2cc 100644 --- a/backend/routers/filer.py +++ b/backend/routers/filer.py @@ -8,8 +8,7 @@ from urllib import parse from datetime import datetime -from . import worker -from .worker import production_environment +from worker import tasks as worker from .lib import web from .lib import database @@ -20,6 +19,8 @@ from .lib.api import sec_filer_search from .lib.cache import cache +production_environment = getattr(worker, "production_environment", False) + class Filer(BaseModel): cik: str @@ -198,7 +199,7 @@ def create_historical(cik, company, stamp): def create_filer(cik, sec_data): - company, stamp = web.initalize_filer(cik, sec_data) + company, stamp = web.initialize_filer(cik, sec_data) create_recent(cik, company, stamp) create_historical(cik, company, stamp) diff --git a/backend/routers/general.py b/backend/routers/general.py index e6370c1f..0c94944a 100644 --- a/backend/routers/general.py +++ b/backend/routers/general.py @@ -5,12 +5,13 @@ import os import logging +from worker.tasks import try_filer, replace_filer, delay_error, production_environment + from .lib import database from .lib import cache as cm from .lib.backup import save_collections from .filer import popular_cik_list, top_cik_list -from .worker import try_filer, replace_filer, delay_error, production_environment cache = cm.cache router = APIRouter( @@ -32,6 +33,7 @@ async def info(): async def info_undefined(): return {"message": "Hello World!"} + @cache(4) @router.get("/health", status_code=200) async def health(): diff --git a/backend/routers/lib/analysis.py b/backend/routers/lib/analysis.py index 4c0f83a7..7b6674a0 100644 --- a/backend/routers/lib/analysis.py +++ b/backend/routers/lib/analysis.py @@ -789,12 +789,8 @@ def sort_and_format(filer_ciks): "updated": 1, "_id": 0, } - - for cik in filer_ciks: - filers = [] - filer = database.find_filer(cik, project) - if filer: - filers.append(filer) + filers = [filer for filer in database.find_filers({"cik": {"$in": filer_ciks}}, project)] + try: filers_sorted = [ @@ -825,7 +821,7 @@ def sort_and_format(filer_ciks): ) filer.pop("_id", None) except Exception as e: - errors.report_error(cik, e) + errors.report_error(filer.get("cik", "NA"), e) filer["date"] = "NA" filer["market_value"] = "NA" return filers_sorted diff --git a/backend/routers/lib/api.py b/backend/routers/lib/api.py index acae5e2d..80137b97 100644 --- a/backend/routers/lib/api.py +++ b/backend/routers/lib/api.py @@ -3,11 +3,14 @@ import requests import logging +from dotenv import load_dotenv from datetime import datetime from . import database from . import analysis +load_dotenv() + logging.info("[ APIs Initializing ] ...") # Requests @@ -16,19 +19,10 @@ "User-Agent": "wallstreetlocal admin@wallstreetlocal.com ", } -ENVIRONMENT = os.environ.get("ENVIRONMENT", "development") -production_environment = True if ENVIRONMENT == "production" else False -if not production_environment: - from dotenv import load_dotenv - - load_dotenv(".env.development") - -# Environment Variables -FINN_HUB_API_KEY = os.environ["FINN_HUB_API_KEY"] -ALPHA_VANTAGE_API_KEY = os.environ["ALPHA_VANTAGE_API_KEY"] -OPEN_FIGI_API_KEY = os.environ["OPEN_FIGI_API_KEY"] - -# pyright: reportUnboundVariable=false +# API Variables +FINN_HUB_API_KEY = os.environ.get("FINN_HUB_API_KEY", "") +ALPHA_VANTAGE_API_KEY = os.environ.get("ALPHA_VANTAGE_API_KEY", "") +OPEN_FIGI_API_KEY = os.environ.get("OPEN_FIGI_API_KEY", "") def rate_limit(cik, wait=60): @@ -140,7 +134,7 @@ def sec_filer_search(cik): cik, custom_wait=600, ) - + if res.ok: data = res.json() else: diff --git a/backend/routers/lib/cache.py b/backend/routers/lib/cache.py index cde07d93..7e50b964 100644 --- a/backend/routers/lib/cache.py +++ b/backend/routers/lib/cache.py @@ -14,10 +14,10 @@ ENVIRONMENT = os.environ.get("ENVIRONMENT", "development") production_environment = True if ENVIRONMENT == "production" else False -REDIS_SERVER_URL = os.environ["REDIS_SERVER_URL"] -REDIS_PORT = int(os.environ.get("REDIS_PORT", 14640)) +REDIS_SERVER_URL = os.environ.get("REDIS_SERVER_URL", "cache") +REDIS_PORT = int(os.environ.get("REDIS_PORT", 6379)) REDIS_USERNAME = os.environ.get("REDIS_USERNAME", "default") -REDIS_PASSWORD = os.environ["REDIS_PASSWORD"] +REDIS_PASSWORD = os.environ.get("REDIS_PASSWORD", "") store = redis.Redis( host=REDIS_SERVER_URL, diff --git a/backend/routers/lib/database.py b/backend/routers/lib/database.py index 83785c40..8f2d9a7e 100644 --- a/backend/routers/lib/database.py +++ b/backend/routers/lib/database.py @@ -10,7 +10,7 @@ load_dotenv() -MONGO_SERVER_URL = os.environ["MONGO_SERVER_URL"] +MONGO_SERVER_URL = os.environ.get("MONGO_SERVER_URL", "mongodb://database:27017") ENVIRONMENT = os.environ.get("ENVIRONMENT", "development") production_environment = True if ENVIRONMENT == "production" else False diff --git a/backend/routers/lib/search.py b/backend/routers/lib/search.py index e129ab0d..2d63ba09 100644 --- a/backend/routers/lib/search.py +++ b/backend/routers/lib/search.py @@ -7,8 +7,10 @@ ENVIRONMENT = os.environ.get("ENVIRONMENT", "development") production_environment = True if ENVIRONMENT == "production" else False -MEILI_SERVER_URL = os.environ["MEILI_SERVER_URL"] -MEILI_MASTER_KEY = os.environ["MEILI_MASTER_KEY"] +MEILI_SERVER_URL = os.environ.get("MEILI_SERVER_URL", "http://search:7700") +MEILI_MASTER_KEY = os.environ.get( + "MEILI_MASTER_KEY", "qq80RvopBK1kjvdlSVG_8VaxsRZICP0uniq5F2v0nlM" +) def _prepare_meilisearch(): @@ -16,7 +18,7 @@ def _prepare_meilisearch(): companies_index = client.index("companies") indexes = client.get_indexes() if not indexes or "companies" not in [index.uid for index in indexes]: - client.create_index("companies", {"primaryKey": "cik"}) + client.create_index("companies", "cik") try: companies_index.update(primary_key="cik") companies_index.update_displayed_attributes( diff --git a/backend/routers/lib/web.py b/backend/routers/lib/web.py index 3db7be75..2aee22d0 100644 --- a/backend/routers/lib/web.py +++ b/backend/routers/lib/web.py @@ -179,7 +179,7 @@ def process_filings(cik, data): return filings, last_report, first_report -def initalize_filer(cik, sec_data): +def initialize_filer(cik, sec_data): company = { "name": sec_data["name"], "cik": cik, diff --git a/backend/routers/utils.py b/backend/routers/utils.py index 0b838d8a..f7a91a88 100644 --- a/backend/routers/utils.py +++ b/backend/routers/utils.py @@ -14,7 +14,8 @@ from sentry_sdk.integrations.pymongo import PyMongoIntegration from sentry_sdk.integrations.logging import LoggingIntegration -from .worker import queue +from worker import tasks as queue + from .lib import errors from .lib import database from .lib import search @@ -313,9 +314,4 @@ def insert_search(document_list: list): with open(f"{cwd}/static/statistics.json", "w") as s: json.dump(statistic, s, indent=6) - print("Starting Worker ...") - if production_environment: - worker = threading.Thread(target=start_worker) - worker.start() - print("Done!") diff --git a/backend/static/statistics.json b/backend/static/statistics.json index 375fbac6..1ffef702 100644 --- a/backend/static/statistics.json +++ b/backend/static/statistics.json @@ -1,12 +1,12 @@ { "latest": { - "count": 783, - "total": 1062215.2685668468, - "average": 1356.5967670074672 + "count": 784, + "total": 1062224.7630047798, + "average": 1354.8785242407905 }, "historical": { - "count": 815, - "total": 3688009.7609632015, - "average": 4525.165350875094 + "count": 816, + "total": 3688239.704072237, + "average": 4519.901598127742 } } \ No newline at end of file diff --git a/backend/worker/Dockerfile b/backend/worker/Dockerfile new file mode 100644 index 00000000..d50b22a1 --- /dev/null +++ b/backend/worker/Dockerfile @@ -0,0 +1,15 @@ +FROM ghcr.io/astral-sh/uv:python3.9-bookworm-slim AS base + +# Copy only requirements to cache them in docker layer +WORKDIR /app +COPY uv.lock pyproject.toml /app/ + +# Install requirements +RUN uv sync + +COPY worker/tasks.py /app/worker/tasks.py +COPY static /app/static/ +COPY routers /app/routers/ + +# Command to run the application +CMD ["uv", "run", "python", "-m", "celery", "-A", "worker.tasks.queue", "worker", "--loglevel=info"] diff --git a/backend/routers/worker.py b/backend/worker/tasks.py similarity index 76% rename from backend/routers/worker.py rename to backend/worker/tasks.py index 7fbbfd8e..e4a50a47 100644 --- a/backend/routers/worker.py +++ b/backend/worker/tasks.py @@ -7,14 +7,17 @@ import sentry_sdk from sentry_sdk.integrations.celery import CeleryIntegration -from . import filer + +from routers import filer +from routers.lib.cache import ( + REDIS_SERVER_URL, + REDIS_PORT, + REDIS_USERNAME, + REDIS_PASSWORD, +) load_dotenv() -REDIS_SERVER_URL = os.environ.get("REDIS_SERVER_URL", "cache") -REDIS_PORT = os.environ.get("REDIS_PORT", 6379) -REDIS_USERNAME = os.environ.get("REDIS_USERNAME", "default") -REDIS_PASSWORD = os.environ.get("REDIS_PASSWORD", "") BROKER = f"redis://{REDIS_USERNAME}:{REDIS_PASSWORD}@{REDIS_SERVER_URL}:{REDIS_PORT}/0" SENTRY_DSN = os.environ.get("SENTRY_DSN", "") @@ -25,12 +28,13 @@ production_environment = True if ENVIRONMENT == "production" else False run_telemetry = True if TELEMETRY else False + class Config: worker_concurrency = WORKERS - conccurrency = 1 + concurrency = 1 broker_connection_retry_on_startup = True celery_task_always_eager = False if production_environment else True - + queue = Celery("worker", broker=BROKER) queue.config_from_object(Config) @@ -42,6 +46,13 @@ def init_worker(*args, **kwargs): sentry_sdk.init( dsn=SENTRY_DSN, enable_tracing=True, + # Set traces_sample_rate to 1.0 to capture 100% + # of transactions for tracing. + traces_sample_rate=1.0, + # Set profiles_sample_rate to 1.0 to profile 100% + # of sampled transactions. + # We recommend adjusting this value in production. + profiles_sample_rate=1.0, integrations=[CeleryIntegration()], )