diff --git a/.github/workflows/airflow-publish.yml b/.github/workflows/airflow-publish.yml new file mode 100644 index 00000000..3fa19f85 --- /dev/null +++ b/.github/workflows/airflow-publish.yml @@ -0,0 +1,44 @@ +name: Create and publish a Docker image + +on: + push: + branches: [ "main" ] + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + build-and-push-image: + runs-on: ubuntu-latest + + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Log in to the Container registry + uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + + - name: Build and push Docker image + id: push + uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 + with: + context: . + file: ./burrito/airflow/Dockerfile + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/.gitignore b/.gitignore index e48eceba..6aab1dac 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,5 @@ shadow .poetry.lock *.ipynb + +*/airflow/logs/* diff --git a/burrito/airflow/Dockerfile b/burrito/airflow/Dockerfile new file mode 100644 index 00000000..fb202adf --- /dev/null +++ b/burrito/airflow/Dockerfile @@ -0,0 +1,32 @@ +FROM python:3.10-slim-buster as build-base + +ENV POETRY_NO_INTERACTION=1 \ + POETRY_VIRTUALENVS_IN_PROJECT=true \ + PATH="/opt/pysetup/.venv/bin:$PATH" + +RUN apt-get update +RUN apt-get install --no-install-recommends -y build-essential + +WORKDIR /opt/pysetup + +RUN pip3 install poetry + +COPY pyproject.toml ./ + +RUN poetry install --only main + +RUN apt -y install netcat iputils-ping + +FROM apache/airflow:2.9.1-python3.10 + +ENV PATH="/opt/pysetup/.venv/bin:$PATH" +ENV PYTHONPATH="${PYTHONPATH}:/opt/burrito_project" + +COPY --from=build-base /opt/pysetup/ /opt/pysetup/ +COPY ./preprocessor_config.json /opt/burrito_project/burrito/preprocessor_config.json +COPY ./CONTRIBUTORS.md /opt/burrito_project/burrito/CONTRIBUTORS.md +COPY ./CHANGELOG.md /opt/burrito_project/burrito/CHANGELOG.md +COPY ./burrito /opt/burrito_project/burrito +COPY ./event_init.sql /opt/burrito_project/burrito/event_init.sql + +CMD [] diff --git a/burrito/utils/tasks/new_tickets.py b/burrito/airflow/dags/new_tickets_dag.py similarity index 76% rename from burrito/utils/tasks/new_tickets.py rename to burrito/airflow/dags/new_tickets_dag.py index deac3b22..b09c0444 100644 --- a/burrito/utils/tasks/new_tickets.py +++ b/burrito/airflow/dags/new_tickets_dag.py @@ -1,5 +1,8 @@ import datetime +from airflow import DAG +from airflow.operators.python import PythonOperator + from burrito.utils.email_util import publish_email from burrito.utils.email_templates import TEMPLATE__EMAIL_NOTIFICATION_FOR_ADMIN from burrito.utils.query_util import STATUS_NEW @@ -44,3 +47,17 @@ def check_for_new_tickets(): ) ) get_logger().info(f"Found {len(tickets_list)} tickets with status NEW") + + +with DAG( + dag_id="new_tickets_dag", + description="Check if there is tickets with NEW status and send email if any", + start_date=datetime.datetime.now() - datetime.timedelta(days=1), + schedule_interval=datetime.timedelta(hours=3), + catchup=False, + is_paused_upon_creation=False +) as dag: + PythonOperator( + task_id="check_for_new_tickets", + python_callable=check_for_new_tickets + ) diff --git a/burrito/airflow/dags/ping_dag.py b/burrito/airflow/dags/ping_dag.py new file mode 100644 index 00000000..b8d278ca --- /dev/null +++ b/burrito/airflow/dags/ping_dag.py @@ -0,0 +1,52 @@ +import socket +import datetime + +from airflow import DAG +from airflow.operators.python import PythonOperator +from airflow.models.baseoperator import chain + +from burrito.utils.logger import get_logger +from burrito.utils.config_reader import get_config + + +HOSTS_TO_PING = ( + (get_config().BURRITO_DB_HOST, get_config().BURRITO_DB_PORT), + (get_config().BURRITO_REDIS_HOST, get_config().BURRITO_REDIS_PORT), + (get_config().BURRITO_MONGO_HOST, get_config().BURRITO_MONGO_PORT), + ("iis.sumdu.edu.ua", 80), + (get_config().BURRITO_SMTP_SERVER, 25), + (get_config().BURRITO_SMTP_SERVER, 465), +) + + +def burrito_ping(host, port): + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + + try: + sock.connect((host, int(port))) + except socket.error: + get_logger().critical(f"({host}, {port}) is unreachable") + except Exception as exc: + get_logger().error(exc) + + +with DAG( + dag_id="ping_dag", + description="Pinging important hosts", + start_date=datetime.datetime.now() - datetime.timedelta(days=1), + schedule_interval=datetime.timedelta(hours=1), + catchup=False, + is_paused_upon_creation=False +) as dag: + chain( + *[ + PythonOperator( + task_id=f"pinging__{host}_{port}", + python_callable=burrito_ping, + op_kwargs={ + "host": host, + "port": port + } + ) for host, port in HOSTS_TO_PING + ] + ) diff --git a/burrito/airflow/dags/preinstall_dag.py b/burrito/airflow/dags/preinstall_dag.py new file mode 100644 index 00000000..6fa50d80 --- /dev/null +++ b/burrito/airflow/dags/preinstall_dag.py @@ -0,0 +1,66 @@ +import datetime + +from airflow import DAG +from airflow.operators.python import PythonOperator +from airflow.models.baseoperator import chain +from peewee import MySQLDatabase + +from burrito.utils.db_utils import create_tables +from burrito.utils.db_cursor_object import get_database_cursor +from burrito.utils.mongo_util import mongo_init_ttl_indexes +from burrito.utils.logger import get_logger +from burrito.airflow.utils import preprocessor_task + +from burrito.models.m_email_code import EmailVerificationCode +from burrito.models.m_password_rest_model import AccessRenewMetaData + + +def init_db_events(): + with open( + "/opt/burrito_project/burrito/event_init.sql", + "r", + encoding="utf-8" + ) as file: + db: MySQLDatabase = get_database_cursor() + + for query in file.read().split(";"): + query = query.replace('\t', "").replace("\n", "") + query = ' '.join(query.split()) + + if not query: + continue + + try: + db.execute_sql(query) + except Exception as e: + get_logger().error(e) + + +with DAG( + dag_id="preinstall_dag", + description="Preparing DB", + start_date=datetime.datetime.now() - datetime.timedelta(days=1), + schedule_interval=None, + is_paused_upon_creation=False +) as dag: + chain( + PythonOperator( + task_id="create_db_tables", + python_callable=create_tables + ), + PythonOperator( + task_id="first_preprocessor_run", + python_callable=preprocessor_task + ), + PythonOperator( + task_id="create_mongo_ttl_indexes", + python_callable=mongo_init_ttl_indexes, + op_kwargs={ + "models": [EmailVerificationCode, AccessRenewMetaData] + } + ), + PythonOperator( + task_id="create_db_events", + python_callable=init_db_events + ) + ) diff --git a/burrito/airflow/dags/preprocessor_dag.py b/burrito/airflow/dags/preprocessor_dag.py new file mode 100644 index 00000000..b69502dc --- /dev/null +++ b/burrito/airflow/dags/preprocessor_dag.py @@ -0,0 +1,48 @@ +import datetime +import orjson as json + +from airflow import DAG +from airflow.operators.python import PythonOperator + +from burrito.models.group_model import Groups +from burrito.models.statuses_model import Statuses +from burrito.models.faculty_model import Faculties +from burrito.models.queues_model import Queues +from burrito.models.permissions_model import Permissions +from burrito.models.roles_model import Roles +from burrito.models.role_permissions_model import RolePermissions +from burrito.airflow.utils import preprocessor_task + + +MODEL_KEYS = { + "groups": Groups, + "faculties": Faculties, + "statuses": Statuses, + "queues": Queues, + "permissions": Permissions, + "roles": Roles, + "role_permissions": RolePermissions +} + +DEFAULT_CONFIG = "" + +with open( + "/opt/burrito_project/burrito/preprocessor_config.json", + "r", + encoding="utf-8" +) as file: + DEFAULT_CONFIG = json.loads(file.read()) + + +with DAG( + dag_id="preprocessor_dag", + description="Synchronize DB with data get from API", + start_date=datetime.datetime.now() - datetime.timedelta(days=1), + schedule_interval=datetime.timedelta(minutes=30), + catchup=False, + is_paused_upon_creation=False +) as dag: + PythonOperator( + task_id="preprocessor_task", + python_callable=preprocessor_task + ) diff --git a/burrito/utils/tasks/preprocessor.py b/burrito/airflow/utils.py similarity index 96% rename from burrito/utils/tasks/preprocessor.py rename to burrito/airflow/utils.py index 7bac5741..f8aa9e2c 100644 --- a/burrito/utils/tasks/preprocessor.py +++ b/burrito/airflow/utils.py @@ -31,11 +31,17 @@ DEFAULT_CONFIG = "" -with open("preprocessor_config.json", "r", encoding="utf-8") as file: +with open( + "/opt/burrito_project/burrito/preprocessor_config.json", + "r", + encoding="utf-8" +) as file: DEFAULT_CONFIG = json.loads(file.read()) def preprocessor_task(): + PluginLoader.load() + get_logger().info("Preprocessor is started") conn = None diff --git a/burrito/utils/tasks/ping.py b/burrito/utils/tasks/ping.py deleted file mode 100644 index 7a252214..00000000 --- a/burrito/utils/tasks/ping.py +++ /dev/null @@ -1,14 +0,0 @@ -import socket - -from burrito.utils.logger import get_logger - - -def burrito_ping(host, port): - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - - try: - sock.connect((host, int(port))) - except socket.error: - get_logger().critical(f"({host}, {port}) is unreachable") - except Exception as exc: - get_logger().error(exc) diff --git a/docker-compose-airflow.yml b/docker-compose-airflow.yml new file mode 100644 index 00000000..1e243324 --- /dev/null +++ b/docker-compose-airflow.yml @@ -0,0 +1,158 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Basic Airflow cluster configuration for CeleryExecutor with Redis and PostgreSQL. +# +# WARNING: This configuration is for local development. Do not use it in a production deployment. +# +# This configuration supports basic configuration using environment variables or an .env file +# The following variables are supported: +# +# AIRFLOW_IMAGE_NAME - Docker image name used to run Airflow. +# Default: apache/airflow:master-python3.8 +# AIRFLOW_UID - User ID in Airflow containers +# Default: 50000 +# AIRFLOW_GID - Group ID in Airflow containers +# Default: 50000 +# _AIRFLOW_WWW_USER_USERNAME - Username for the administrator account. +# Default: airflow +# _AIRFLOW_WWW_USER_PASSWORD - Password for the administrator account. +# Default: airflow +# +# Feel free to modify this file to suit your needs. +--- +version: '3' +x-airflow-common: + &airflow-common + build: + context: . + dockerfile: ./burrito/airflow/Dockerfile + environment: + &airflow-common-env + AIRFLOW__CORE__EXECUTOR: CeleryExecutor + AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow + AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@postgres/airflow + AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0 + AIRFLOW__CORE__FERNET_KEY: '' + AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true' + AIRFLOW__CORE__LOAD_EXAMPLES: 'false' + AIRFLOW__API__AUTH_BACKEND: 'airflow.api.auth.backend.basic_auth' + env_file: + - .env + volumes: + - ./burrito/airflow/dags:/opt/airflow/dags + - ./burrito/airflow/logs:/opt/airflow/logs + - ./burrito/airflow/plugins:/opt/airflow/plugins + user: "${AIRFLOW_UID:-50000}:${AIRFLOW_GID:-50000}" + depends_on: + redis: + condition: service_healthy + postgres: + condition: service_healthy + +services: + postgres: + image: postgres:13 + environment: + POSTGRES_USER: airflow + POSTGRES_PASSWORD: airflow + POSTGRES_DB: airflow + volumes: + - postgres-db-volume:/var/lib/postgresql/data + healthcheck: + test: ["CMD", "pg_isready", "-U", "airflow"] + interval: 5s + retries: 5 + restart: always + networks: + - burrito_party + + redis: + image: redis:latest + ports: + - 6379:6379 + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 5s + timeout: 30s + retries: 50 + restart: always + networks: + - burrito_party + + airflow-webserver: + <<: *airflow-common + command: webserver + ports: + - 8080:8080 + healthcheck: + test: ["CMD", "curl", "--fail", "http://localhost:8080/health"] + interval: 10s + timeout: 10s + retries: 5 + restart: always + networks: + - burrito_party + + airflow-scheduler: + <<: *airflow-common + command: scheduler + restart: always + networks: + - burrito_party + + airflow-worker: + <<: *airflow-common + command: celery worker + restart: always + networks: + - burrito_party + + airflow-init: + <<: *airflow-common + command: version + environment: + <<: *airflow-common-env + _AIRFLOW_DB_UPGRADE: 'true' + _AIRFLOW_WWW_USER_CREATE: 'true' + _AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow} + _AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow} + networks: + - burrito_party + + flower: + <<: *airflow-common + command: celery flower + ports: + - 5555:5555 + healthcheck: + test: ["CMD", "curl", "--fail", "http://localhost:5555/"] + interval: 10s + timeout: 10s + retries: 5 + restart: always + networks: + - burrito_party + +volumes: + postgres-db-volume: + +networks: + burrito_party: + name: burrito_party + external: true diff --git a/pyproject.toml b/pyproject.toml index 4146cba0..274e9cd4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ authors = ["DimonBor", "m-o-d-e-r"] readme = "README.md" [tool.poetry.dependencies] -python = "^3.10" +python = ">=3.10,<3.13" anyio = "^3.6.2" distro = "^1.8.0" fastapi = "^0.111.0" @@ -36,6 +36,8 @@ funcy = "^2.0" deptry = "^0.16.1" pyjwt = "^2.8.0" httpx = "^0.27.0" +apache-airflow = {version = "2.9.1", extras = ["celery"]} +psycopg2-binary = "^2.9.9" [tool.poetry.group.dev.dependencies]