Skip to content

Commit

Permalink
Merge pull request #231 from OP-TED/feature/TED-663
Browse files Browse the repository at this point in the history
Feature/ted 663
  • Loading branch information
CaptainOfHacks authored Sep 1, 2022
2 parents a927253 + e22063b commit a9143ef
Show file tree
Hide file tree
Showing 4 changed files with 199 additions and 54 deletions.
47 changes: 28 additions & 19 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ PROJECT_PATH = $(shell pwd)
AIRFLOW_INFRA_FOLDER ?= ${PROJECT_PATH}/.airflow
RML_MAPPER_PATH = ${PROJECT_PATH}/.rmlmapper/rmlmapper.jar
XML_PROCESSOR_PATH = ${PROJECT_PATH}/.saxon/saxon-he-10.6.jar
HOSTNAME = $(shell hostname)


#-----------------------------------------------------------------------------
Expand Down Expand Up @@ -91,6 +92,14 @@ create-env-airflow:
@ chmod 777 ${AIRFLOW_INFRA_FOLDER}/logs ${AIRFLOW_INFRA_FOLDER}/plugins ${AIRFLOW_INFRA_FOLDER}/.env
@ cp requirements.txt ./infra/airflow/


build-airflow: guard-ENVIRONMENT create-env-airflow build-externals
@ echo -e "$(BUILD_PRINT) Build Airflow services $(END_BUILD_PRINT)"
@ docker build -t meaningfy/airflow ./infra/airflow/
@ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow/docker-compose.yaml --env-file ${ENV_FILE} up -d --force-recreate

#--------------------------------------AIRFLOW_CLUSTER----BEGIN----TARGETS----------------------------------------------

create-env-airflow-cluster:
@ echo -e "$(BUILD_PRINT) Create Airflow env $(END_BUILD_PRINT)"
@ echo -e "$(BUILD_PRINT) ${AIRFLOW_INFRA_FOLDER} ${ENVIRONMENT} $(END_BUILD_PRINT)"
Expand All @@ -100,31 +109,28 @@ create-env-airflow-cluster:
@ chmod 777 ${AIRFLOW_INFRA_FOLDER}/logs ${AIRFLOW_INFRA_FOLDER}/plugins ${AIRFLOW_INFRA_FOLDER}/.env
@ cp requirements.txt ./infra/airflow-cluster/

build-airflow: guard-ENVIRONMENT create-env-airflow build-externals
@ echo -e "$(BUILD_PRINT) Build Airflow services $(END_BUILD_PRINT)"
@ docker build -t meaningfy/airflow ./infra/airflow/
@ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow/docker-compose.yaml --env-file ${ENV_FILE} up -d --force-recreate

build-airflow-cluster: guard-ENVIRONMENT create-env-airflow-cluster build-externals
@ echo -e "$(BUILD_PRINT) Build Airflow services $(END_BUILD_PRINT)"
@ echo -e "$(BUILD_PRINT) Build Airflow Common Image $(END_BUILD_PRINT)"
@ docker build -t meaningfy/airflow ./infra/airflow-cluster/
@ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} up -d airflow-init

start-airflow-cluster: build-externals
@ echo -e "$(BUILD_PRINT)Starting Airflow services $(END_BUILD_PRINT)"
@ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} up -d --force-recreate airflow-webserver airflow-scheduler airflow-triggerer flower
start-airflow-master: build-externals
@ echo -e "$(BUILD_PRINT)Starting Airflow Master $(END_BUILD_PRINT)"
@ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} up -d --force-recreate

start-airflow-cluster-worker: build-externals
@ echo -e "$(BUILD_PRINT)Starting Airflow services $(END_BUILD_PRINT)"
@ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} up -d airflow-worker
start-airflow-worker: build-externals
@ echo -e "$(BUILD_PRINT)Starting Airflow Worker $(END_BUILD_PRINT)"
@ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose-worker.yaml --env-file ${ENV_FILE} up -d

stop-airflow-master:
@ echo -e "$(BUILD_PRINT)Stopping Airflow Master $(END_BUILD_PRINT)"
@ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} down

stop-airflow-worker:
@ echo -e "$(BUILD_PRINT)Stopping Airflow Worker $(END_BUILD_PRINT)"
@ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose-worker.yaml --env-file ${ENV_FILE} down

stop-airflow-cluster:
@ echo -e "$(BUILD_PRINT)Stopping Airflow Cluster $(END_BUILD_PRINT)"
@ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} down airflow-webserver airflow-scheduler airflow-triggerer flower

stop-airflow-cluster-worker:
@ echo -e "$(BUILD_PRINT)Stopping Airflow Cluster Worker $(END_BUILD_PRINT)"
@ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow-cluster/docker-compose.yaml --env-file ${ENV_FILE} down airflow-worker
#---------------------------------------AIRFLOW_CLUSTER----END----TARGETS-----------------------------------------------

start-airflow: build-externals
@ echo -e "$(BUILD_PRINT)Starting Airflow services $(END_BUILD_PRINT)"
Expand Down Expand Up @@ -242,6 +248,7 @@ staging-dotenv-file: guard-VAULT_ADDR guard-VAULT_TOKEN vault-installed
@ echo RML_MAPPER_PATH=${RML_MAPPER_PATH} >> .env
@ echo XML_PROCESSOR_PATH=${XML_PROCESSOR_PATH} >> .env
@ echo AIRFLOW_INFRA_FOLDER=~/airflow-infra/staging >> .env
@ echo AIRFLOW_WORKER_HOSTNAME=${HOSTNAME} >> .env
@ vault kv get -format="json" ted-staging/airflow | jq -r ".data.data | keys[] as \$$k | \"\(\$$k)=\(.[\$$k])\"" >> .env
@ vault kv get -format="json" ted-staging/mongo-db | jq -r ".data.data | keys[] as \$$k | \"\(\$$k)=\(.[\$$k])\"" >> .env
@ vault kv get -format="json" ted-staging/metabase | jq -r ".data.data | keys[] as \$$k | \"\(\$$k)=\(.[\$$k])\"" >> .env
Expand All @@ -260,6 +267,7 @@ dev-dotenv-file: guard-VAULT_ADDR guard-VAULT_TOKEN vault-installed
@ echo RML_MAPPER_PATH=${RML_MAPPER_PATH} >> .env
@ echo XML_PROCESSOR_PATH=${XML_PROCESSOR_PATH} >> .env
@ echo AIRFLOW_INFRA_FOLDER=${AIRFLOW_INFRA_FOLDER} >> .env
@ echo AIRFLOW_WORKER_HOSTNAME=${HOSTNAME} >> .env
@ vault kv get -format="json" ted-dev/airflow | jq -r ".data.data | keys[] as \$$k | \"\(\$$k)=\(.[\$$k])\"" >> .env
@ vault kv get -format="json" ted-dev/mongo-db | jq -r ".data.data | keys[] as \$$k | \"\(\$$k)=\(.[\$$k])\"" >> .env
@ vault kv get -format="json" ted-dev/metabase | jq -r ".data.data | keys[] as \$$k | \"\(\$$k)=\(.[\$$k])\"" >> .env
Expand All @@ -278,6 +286,7 @@ prod-dotenv-file: guard-VAULT_ADDR guard-VAULT_TOKEN vault-installed
@ echo RML_MAPPER_PATH=${RML_MAPPER_PATH} >> .env
@ echo XML_PROCESSOR_PATH=${XML_PROCESSOR_PATH} >> .env
@ echo AIRFLOW_INFRA_FOLDER=~/airflow-infra/prod >> .env
@ echo AIRFLOW_WORKER_HOSTNAME=${HOSTNAME} >> .env
@ vault kv get -format="json" ted-prod/airflow | jq -r ".data.data | keys[] as \$$k | \"\(\$$k)=\(.[\$$k])\"" >> .env
@ vault kv get -format="json" ted-prod/mongo-db | jq -r ".data.data | keys[] as \$$k | \"\(\$$k)=\(.[\$$k])\"" >> .env
@ vault kv get -format="json" ted-prod/metabase | jq -r ".data.data | keys[] as \$$k | \"\(\$$k)=\(.[\$$k])\"" >> .env
Expand Down
116 changes: 116 additions & 0 deletions infra/airflow-cluster/docker-compose-worker.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

# Basic Airflow cluster configuration for CeleryExecutor with Redis and PostgreSQL.
#
# WARNING: This configuration is for local development. Do not use it in a production deployment.
#
# This configuration supports basic configuration using environment variables or an .env file
# The following variables are supported:
#
# AIRFLOW_IMAGE_NAME - Docker image name used to run Airflow.
# Default: apache/airflow:|version|
# AIRFLOW_UID - User ID in Airflow containers
# Default: 50000
# Those configurations are useful mostly in case of standalone testing/running Airflow in test/try-out mode
#
# _AIRFLOW_WWW_USER_USERNAME - Username for the administrator account (if requested).
# Default: airflow
# _AIRFLOW_WWW_USER_PASSWORD - Password for the administrator account (if requested).
# Default: airflow
# _PIP_ADDITIONAL_REQUIREMENTS - Additional PIP requirements to add when starting all containers.
# Default: ''
#
# Feel free to modify this file to suit your needs.
---
version: '3'
x-airflow-common:
&airflow-common
# In order to add custom dependencies or upgrade provider packages you can use your extended image.
# Comment the image line, place your Dockerfile in the directory where you placed the docker-compose.yaml
# and uncomment the "build" line below, Then run `docker-compose build` to build the images.
#image: ${AIRFLOW_IMAGE_NAME:-apache/airflow:2.2.2-python3.8}
#build: .
image: meaningfy/airflow:latest
env_file:
- ../../.env
environment:
&airflow-common-env
AIRFLOW__CORE__PARALLELISM: 128
AIRFLOW__CORE__MAX_ACTIVE_TASKS_PER_DAG: 128
AIRFLOW__SCHEDULER__PARSING_PROCESSES: 4
AIRFLOW__CELERY__WORKER_CONCURRENCY: 128
AIRFLOW__CORE__SQL_ALCHEMY_POOL_SIZE: 256
AIRFLOW__CORE__SQL_ALCHEMY_MAX_OVERFLOW: 512
AIRFLOW__CORE__SQL_ALCHEMY_CONN: "postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_USER}@postgres.${SUBDOMAIN}${DOMAIN}/airflow"
AIRFLOW__CELERY__RESULT_BACKEND: "db+postgresql://${POSTGRES_USER}:${POSTGRES_USER}@postgres.${SUBDOMAIN}${DOMAIN}/airflow"
AIRFLOW__CELERY__BROKER_URL: "redis://:${REDIS_PASSWORD}@redis.${SUBDOMAIN}${DOMAIN}:6379/0"
AIRFLOW__WEBSERVER__SECRET_KEY: "zqOVjqVrMstjDbKEPpYiSA=="
IS_PRIME_ENV: 'true'
AIRFLOW__CORE__EXECUTOR: CeleryExecutor
AIRFLOW__CORE__FERNET_KEY: ''
AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true'
AIRFLOW__CORE__ENABLE_XCOM_PICKLING: "true"
AIRFLOW__CORE__LOAD_EXAMPLES: 'false'
AIRFLOW__API__AUTH_BACKEND: 'airflow.api.auth.backend.basic_auth'
_PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-}
VAULT_TOKEN: ${VAULT_TOKEN}
VAULT_ADDR: ${VAULT_ADDR}
ENVIRONMENT: ${ENVIRONMENT}
PYTHONPATH: /opt/airflow/
AIRFLOW_HOME: /opt/airflow
RML_MAPPER_PATH: /opt/airflow/.rmlmapper/rmlmapper.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.6.jar
DAG_LOGGER_CONFIG_HANDLERS: ${DAG_LOGGER_CONFIG_HANDLERS}
volumes:
# - ./config/airflow.cfg:/opt/airflow/airflow.cfg
- ${AIRFLOW_INFRA_FOLDER}/.env:/opt/airflow/.env
- ${AIRFLOW_INFRA_FOLDER}/dags:/opt/airflow/dags
- ${AIRFLOW_INFRA_FOLDER}/logs:/opt/airflow/logs
- ${AIRFLOW_INFRA_FOLDER}/plugins:/opt/airflow/plugins
- ${AIRFLOW_INFRA_FOLDER}/ted_sws:/opt/airflow/ted_sws
- ${AIRFLOW_INFRA_FOLDER}/tests:/opt/airflow/tests
user: "${AIRFLOW_UID:-50000}:0"
command: bash -c "export PYTHONPATH='/opt/airflow/'"

services:
airflow-worker:
<<: *airflow-common
container_name: airflow-worker-${ENVIRONMENT}
command: celery worker
hostname: ${AIRFLOW_WORKER_HOSTNAME}.${SUBDOMAIN}remote_worker
ports:
- "8793:8793"
healthcheck:
test: ["CMD-SHELL",'celery --app airflow.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}"']
interval: 10s
timeout: 10s
retries: 5
environment:
<<: *airflow-common-env
# Required to handle warm shutdown of the celery workers properly
# See https://airflow.apache.org/docs/docker-stack/entrypoint.html#signal-propagation
DUMB_INIT_SETSID: "0"
restart: unless-stopped
networks:
- proxy-net

networks:
proxy-net:
external:
name: proxy-net
84 changes: 50 additions & 34 deletions infra/airflow-cluster/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,19 @@ x-airflow-common:
- ../../.env
environment:
&airflow-common-env
AIRFLOW__CORE__PARALLELISM: 64
AIRFLOW__CORE__MAX_ACTIVE_TASKS_PER_DAG: 32
AIRFLOW__CORE__PARALLELISM: 128
AIRFLOW__CORE__MAX_ACTIVE_TASKS_PER_DAG: 128
AIRFLOW__SCHEDULER__PARSING_PROCESSES: 4
AIRFLOW__CELERY__WORKER_CONCURRENCY: 8
AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@ted-data.eu/airflow
AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@ted-data.eu/airflow
AIRFLOW__CELERY__BROKER_URL: redis://:@ted-data.eu:6379/0
AIRFLOW__CELERY__WORKER_CONCURRENCY: 128
AIRFLOW__CORE__SQL_ALCHEMY_POOL_SIZE: 256
AIRFLOW__CORE__SQL_ALCHEMY_MAX_OVERFLOW: 512
AIRFLOW__CORE__SQL_ALCHEMY_CONN: "postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_USER}@postgres/airflow"
AIRFLOW__CELERY__RESULT_BACKEND: "db+postgresql://${POSTGRES_USER}:${POSTGRES_USER}@postgres/airflow"
AIRFLOW__CELERY__BROKER_URL: "redis://:${REDIS_PASSWORD}@redis:6379/0"
AIRFLOW__WEBSERVER__SECRET_KEY: "zqOVjqVrMstjDbKEPpYiSA=="
IS_PRIME_ENV: 'true'
AIRFLOW__CORE__EXECUTOR: CeleryExecutor
AIRFLOW__CORE__FERNET_KEY: 'M9NmXPlfIszmYCVjp3nJNQEocpmNQtKQdG-Kxdvfgm8='
AIRFLOW__CORE__FERNET_KEY: ''
AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true'
AIRFLOW__CORE__ENABLE_XCOM_PICKLING: "true"
AIRFLOW__CORE__LOAD_EXAMPLES: 'false'
Expand All @@ -75,9 +77,6 @@ x-airflow-common:
RML_MAPPER_PATH: /opt/airflow/.rmlmapper/rmlmapper.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.6.jar
DAG_LOGGER_CONFIG_HANDLERS: ${DAG_LOGGER_CONFIG_HANDLERS}
extra_hosts:
- "hermes-worker:${HERMES_IP_ADDRESS}"
- "srv-worker:${SRV_IP_ADDRESS}"
volumes:
# - ./config/airflow.cfg:/opt/airflow/airflow.cfg
- ${AIRFLOW_INFRA_FOLDER}/.env:/opt/airflow/.env
Expand All @@ -99,9 +98,10 @@ services:
postgres:
image: postgres:13
container_name: postgres-airflow-${ENVIRONMENT}
command: postgres -c 'max_connections=1000'
environment:
POSTGRES_USER: airflow
POSTGRES_PASSWORD: airflow
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${POSTGRES_USER}
POSTGRES_DB: airflow
ports:
- "5432:5432"
Expand All @@ -114,15 +114,18 @@ services:
restart: unless-stopped
networks:
- airflow
- proxy-net
- common-ext

redis:
image: redis:latest
image: redis:5.0.14
container_name: redis-airflow-${ENVIRONMENT}
command: >
--requirepass ${REDIS_PASSWORD}
environment:
- REDIS_PASSWORD=${REDIS_PASSWORD}
ports:
- "6379:6379"
expose:
- 6379
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 5s
Expand All @@ -131,6 +134,7 @@ services:
restart: unless-stopped
networks:
- airflow
- proxy-net
- common-ext

airflow-webserver:
Expand Down Expand Up @@ -161,9 +165,6 @@ services:
airflow-init:
condition: service_completed_successfully




airflow-scheduler:
<<: *airflow-common
container_name: airflow-scheduler-${ENVIRONMENT}
Expand All @@ -182,23 +183,15 @@ services:
airflow-init:
condition: service_completed_successfully

airflow-worker:
airflow-triggerer:
<<: *airflow-common
container_name: airflow-worker-${ENVIRONMENT}
command: celery worker
hostname: ${WORKER_HOSTNAME}
ports:
- "8793:8793"
container_name: airflow-triggerer-${ENVIRONMENT}
command: triggerer
healthcheck:
test: ["CMD-SHELL",'celery --app airflow.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}"']
test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"']
interval: 10s
timeout: 10s
retries: 5
environment:
<<: *airflow-common-env
# Required to handle warm shutdown of the celery workers properly
# See https://airflow.apache.org/docs/docker-stack/entrypoint.html#signal-propagation
DUMB_INIT_SETSID: "0"
restart: unless-stopped
networks:
- airflow
Expand All @@ -208,15 +201,21 @@ services:
airflow-init:
condition: service_completed_successfully

airflow-triggerer:
airflow-worker:
<<: *airflow-common
container_name: airflow-triggerer-${ENVIRONMENT}
command: triggerer
container_name: airflow-worker-${ENVIRONMENT}
command: celery worker
hostname: ${AIRFLOW_WORKER_HOSTNAME}.${SUBDOMAIN}local_worker
healthcheck:
test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"']
test: ["CMD-SHELL",'celery --app airflow.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}"']
interval: 10s
timeout: 10s
retries: 5
environment:
<<: *airflow-common-env
# Required to handle warm shutdown of the celery workers properly
# See https://airflow.apache.org/docs/docker-stack/entrypoint.html#signal-propagation
DUMB_INIT_SETSID: "0"
restart: unless-stopped
networks:
- airflow
Expand All @@ -226,6 +225,7 @@ services:
airflow-init:
condition: service_completed_successfully


airflow-init:
<<: *airflow-common
entrypoint: /bin/bash
Expand Down Expand Up @@ -302,6 +302,8 @@ services:
- .:/sources
networks:
- airflow
depends_on:
<<: *airflow-common-depends-on

airflow-cli:
<<: *airflow-common
Expand All @@ -324,10 +326,24 @@ services:
container_name: airflow-flower-${ENVIRONMENT}
command: celery flower
restart: unless-stopped
ports:
- "5555:5555"
expose:
- 5555
networks:
- airflow
- proxy-net
- common-ext
labels:
#### Labels define the behavior and rules of the traefik proxy for this container ####
- "traefik.enable=true" # <== Enable traefik to proxy this container
- "traefik.http.routers.${ENVIRONMENT}-flower.rule=Host(`flower.${SUBDOMAIN}${DOMAIN}`)" # <== Your Domain Name goes here for the http rule
- "traefik.http.routers.${ENVIRONMENT}-flower.entrypoints=web" # <== Defining the entrypoint for http, **ref: line 30
- "traefik.http.routers.${ENVIRONMENT}-flower.middlewares=redirect@file" # <== This is a middleware to redirect to https
- "traefik.http.routers.${ENVIRONMENT}-flower-secured.rule=Host(`flower.${SUBDOMAIN}${DOMAIN}`)" # <== Your Domain Name for the https rule
- "traefik.http.routers.${ENVIRONMENT}-flower-secured.entrypoints=web-secured" # <== Defining entrypoint for https, **ref: line 31
- "traefik.http.routers.${ENVIRONMENT}-flower-secured.tls.certresolver=mytlschallenge" # <== Defining certsresolvers for https
- "traefik.http.services.${ENVIRONMENT}-flower-secured.loadbalancer.server.port=5555"
depends_on:
<<: *airflow-common-depends-on
airflow-init:
Expand Down
Loading

0 comments on commit a9143ef

Please sign in to comment.