From 8bf0d58cf05804c9c4839254bf37c99715dfa707 Mon Sep 17 00:00:00 2001 From: lelouvincx Date: Mon, 18 Sep 2023 14:35:56 +0700 Subject: [PATCH 1/7] Initital branch From 592b0a1651f9420a522a97140b4f97fbcf80717c Mon Sep 17 00:00:00 2001 From: lelouvincx Date: Tue, 19 Sep 2023 17:15:51 +0700 Subject: [PATCH 2/7] #1: Create 3 kafka nodes with KRaft --- .../.docker/connect-distributed.properties | 89 ++++++++++++ database-replication/docker-compose.yml | 128 +++++++++++++++--- 2 files changed, 197 insertions(+), 20 deletions(-) create mode 100644 database-replication/.docker/connect-distributed.properties diff --git a/database-replication/.docker/connect-distributed.properties b/database-replication/.docker/connect-distributed.properties new file mode 100644 index 0000000..d42c818 --- /dev/null +++ b/database-replication/.docker/connect-distributed.properties @@ -0,0 +1,89 @@ +## +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +## + +# This file contains some of the configurations for the Kafka Connect distributed worker. This file is intended +# to be used with the examples, and some settings may differ from those used in a production system, especially +# the `bootstrap.servers` and those specifying replication factors. + +# A list of host/port pairs to use for establishing the initial connection to the Kafka cluster. +bootstrap.servers=kafka-0:9092 + +# unique name for the cluster, used in forming the Connect cluster group. Note that this must not conflict with consumer group IDs +group.id=connect-cluster + +# The converters specify the format of data in Kafka and how to translate it into Connect data. Every Connect user will +# need to configure these based on the format they want their data in when loaded from or stored into Kafka +key.converter=org.apache.kafka.connect.json.JsonConverter +value.converter=org.apache.kafka.connect.json.JsonConverter +# Converter-specific settings can be passed in by prefixing the Converter's setting with the converter we want to apply +# it to +key.converter.schemas.enable=true +value.converter.schemas.enable=true + +# Topic to use for storing offsets. This topic should have many partitions and be replicated and compacted. +# Kafka Connect will attempt to create the topic automatically when needed, but you can always manually create +# the topic before starting Kafka Connect if a specific topic configuration is needed. +# Most users will want to use the built-in default replication factor of 3 or in some cases even specify a larger value. +# Since this means there must be at least as many brokers as the maximum replication factor used, we'd like to be able +# to run this example on a single-broker cluster and so here we instead set the replication factor to 1. +offset.storage.topic=connect-offsets +offset.storage.replication.factor=1 +#offset.storage.partitions=25 + +# Topic to use for storing connector and task configurations; note that this should be a single partition, highly replicated, +# and compacted topic. Kafka Connect will attempt to create the topic automatically when needed, but you can always manually create +# the topic before starting Kafka Connect if a specific topic configuration is needed. +# Most users will want to use the built-in default replication factor of 3 or in some cases even specify a larger value. +# Since this means there must be at least as many brokers as the maximum replication factor used, we'd like to be able +# to run this example on a single-broker cluster and so here we instead set the replication factor to 1. +config.storage.topic=connect-configs +config.storage.replication.factor=1 + +# Topic to use for storing statuses. This topic can have multiple partitions and should be replicated and compacted. +# Kafka Connect will attempt to create the topic automatically when needed, but you can always manually create +# the topic before starting Kafka Connect if a specific topic configuration is needed. +# Most users will want to use the built-in default replication factor of 3 or in some cases even specify a larger value. +# Since this means there must be at least as many brokers as the maximum replication factor used, we'd like to be able +# to run this example on a single-broker cluster and so here we instead set the replication factor to 1. +status.storage.topic=connect-status +status.storage.replication.factor=1 +#status.storage.partitions=5 + +# Flush much faster than normal, which is useful for testing/debugging +offset.flush.interval.ms=10000 + +# List of comma-separated URIs the REST API will listen on. The supported protocols are HTTP and HTTPS. +# Specify hostname as 0.0.0.0 to bind to all interfaces. +# Leave hostname empty to bind to default interface. +# Examples of legal listener lists: HTTP://myhost:8083,HTTPS://myhost:8084" +#listeners=HTTP://:8083 + +# The Hostname & Port that will be given out to other workers to connect to i.e. URLs that are routable from other servers. +# If not set, it uses the value for "listeners" if configured. +#rest.advertised.host.name= +#rest.advertised.port= +#rest.advertised.listener= + +# Set to a list of filesystem paths separated by commas (,) to enable class loading isolation for plugins +# (connectors, converters, transformations). The list should consist of top level directories that include +# any combination of: +# a) directories immediately containing jars with plugins and their dependencies +# b) uber-jars with plugins and their dependencies +# c) directories immediately containing the package directory structure of classes of plugins and their dependencies +# Examples: +# plugin.path=/usr/local/share/java,/usr/local/share/kafka/plugins,/opt/connectors, +#plugin.path= diff --git a/database-replication/docker-compose.yml b/database-replication/docker-compose.yml index 2f77f5b..4c2a2bc 100644 --- a/database-replication/docker-compose.yml +++ b/database-replication/docker-compose.yml @@ -52,7 +52,7 @@ services: start_period: 30s zookeeper: - image: docker.io/bitnami/zookeeper:3.8 + image: bitnami/zookeeper:3.8 container_name: zookeeper ports: - "2181:2181" @@ -60,6 +60,9 @@ services: - .docker/data/zookeeper:/bitnami/zookeeper environment: - ALLOW_ANONYMOUS_LOGIN=yes + - ZOO_SYNC_LIMIT=7 # How far out of date a server can be from a leader + # - ZOO_ENABLE_PROMETHEUS_METRICS=yes + - ZOO_LOG_LEVEL=DEBUG restart: unless-stopped healthcheck: test: nc -z localhost 2181 | exit 1 @@ -67,33 +70,118 @@ services: timeout: 30s retries: 5 start_period: 30s + networks: + - kafka_networks kafka-0: - image: docker.io/bitnami/kafka:3.4 - container_name: kafka-broker-0 + image: bitnami/kafka:3.5 + container_name: kafka-server ports: - "9092:9092" + - ":9093" volumes: - - .docker/data/kafka:/bitnami/kafka + - .docker/data/kafka-0:/bitnami/kafka + - .docker/log/kafka-0:/tmp/kafka_mounts/logs environment: - - KAFKA_CFG_ZOOKEEPER_CONNECT=zookeeper:2181 - depends_on: - - zookeeper - restart: unless-stopped - healthcheck: - test: nc -z localhost 9092 | exit 1 - interval: 1m30s - timeout: 30s - retries: 5 - start_period: 30s + # - KAFKA_CFG_ZOOKEEPER_CONNECT=zookeeper:2181 + - KAFKA_CFG_NODE_ID=0 + - KAFKA_CFG_PROCESS_ROLES=controller,broker + - KAFKA_CFG_CONTROLLER_QUORUM_VOTERS=0@kafka-0:9093,1@kafka-1:9093,2@kafka-2:9093 + - KAFKA_KRAFT_CLUSTER_ID=abcdefghijklmnopqrstuv + - KAFKA_CFG_LISTENERS=PLAINTEXT://:9092,CONTROLLER://:9093 + - KAFKA_CFG_ADVERTISED_LISTENERS=PLAINTEXT://:9092 + - KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,CONTROLLER:PLAINTEXT + - KAFKA_CFG_CONTROLLER_LISTENER_NAMES=CONTROLLER + - KAFKA_CFG_INTER_BROKER_LISTENER_NAME=PLAINTEXT + - ALLOW_PLAINTEXT_LISTENER=yes + # depends_on: + # - zookeeper + networks: + - kafka_networks - kafka-manager: - image: docker.io/sheepkiller/kafka-manager - container_name: kafka-manager + kafka-1: + image: bitnami/kafka:3.5 + container_name: kafka-1 ports: - - "9000:9000" + - ":9092" + - ":9093" + volumes: + - .docker/data/kafka-1:/bitnami/kafka + - .docker/log/kafka-1:/tmp/kafka_mounts/logs environment: - - ZK_HOSTS=zookeeper:2181 - - APPLICATION_SECRET=letmein + # - KAFKA_CFG_ZOOKEEPER_CONNECT=zookeeper:2181 + - KAFKA_CFG_NODE_ID=1 + - KAFKA_CFG_PROCESS_ROLES=controller,broker + - KAFKA_CFG_CONTROLLER_QUORUM_VOTERS=0@kafka-0:9093,1@kafka-1:9093,2@kafka-2:9093 + - KAFKA_KRAFT_CLUSTER_ID=abcdefghijklmnopqrstuv + - KAFKA_CFG_LISTENERS=PLAINTEXT://:9092,CONTROLLER://:9093 + - KAFKA_CFG_ADVERTISED_LISTENERS=PLAINTEXT://:9092 + - KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,CONTROLLER:PLAINTEXT + - KAFKA_CFG_CONTROLLER_LISTENER_NAMES=CONTROLLER + - KAFKA_CFG_INTER_BROKER_LISTENER_NAME=PLAINTEXT + - ALLOW_PLAINTEXT_LISTENER=yes + # depends_on: + # - zookeeper + networks: + - kafka_networks + + kafka-2: + image: bitnami/kafka:3.5 + container_name: kafka-2 + ports: + - ":9092" + - ":9093" + volumes: + - .docker/data/kafka-2:/bitnami/kafka + - .docker/log/kafka-2:/tmp/kafka_mounts/logs + environment: + # - KAFKA_CFG_ZOOKEEPER_CONNECT=zookeeper:2181 + - KAFKA_CFG_NODE_ID=2 + - KAFKA_CFG_PROCESS_ROLES=controller,broker + - KAFKA_CFG_CONTROLLER_QUORUM_VOTERS=0@kafka-0:9093,1@kafka-1:9093,2@kafka-2:9093 + - KAFKA_KRAFT_CLUSTER_ID=abcdefghijklmnopqrstuv + - KAFKA_CFG_LISTENERS=PLAINTEXT://:9092,CONTROLLER://:9093 + - KAFKA_CFG_ADVERTISED_LISTENERS=PLAINTEXT://:9092 + - KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,CONTROLLER:PLAINTEXT + - KAFKA_CFG_CONTROLLER_LISTENER_NAMES=CONTROLLER + - KAFKA_CFG_INTER_BROKER_LISTENER_NAME=PLAINTEXT + - ALLOW_PLAINTEXT_LISTENER=yes + # depends_on: + # - zookeeper + networks: + - kafka_networks + + kafka-connect: + image: bitnami/kafka:3.4 + container_name: kafka-connect + ports: + - "8083:8083" + volumes: + - .docker/data/kafka-connect:/bitnami/kafka + - ./.docker/connect-distributed.properties:/opt/bitnami/kafka/config/connect-distributed.properties + command: /opt/bitnami/kafka/bin/connect-distributed.sh /opt/bitnami/kafka/config/connect-distributed.properties depends_on: - zookeeper + - kafka-0 + networks: + - kafka_networks + + kafka-ui: + image: provectuslabs/kafka-ui:latest + container_name: kafka-manager-ui + ports: + - "8000:8080" + environment: + - KAFKA_CLUSTERS_0_NAME=${KAFKA_CLUSTERS_0_NAME} + - KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS=${KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS} + - KAFKA_CLUSTERS_0_ZOOKEEPER=${KAFKA_CLUSTERS_0_ZOOKEEPER} + - KAFKA_CLUSTERS_0_PROPERTIES_SECURITY_PROTOCOL=${KAFKA_CLUSTERS_0_PROPERTIES_SECURITY_PROTOCOL} + - KAFKA_CLUSTERS_0_PROPERTIES_SASL_MECHANISM=${KAFKA_CLUSTERS_0_PROPERTIES_SASL_MECHANISM} + - KAFKA_CLUSTERS_0_PROPERTIES_SASL_JAAS_CONFIG=${KAFKA_CLUSTERS_0_PROPERTIES_SASL_JAAS_CONFIG} + networks: + - kafka_networks + + +networks: + kafka_networks: + driver: bridge From 6affd3eca950e4784762d7cf36a56ff610d37379 Mon Sep 17 00:00:00 2001 From: lelouvincx Date: Wed, 20 Sep 2023 06:36:25 +0700 Subject: [PATCH 3/7] #1: Add healthcheck to kafka cluster (and fix upstream-app healthcheck) --- .../.docker/images/app/Dockerfile | 4 +- database-replication/Makefile | 4 + database-replication/docker-compose.yml | 87 +++++++------------ 3 files changed, 38 insertions(+), 57 deletions(-) diff --git a/database-replication/.docker/images/app/Dockerfile b/database-replication/.docker/images/app/Dockerfile index 71b5491..438ab5d 100644 --- a/database-replication/.docker/images/app/Dockerfile +++ b/database-replication/.docker/images/app/Dockerfile @@ -4,10 +4,12 @@ WORKDIR /app RUN python3 -m venv .venv RUN pip install pip --upgrade - COPY app/requirements.txt /app/requirements.txt RUN pip install --no-cache-dir -r /app/requirements.txt +RUN apt-get update && apt-get install -y curl && \ + apt-get autoremove -y + COPY app/ . CMD [ "streamlit", "run", "ui.py", "--server.address=0.0.0.0" ] diff --git a/database-replication/Makefile b/database-replication/Makefile index 3f4ea2e..b896f04 100644 --- a/database-replication/Makefile +++ b/database-replication/Makefile @@ -28,6 +28,10 @@ restart-build-d: down up-build-d sleep: sleep 20 +# ============ Build images ============ +build-upstream-app: + docker build -t upstream-app:dev -f .docker/images/app/Dockerfile . + # ============ Testing, formatting, type checks, link checks ============ app-requirements: rm app/requirements.txt && \ diff --git a/database-replication/docker-compose.yml b/database-replication/docker-compose.yml index 4c2a2bc..9d93500 100644 --- a/database-replication/docker-compose.yml +++ b/database-replication/docker-compose.yml @@ -22,7 +22,7 @@ services: interval: 1m30s timeout: 30s retries: 5 - start_period: 30s + start_period: 10s adminer: image: adminer:standalone @@ -33,9 +33,9 @@ services: - source_db restart: unless-stopped - data_gen: - image: data-generator:localdev - container_name: "data_gen" + upstream-app: + image: upstream-app:dev + container_name: "upstream-app" ports: - "8501:8501" volumes: @@ -49,7 +49,7 @@ services: interval: 1m30s timeout: 30s retries: 5 - start_period: 30s + start_period: 10s zookeeper: image: bitnami/zookeeper:3.8 @@ -64,12 +64,6 @@ services: # - ZOO_ENABLE_PROMETHEUS_METRICS=yes - ZOO_LOG_LEVEL=DEBUG restart: unless-stopped - healthcheck: - test: nc -z localhost 2181 | exit 1 - interval: 1m30s - timeout: 30s - retries: 5 - start_period: 30s networks: - kafka_networks @@ -82,22 +76,20 @@ services: volumes: - .docker/data/kafka-0:/bitnami/kafka - .docker/log/kafka-0:/tmp/kafka_mounts/logs + env_file: .env environment: - # - KAFKA_CFG_ZOOKEEPER_CONNECT=zookeeper:2181 - KAFKA_CFG_NODE_ID=0 - - KAFKA_CFG_PROCESS_ROLES=controller,broker - - KAFKA_CFG_CONTROLLER_QUORUM_VOTERS=0@kafka-0:9093,1@kafka-1:9093,2@kafka-2:9093 - - KAFKA_KRAFT_CLUSTER_ID=abcdefghijklmnopqrstuv - - KAFKA_CFG_LISTENERS=PLAINTEXT://:9092,CONTROLLER://:9093 - - KAFKA_CFG_ADVERTISED_LISTENERS=PLAINTEXT://:9092 - - KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,CONTROLLER:PLAINTEXT - - KAFKA_CFG_CONTROLLER_LISTENER_NAMES=CONTROLLER - - KAFKA_CFG_INTER_BROKER_LISTENER_NAME=PLAINTEXT - - ALLOW_PLAINTEXT_LISTENER=yes + # - KAFKA_CFG_ZOOKEEPER_CONNECT=zookeeper:2181 # depends_on: # - zookeeper networks: - kafka_networks + healthcheck: + test: kafka-topics.sh --list --bootstrap-server kafka-server:9092 || exit -1 + interval: 5s + timeout: 10s + retries: 10 + start_period: 15s kafka-1: image: bitnami/kafka:3.5 @@ -108,22 +100,17 @@ services: volumes: - .docker/data/kafka-1:/bitnami/kafka - .docker/log/kafka-1:/tmp/kafka_mounts/logs + env_file: .env environment: - # - KAFKA_CFG_ZOOKEEPER_CONNECT=zookeeper:2181 - KAFKA_CFG_NODE_ID=1 - - KAFKA_CFG_PROCESS_ROLES=controller,broker - - KAFKA_CFG_CONTROLLER_QUORUM_VOTERS=0@kafka-0:9093,1@kafka-1:9093,2@kafka-2:9093 - - KAFKA_KRAFT_CLUSTER_ID=abcdefghijklmnopqrstuv - - KAFKA_CFG_LISTENERS=PLAINTEXT://:9092,CONTROLLER://:9093 - - KAFKA_CFG_ADVERTISED_LISTENERS=PLAINTEXT://:9092 - - KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,CONTROLLER:PLAINTEXT - - KAFKA_CFG_CONTROLLER_LISTENER_NAMES=CONTROLLER - - KAFKA_CFG_INTER_BROKER_LISTENER_NAME=PLAINTEXT - - ALLOW_PLAINTEXT_LISTENER=yes - # depends_on: - # - zookeeper networks: - kafka_networks + healthcheck: + test: kafka-topics.sh --list --bootstrap-server kafka-server:9092 || exit -1 + interval: 5s + timeout: 10s + retries: 10 + start_period: 15s kafka-2: image: bitnami/kafka:3.5 @@ -134,25 +121,20 @@ services: volumes: - .docker/data/kafka-2:/bitnami/kafka - .docker/log/kafka-2:/tmp/kafka_mounts/logs + env_file: .env environment: - # - KAFKA_CFG_ZOOKEEPER_CONNECT=zookeeper:2181 - KAFKA_CFG_NODE_ID=2 - - KAFKA_CFG_PROCESS_ROLES=controller,broker - - KAFKA_CFG_CONTROLLER_QUORUM_VOTERS=0@kafka-0:9093,1@kafka-1:9093,2@kafka-2:9093 - - KAFKA_KRAFT_CLUSTER_ID=abcdefghijklmnopqrstuv - - KAFKA_CFG_LISTENERS=PLAINTEXT://:9092,CONTROLLER://:9093 - - KAFKA_CFG_ADVERTISED_LISTENERS=PLAINTEXT://:9092 - - KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,CONTROLLER:PLAINTEXT - - KAFKA_CFG_CONTROLLER_LISTENER_NAMES=CONTROLLER - - KAFKA_CFG_INTER_BROKER_LISTENER_NAME=PLAINTEXT - - ALLOW_PLAINTEXT_LISTENER=yes - # depends_on: - # - zookeeper networks: - kafka_networks + healthcheck: + test: kafka-topics.sh --list --bootstrap-server kafka-server:9092 || exit -1 + interval: 5s + timeout: 10s + retries: 10 + start_period: 15s kafka-connect: - image: bitnami/kafka:3.4 + image: bitnami/kafka:3.5 container_name: kafka-connect ports: - "8083:8083" @@ -160,9 +142,6 @@ services: - .docker/data/kafka-connect:/bitnami/kafka - ./.docker/connect-distributed.properties:/opt/bitnami/kafka/config/connect-distributed.properties command: /opt/bitnami/kafka/bin/connect-distributed.sh /opt/bitnami/kafka/config/connect-distributed.properties - depends_on: - - zookeeper - - kafka-0 networks: - kafka_networks @@ -171,15 +150,11 @@ services: container_name: kafka-manager-ui ports: - "8000:8080" - environment: - - KAFKA_CLUSTERS_0_NAME=${KAFKA_CLUSTERS_0_NAME} - - KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS=${KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS} - - KAFKA_CLUSTERS_0_ZOOKEEPER=${KAFKA_CLUSTERS_0_ZOOKEEPER} - - KAFKA_CLUSTERS_0_PROPERTIES_SECURITY_PROTOCOL=${KAFKA_CLUSTERS_0_PROPERTIES_SECURITY_PROTOCOL} - - KAFKA_CLUSTERS_0_PROPERTIES_SASL_MECHANISM=${KAFKA_CLUSTERS_0_PROPERTIES_SASL_MECHANISM} - - KAFKA_CLUSTERS_0_PROPERTIES_SASL_JAAS_CONFIG=${KAFKA_CLUSTERS_0_PROPERTIES_SASL_JAAS_CONFIG} + env_file: .env networks: - kafka_networks + depends_on: # WARN: Not working if kafka-0 down, but if depends on the env configs + - kafka-0 networks: From d79f212ab839771a111763000b8e864bc1ba3782 Mon Sep 17 00:00:00 2001 From: lelouvincx Date: Wed, 20 Sep 2023 16:30:52 +0700 Subject: [PATCH 4/7] #4: Restructure upstream-app --- database-replication/.docker/images/app/Dockerfile | 4 ++-- database-replication/app/__init__.py | 0 database-replication/app/{ => app}/.streamlit/config.toml | 0 database-replication/app/app/__init__.py | 6 ++++++ database-replication/app/{ => app}/experiment.ipynb | 0 database-replication/app/{ => app}/gen_data.py | 6 ++++-- database-replication/app/{ => app}/psql_connector.py | 0 database-replication/app/{ui.py => app/streamlit_app.py} | 4 ++-- database-replication/docker-compose.yml | 2 +- 9 files changed, 15 insertions(+), 7 deletions(-) delete mode 100644 database-replication/app/__init__.py rename database-replication/app/{ => app}/.streamlit/config.toml (100%) create mode 100644 database-replication/app/app/__init__.py rename database-replication/app/{ => app}/experiment.ipynb (100%) rename database-replication/app/{ => app}/gen_data.py (95%) rename database-replication/app/{ => app}/psql_connector.py (100%) rename database-replication/app/{ui.py => app/streamlit_app.py} (98%) diff --git a/database-replication/.docker/images/app/Dockerfile b/database-replication/.docker/images/app/Dockerfile index 438ab5d..efc6d3d 100644 --- a/database-replication/.docker/images/app/Dockerfile +++ b/database-replication/.docker/images/app/Dockerfile @@ -10,6 +10,6 @@ RUN pip install --no-cache-dir -r /app/requirements.txt RUN apt-get update && apt-get install -y curl && \ apt-get autoremove -y -COPY app/ . +COPY app/app . -CMD [ "streamlit", "run", "ui.py", "--server.address=0.0.0.0" ] +CMD [ "streamlit", "run", "streamlit_app.py", "--server.address=0.0.0.0" ] diff --git a/database-replication/app/__init__.py b/database-replication/app/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/database-replication/app/.streamlit/config.toml b/database-replication/app/app/.streamlit/config.toml similarity index 100% rename from database-replication/app/.streamlit/config.toml rename to database-replication/app/app/.streamlit/config.toml diff --git a/database-replication/app/app/__init__.py b/database-replication/app/app/__init__.py new file mode 100644 index 0000000..101dedb --- /dev/null +++ b/database-replication/app/app/__init__.py @@ -0,0 +1,6 @@ +import sys +import os + + +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(os.path.dirname(SCRIPT_DIR)) diff --git a/database-replication/app/experiment.ipynb b/database-replication/app/app/experiment.ipynb similarity index 100% rename from database-replication/app/experiment.ipynb rename to database-replication/app/app/experiment.ipynb diff --git a/database-replication/app/gen_data.py b/database-replication/app/app/gen_data.py similarity index 95% rename from database-replication/app/gen_data.py rename to database-replication/app/app/gen_data.py index 2db021a..4bd2876 100644 --- a/database-replication/app/gen_data.py +++ b/database-replication/app/app/gen_data.py @@ -1,6 +1,6 @@ from sqlalchemy import text from faker import Faker -from psql_connector import PsqlConnector +from app.psql_connector import PsqlConnector import os import logging @@ -60,7 +60,7 @@ def set_attributes(self, attributes: list) -> None: self._attributes = attributes # Methods - def update_attributes(self, connector: PsqlConnector) -> None: + def update_attributes(self, connector: PsqlConnector) -> bool: with connector.connect() as engine: with engine.connect() as cursor: sql_script = f""" @@ -100,9 +100,11 @@ def update_attributes(self, connector: PsqlConnector) -> None: if new_attributes == self._attributes: logger.info("There's nothing to change") + return False else: self.set_attributes(new_attributes) logger.info("Table attributes are updated") + return True def gen_public_test(connector: PsqlConnector, num_records: int = 1) -> None: diff --git a/database-replication/app/psql_connector.py b/database-replication/app/app/psql_connector.py similarity index 100% rename from database-replication/app/psql_connector.py rename to database-replication/app/app/psql_connector.py diff --git a/database-replication/app/ui.py b/database-replication/app/app/streamlit_app.py similarity index 98% rename from database-replication/app/ui.py rename to database-replication/app/app/streamlit_app.py index c81c671..ca81121 100644 --- a/database-replication/app/ui.py +++ b/database-replication/app/app/streamlit_app.py @@ -2,7 +2,7 @@ import streamlit as st import pandas as pd from sqlalchemy import text -from psql_connector import PsqlConnector +from app.psql_connector import PsqlConnector from gen_data import gen_public_test @@ -11,7 +11,7 @@ import logging import time -load_dotenv(dotenv_path=".env") +load_dotenv(dotenv_path="../../.env") # Init logging logging.basicConfig( diff --git a/database-replication/docker-compose.yml b/database-replication/docker-compose.yml index 9d93500..ef75a88 100644 --- a/database-replication/docker-compose.yml +++ b/database-replication/docker-compose.yml @@ -39,7 +39,7 @@ services: ports: - "8501:8501" volumes: - - ./app:/app + - ./app/app:/app env_file: .env restart: on-failure depends_on: From ea954a2035a5fcf99257003bb5db05f7f9c3f241 Mon Sep 17 00:00:00 2001 From: lelouvincx Date: Wed, 20 Sep 2023 16:31:09 +0700 Subject: [PATCH 5/7] #4: Update requirements --- database-replication/.gitignore | 1 + database-replication/Makefile | 2 +- database-replication/app/requirements.txt | 3 +++ database-replication/pyproject.toml | 6 ++++-- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/database-replication/.gitignore b/database-replication/.gitignore index c762576..9fc3a0e 100644 --- a/database-replication/.gitignore +++ b/database-replication/.gitignore @@ -2,3 +2,4 @@ .docker/backups/* restore.log database-replication.code-workspace +htmlcov/ diff --git a/database-replication/Makefile b/database-replication/Makefile index b896f04..e3d90a0 100644 --- a/database-replication/Makefile +++ b/database-replication/Makefile @@ -49,7 +49,7 @@ lint: test: python -m pytest --log-cli-level info -p no:warnings -v ./app -ci: docs app-requirements format lint +ci: docs app-requirements lint format # ============ Postgres ============ to-psql-default: diff --git a/database-replication/app/requirements.txt b/database-replication/app/requirements.txt index ba64d2d..099ac02 100644 --- a/database-replication/app/requirements.txt +++ b/database-replication/app/requirements.txt @@ -19,6 +19,7 @@ charset-normalizer==3.2.0 click==8.1.7 comm==0.1.4 confluent-kafka==2.2.0 +coverage==7.3.1 debugpy==1.8.0 decorator==5.1.1 defusedxml==0.7.1 @@ -87,6 +88,8 @@ pydeck==0.8.0 Pygments==2.16.1 Pympler==1.0.1 pytest==7.4.2 +pytest-dependency==0.5.1 +pytest-ordering==0.6 python-dateutil==2.8.2 python-dotenv==1.0.0 python-json-logger==2.0.7 diff --git a/database-replication/pyproject.toml b/database-replication/pyproject.toml index 4803780..401cf0f 100644 --- a/database-replication/pyproject.toml +++ b/database-replication/pyproject.toml @@ -12,11 +12,13 @@ confluent-kafka = "^2.2.0" ruff = "^0.0.287" black = "^23.9.1" pytest = "^7.4.2" +pytest-dependency = "^0.5.1" +pytest-ordering = "^0.6" [tool.ruff] # Enable rule pycodestyle select = ["E"] -ignore = ["E501", "E101"] +ignore = ["E501", "E101", "E402"] # Allow autofix for all enabled rules (when `--fix`) is provided. fixable = ["ALL"] @@ -24,7 +26,7 @@ unfixable = [] # Maximum line length is same as black line-length = 88 -src = ["app"] +src = ["app/app"] # Exclude a variety of commonly ignored directories. exclude = [ From b538e9c1c552509924d2cdeeefc750689c1e30be Mon Sep 17 00:00:00 2001 From: lelouvincx Date: Wed, 20 Sep 2023 16:31:42 +0700 Subject: [PATCH 6/7] #4: Add unit tests for upstream-app --- database-replication/app/tests/conftest.py | 55 +++++++++++++++++++ .../app/tests/test_gen_data.py | 55 +++++++++++++++++++ .../app/tests/test_psql_connector.py | 32 +++++++++++ 3 files changed, 142 insertions(+) create mode 100644 database-replication/app/tests/conftest.py create mode 100644 database-replication/app/tests/test_gen_data.py create mode 100644 database-replication/app/tests/test_psql_connector.py diff --git a/database-replication/app/tests/conftest.py b/database-replication/app/tests/conftest.py new file mode 100644 index 0000000..8f5c465 --- /dev/null +++ b/database-replication/app/tests/conftest.py @@ -0,0 +1,55 @@ +# Config relative path to module app +from os.path import dirname, abspath +import sys + +parent_dir = dirname(dirname(abspath(__file__))) +sys.path.append(parent_dir) + + +from app.psql_connector import PsqlConnector +from sqlalchemy import text +from dotenv import load_dotenv +from os import environ as env +import pytest + +load_dotenv() + +psql_params = { + "host": env["POSTGRES_HOST"], + "port": env["POSTGRES_PORT"], + "user": env["POSTGRES_USER"], + "password": env["POSTGRES_PASSWORD"], + "database": env["POSTGRES_DB"], +} + + +@pytest.fixture +def create_temp_table(): + psql_connector = PsqlConnector(psql_params) + + # Create temp_table + with psql_connector.connect() as engine: + with engine.connect() as cursor: + sql_script = text(""" + CREATE TEMP TABLE temp_table ( + id serial PRIMARY KEY, + name VARCHAR(50), + age INT + ) + """) + cursor.execute(sql_script) + cursor.commit() + + # Find schema of temp_table + with psql_connector.connect() as engine: + with engine.connect() as cursor: + sql_script = text(""" + SELECT schemaname + FROM pg_tables + WHERE tablename = 'temp_table'; + """) + temp_table_schema = cursor.execute(sql_script).fetchone() or [] + temp_table_schema = temp_table_schema[0] or str + + # Create Table object for temp_table + return temp_table_schema diff --git a/database-replication/app/tests/test_gen_data.py b/database-replication/app/tests/test_gen_data.py new file mode 100644 index 0000000..1677a3d --- /dev/null +++ b/database-replication/app/tests/test_gen_data.py @@ -0,0 +1,55 @@ +from app.gen_data import Table, PsqlConnector +from conftest import psql_params +from sqlalchemy import text +import pytest + + +class TestTable: + @pytest.mark.first + @pytest.mark.dependency(name="TEST_CONNECTING") + def test_connecting(self): + psql_connector = PsqlConnector(psql_params) + is_connected = False + with psql_connector.connect() as engine: + with engine.connect() as cursor: + is_connected = True + cursor.commit() + assert is_connected is True, "Not connected to database" + + @pytest.mark.dependency(depends=["TEST_CONNECTING"]) + def test_update_attributes(self): + psql_connector = PsqlConnector(psql_params) + + # Create temp_table + with psql_connector.connect() as engine: + with engine.connect() as cursor: + sql_script = text(""" + CREATE TEMP TABLE temp_table ( + id serial PRIMARY KEY, + name VARCHAR(50), + age INT + ) + """) + cursor.execute(sql_script) + cursor.commit() + + # Find schema of temp_table + with psql_connector.connect() as engine: + with engine.connect() as cursor: + sql_script = text(""" + SELECT schemaname + FROM pg_tables + WHERE tablename = 'temp_table'; + """) + temp_table_schema = cursor.execute(sql_script).fetchone() or [] + temp_table_schema = temp_table_schema[0] + + temp_table = Table(schema=temp_table_schema, name="temp_table") + is_changed = temp_table.update_attributes(psql_connector) + + assert (is_changed is True), "Attributes not changed." + + @pytest.mark.skip(reason="Not implemented due to WIP") + @pytest.mark.dependency(depends=["TEST_CONNECTING"]) + def test_generate(self): + pass diff --git a/database-replication/app/tests/test_psql_connector.py b/database-replication/app/tests/test_psql_connector.py new file mode 100644 index 0000000..0434ac7 --- /dev/null +++ b/database-replication/app/tests/test_psql_connector.py @@ -0,0 +1,32 @@ +from app.psql_connector import PsqlConnector +from conftest import psql_params +from sqlalchemy import text +import pytest + + +class TestPsqlConnector: + @pytest.mark.first + @pytest.mark.dependency(name="TEST_CONNECTING") + def test_connecting(self): + psql_connector = PsqlConnector(psql_params) + is_connected = False + with psql_connector.connect() as engine: + with engine.connect() as cursor: + is_connected = True + cursor.commit() + assert is_connected is True, "Not connected to database." + + @pytest.mark.dependency(depends=["TEST_CONNECTING"]) + def test_getting_data(self): + psql_connector = PsqlConnector(psql_params) + with psql_connector.connect() as engine: + with engine.connect() as cursor: + sql_script = "SELECT 1;" + fetched_data = 0 + try: + fetched_data = cursor.execute(text(sql_script)).fetchone() or [] + fetched_data = fetched_data[0] or int + except Exception as e: + print(f"Error when retrieving results from database: {e}") + assert False, "Error when retrieving results." + assert fetched_data == 1 From 51c60848588d21ccf7fc7b6ada97908d0123e1be Mon Sep 17 00:00:00 2001 From: lelouvincx Date: Wed, 20 Sep 2023 17:07:06 +0700 Subject: [PATCH 7/7] Local CI --- database-replication/app/requirements.txt | 2 +- database-replication/app/tests/test_gen_data.py | 2 +- database-replication/pyproject.toml | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/database-replication/app/requirements.txt b/database-replication/app/requirements.txt index 099ac02..d4c1889 100644 --- a/database-replication/app/requirements.txt +++ b/database-replication/app/requirements.txt @@ -19,7 +19,6 @@ charset-normalizer==3.2.0 click==8.1.7 comm==0.1.4 confluent-kafka==2.2.0 -coverage==7.3.1 debugpy==1.8.0 decorator==5.1.1 defusedxml==0.7.1 @@ -88,6 +87,7 @@ pydeck==0.8.0 Pygments==2.16.1 Pympler==1.0.1 pytest==7.4.2 +pytest-cov==4.1.0 pytest-dependency==0.5.1 pytest-ordering==0.6 python-dateutil==2.8.2 diff --git a/database-replication/app/tests/test_gen_data.py b/database-replication/app/tests/test_gen_data.py index 1677a3d..48ab2a4 100644 --- a/database-replication/app/tests/test_gen_data.py +++ b/database-replication/app/tests/test_gen_data.py @@ -47,7 +47,7 @@ def test_update_attributes(self): temp_table = Table(schema=temp_table_schema, name="temp_table") is_changed = temp_table.update_attributes(psql_connector) - assert (is_changed is True), "Attributes not changed." + assert is_changed is True, "Attributes not changed." @pytest.mark.skip(reason="Not implemented due to WIP") @pytest.mark.dependency(depends=["TEST_CONNECTING"]) diff --git a/database-replication/pyproject.toml b/database-replication/pyproject.toml index 401cf0f..ee9f51a 100644 --- a/database-replication/pyproject.toml +++ b/database-replication/pyproject.toml @@ -14,6 +14,7 @@ black = "^23.9.1" pytest = "^7.4.2" pytest-dependency = "^0.5.1" pytest-ordering = "^0.6" +pytest-cov = "^4.1.0" [tool.ruff] # Enable rule pycodestyle