Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SDK server prod #4243

Merged
merged 11 commits into from
Jun 2, 2023
6 changes: 5 additions & 1 deletion .github/workflows/build-test-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -925,7 +925,7 @@ jobs:
env:
AWS_PROFILE: aws-deployer-connext
TF_VAR_cartographer_image_tag: ${{ github.sha }}
TF_VAR_sdk_server_image_tag: ${{ github.sha }}
TF_VAR_full_image_name_sdk_server: ${{ fromJSON(needs.smoke-tests.outputs.sdk-server-tags).tags[0] }}

runs-on: ubuntu-latest
permissions:
Expand Down Expand Up @@ -1122,13 +1122,15 @@ jobs:
router-executor-tags: ${{ needs.e2e-tests.outputs.router-executor-tags }}
relayer-tags: ${{ needs.e2e-tests.outputs.relayer-tags }}
watcher-tags: ${{ needs.e2e-tests.outputs.watcher-tags }}
sdk-server-tags: ${{ needs.e2e-tests.outputs.sdk-server-tags }}

terraform-services-backend-prod-testnet:
if: github.ref == 'refs/heads/testnet-prod' || github.ref == 'refs/heads/prod'
needs: [e2e-tests]
env:
AWS_PROFILE: aws-deployer-connext
TF_VAR_cartographer_image_tag: ${{ github.sha }}
TF_VAR_full_image_name_sdk_server: ${{ fromJSON(needs.e2e-tests.outputs.sdk-server-tags).tags[0] }}

runs-on: ubuntu-latest
permissions:
Expand Down Expand Up @@ -1188,13 +1190,15 @@ jobs:
router-executor-tags: ${{ needs.e2e-tests.outputs.router-executor-tags }}
relayer-tags: ${{ needs.e2e-tests.outputs.relayer-tags }}
watcher-tags: ${{ needs.e2e-tests.outputs.watcher-tags }}
sdk-server-tags: ${{ needs.e2e-tests.outputs.sdk-server-tags }}

terraform-services-backend-prod-mainnet:
if: github.ref == 'refs/heads/prod'
needs: [terraform-services-core-prod-testnet, terraform-services-backend-prod-testnet]
env:
AWS_PROFILE: aws-deployer-connext
TF_VAR_cartographer_image_tag: ${{ github.sha }}
TF_VAR_full_image_name_sdk_server: ${{ fromJSON(needs.terraform-services-backend-prod-testnet.outputs.sdk-server-tags).tags[0] }}

runs-on: ubuntu-latest
permissions:
Expand Down
21 changes: 13 additions & 8 deletions docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,20 @@ Config.json located in the root of each project's docker ie (repo-root)/docker/<

- router (pretty much just runs the router as configured).

- integration
- integration

Run everything from the root directory.

Building:
(from the repo root directory):
ex.
```[sudo] bash docker/router/build-router.sh```
Build:

Running:
ex.
```<ENV variables> [sudo] docker run -d --name router nxtp-router```
```
docker build -f docker/sdk-server/Dockerfile -t sdk-server .
```

> NOTE: If you are using an M1/M2 mac, you must provide the flag "`--platform linux/amd64`"! Also if you are using Docker Desktop, make sure you turn ON the "Use Virtualization framework" option in Settings > General and turn OFF the "Use Rosetta for x86/amd64 emulation on Apple Silicon" option in Settings > Features in development.

Run:

```
docker run -it sdk-server
```
6 changes: 4 additions & 2 deletions docker/sdk-server/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
FROM node:18-alpine as node

# ----------------------------------------
# Builds Docker container for nxtp-watcher package.
# Builds Docker container for sdk-server package.

# ----------------------------------------

Expand All @@ -23,6 +23,7 @@ ARG TEMP_DEPS_DIR
COPY .yarn /tmp/build/.yarn/
COPY .yarnrc.yml /tmp/build/
COPY package.json /tmp/build/
COPY packages/examples/sdk-server/package.json /tmp/build/packages/examples/sdk-server/
COPY packages/agents/sdk/package.json /tmp/build/packages/agents/sdk/
COPY packages/adapters/cache/package.json /tmp/build/packages/adapters/cache/
COPY packages/adapters/subgraph/package.json /tmp/build/packages/adapters/subgraph/
Expand All @@ -38,6 +39,7 @@ RUN yarn install

# ----- Copy source and all other files that affect lint, test, build -----
COPY config config/
COPY packages/examples/sdk-server /tmp/build/packages/examples/sdk-server
COPY packages/agents/sdk /tmp/build/packages/agents/sdk
COPY packages/adapters/cache /tmp/build/packages/adapters/cache
COPY packages/adapters/subgraph /tmp/build/packages/adapters/subgraph
Expand Down Expand Up @@ -68,7 +70,7 @@ COPY --from=build --chown=node:node /tmp/build /home/node
# This user is created in the base image with uid and gid = 1000.
USER node

WORKDIR /home/node/packages/agents/sdk
WORKDIR /home/node/packages/examples/sdk-server

EXPOSE 8080

Expand Down
17 changes: 0 additions & 17 deletions docker/sdk-server/conifg.local.json

This file was deleted.

64 changes: 64 additions & 0 deletions ops/modules/db-alarms/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
locals {
thresholds = {
CPUUtilizationThreshold = var.cpu_utilization_threshold
FreeStorageSpaceThreshold = var.free_storage_space_threshold
}

alarm_names = toset([
"cpu_utilization_too_high",
"free_storage_space_threshold",
])
}


resource "aws_sns_topic_subscription" "target" {
count = var.enable_cpu_utilization_alarm || var.enable_free_storage_space_too_low_alarm ? length(var.sns_topic_subscription_emails) : 0
topic_arn = aws_sns_topic.topic[0].arn
protocol = "email"
endpoint = var.sns_topic_subscription_emails[count.index]
}

resource "aws_sns_topic" "topic" {
count = var.enable_cpu_utilization_alarm || var.enable_free_storage_space_too_low_alarm ? 1 : 0
name = "${var.environment}-${var.stage}-${var.is_replica ? "replica-" : ""}db-sns-topic"
}


resource "aws_cloudwatch_metric_alarm" "cpu_utilization_too_high" {
count = var.enable_cpu_utilization_alarm ? 1 : 0
alarm_name = "${var.db_instance_name}-cpu-utilization-too-high"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = "1"
metric_name = "CPUUtilization"
namespace = "AWS/RDS"
period = "600"
statistic = "Average"
threshold = local.thresholds["CPUUtilizationThreshold"]
alarm_description = "Average database CPU utilization over last 10 minutes too high"
alarm_actions = aws_sns_topic.topic.*.arn
ok_actions = aws_sns_topic.topic.*.arn

dimensions = {
DBInstanceIdentifier = var.db_instance_id
}
}


resource "aws_cloudwatch_metric_alarm" "free_storage_space_too_low" {
count = var.enable_free_storage_space_too_low_alarm ? 1 : 0
alarm_name = "${var.db_instance_name}-free-storage-space-too-low"
comparison_operator = "LessThanThreshold"
evaluation_periods = "1"
metric_name = "FreeStorageSpace"
namespace = "AWS/RDS"
period = "600"
statistic = "Average"
threshold = local.thresholds["FreeStorageSpaceThreshold"]
alarm_description = "Average database free storage space over last 10 minutes too low"
alarm_actions = aws_sns_topic.topic.*.arn
ok_actions = aws_sns_topic.topic.*.arn

dimensions = {
DBInstanceIdentifier = var.db_instance_id
}
}
7 changes: 7 additions & 0 deletions ops/modules/db-alarms/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
output "free_storage_space_threshold_alarm_id" {
value = aws_cloudwatch_metric_alarm.free_storage_space_too_low[0].alarm_name
}

output "cpu_utilization_threshold_id" {
value = aws_cloudwatch_metric_alarm.free_storage_space_too_low[0].alarm_name
}
52 changes: 52 additions & 0 deletions ops/modules/db-alarms/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
variable "db_instance_name" {
type = string
description = "The name of the database instance"
}

variable "db_instance_id" {
type = string
description = "The id of the database instance"
}

variable "is_replica" {
type = bool
description = "Whether the database instance is a replica"
}

variable "enable_cpu_utilization_alarm" {
type = bool
description = "Whether to enable the CPU utilization alarm"
}

variable "enable_free_storage_space_too_low_alarm" {
type = bool
description = "Whether to enable the free storage space too low alarm"
}

variable "environment" {
type = string
description = "The environment of the database instance"
}

variable "stage" {
type = string
description = "The stage of the database instance"
}

variable "sns_topic_subscription_emails" {
type = list(string)
description = "The emails to subscribe to the SNS topic"
}

variable "free_storage_space_threshold" {
type = number
description = "The free storage space threshold"
default = 5
}


variable "cpu_utilization_threshold" {
type = number
description = "The free storage space threshold"
default = 90
}
24 changes: 22 additions & 2 deletions ops/testnet/prod/backend/config.tf
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,13 @@ locals {
DD_LAMBDA_HANDLER = "packages/agents/cartographer/poller/dist/index.handler"
}


sdk_server_env_vars = [
{ name = "NXTP_CONFIG", value = local.local_sdk_server_config },
{ name = "ENVIRONMENT", value = var.environment },
{ name = "STAGE", value = var.stage },
{ name = "DD_PROFILING_ENABLED", value = "true" },
{ name = "DD_ENV", value = "${var.environment}-${var.stage}" },
]
postgrest_env_vars = [
{ name = "PGRST_ADMIN_SERVER_PORT", value = "3001" },
{ name = "PGRST_DB_URI", value = "postgres://${var.postgres_user}:${var.postgres_password}@${module.cartographer_db_replica.db_instance_endpoint}/connext" },
Expand All @@ -20,7 +26,21 @@ locals {
{ name = "ENVIRONMENT", value = var.environment },
{ name = "STAGE", value = var.stage }
]

local_sdk_server_config = jsonencode({
logLevel = "debug"
chains = {
"1735356532" = {
providers = ["https://goerli.optimism.io/"]
}
"1735353714" = {
providers = ["https://goerli.infura.io/v3/9aa3d95b3bc440fa88ea12eaa4456161"]
}
"9991" = {
providers = ["https://rpc.ankr.com/polygon_mumbai"]
}
}
environment = var.stage
})
local_cartographer_config = jsonencode({
logLevel = "debug"
chains = {
Expand Down
57 changes: 57 additions & 0 deletions ops/testnet/prod/backend/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ data "aws_route53_zone" "primary" {
zone_id = "Z03634792TWUEHHQ5L0YX"
}

locals {
db_alarm_emails = ["carlo@connext.network", "rahul@connext.network", "preetham@connext.network", "sanchay@connext.network"]
}

module "cartographer_db" {
domain = "cartographer"
source = "../../../modules/db"
Expand Down Expand Up @@ -52,6 +56,18 @@ module "cartographer_db" {
publicly_accessible = true
}

module "cartographer-db-alarms" {
source = "../../../modules/db-alarms"
db_instance_name = module.cartographer_db.db_instance_name
db_instance_id = module.cartographer_db.db_instance_id
is_replica = false
enable_cpu_utilization_alarm = true
enable_free_storage_space_too_low_alarm = true
stage = var.stage
environment = var.environment
sns_topic_subscription_emails = local.db_alarm_emails
}

module "cartographer_db_replica" {
domain = "cartographer"
source = "../../../modules/db-replica"
Expand Down Expand Up @@ -87,6 +103,17 @@ module "cartographer_db_replica" {
publicly_accessible = module.cartographer_db.db_publicly_accessible
}

module "cartographer-db-replica-alarms" {
source = "../../../modules/db-alarms"
db_instance_name = module.cartographer_db.db_instance_name
db_instance_id = module.cartographer_db.db_instance_id
is_replica = true
enable_cpu_utilization_alarm = true
enable_free_storage_space_too_low_alarm = true
stage = var.stage
environment = var.environment
sns_topic_subscription_emails = local.db_alarm_emails
}

module "postgrest" {
source = "../../../modules/service"
Expand Down Expand Up @@ -117,6 +144,36 @@ module "postgrest" {
domain = var.domain
}

module "sdk-server" {
source = "../../../modules/service"
region = var.region
dd_api_key = var.dd_api_key
zone_id = data.aws_route53_zone.primary.zone_id
execution_role_arn = data.aws_iam_role.ecr_admin_role.arn
cluster_id = module.ecs.ecs_cluster_id
vpc_id = module.network.vpc_id
private_subnets = module.network.private_subnets
lb_subnets = module.network.public_subnets
internal_lb = false
docker_image = var.full_image_name_sdk_server
container_family = "sdk-server"
container_port = 8080
loadbalancer_port = 80
cpu = 256
memory = 512
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minor: Is this tier enough for SDK Server in prod ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question, I am not sure. Will defer to @rhlsthrm

instance_count = 2
timeout = 180
environment = var.environment
stage = var.stage
ingress_cdir_blocks = ["0.0.0.0/0"]
ingress_ipv6_cdir_blocks = []
service_security_groups = flatten([module.network.allow_all_sg, module.network.ecs_task_sg])
cert_arn = var.certificate_arn_testnet
container_env_vars = local.sdk_server_env_vars
domain = var.domain
}


module "cartographer-routers-lambda-cron" {
source = "../../../modules/lambda"
ecr_repository_name = "nxtp-cartographer"
Expand Down
5 changes: 5 additions & 0 deletions ops/testnet/prod/backend/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ output "postgrest-service-endpoint" {
value = module.postgrest.service_endpoint
}

output "sdk-server-service-endpoint" {
value = module.sdk-server.service_endpoint
}


output "db-instance-endpoint" {
value = module.cartographer_db.db_instance_endpoint
}
Expand Down
6 changes: 6 additions & 0 deletions ops/testnet/prod/backend/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ variable "cartographer_image_tag" {
default = "latest"
}

variable "full_image_name_sdk_server" {
type = string
description = "router image name"
default = "ghcr.io/connext/sdk-server:sha-e33b4fd"
}

variable "certificate_arn_testnet" {
default = "arn:aws:acm:us-west-1:679752396206:certificate/0ebbf095-681a-4a0a-9dc9-fa70cb80166a"
}
Expand Down
Loading