Skip to content

Commit

Permalink
chore: Added metrics to liteprotocoltester (#3002)
Browse files Browse the repository at this point in the history
* Added metrics to tests
Fix liteprotocoltester docker files with libnegentropy COPY
docker compose with waku-sim simulation now having test performance dashboard and localhost:3033

Mention dashboard in Readme

* Update apps/liteprotocoltester/statistics.nim

Co-authored-by: Ivan FB <128452529+Ivansete-status@users.noreply.github.com>

* indent fix, more stable finding of service/bootstrap nodes, pre-set for TWN

---------

Co-authored-by: Ivan FB <128452529+Ivansete-status@users.noreply.github.com>
  • Loading branch information
NagyZoltanPeter and Ivansete-status authored Sep 4, 2024
1 parent 19feb6b commit 8baf627
Show file tree
Hide file tree
Showing 20 changed files with 1,240 additions and 6,169 deletions.
14 changes: 7 additions & 7 deletions apps/liteprotocoltester/.env
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
START_PUBLISHING_AFTER=10
START_PUBLISHING_AFTER=45
# can add some seconds delay before SENDER starts publishing

NUM_MESSAGES=0
Expand All @@ -12,16 +12,16 @@ MIN_MESSAGE_SIZE=15Kb
MAX_MESSAGE_SIZE=145Kb

## for wakusim
PUBSUB=/waku/2/rs/66/0
CONTENT_TOPIC=/tester/2/light-pubsub-test/wakusim
CLUSTER_ID=66
#PUBSUB=/waku/2/rs/66/0
#CONTENT_TOPIC=/tester/2/light-pubsub-test/wakusim
#CLUSTER_ID=66

## for status.prod
#PUBSUB=/waku/2/rs/16/32
#CONTENT_TOPIC=/tester/2/light-pubsub-test/fleet
#CLUSTER_ID=16

## for TWN
#PUBSUB=/waku/2/rs/1/4
#CONTENT_TOPIC=/tester/2/light-pubsub-test/twn
#CLUSTER_ID=1
PUBSUB=/waku/2/rs/1/4
CONTENT_TOPIC=/tester/2/light-pubsub-test/twn
CLUSTER_ID=1
2 changes: 2 additions & 0 deletions apps/liteprotocoltester/Dockerfile.liteprotocoltester
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@

COPY build/liteprotocoltester /usr/bin/
COPY apps/liteprotocoltester/run_tester_node.sh /usr/bin/
COPY ./libnegentropy.so /usr/lib/


ENTRYPOINT ["/usr/bin/run_tester_node.sh", "/usr/bin/liteprotocoltester"]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@

# Copy migration scripts for DB upgrades
COPY --from=nim-build /app/migrations/ /app/migrations/
COPY --from=nim-build /app/libnegentropy.so /usr/lib/

ENTRYPOINT ["/usr/bin/liteprotocoltester"]

Expand Down
35 changes: 0 additions & 35 deletions apps/liteprotocoltester/Dockerfile.liteprotocoltester.copy

This file was deleted.

4 changes: 4 additions & 0 deletions apps/liteprotocoltester/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ docker compose -f docker-compose-on-simularor.yml logs -f receivernode
- Notice there is a configurable wait before start publishing messages as it is noticed time is needed for the service nodes to get connected to full nodes from simulator
- light clients will print report on their and the connected service node's connectivity to the network in every 20 secs.

#### Test monitoring

Navigate to http://localhost:3033 to see the lite-protocol-tester dashboard.

### Phase 3

> Run independently on a chosen waku fleet
Expand Down
74 changes: 41 additions & 33 deletions apps/liteprotocoltester/docker-compose-on-simularor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ services:
image: waku.liteprotocoltester:latest
build:
context: ../..
dockerfile: ./apps/liteprotocoltester/Dockerfile.liteprotocoltester.copy
dockerfile: ./apps/liteprotocoltester/Dockerfile.liteprotocoltester
deploy:
replicas: ${NUM_PUBLISHER_NODES:-3}
# ports:
Expand All @@ -84,13 +84,12 @@ services:
- *rln_env
- *test_running_conditions
volumes:
- ./run_tester_node.sh:/opt/run_tester_node.sh:Z
- ${CERTS_DIR:-./certs}:/etc/letsencrypt/:Z
- ./rln_tree:/etc/rln_tree/:Z
- ./keystore:/keystore:Z
entrypoint: sh
command:
- /opt/run_tester_node.sh
- /usr/bin/run_tester_node.sh
- /usr/bin/liteprotocoltester
- SENDER
- waku-sim
Expand Down Expand Up @@ -139,7 +138,7 @@ services:
image: waku.liteprotocoltester:latest
build:
context: ../..
dockerfile: ./apps/liteprotocoltester/Dockerfile.liteprotocoltester.copy
dockerfile: ./apps/liteprotocoltester/Dockerfile.liteprotocoltester
deploy:
replicas: ${NUM_RECEIVER_NODES:-1}
# ports:
Expand All @@ -161,13 +160,12 @@ services:
- *rln_env
- *test_running_conditions
volumes:
- ./run_tester_node.sh:/opt/run_tester_node.sh:Z
- ${CERTS_DIR:-./certs}:/etc/letsencrypt/:Z
- ./rln_tree:/etc/rln_tree/:Z
- ./keystore:/keystore:Z
entrypoint: sh
command:
- /opt/run_tester_node.sh
- /usr/bin/run_tester_node.sh
- /usr/bin/liteprotocoltester
- RECEIVER
- waku-sim
Expand All @@ -180,34 +178,44 @@ services:
networks:
- waku-simulator_simulation

## We have prometheus and grafana defined in waku-simulator already
# prometheus:
# image: docker.io/prom/prometheus:latest
# volumes:
# - ./monitoring/prometheus-config.yml:/etc/prometheus/prometheus.yml:Z
# command:
# - --config.file=/etc/prometheus/prometheus.yml
# ports:
# - 127.0.0.1:9090:9090
# depends_on:
# - servicenode
# We have prometheus and grafana defined in waku-simulator already
prometheus:
image: docker.io/prom/prometheus:latest
volumes:
- ./monitoring/prometheus-config.yml:/etc/prometheus/prometheus.yml:Z
command:
- --config.file=/etc/prometheus/prometheus.yml
- --web.listen-address=:9099
# ports:
# - 127.0.0.1:9090:9090
restart: on-failure:5
depends_on:
- filter-service
- lightpush-service
- publishernode
- receivernode
networks:
- waku-simulator_simulation

# grafana:
# image: docker.io/grafana/grafana:latest
# env_file:
# - ./monitoring/configuration/grafana-plugins.env
# volumes:
# - ./monitoring/configuration/grafana.ini:/etc/grafana/grafana.ini:Z
# - ./monitoring/configuration/dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:Z
# - ./monitoring/configuration/datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z
# - ./monitoring/configuration/dashboards:/var/lib/grafana/dashboards/:Z
# - ./monitoring/configuration/customizations/custom-logo.svg:/usr/share/grafana/public/img/grafana_icon.svg:Z
# - ./monitoring/configuration/customizations/custom-logo.svg:/usr/share/grafana/public/img/grafana_typelogo.svg:Z
# - ./monitoring/configuration/customizations/custom-logo.png:/usr/share/grafana/public/img/fav32.png:Z
# ports:
# - 0.0.0.0:3000:3000
# depends_on:
# - prometheus
grafana:
image: docker.io/grafana/grafana:latest
env_file:
- ./monitoring/configuration/grafana-plugins.env
volumes:
- ./monitoring/configuration/grafana.ini:/etc/grafana/grafana.ini:Z
- ./monitoring/configuration/dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:Z
- ./monitoring/configuration/datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z
- ./monitoring/configuration/dashboards:/var/lib/grafana/dashboards/:Z
- ./monitoring/configuration/customizations/custom-logo.svg:/usr/share/grafana/public/img/grafana_icon.svg:Z
- ./monitoring/configuration/customizations/custom-logo.svg:/usr/share/grafana/public/img/grafana_typelogo.svg:Z
- ./monitoring/configuration/customizations/custom-logo.png:/usr/share/grafana/public/img/fav32.png:Z
ports:
- 0.0.0.0:3033:3033
restart: on-failure:5
depends_on:
- prometheus
networks:
- waku-simulator_simulation

configs:
cfg_tester_node.toml:
Expand Down
9 changes: 4 additions & 5 deletions apps/liteprotocoltester/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ services:
image: waku.liteprotocoltester:latest
build:
context: ../..
dockerfile: ./apps/liteprotocoltester/Dockerfile.liteprotocoltester.copy
dockerfile: ./apps/liteprotocoltester/Dockerfile.liteprotocoltester
ports:
# - 30304:30304/tcp
# - 30304:30304/udp
Expand All @@ -79,13 +79,12 @@ services:
- *rln_env
- *test_running_conditions
volumes:
- ./run_tester_node.sh:/opt/run_tester_node.sh:Z
- ${CERTS_DIR:-./certs}:/etc/letsencrypt/:Z
- ./rln_tree:/etc/rln_tree/:Z
- ./keystore:/keystore:Z
entrypoint: sh
command:
- /opt/run_tester_node.sh
- /usr/bin/run_tester_node.sh
- /usr/bin/liteprotocoltester
- SENDER
- servicenode
Expand All @@ -99,7 +98,7 @@ services:
image: waku.liteprotocoltester:latest
build:
context: ../..
dockerfile: ./apps/liteprotocoltester/Dockerfile.liteprotocoltester.copy
dockerfile: ./apps/liteprotocoltester/Dockerfile.liteprotocoltester
ports:
# - 30304:30304/tcp
# - 30304:30304/udp
Expand All @@ -125,7 +124,7 @@ services:
- ./keystore:/keystore:Z
entrypoint: sh
command:
- /opt/run_tester_node.sh
- /usr/bin/run_tester_node.sh
- /usr/bin/liteprotocoltester
- RECEIVER
- servicenode
Expand Down
6 changes: 5 additions & 1 deletion apps/liteprotocoltester/lightpush_publisher.nim
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ import
common/utils/parse_size_units,
],
./tester_config,
./tester_message
./tester_message,
./lpt_metrics

randomize()

Expand Down Expand Up @@ -141,12 +142,15 @@ proc publishMessages(
pubsubTopic = lightpushPubsubTopic,
hash = msgHash
inc(messagesSent)
lpt_publisher_sent_messages_count.inc()
lpt_publisher_sent_bytes.inc(amount = msgSize.int64)
else:
sentMessages[messagesSent] = (hash: msgHash, relayed: false)
failedToSendCause.mgetOrPut(wlpRes.error, 1).inc()
error "failed to publish message using lightpush",
err = wlpRes.error, hash = msgHash
inc(failedToSendCount)
lpt_publisher_failed_messages_count.inc(labelValues = [wlpRes.error])

await sleepAsync(delayMessages)

Expand Down
7 changes: 4 additions & 3 deletions apps/liteprotocoltester/liteprotocoltester.nim
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ when isMainModule:
wakuConf.clusterId = conf.clusterId
## TODO: Depending on the tester needs we might extend here with shards, clusterId, etc...

wakuConf.metricsServer = true
wakuConf.metricsServerAddress = parseIpAddress("0.0.0.0")
wakuConf.metricsServerPort = 8003

if conf.testFunc == TesterFunctionality.SENDER:
wakuConf.lightpushnode = conf.serviceNode
else:
Expand All @@ -108,9 +112,6 @@ when isMainModule:

wakuConf.rest = false

wakuConf.metricsServer = true
wakuConf.metricsServerAddress = parseIpAddress("0.0.0.0")

# NOTE: {.threadvar.} is used to make the global variable GC safe for the closure uses it
# It will always be called from main thread anyway.
# Ref: https://nim-lang.org/docs/manual.html#threads-gc-safety
Expand Down
30 changes: 30 additions & 0 deletions apps/liteprotocoltester/lpt_metrics.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
## Example showing how a resource restricted client may
## subscribe to messages without relay

import metrics

export metrics

declarePublicGauge lpt_receiver_sender_peer_count, "count of sender peers"

declarePublicCounter lpt_receiver_received_messages_count,
"number of messages received per peer", ["peer"]

declarePublicCounter lpt_receiver_received_bytes,
"number of received bytes per peer", ["peer"]

declarePublicGauge lpt_receiver_missing_messages_count,
"number of missing messages per peer", ["peer"]

declarePublicCounter lpt_receiver_duplicate_messages_count,
"number of duplicate messages per peer", ["peer"]

declarePublicGauge lpt_receiver_distinct_duplicate_messages_count,
"number of distinct duplicate messages per peer", ["peer"]

declarePublicCounter lpt_publisher_sent_messages_count, "number of messages published"

declarePublicCounter lpt_publisher_failed_messages_count,
"number of messages failed to publish per failure cause", ["cause"]

declarePublicCounter lpt_publisher_sent_bytes, "number of total bytes sent"
Loading

0 comments on commit 8baf627

Please sign in to comment.