Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: local docker telemetry infra #2296

Merged
merged 10 commits into from
Sep 4, 2023
12 changes: 12 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
SHELL=/usr/bin/env bash
PROJECTNAME=$(shell basename "$(PWD)")
DIR_FULLPATH=$(shell pwd)
versioningPath := "github.com/celestiaorg/celestia-node/nodebuilder/node"
LDFLAGS=-ldflags="-X '$(versioningPath).buildTime=$(shell date)' -X '$(versioningPath).lastCommit=$(shell git rev-parse HEAD)' -X '$(versioningPath).semanticVersion=$(shell git describe --tags --dirty=-dev 2>/dev/null || git rev-parse --abbrev-ref HEAD)'"
ifeq (${PREFIX},)
Expand Down Expand Up @@ -176,3 +177,14 @@ adr-gen:
@echo "--> Generating ADR"
@curl -sSL https://raw.githubusercontent.com/celestiaorg/.github/main/adr-template.md > docs/architecture/adr-$(NUM)-$(TITLE).md
.PHONY: adr-gen

## telemetry-infra-up: launches local telemetry infrastructure. This includes grafana, jaeger, loki, pyroscope, and an otel-collector.
## you can access the grafana instance at localhost:3000 and login with admin:admin.
telemetry-infra-up:
PWD="${DIR_FULLPATH}/docker/telemetry" docker-compose -f ./docker/telemetry/docker-compose.yml up
.PHONY: telemetry-infra-up

## telemetry-infra-down: tears the telemetry infrastructure down. The stores for grafana, prometheus, and loki will persist.
telemetry-infra-down:
PWD="${DIR_FULLPATH}/docker/telemetry" docker-compose -f ./docker/telemetry/docker-compose.yml down
.PHONY: telemetry-infra-down
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ Continue reading [here](https://blog.celestia.org/celestia-mvp-release-data-avai
- [API docs](#api-docs)
- [Node types](#node-types)
- [Run a node](#run-a-node)
- [Environment variables](#environment-variables)
- [Environment variables](#environment-variables)
- [Package-specific documentation](#package-specific-documentation)
- [Code of Conduct](#code-of-conduct)

Expand Down Expand Up @@ -55,7 +55,7 @@ For more information on setting up a node and the hardware requirements needed,

## API docs

Celestia-node public API is documented [here](https://docs.celestia.org/category/node-api/).
The celestia-node public API is documented [here](https://docs.celestia.org/category/node-api/).

## Node types

Expand All @@ -67,7 +67,7 @@ More information can be found [here](https://github.com/celestiaorg/celestia-nod

## Run a node

`<node_type>` can be `bridge`, `full` or `light`.
`<node_type>` can be: `bridge`, `full` or `light`.

```sh
celestia <node_type> init
Expand All @@ -77,7 +77,9 @@ celestia <node_type> init
celestia <node_type> start
```

### Environment variables
Please refer to [this guide](https://docs.celestia.org/nodes/celestia-node/) for more information on running a node.

## Environment variables

| Variable | Explanation | Default value | Required |
| ----------------------- | ----------------------------------- | ------------- | -------- |
Expand Down
89 changes: 89 additions & 0 deletions docker/telemetry/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
---
version: '3.8'

services:
loki:
container_name: loki
image: grafana/loki:2.6.1
expose:
- 3100
ports:
- "3100:3100"
restart: unless-stopped
volumes:
- loki-data:/loki

promtail:
container_name: promtail
image: grafana/promtail:latest
volumes:
# custom config will read logs from the containers of
# this project
- ${PWD}/promtail:/etc/promtail
# to read container labels and logs
- /var/run/docker.sock:/var/run/docker.sock
- /var/lib/docker/containers:/var/lib/docker/containers:ro
depends_on:
- loki

prometheus:
container_name: prometheus
image: prom/prometheus
ports:
- 9000:9090
volumes:
- ${PWD}/prometheus:/etc/prometheus
- prometheus-data:/prometheus
# yamllint disable-line rule:line-length
command: --web.enable-lifecycle --config.file=/etc/prometheus/prometheus.yml
extra_hosts:
- "host.docker.internal:host-gateway"

otel-collector:
container_name: otel-collector
image: otel/opentelemetry-collector
command: ["--config=/root/otel-collector/config.yml"]
volumes:
- ${PWD}/otel-collector:/root/otel-collector/
ports:
- "8888:8888" # Prometheus metrics exposed by the collector
- "8889:8889" # Prometheus exporter metrics
- "55681:55681"
- "13133:13133" # health_check extension
- "4317:4317" # OTLP gRPC receiver
- "4318:4318" # OTLP http receiver

jaeger:
container_name: jaeger
image: jaegertracing/all-in-one:latest
ports:
- "16686:16686"
- "14268:14268"
- "14250:14250"
environment:
- COLLECTOR_OTLP_ENABLED=true
- LOG_LEVEL=debug

grafana:
container_name: grafana
image: grafana/grafana:latest
user: "0"
ports:
- 3001:3000
restart: unless-stopped
volumes:
- ${PWD}/grafana/:/etc/grafana/provisioning/
- ${PWD}/grafana/:/var/lib/grafana/dashboards/
- grafana-data:/var/lib/grafana

pyroscope:
image: "pyroscope/pyroscope:latest"
ports:
- "4040:4040"
command:
- "server"

volumes:
prometheus-data:
loki-data:
grafana-data:
14 changes: 14 additions & 0 deletions docker/telemetry/grafana/datasources/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
---
apiVersion: 1

datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
- name: Loki
type: loki
access: proxy
url: http://loki:3100
jsonData:
maxLines: 1000
11 changes: 11 additions & 0 deletions docker/telemetry/loki/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
auth_enabled: true

http_prefix:

server:
http_listen_address: 0.0.0.0
grpc_listen_address: 0.0.0.0
http_listen_port: 3100
grpc_listen_port: 9095
log_level: info
32 changes: 32 additions & 0 deletions docker/telemetry/otel-collector/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
---
extensions:
health_check:

receivers:
otlp:
protocols:
grpc:
# endpoint: "0.0.0.0:4317"
http:
# endpoint: "0.0.0.0:4318"

exporters:
prometheus:
endpoint: "otel-collector:8889"
send_timestamps: true
metric_expiration: 1800m
jaeger:
endpoint: "jaeger:14250"
tls:
insecure: true

service:
extensions: [health_check]
pipelines:
metrics:
receivers: [otlp]
exporters: [prometheus]
traces:
receivers: [otlp]
processors: []
exporters: [jaeger]
25 changes: 25 additions & 0 deletions docker/telemetry/prometheus/prometheus.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
---
global:
scrape_interval: 15s
scrape_timeout: 10s
evaluation_interval: 15s

scrape_configs:
- job_name: 'collector'
metrics_path: /metrics
honor_timestamps: true
scrape_interval: 15s
scrape_timeout: 10s
scheme: http
static_configs:
- targets:
- 'otel-collector:8889'
- job_name: 'p2p-metrics'
metrics_path: /metrics
honor_timestamps: true
scrape_interval: 15s
scrape_timeout: 10s
scheme: http
static_configs:
- targets:
- 'host.docker.internal:8890'

Check failure on line 25 in docker/telemetry/prometheus/prometheus.yml

View workflow job for this annotation

GitHub Actions / yamllint

25:9 [indentation] wrong indentation: expected 10 but found 8
29 changes: 29 additions & 0 deletions docker/telemetry/promtail/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# https://grafana.com/docs/loki/latest/clients/promtail/configuration/
# https://docs.docker.com/engine/api/v1.41/#operation/ContainerList
---
server:
http_listen_port: 9080
grpc_listen_port: 0

positions:
filename: /tmp/positions.yaml

clients:
- url: http://loki:3100/loki/api/v1/push

scrape_configs:
- job_name: flog_scrape
docker_sd_configs:
- host: unix:///var/run/docker.sock
refresh_interval: 5s
filters:
- name: label
values: ["logging=promtail"]
relabel_configs:
- source_labels: ['__meta_docker_container_name']
regex: '/(.*)'
target_label: 'container'
- source_labels: ['__meta_docker_container_log_stream']
target_label: 'logstream'
- source_labels: ['__meta_docker_container_label_logging_jobname']
target_label: 'job'
Loading