diff --git a/jforwarder-monitoring.drawio.svg b/jforwarder-monitoring.drawio.svg index 081915e9..9cf57b56 100644 --- a/jforwarder-monitoring.drawio.svg +++ b/jforwarder-monitoring.drawio.svg @@ -1,4 +1,275 @@ - - - -
Prometheus
Prometheus
Prometheus node exporter
Prometheus node expo...
Prometheus postgres exporter
Prometheus postgres...
postgres
postgres
grafana
grafana
Microservices
Microservices
Loki
(local volume storage)
Loki...
Microservices
(logback appender)
Microservices...
Prometheus alertmanager
Prometheus alertmana...
Telegram chat
Telegram chat
Text is not SVG - cannot display
\ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+ Prometheus +
+
+
+
+
+ + Prometheus + +
+
+ + + + + + +
+
+
+ Prometheus node exporter +
+
+
+
+ + Prometheus node expo... + +
+
+ + + + + + +
+
+
+ Prometheus postgres exporter +
+
+
+
+ + Prometheus postgres... + +
+
+ + + + + +
+
+
+
+ postgres +
+
+
+
+
+
+ + postgres + +
+
+ + + + + + + + + + +
+
+
+ grafana +
+
+
+
+ + grafana + +
+
+ + + + + + + + +
+
+
+
+ Microservices +
+
+
+
+
+ + Microservices + +
+
+ + + + +
+
+
+
+ Loki +
+
+ (local volume storage) +
+
+
+
+
+
+ + Loki... + +
+
+ + + + + + +
+
+
+
+ Microservices +
+
+ (logback appender) +
+
+
+
+
+
+ + Microservices... + +
+
+ + + + + + +
+
+
+
+ Prometheus alertmanager +
+
+
+
+
+ + Prometheus alertmana... + +
+
+ + + + +
+
+
+ Telegram chat +
+
+
+
+ + Telegram chat + +
+
+ + + + + + +
+
+
+ Grafana Mimir +
+
+
+
+ + Grafana Mimir + +
+
+ + + + + +
+
+
+ Minio +
+
+
+
+ + Minio + +
+
+
+ + + + + Text is not SVG - cannot display + + + +
\ No newline at end of file diff --git a/monitoring/README.md b/monitoring/README.md index 54b9b4d9..7116a15b 100644 --- a/monitoring/README.md +++ b/monitoring/README.md @@ -19,9 +19,9 @@ ``` ```yml - # for some unknown reason exporter tries to connect to localhost even if you define hostPort in config.yml. - # broken stuff. - jmx-exporter: + # for some unknown reason exporter tries to connect to localhost even if you define hostPort in config.yml. + # broken stuff. + jmx-exporter: image: 'docker.io/bitnami/jmx-exporter:1.0.1' container_name: jmx-exporter ports: @@ -47,4 +47,7 @@ Then proceed to update to schema v13 or newer before re-enabling this config, search for 'Storage Schema' in the docs for the schema update procedure\n - CONFIG ERROR: `tsdb` index type is required to store Structured Metadata and use native OTLP ingestion, your index type is `boltdb-shipper` (defined in the `store` parameter of the schema_config). Set `allow_structured_metadata: false` in the `limits_config` section or set the command line argument `-validation.allow-structured-metadata=false` and restart Loki. Then proceed to update the schema to use index type `tsdb` before re-enabling this config, search for 'Storage Schema' in the docs for the schema update procedure" \ No newline at end of file + CONFIG ERROR: `tsdb` index type is required to store Structured Metadata and use native OTLP ingestion, your index type is `boltdb-shipper` (defined in the `store` parameter of the schema_config). Set `allow_structured_metadata: false` in the `limits_config` section or set the command line argument `-validation.allow-structured-metadata=false` and restart Loki. Then proceed to update the schema to use index type `tsdb` before re-enabling this config, search for 'Storage Schema' in the docs for the schema update procedure" + +8) Mimir runs as one container. Should run as microservices in production. + To read more about storage blocks - [git](https://github.com/grafana/mimir/discussions/4187) \ No newline at end of file diff --git a/monitoring/docker-compose.yaml b/monitoring/docker-compose.yaml index f8ccf8b5..81e15f19 100644 --- a/monitoring/docker-compose.yaml +++ b/monitoring/docker-compose.yaml @@ -109,6 +109,71 @@ services: networks: - jforwarder-network + grafana-mimir: + image: 'docker.io/grafana/mimir:2.14.0' + container_name: grafana-mimir + ports: + - "9009:9009" + command: ["-config.file=/etc/mimir.yaml"] + depends_on: + - minio + volumes: + - ./mimir/mimir.yml:/etc/mimir.yaml + - mimir_volume:/data + networks: + - jforwarder-network + + minio: + image: 'docker.io/minio/minio:RELEASE.2024-10-13T13-34-11Z' + container_name: minio + command: server --quiet /data + entrypoint: > + /bin/sh -c ' + isAlive() { curl -sf http://127.0.0.1:9000/minio/health/live; } # check if Minio is alive + minio $0 "$@" --quiet & echo $! > /tmp/minio.pid # start Minio in the background + while ! isAlive; do sleep 0.1; done # wait until Minio is alive + mc alias set minio http://127.0.0.1:9000 $MINIO_ROOT_USER $MINIO_ROOT_PASSWORD # setup Minio client + mc mb minio/mimir || true # create a test bucket + mc anonymous set public minio/mimir # make the test bucket public + kill -s INT $(cat /tmp/minio.pid) && rm /tmp/minio.pid # stop Minio + while isAlive; do sleep 0.1; done # wait until Minio is stopped + exec minio $0 "$@" # start Minio in the foreground + ' + environment: + - MINIO_ROOT_USER=${MINIO_ROOT_USER} + - MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD} + volumes: + - minio_volume:/data:rw + networks: + - jforwarder-network + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + interval: 30s + timeout: 20s + retries: 3 + + # createbuckets: + # image: 'docker.io/minio/mc:RELEASE.2024-10-08T09-37-26Z' + # container_name: minio-mc + # environment: + # - MINIO_ROOT_USER=${MINIO_ROOT_USER} + # - MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD} + # depends_on: + # - minio + # entrypoint: > + # /bin/sh -c " + # /usr/bin/mc alias set myminio http://minio:9000 $MINIO_ROOT_USER $MINIO_ROOT_PASSWORD; + # /usr/bin/mc mb myminio/mimir-blocks; + # /usr/bin/mc mb myminio/mimir-ruler; + # /usr/bin/mc mb myminio/mimir; + # /usr/bin/mc policy set public myminio/mimir-blocks; + # /usr/bin/mc policy set public myminio/mimir-ruler; + # /usr/bin/mc policy set public myminio/mimir; + # exit 0; + # " + # networks: + # - jforwarder-network + volumes: prometheus_volume: driver: local @@ -118,6 +183,9 @@ volumes: # driver: local loki_volume: driver: local + minio_volume: + mimir_volume: + driver: local networks: jforwarder-network: diff --git a/monitoring/grafana/provisioning/datasources/Datasource.yml b/monitoring/grafana/provisioning/datasources/Datasource.yml index a6f73cd6..f60517b7 100644 --- a/monitoring/grafana/provisioning/datasources/Datasource.yml +++ b/monitoring/grafana/provisioning/datasources/Datasource.yml @@ -6,6 +6,20 @@ deleteDatasources: datasources: +# Deprecated. Moved to mimir backed metrics. + # - id: prometheus + # uid: prometheus + # type: prometheus + # name: Prometheus + # editable: true # whether it should be editable + # isDefault: false # whether this should be the default DS + # org_id: 1 # id of the organization to tie this datasource to + # access: proxy + # url: "http://prometheus:9090" + # version: 2 # well, versioning + +# Added header to differentiate prometheus client. Prometheus scraps all data and pushes it +# to mimir with tenant "jforwarder". So all "jforwarder" related stuff should be accessed with its header. - id: prometheus uid: prometheus type: prometheus @@ -14,7 +28,11 @@ datasources: isDefault: true # whether this should be the default DS org_id: 1 # id of the organization to tie this datasource to access: proxy - url: "http://prometheus:9090" + url: "http://grafana-mimir:9009/prometheus" + jsonData: + httpHeaderName1: "X-Scope-OrgID" + secureJsonData: + httpHeaderValue1: "jforwarder" version: 1 # well, versioning # - id: Tempo diff --git a/monitoring/mimir/mimir.yml b/monitoring/mimir/mimir.yml new file mode 100644 index 00000000..49962e6d --- /dev/null +++ b/monitoring/mimir/mimir.yml @@ -0,0 +1,44 @@ +# Run Mimir in single process mode, with all components running in 1 process. +target: all + +# Configure Mimir to use Minio as object storage backend. +common: + storage: + backend: s3 + s3: + endpoint: minio:9000 + access_key_id: ${access_key_id} + secret_access_key: ${secret_access_key} + insecure: true + bucket_name: mimir + +# Blocks storage requires a prefix when using a common object storage bucket. +# Blocks storage is used for long time data +blocks_storage: + backend: s3 + storage_prefix: blocks + bucket_store: + sync_dir: /data/tsdb-sync + # ingesters writes here and "flush" this data to blocks storage in s3 backend every 2 hours. + tsdb: + dir: /data/tsdb + +ruler_storage: + backend: s3 + storage_prefix: ruler + +ingester: + ring: + # Default - 3. Made one. Mimir should be in 3 replicas otherwise. + replication_factor: 1 + +limits: + # Delete from storage metrics data older than 1 year. + compactor_blocks_retention_period: 1y + +usage_stats: + enabled: false + +server: + http_listen_port: 9009 # should be set to work properly + log_level: warn \ No newline at end of file diff --git a/monitoring/prometheus/prometheus.yml b/monitoring/prometheus/prometheus.yml index f37abd58..abd1c986 100644 --- a/monitoring/prometheus/prometheus.yml +++ b/monitoring/prometheus/prometheus.yml @@ -17,11 +17,26 @@ alerting: # Alertmanager's default port is 9093 - prometheus-alertmanager:9093 +remote_write: + - url: http://grafana-mimir:9009/api/v1/push + # Add X-Scope-OrgID header so that Mimir knows what tenant the remote write data should be stored in. + headers: + X-Scope-OrgID: jforwarder + scrape_configs: + - job_name: 'postgres' static_configs: - targets: [ 'prometheus-postgres-exporter:9187' ] + - job_name: 'node' + static_configs: + - targets: [ 'prometheus-node-exporter:9100' ] + + - job_name: 'kafka' + static_configs: + - targets: ['kafka:12345'] + - job_name: 'jvmMetrics' metrics_path: '/actuator/prometheus' static_configs: @@ -48,10 +63,4 @@ scrape_configs: # static_configs: # - targets: [ 'host.docker.internal:9323' ] - - job_name: 'node' - static_configs: - - targets: [ 'prometheus-node-exporter:9100' ] - - job_name: 'kafka' - static_configs: - - targets: ['kafka:12345']