diff --git a/examples/opentelemetry-collector/.test.sh b/examples/opentelemetry-collector/.test.sh new file mode 100755 index 000000000..1569a1e00 --- /dev/null +++ b/examples/opentelemetry-collector/.test.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -ex + +endpoint="http://localhost:13133/" + +timeout 20 bash -c "until curl $endpoint 2>/dev/null; do sleep 0.5; done" +curl -s "$endpoint" | grep "Server available" diff --git a/examples/opentelemetry-collector/devenv.nix b/examples/opentelemetry-collector/devenv.nix new file mode 100644 index 000000000..75c18b95a --- /dev/null +++ b/examples/opentelemetry-collector/devenv.nix @@ -0,0 +1,62 @@ +{ ... }: + +{ + services.clickhouse.enable = true; + + # Wait for clickhouse to come up + processes.opentelemetry-collector.process-compose = { + depends_on.clickhouse-server.condition = "process_healthy"; + }; + + services.opentelemetry-collector = { + enable = true; + + # Or use a raw YAML file: + # `services.opentelemetry-collector.configFile = pkgs.writeTextFile "otel-config.yaml" "...";` + settings = { + receivers = { + otlp = { + protocols = { + grpc = { }; + http = { }; + }; + }; + }; + + processors = { + batch = { + timeout = "5s"; + send_batch_size = 100000; + }; + }; + + exporters = { + clickhouse = { + endpoint = "tcp://127.0.0.1:9000?dial_timeout=10s&compress=lz4"; + database = "otel"; + ttl_days = 3; + logs_table_name = "otel_logs"; + traces_table_name = "otel_traces"; + metrics_table_name = "otel_metrics"; + timeout = "5s"; + retry_on_failure = { + enabled = true; + initial_interval = "5s"; + max_interval = "30s"; + max_elapsed_time = "300s"; + }; + }; + }; + + service = { + pipelines = { + traces = { + receivers = [ "otlp" ]; + processors = [ "batch" ]; + exporters = [ "clickhouse" ]; + }; + }; + }; + }; + }; +} diff --git a/src/modules/services/opentelemetry-collector.nix b/src/modules/services/opentelemetry-collector.nix new file mode 100644 index 000000000..c65498033 --- /dev/null +++ b/src/modules/services/opentelemetry-collector.nix @@ -0,0 +1,84 @@ +{ pkgs, config, lib, ... }: + +let + cfg = config.services.opentelemetry-collector; + types = lib.types; + + settingsFormat = pkgs.formats.yaml { }; + + defaultSettings = { + extensions = { + health_check = { + endpoint = "localhost:13133"; + }; + }; + service = { + extensions = [ "health_check" ]; + }; + }; + + otelConfig = + if cfg.configFile == null + then settingsFormat.generate "otel-config.yaml" cfg.settings + else cfg.configFile; +in +{ + options.services.opentelemetry-collector = { + enable = lib.mkEnableOption "opentelemetry-collector"; + + package = lib.mkOption { + type = types.package; + description = "The OpenTelemetry Collector package to use"; + default = pkgs.opentelemetry-collector-contrib; + defaultText = lib.literalExpression "pkgs.opentelemetry-collector-contrib"; + }; + + configFile = lib.mkOption { + type = types.nullOr types.path; + description = '' + Override the configuration file used by OpenTelemetry Collector. + By default, a configuration is generated from `services.opentelemetry-collector.settings`. + + If overriding, enable the `health_check` extension to allow process-compose to check whether the Collector is ready. + Otherwise, disable the readiness probe by setting `processes.opentelemetry-collector.process-compose.readiness_probe = {};`. + ''; + default = null; + example = lib.literalExpression '' + pkgs.writeTextFile { name = "otel-config.yaml"; text = "..."; } + ''; + }; + + settings = lib.mkOption { + type = types.submodule ({ freeformType = settingsFormat.type; } // defaultSettings); + description = '' + OpenTelemetry Collector configuration. + Refer to https://opentelemetry.io/docs/collector/configuration/ + for more information on how to configure the Collector. + ''; + defaultText = defaultSettings; + }; + }; + + config = lib.mkIf cfg.enable { + processes.opentelemetry-collector = { + exec = "${lib.getExe cfg.package} --config ${otelConfig}"; + + process-compose = { + readiness_probe = { + http_get = { + host = "localhost"; + scheme = "http"; + path = "/"; + port = 13133; + }; + initial_delay_seconds = 2; + period_seconds = 10; + timeout_seconds = 5; + success_threshold = 1; + failure_threshold = 3; + }; + availability.restart = "on_failure"; + }; + }; + }; +}