From c9ba731ebdd570fbddf55b91b47ccb7d8726915c Mon Sep 17 00:00:00 2001 From: Anatoly Laskaris Date: Fri, 27 May 2022 19:20:17 +0300 Subject: [PATCH 01/16] Add Loki monolith example --- production/nomad/README.md | 232 ++++++++++++++++++++++++++++ production/nomad/loki/README.md | 31 ++++ production/nomad/loki/config.yml | 62 ++++++++ production/nomad/loki/job.nomad.hcl | 87 +++++++++++ 4 files changed, 412 insertions(+) create mode 100644 production/nomad/README.md create mode 100644 production/nomad/loki/README.md create mode 100644 production/nomad/loki/config.yml create mode 100644 production/nomad/loki/job.nomad.hcl diff --git a/production/nomad/README.md b/production/nomad/README.md new file mode 100644 index 000000000000..fcc915b20b04 --- /dev/null +++ b/production/nomad/README.md @@ -0,0 +1,232 @@ +# Loki Nomad examples + +## Requirements + +### Hard requirements + +- recent version of Nomad with healthy Docker driver +- [Consul integration](https://www.nomadproject.io/docs/integrations/consul-integration) + is enabled in Nomad +- access to S3 storage + +### Optional requirements + +- [Vault integration](https://www.nomadproject.io/docs/integrations/vault-integration) + for providing S3 credentials securely +- Consul connect to secure Loki HTTP endpoints +- Traefik configured to use + [Consul provider](https://doc.traefik.io/traefik/providers/consul-catalog/) to + loadbalance between Loki instances + +### Production use + +For use in production it is recommended to: + +- use authentication - can be achieved with + [Traefik](https://doc.traefik.io/traefik/middlewares/http/basicauth/) +- secure HTTP endpoints with + [Consul Connect](https://www.nomadproject.io/docs/integrations/consul-connect) +- secure GRPC communication with TLS when running multiple instances - can be + achived with Vault's + [PKI secret engine](https://www.vaultproject.io/docs/secrets/pki) + +Use the advanced Loki setup as an example. + +## Service discovery when scaling + +When using multiple Loki instances memberlist advertises wrong address (see this +[issue](https://github.com/grafana/loki/issues/5610)), that is why this example +is using Consul ring for service discovery. + +Is you are using Nomad then you are probably also using Consul, so this +shouldn't be an issue. + +## Run Loki behind ingress + +When running multiple instances of Loki incoming request should be loadbalanced. + +Register Loki in Traefik: + +```hcl +tags = [ + "traefik.enable=true", + "traefik.http.routers.loki.entrypoints=https", + "traefik.http.routers.loki.rule=Host(`loki.service.consul`)", +] +``` + +## Setip basicauth + +Generate basicauth credentials: + +```shell +> docker run --rm httpd:alpine htpasswd -nb promtail password123 +promtail:$apr1$Lr55BanK$BV/rE2POaOolkFz8kIfY4/ +``` + +Register Loki in Traefik: + +```hcl +tags = [ + "traefik.enable=true", + "traefik.http.routers.loki.entrypoints=https", + "traefik.http.routers.loki.rule=Host(`loki.service.consul`)", + "traefik.http.middlewares.loki.basicauth.users=promtail:$apr1$Lr55BanK$BV/rE2POaOolkFz8kIfY4/", + "traefik.http.routers.loki.middlewares=loki@consulcatalog", +] +``` + +Update Promtail config: + +```yaml +clients: + - url: https://loki.service.consul/loki/api/v1/push + basic_auth: + username: promtail + password_file: password123 +``` + +## Use HashiCorp Vault to provider S3 credentials + +To provide static credentials: + +```hcl +template { + data = <<-EOH + {{ with secret "secret/minio/loki" }} + S3_ACCESS_KEY_ID={{ .Data.data.access_key }} + S3_SECRET_ACCESS_KEY={{ .Data.data.secret_key }} + {{- end }} + EOH + + destination = "secrets/s3.env" + env = true +} +``` + +Is is better to provide dynamic credentials using +[AWS secret engine](https://www.vaultproject.io/docs/secrets/aws) when using AWS +S3: + +```hcl +template { + data = <<-EOH + {{ with secret "aws/creds/loki" -}} + S3_ACCESS_KEY_ID={{ .Data.access_key }} + S3_SECRET_ACCESS_KEY={{ .Data.secret_key }} + {{- end }} + EOH + + destination = "secrets/s3.env" + env = true +} +``` + +## Supplying alerting rules to Loki ruler + +### Using [`artifact` stanza](https://www.nomadproject.io/docs/job-specification/artifact) + +Alert rules can be download using artifact stanza. It supports: + +- Git +- Mercurial +- HTTP +- Amazon S3 +- Google GCP + +Example with git: + +```hcl +artifact { + source = "git::github.com/someorg/observability//loki-rules" + destination = "local/rules/" +} +``` + +### Using local files + +Alert rules can be stored locally (beside job definition) and provided to Loki +ruler container with +[`template`](https://www.nomadproject.io/docs/job-specification/template) and +some HCL magic, namely: + +- [fileset](https://www.nomadproject.io/docs/job-specification/hcl2/functions/file/fileset) - + to generate a list of files +- [file](https://www.nomadproject.io/docs/job-specification/hcl2/functions/file/file) - + to get the content of a file +- [dynamic](https://www.nomadproject.io/docs/job-specification/hcl2/expressions#dynamic-blocks) - + to dynamically generate `template` stanza for each file found + +Example: + +```shell +> tree rules/ +rules/ +├── other-rule.yml +└── some-rule.yml + +0 directories, 2 files +``` + +```hcl +dynamic "template" { + for_each = fileset(".", "rules/**") + + content { + data = file(template.value) + destination = "local/${template.value}" + left_delimiter = "[[" + right_delimiter = "]]" + } +} +``` + +Each file will end up in `/local/rules/` inside ruler container. + +### Using Consul K/V and Terraform + +```shell +> tree loki-rules/ +loki-rules/ +├── other-rule.yml +└── some-rule.yml + +0 directories, 2 files +``` + +Using Terraform +[Consul provider](https://registry.terraform.io/providers/hashicorp/consul/latest/docs/resources/keys) +put all files in `loki-rules/` to Consul K/V + +```hcl +resource "consul_keys" "loki-rules" { + dynamic "key" { + for_each = fileset("${path.module}/loki-rules", "**") + content { + path = "configs/loki-rules/${trimsuffix(key.value, ".yml")}" + value = file("${file.module}/loki-rules/${key.value}") + delete = true + } + } +} +``` + +Provide rules from K/V to Loki ruler container inside Nomad with +[`safeTree`](https://github.com/hashicorp/consul-template/blob/main/docs/templating-language.md#safetree) + +```hcl +template { + data = <<-EOF + {{- range safeTree "configs/loki-rules" }} + --- + {{ .Value | indent 2 }} + {{ end -}} + EOF + + destination = "local/rules/rules.yml" + change_mode = "signal" + change_signal = "SIGINT" +} +``` + +When updated in Consul K/V rules will be automatically updated in Loki ruler. diff --git a/production/nomad/loki/README.md b/production/nomad/loki/README.md new file mode 100644 index 000000000000..7df405d4cc76 --- /dev/null +++ b/production/nomad/loki/README.md @@ -0,0 +1,31 @@ +# Monolithic mode + +This Nomad job will deploy a Loki in monolithic mode with minimum dependencies, +using boltdb-shipper and S3 backend and with the ability to scale. + +## Usage + +Have a look at the job file and Loki configuration file and change it according +to suite your environment. + +### Run job + +Inside directory with job run: + +```shell +nomad run job.nomad.hcl +``` + +### Scale Loki + +Change `count` in job file in `group "loki"` and run: + +```shell +nomad run job.nomad.hcl +``` + +or use Nomad CLI + +```shell +nomad job scale loki loki +``` diff --git a/production/nomad/loki/config.yml b/production/nomad/loki/config.yml new file mode 100644 index 000000000000..f51207ebc85b --- /dev/null +++ b/production/nomad/loki/config.yml @@ -0,0 +1,62 @@ +auth_enabled: false + +server: + log_level: info + http_listen_port: {{ env "NOMAD_PORT_http" }} + grpc_listen_port: {{ env "NOMAD_PORT_grpc" }} + +common: + replication_factor: 1 + # Tell Loki which address to advertise + instance_addr: {{ env "NOMAD_IP_grpc" }} + ring: + # Tell Loki which address to advertise in ring + instance_addr: {{ env "NOMAD_IP_grpc" }} + kvstore: + store: consul + prefix: loki/ + consul: + host: {{ env "attr.unique.network.ip-address" }}:8500 + +ingester: + wal: + dir: {{ env "NOMAD_ALLOC_DIR" }}/data/wal + flush_on_shutdown: true + replay_memory_ceiling: "1G" + +schema_config: + configs: + - from: 2022-05-15 + store: boltdb-shipper + object_store: s3 + schema: v12 + index: + prefix: index_ + period: 24h + +storage_config: + boltdb_shipper: + # Nomad ephemeral disk is used to store index and cache + # it will try to preserve /alloc/data between job updates + active_index_directory: {{ env "NOMAD_ALLOC_DIR" }}/data/index + cache_location: {{ env "NOMAD_ALLOC_DIR" }}/data/index-cache + shared_store: s3 + + aws: + endpoint: https://s3.endpoint.com + bucketnames: loki + region: us-west-1 + access_key_id: ${S3_ACCESS_KEY_ID} + secret_access_key: ${S3_SECRET_ACCESS_KEY} + s3forcepathstyle: true + +limits_config: + enforce_metric_name: false + reject_old_samples: true + reject_old_samples_max_age: 168h + +compactor: + working_directory: {{ env "NOMAD_ALLOC_DIR" }}/compactor + shared_store: s3 + compaction_interval: 5m + retention_enabled: true diff --git a/production/nomad/loki/job.nomad.hcl b/production/nomad/loki/job.nomad.hcl new file mode 100644 index 000000000000..f42e9675abba --- /dev/null +++ b/production/nomad/loki/job.nomad.hcl @@ -0,0 +1,87 @@ +job "loki" { + datacenters = ["dc1"] + + group "loki" { + count = 1 + + ephemeral_disk { + # Used to store index, chache, WAL + # Nomad will try to preserve the disk between job updates + size = 1000 + sticky = true + } + + network { + port "http" { + to = 3100 + static = 3100 + } + port "grpc" {} + } + + task "loki" { + driver = "docker" + user = "nobody" + + config { + image = "grafana/loki:2.5.0" + ports = [ + "http", + "grpc", + ] + args = [ + "-target=all", + "-config.file=/local/config.yml", + "-config.expand-env=true", + ] + } + + template { + data = file("config.yml") + destination = "local/config.yml" + } + + template { + data = <<-EOH + S3_ACCESS_KEY_ID= + S3_SECRET_ACCESS_KEY= + EOH + + destination = "secrets/s3.env" + env = true + } + + service { + name = "loki" + port = "http" + + # use Traefik to loadbalance between Loki instances + tags = [ + "traefik.enable=true", + "traefik.http.routers.loki.entrypoints=https", + "traefik.http.routers.loki.rule=Host(`loki.service.consul`)", + ] + + check { + name = "Loki" + port = "http" + type = "http" + path = "/ready" + interval = "20s" + timeout = "1s" + + initial_status = "passing" + } + } + + resources { + # adjust to suite your load + cpu = 500 + memory = 256 + # requiers memory_oversubscription + # https://www.nomadproject.io/api-docs/operator/scheduler#update-scheduler-configuration + # memory_max = 512 + } + } + } +} From 8014746b890dd25957c0c24da79af2dd6a9ea79e Mon Sep 17 00:00:00 2001 From: Anatoly Laskaris Date: Fri, 27 May 2022 19:24:08 +0300 Subject: [PATCH 02/16] Commentout traefik in monolithic setup --- production/nomad/loki/job.nomad.hcl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/production/nomad/loki/job.nomad.hcl b/production/nomad/loki/job.nomad.hcl index f42e9675abba..9ed5bfa6ddf3 100644 --- a/production/nomad/loki/job.nomad.hcl +++ b/production/nomad/loki/job.nomad.hcl @@ -56,11 +56,11 @@ job "loki" { port = "http" # use Traefik to loadbalance between Loki instances - tags = [ - "traefik.enable=true", - "traefik.http.routers.loki.entrypoints=https", - "traefik.http.routers.loki.rule=Host(`loki.service.consul`)", - ] + # tags = [ + # "traefik.enable=true", + # "traefik.http.routers.loki.entrypoints=https", + # "traefik.http.routers.loki.rule=Host(`loki.service.consul`)", + # ] check { name = "Loki" From d61645e495d0b94e010cca8e840709a08f2c2ee1 Mon Sep 17 00:00:00 2001 From: Anatoly Laskaris Date: Fri, 27 May 2022 19:26:51 +0300 Subject: [PATCH 03/16] Typos --- production/nomad/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/production/nomad/README.md b/production/nomad/README.md index fcc915b20b04..16a2fbb2424a 100644 --- a/production/nomad/README.md +++ b/production/nomad/README.md @@ -43,7 +43,7 @@ shouldn't be an issue. ## Run Loki behind ingress -When running multiple instances of Loki incoming request should be loadbalanced. +When running multiple instances of Loki incoming requests should be loadbalanced. Register Loki in Traefik: @@ -86,7 +86,7 @@ clients: password_file: password123 ``` -## Use HashiCorp Vault to provider S3 credentials +## Use HashiCorp Vault to provide S3 credentials To provide static credentials: From c8fc25025c869b48534ed5232c515794c2466f3c Mon Sep 17 00:00:00 2001 From: Anatoly Laskaris Date: Fri, 27 May 2022 20:12:24 +0300 Subject: [PATCH 04/16] Add ruler configuration --- production/nomad/README.md | 14 +++++++------- production/nomad/loki/config.yml | 13 +++++++++++++ production/nomad/loki/job.nomad.hcl | 11 +++++++++++ production/nomad/loki/rules/fake/alerts.yml | 11 +++++++++++ 4 files changed, 42 insertions(+), 7 deletions(-) create mode 100644 production/nomad/loki/rules/fake/alerts.yml diff --git a/production/nomad/README.md b/production/nomad/README.md index 16a2fbb2424a..3e29ce3978a9 100644 --- a/production/nomad/README.md +++ b/production/nomad/README.md @@ -162,10 +162,10 @@ Example: ```shell > tree rules/ rules/ -├── other-rule.yml -└── some-rule.yml +└── fake + └── some-alerts.yml -0 directories, 2 files +1 directory, 1 file ``` ```hcl @@ -188,10 +188,10 @@ Each file will end up in `/local/rules/` inside ruler container. ```shell > tree loki-rules/ loki-rules/ -├── other-rule.yml -└── some-rule.yml +└── fake + └── some-alerts.yml -0 directories, 2 files +1 directory, 1 file ``` Using Terraform @@ -223,7 +223,7 @@ template { {{ end -}} EOF - destination = "local/rules/rules.yml" + destination = "local/rules/fake/rules.yml" change_mode = "signal" change_signal = "SIGINT" } diff --git a/production/nomad/loki/config.yml b/production/nomad/loki/config.yml index f51207ebc85b..535da397854d 100644 --- a/production/nomad/loki/config.yml +++ b/production/nomad/loki/config.yml @@ -60,3 +60,16 @@ compactor: shared_store: s3 compaction_interval: 5m retention_enabled: true + +ruler: + alertmanager_url: https://alertmanager.service.consul + enable_alertmanager_v2: true + enable_api: true + external_url: https://loki.service.consul + rule_path: {{ env "NOMAD_ALLOC_DIR" }}/tmp/rules + storage: + type: local + local: + directory: {{ env "NOMAD_TASK_DIR" }}/rules + wal: + dir: {{ env "NOMAD_ALLOC_DIR" }}/data/ruler diff --git a/production/nomad/loki/job.nomad.hcl b/production/nomad/loki/job.nomad.hcl index 9ed5bfa6ddf3..a2d868539d15 100644 --- a/production/nomad/loki/job.nomad.hcl +++ b/production/nomad/loki/job.nomad.hcl @@ -51,6 +51,17 @@ job "loki" { env = true } + dynamic "template" { + for_each = fileset(".", "rules/**") + + content { + data = file(template.value) + destination = "local/${template.value}" + left_delimiter = "[[" + right_delimiter = "]]" + } + } + service { name = "loki" port = "http" diff --git a/production/nomad/loki/rules/fake/alerts.yml b/production/nomad/loki/rules/fake/alerts.yml new file mode 100644 index 000000000000..2c9f20ff79ef --- /dev/null +++ b/production/nomad/loki/rules/fake/alerts.yml @@ -0,0 +1,11 @@ +groups: + - name: always-firing + rules: + - alert: fire + expr: | + 1 > 0 + for: 0m + labels: + severity: warning + annotations: + summary: test From 2264290711463031410bedb1a8223a84d34e5fd0 Mon Sep 17 00:00:00 2001 From: Anatoly Laskaris Date: Fri, 27 May 2022 20:19:02 +0300 Subject: [PATCH 05/16] Add loki-simple --- production/nomad/README.md | 2 +- production/nomad/loki-simple/README.md | 33 +++++ production/nomad/loki-simple/config.yml | 75 ++++++++++ production/nomad/loki-simple/job.nomad.hcl | 159 +++++++++++++++++++++ production/nomad/loki/README.md | 6 +- 5 files changed, 272 insertions(+), 3 deletions(-) create mode 100644 production/nomad/loki-simple/README.md create mode 100644 production/nomad/loki-simple/config.yml create mode 100644 production/nomad/loki-simple/job.nomad.hcl diff --git a/production/nomad/README.md b/production/nomad/README.md index 3e29ce3978a9..377251585cf2 100644 --- a/production/nomad/README.md +++ b/production/nomad/README.md @@ -122,7 +122,7 @@ template { } ``` -## Supplying alerting rules to Loki ruler +## Supplying alerting rules to Loki ruler with `local` ruler storage ### Using [`artifact` stanza](https://www.nomadproject.io/docs/job-specification/artifact) diff --git a/production/nomad/loki-simple/README.md b/production/nomad/loki-simple/README.md new file mode 100644 index 000000000000..2b2bff4dd7a3 --- /dev/null +++ b/production/nomad/loki-simple/README.md @@ -0,0 +1,33 @@ +# Simple scalable deployment mode + +This Nomad job will deploy a Loki in +[simple scalable deployment mode](https://grafana.com/docs/loki/latest/fundamentals/architecture/deployment-modes/#simple-scalable-deployment-mode) +with minimum dependencies, using boltdb-shipper and S3 backend and with the +ability to scale. + +## Usage + +Have a look at the job file and Loki configuration file and change it according +to suite your environment. + +### Run job + +Inside directory with job run: + +```shell +nomad run job.nomad.hcl +``` + +### Scale Loki + +Change `count` in job file in `group "loki"` and run: + +```shell +nomad run job.nomad.hcl +``` + +or use Nomad CLI + +```shell +nomad job scale loki write +``` diff --git a/production/nomad/loki-simple/config.yml b/production/nomad/loki-simple/config.yml new file mode 100644 index 000000000000..909be2fe49a0 --- /dev/null +++ b/production/nomad/loki-simple/config.yml @@ -0,0 +1,75 @@ +auth_enabled: false + +server: + log_level: info + http_listen_port: {{ env "NOMAD_PORT_http" }} + grpc_listen_port: {{ env "NOMAD_PORT_grpc" }} + +common: + replication_factor: 1 + # Tell Loki which address to advertise + instance_addr: {{ env "NOMAD_IP_grpc" }} + ring: + # Tell Loki which address to advertise in ring + instance_addr: {{ env "NOMAD_IP_grpc" }} + kvstore: + store: consul + prefix: loki-test/ + consul: + host: {{ env "attr.unique.network.ip-address" }}:8500 + +ingester: + wal: + dir: {{ env "NOMAD_ALLOC_DIR" }}/data/wal + flush_on_shutdown: true + replay_memory_ceiling: "1G" + +schema_config: + configs: + - from: 2022-05-15 + store: boltdb-shipper + object_store: s3 + schema: v12 + index: + prefix: index_ + period: 24h + +storage_config: + boltdb_shipper: + # Nomad ephemeral disk is used to store index and cache + # it will try to preserve /alloc/data between job updates + active_index_directory: {{ env "NOMAD_ALLOC_DIR" }}/data/index + cache_location: {{ env "NOMAD_ALLOC_DIR" }}/data/index-cache + shared_store: s3 + + aws: + endpoint: https://s3.endpoint.com + bucketnames: loki + region: us-west-1 + access_key_id: ${S3_ACCESS_KEY_ID} + secret_access_key: ${S3_SECRET_ACCESS_KEY} + s3forcepathstyle: true + +limits_config: + enforce_metric_name: false + reject_old_samples: true + reject_old_samples_max_age: 168h + +compactor: + working_directory: {{ env "NOMAD_ALLOC_DIR" }}/compactor + shared_store: s3 + compaction_interval: 5m + retention_enabled: true + +ruler: + alertmanager_url: https://alertmanager.service.consul + enable_alertmanager_v2: true + enable_api: true + external_url: https://loki.service.consul + rule_path: {{ env "NOMAD_ALLOC_DIR" }}/tmp/rules + storage: + type: local + local: + directory: {{ env "NOMAD_TASK_DIR" }}/rules + wal: + dir: {{ env "NOMAD_ALLOC_DIR" }}/data/ruler diff --git a/production/nomad/loki-simple/job.nomad.hcl b/production/nomad/loki-simple/job.nomad.hcl new file mode 100644 index 000000000000..6c705aa0343c --- /dev/null +++ b/production/nomad/loki-simple/job.nomad.hcl @@ -0,0 +1,159 @@ +locals { + version = "2.5.0" +} + +job "loki" { + datacenters = ["dc1"] + + group "read" { + count = 1 + + ephemeral_disk { + size = 1000 + sticky = true + } + + network { + port "http" {} + port "grpc" {} + } + + task "read" { + driver = "docker" + user = "nobody" + + config { + image = "grafana/loki:${local.version}" + + ports = [ + "http", + "grpc", + ] + + args = [ + "-target=read", + "-config.file=/local/config.yml", + "-config.expand-env=true", + ] + } + + template { + data = file("config.yml") + destination = "local/config.yml" + } + + template { + data = <<-EOH + S3_ACCESS_KEY_ID= + S3_SECRET_ACCESS_KEY= + EOH + + destination = "secrets/s3.env" + env = true + } + + service { + name = "loki-read" + port = "http" + + tags = [ + "traefik.enable=true", + "traefik.http.routers.loki-read.entrypoints=https", + "traefik.http.routers.loki-read.rule=Host(`loki-read.service.consul`)", + ] + + check { + name = "Loki read" + port = "http" + type = "http" + path = "/ready" + interval = "20s" + timeout = "1s" + + initial_status = "passing" + } + } + + resources { + cpu = 500 + memory = 256 + } + } + } + + group "write" { + count = 2 + + ephemeral_disk { + size = 1000 + sticky = true + } + + network { + port "http" {} + port "grpc" {} + } + + task "write" { + driver = "docker" + user = "nobody" + + config { + image = "grafana/loki:${local.version}" + + ports = [ + "http", + "grpc", + ] + + args = [ + "-target=write", + "-config.file=/local/config.yml", + "-config.expand-env=true", + ] + } + + template { + data = file("config.yml") + destination = "local/config.yml" + } + + template { + data = <<-EOH + S3_ACCESS_KEY_ID= + S3_SECRET_ACCESS_KEY= + EOH + + destination = "secrets/s3.env" + env = true + } + + service { + name = "loki-write" + port = "http" + + tags = [ + "traefik.enable=true", + "traefik.http.routers.loki-write.entrypoints=https", + "traefik.http.routers.loki-write.rule=Host(`loki-write.service.consul`)", + ] + + check { + name = "Loki write" + port = "http" + type = "http" + path = "/ready" + interval = "20s" + timeout = "1s" + + initial_status = "passing" + } + } + + resources { + cpu = 500 + memory = 256 + } + } + } +} diff --git a/production/nomad/loki/README.md b/production/nomad/loki/README.md index 7df405d4cc76..0830f02d42dd 100644 --- a/production/nomad/loki/README.md +++ b/production/nomad/loki/README.md @@ -1,7 +1,9 @@ # Monolithic mode -This Nomad job will deploy a Loki in monolithic mode with minimum dependencies, -using boltdb-shipper and S3 backend and with the ability to scale. +This Nomad job will deploy a Loki in +[monolithic mode](https://grafana.com/docs/loki/latest/fundamentals/architecture/deployment-modes/#monolithic-mode) +with minimum dependencies, using boltdb-shipper and S3 backend and with the +ability to scale. ## Usage From 5771bc6688274fcb8bbc55310dce82adac11042f Mon Sep 17 00:00:00 2001 From: Anatoly Laskaris Date: Fri, 27 May 2022 20:19:52 +0300 Subject: [PATCH 06/16] Fix consul prefix --- production/nomad/loki-simple/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/production/nomad/loki-simple/config.yml b/production/nomad/loki-simple/config.yml index 909be2fe49a0..535da397854d 100644 --- a/production/nomad/loki-simple/config.yml +++ b/production/nomad/loki-simple/config.yml @@ -14,7 +14,7 @@ common: instance_addr: {{ env "NOMAD_IP_grpc" }} kvstore: store: consul - prefix: loki-test/ + prefix: loki/ consul: host: {{ env "attr.unique.network.ip-address" }}:8500 From 3156bda000217d498dd18174d26912deb914358b Mon Sep 17 00:00:00 2001 From: Anatoly Laskaris Date: Fri, 27 May 2022 21:15:36 +0300 Subject: [PATCH 07/16] Add loki-distributed example --- production/nomad/README.md | 7 +- production/nomad/loki-distributed/README.md | 82 ++ production/nomad/loki-distributed/config.yml | 130 +++ .../nomad/loki-distributed/job.nomad.hcl | 987 ++++++++++++++++++ production/nomad/loki/README.md | 33 - 5 files changed, 1203 insertions(+), 36 deletions(-) create mode 100644 production/nomad/loki-distributed/README.md create mode 100644 production/nomad/loki-distributed/config.yml create mode 100644 production/nomad/loki-distributed/job.nomad.hcl delete mode 100644 production/nomad/loki/README.md diff --git a/production/nomad/README.md b/production/nomad/README.md index 377251585cf2..3208f7eb3122 100644 --- a/production/nomad/README.md +++ b/production/nomad/README.md @@ -30,7 +30,7 @@ For use in production it is recommended to: achived with Vault's [PKI secret engine](https://www.vaultproject.io/docs/secrets/pki) -Use the advanced Loki setup as an example. +See [loki-distributed](./loki-distributed) setup for an inspiration. ## Service discovery when scaling @@ -43,7 +43,8 @@ shouldn't be an issue. ## Run Loki behind ingress -When running multiple instances of Loki incoming requests should be loadbalanced. +When running multiple instances of Loki incoming requests should be +loadbalanced. Register Loki in Traefik: @@ -55,7 +56,7 @@ tags = [ ] ``` -## Setip basicauth +## Setup basicauth Generate basicauth credentials: diff --git a/production/nomad/loki-distributed/README.md b/production/nomad/loki-distributed/README.md new file mode 100644 index 000000000000..caa5fac76155 --- /dev/null +++ b/production/nomad/loki-distributed/README.md @@ -0,0 +1,82 @@ +# Microservices mode + +This Nomad job will deploy a Loki in +[microservices mode](https://grafana.com/docs/loki/latest/fundamentals/architecture/deployment-modes/#microservices-mode) +using boltdb-shipper and S3 backend GRPC protected with mTLS and with HTTP +endpoinds accessible only through Traefik+Consul Connect. + +Make sure to go over the job file and adjust it to your needs. + +## Secure endpoints + +### HTTP + +HTTP endpoints are hidden behind Consul Connect. + +Ring endpoints are exposed through Traefik for operator and must be protected by +basicauth. Example: + +```hcl +tags = [ + "traefik.enable=true", + "traefik.consulcatalog.connect=true", + + "traefik.http.routers.loki-ingester-ring.entrypoints=https", + "traefik.http.routers.loki-ingester-ring.rule=Host(`loki-ingester.service.consul`) && Path(`/ring`)", + "traefik.http.middlewares.loki-ingester-ring.basicauth.users=devops:$apr11bMKZL02A$QrOgT3NAOx.koXWnqfXbo0", + "traefik.http.routers.loki-ingester-ring.middlewares=loki-ingester-ring@consulcatalog", +] +``` + +Read and write path (including ruler api) are also exposed and must be protected +by basicauth. Example: + +```hcl +tags = [ + "traefik.enable=true", + "traefik.consulcatalog.connect=true", + + "traefik.http.routers.loki-distributor.entrypoints=https", + "traefik.http.routers.loki-distributor.rule=Host(`loki-distributor.service.consul`)", + "traefik.http.middlewares.loki-distributor.basicauth.users=promtail:$apr1$y5tgAEDC$8SHWJq7aBoDd0qnOTQkok0", + "traefik.http.routers.loki-distributor.middlewares=loki-distributor@consulcatalog", + + "traefik.http.routers.loki-distributor-ring.entrypoints=https", + "traefik.http.routers.loki-distributor-ring.rule=Host(`loki-distributor.service.consul`) && Path(`/distributor/ring`)", + "traefik.http.middlewares.loki-distributor-ring.basicauth.users=devops:$apr11bMKZL02A$QrOgT3NAOx.koXWnqfXbo0", + "traefik.http.routers.loki-distributor-ring.middlewares=loki-distributor-ring@consulcatalog", +] +``` + +### GRPC + +GRPC endpoints are using self-signed certificates generated by +[Vault](https://www.vaultproject.io/docs/secrets/pki). + +### `/metrics` and `/ready` endpoints + +Exposed with +[`expose` stanza](https://www.nomadproject.io/docs/job-specification/expose) + +## Gather metrics + +To collect metrics from all components use this config: + +```yaml +- job_name: "loki" + consul_sd_configs: + - services: + - "loki-compactor" + - "loki-ruler" + - "loki-distributor" + - "loki-ingestor" + - "loki-querier" + - "loki-index-gateway" + - "loki-query-frontend" + - "loki-query-scheduler" + relabel_configs: + - source_labels: ["__meta_consul_service_metadata_alloc_id"] + target_label: "instance" + - source_labels: ["__meta_consul_service_metadata_component"] + target_label: "component" +``` diff --git a/production/nomad/loki-distributed/config.yml b/production/nomad/loki-distributed/config.yml new file mode 100644 index 000000000000..80d5609fbdbc --- /dev/null +++ b/production/nomad/loki-distributed/config.yml @@ -0,0 +1,130 @@ +auth_enabled: false + +server: + log_level: info + # HTTP is listening on localhost to make it accessible only through Consul Connect + http_listen_port: 80 + http_listen_address: 127.0.0.1 + grpc_listen_port: {{ env "NOMAD_PORT_grpc" }} + grpc_tls_config: + client_auth_type: "RequireAndVerifyClientCert" + client_ca_file: "/secrets/certs/CA.pem" + cert_file: "/secrets/certs/cert.pem" + key_file: "/secrets/certs/key.pem" + +common: + replication_factor: 2 + # Tell Loki which address to advertise + instance_addr: {{ env "NOMAD_IP_grpc" }} + # Failure domain + # Must be the same as specified in job constraints + instance_availability_zone: {{ env "node.unique.name" }} + zone_awareness_enabled: true + ring: + # Tell Loki which address to advertise in ring + instance_addr: {{ env "NOMAD_IP_grpc" }} + kvstore: + store: consul + prefix: loki/ + consul: + host: {{ env "attr.unique.network.ip-address" }}:8500 + +ingester_client: + grpc_client_config: + grpc_compression: snappy + tls_enabled: true + tls_ca_path: "/secrets/certs/CA.pem" + tls_cert_path: "/secrets/certs/cert.pem" + tls_key_path: "/secrets/certs/key.pem" + +ingester: + wal: + dir: {{ env "NOMAD_ALLOC_DIR" }}/data/wal + flush_on_shutdown: true + replay_memory_ceiling: "1G" + +query_scheduler: + grpc_client_config: + grpc_compression: snappy + tls_enabled: true + tls_ca_path: "/secrets/certs/CA.pem" + tls_cert_path: "/secrets/certs/cert.pem" + tls_key_path: "/secrets/certs/key.pem" + +frontend: + scheduler_address: loki-query-scheduler.service.consul:9096 + compress_responses: true + log_queries_longer_than: 5s + grpc_client_config: + grpc_compression: snappy + tls_enabled: true + tls_ca_path: "/secrets/certs/CA.pem" + tls_cert_path: "/secrets/certs/cert.pem" + tls_key_path: "/secrets/certs/key.pem" + +frontend_worker: + scheduler_address: loki-query-scheduler.service.consul:9096 + grpc_client_config: + grpc_compression: snappy + tls_enabled: true + tls_ca_path: "/secrets/certs/CA.pem" + tls_cert_path: "/secrets/certs/cert.pem" + tls_key_path: "/secrets/certs/key.pem" + +schema_config: + configs: + - from: 2022-05-15 + store: boltdb-shipper + object_store: s3 + schema: v12 + index: + prefix: index_ + period: 24h + +storage_config: + boltdb_shipper: + # Nomad ephemeral disk is used to store index and cache + # it will try to preserve /alloc/data between job updates + active_index_directory: {{ env "NOMAD_ALLOC_DIR" }}/data/index + cache_location: {{ env "NOMAD_ALLOC_DIR" }}/data/index-cache + shared_store: s3 + index_gateway_client: + server_address: loki-index-gateway.service.consul:9097 + grpc_client_config: + grpc_compression: snappy + tls_enabled: true + tls_ca_path: "/secrets/certs/CA.pem" + tls_cert_path: "/secrets/certs/cert.pem" + tls_key_path: "/secrets/certs/key.pem" + + aws: + endpoint: https://minio.service.consul + bucketnames: loki + region: us-west-1 + access_key_id: ${S3_ACCESS_KEY_ID} + secret_access_key: ${S3_SECRET_ACCESS_KEY} + s3forcepathstyle: true + +compactor: + working_directory: {{ env "NOMAD_ALLOC_DIR" }}/compactor + shared_store: s3 + compaction_interval: 24h + retention_enabled: true + +ruler: + alertmanager_url: https://alertmanager.service.consul + enable_alertmanager_v2: true + enable_api: true + external_url: https://loki-ruler.service.consul + rule_path: {{ env "NOMAD_ALLOC_DIR" }}/tmp/rules + storage: + type: local + local: + directory: {{ env "NOMAD_TASK_DIR" }}/rules + wal: + dir: {{ env "NOMAD_ALLOC_DIR" }}/data/ruler + +limits_config: + enforce_metric_name: false + reject_old_samples: true + reject_old_samples_max_age: 168h diff --git a/production/nomad/loki-distributed/job.nomad.hcl b/production/nomad/loki-distributed/job.nomad.hcl new file mode 100644 index 000000000000..0dc1d3b377a9 --- /dev/null +++ b/production/nomad/loki-distributed/job.nomad.hcl @@ -0,0 +1,987 @@ +locals { + version = "2.5.0" + certs = { + "CA" = "issuing_ca", + "cert" = "certificate", + "key" = "private_key", + } +} + +job "loki" { + datacenters = ["dc1"] + + vault { + policies = ["loki"] + } + + group "compactor" { + count = 1 + + ephemeral_disk { + size = 1000 + migrate = true + sticky = true + } + + network { + mode = "bridge" + + port "http" {} + port "health" {} + port "grpc" {} + } + + service { + name = "loki-compactor" + port = "http" + + meta { + alloc_id = NOMAD_ALLOC_ID + component = "compactor" + } + + tags = [ + "traefik.enable=true", + "traefik.consulcatalog.connect=true", + + "traefik.http.routers.loki-compactor-ring.entrypoints=https", + "traefik.http.routers.loki-compactor-ring.rule=Host(`loki-compactor.service.consul`) && Path(`/compactor/ring`)", + ] + + check { + name = "Loki compactor" + port = "health" + type = "http" + path = "/ready" + interval = "20s" + timeout = "1s" + } + + connect { + sidecar_service { + proxy { + local_service_port = 80 + + expose { + path { + path = "/metrics" + protocol = "http" + local_path_port = 80 + listener_port = "http" + } + + path { + path = "/ready" + protocol = "http" + local_path_port = 80 + listener_port = "health" + } + } + } + } + } + } + + task "compactor" { + driver = "docker" + user = "nobody" + kill_timeout = "90s" + + config { + image = "grafana/loki:${local.version}" + ports = [ + "http", + "health", + "grpc", + ] + + args = [ + "-target=compactor", + "-config.file=/local/config.yml", + "-config.expand-env=true", + ] + } + + template { + data = file("config.yml") + destination = "local/config.yml" + } + + template { + data = <<-EOH + {{ with secret "secret/minio/loki" }} + S3_ACCESS_KEY_ID={{ .Data.data.access_key }} + S3_SECRET_ACCESS_KEY={{ .Data.data.secret_key }} + {{- end }} + EOH + + destination = "secrets/s3.env" + env = true + } + + dynamic "template" { + for_each = local.certs + content { + data = <<-EOH + {{- with secret "pki/issue/internal" "ttl=10d" "common_name=loki-compactor.service.consul" (env "attr.unique.network.ip-address" | printf "ip_sans=%s") -}} + {{ .Data.${template.value} }} + {{- end -}} + EOH + + destination = "secrets/certs/${template.key}.pem" + change_mode = "restart" + splay = "1m" + } + } + + resources { + cpu = 3000 + memory = 256 + memory_max = 1024 + } + } + } + + group "ruler" { + count = 1 + + ephemeral_disk { + size = 1000 + migrate = true + sticky = true + } + + network { + mode = "bridge" + + port "http" {} + port "health" {} + port "grpc" {} + } + + service { + name = "loki-ruler" + port = "http" + + meta { + alloc_id = NOMAD_ALLOC_ID + component = "ruler" + } + + tags = [ + "traefik.enable=true", + "traefik.consulcatalog.connect=true", + + "traefik.http.routers.loki-ruler.entrypoints=https", + "traefik.http.routers.loki-ruler.rule=Host(`loki-query-frontend.service.consul`) && (PathPrefix(`/loki/api/v1/rules`) || PathPrefix(`/api/prom/rules`) || PathPrefix (`/prometheus/api/v1`))", + + "traefik.http.routers.loki-ruler-ring.entrypoints=https", + "traefik.http.routers.loki-ruler-ring.rule=Host(`loki-ruler.service.consul`) && Path(`/ruler/ring`)", + ] + + check { + name = "Loki ruler" + port = "health" + type = "http" + path = "/ready" + interval = "20s" + timeout = "1s" + } + + connect { + sidecar_service { + proxy { + local_service_port = 80 + + expose { + path { + path = "/metrics" + protocol = "http" + local_path_port = 80 + listener_port = "http" + } + + path { + path = "/ready" + protocol = "http" + local_path_port = 80 + listener_port = "health" + } + } + } + } + } + } + + task "ruler" { + driver = "docker" + user = "nobody" + kill_timeout = "90s" + + config { + image = "grafana/loki:${local.version}" + ports = [ + "http", + "health", + "grpc", + ] + + args = [ + "-target=ruler", + "-config.file=/local/config.yml", + "-config.expand-env=true", + ] + } + + template { + data = file("config.yml") + destination = "local/config.yml" + } + + dynamic "template" { + for_each = fileset(".", "rules/**") + + content { + data = file(template.value) + destination = "local/${template.value}" + left_delimiter = "[[" + right_delimiter = "]]" + } + } + + template { + data = <<-EOH + {{ with secret "secret/minio/loki" }} + S3_ACCESS_KEY_ID={{ .Data.data.access_key }} + S3_SECRET_ACCESS_KEY={{ .Data.data.secret_key }} + {{- end }} + EOH + + destination = "secrets/s3.env" + env = true + } + + dynamic "template" { + for_each = local.certs + content { + data = <<-EOH + {{- with secret "pki/issue/internal" "ttl=10d" "common_name=loki-ruler.service.consul" (env "attr.unique.network.ip-address" | printf "ip_sans=%s") -}} + {{ .Data.${template.value} }} + {{- end -}} + EOH + + destination = "secrets/certs/${template.key}.pem" + change_mode = "restart" + splay = "5m" + } + } + + resources { + cpu = 1000 + memory = 256 + memory_max = 512 + } + } + } + + group "distibutor" { + count = 2 + + network { + mode = "bridge" + + port "http" {} + port "health" {} + port "grpc" {} + } + + service { + name = "loki-distributor" + port = "http" + + meta { + alloc_id = NOMAD_ALLOC_ID + component = "distributor" + } + + tags = [ + "traefik.enable=true", + "traefik.consulcatalog.connect=true", + + "traefik.http.routers.loki-distributor.entrypoints=https", + "traefik.http.routers.loki-distributor.rule=Host(`loki-distributor.service.consul`)", + + "traefik.http.routers.loki-distributor-ring.entrypoints=https", + "traefik.http.routers.loki-distributor-ring.rule=Host(`loki-distributor.cinarra.com`) && Path(`/distributor/ring`)", + ] + + check { + name = "Loki distibutor" + port = "health" + type = "http" + path = "/ready" + interval = "20s" + timeout = "1s" + } + + connect { + sidecar_service { + proxy { + local_service_port = 80 + + expose { + path { + path = "/metrics" + protocol = "http" + local_path_port = 80 + listener_port = "http" + } + + path { + path = "/ready" + protocol = "http" + local_path_port = 80 + listener_port = "health" + } + } + } + } + } + } + + task "distibutor" { + driver = "docker" + user = "nobody" + kill_timeout = "90s" + + config { + image = "grafana/loki:${local.version}" + ports = [ + "http", + "health", + "grpc", + ] + + args = [ + "-target=distributor", + "-config.file=/local/config.yml", + "-config.expand-env=true", + ] + } + + template { + data = file("config.yml") + destination = "local/config.yml" + } + + dynamic "template" { + for_each = local.certs + content { + data = <<-EOH + {{- with secret "pki/issue/internal" "ttl=10d" "common_name=loki-distributer.service.consul" (env "attr.unique.network.ip-address" | printf "ip_sans=%s") -}} + {{ .Data.${template.value} }} + {{- end -}} + EOH + + destination = "secrets/certs/${template.key}.pem" + change_mode = "restart" + splay = "5m" + } + } + + resources { + cpu = 200 + memory = 128 + memory_max = 1024 + } + } + } + + group "ingester" { + count = 2 + + constraint { + # choose your failure domain + # must be the same as `instance_availability_zone` in config file + distinct_property = node.unique.name + # distinct_property = node.datacenter + # distinct_property = attr.platform.aws.placement.availability-zone + } + + ephemeral_disk { + size = 4000 + migrate = true + sticky = true + } + + network { + mode = "bridge" + + port "http" {} + port "health" {} + port "grpc" {} + } + + service { + name = "loki-ingester" + port = "http" + + meta { + alloc_id = NOMAD_ALLOC_ID + component = "ingester" + } + + tags = [ + "traefik.enable=true", + "traefik.consulcatalog.connect=true", + + "traefik.http.routers.loki-ingester-ring.entrypoints=https", + "traefik.http.routers.loki-ingester-ring.rule=Host(`loki-ingester.service.consul`) && Path(`/ring`)", + ] + + check { + name = "Loki ingester" + port = "health" + type = "http" + path = "/ready" + interval = "20s" + timeout = "1s" + } + + connect { + sidecar_service { + proxy { + local_service_port = 80 + + expose { + path { + path = "/metrics" + protocol = "http" + local_path_port = 80 + listener_port = "http" + } + + path { + path = "/ready" + protocol = "http" + local_path_port = 80 + listener_port = "health" + } + } + } + } + } + } + + task "ingester" { + driver = "docker" + user = "nobody" + kill_timeout = "90s" + + config { + image = "grafana/loki:${local.version}" + ports = [ + "http", + "health", + "grpc", + ] + + args = [ + "-target=ingester", + "-config.file=/local/config.yml", + "-config.expand-env=true", + ] + } + + template { + data = file("config.yml") + destination = "local/config.yml" + } + + template { + data = <<-EOH + {{ with secret "secret/minio/loki" }} + S3_ACCESS_KEY_ID={{ .Data.data.access_key }} + S3_SECRET_ACCESS_KEY={{ .Data.data.secret_key }} + {{- end }} + EOH + + destination = "secrets/s3.env" + env = true + } + + dynamic "template" { + for_each = local.certs + content { + data = <<-EOH + {{- with secret "pki/issue/internal" "ttl=10d" "common_name=loki-ingestor.service.consul" (env "attr.unique.network.ip-address" | printf "ip_sans=%s") -}} + {{ .Data.${template.value} }} + {{- end -}} + EOH + + destination = "secrets/certs/${template.key}.pem" + change_mode = "restart" + splay = "5m" + } + } + + resources { + cpu = 300 + memory = 128 + memory_max = 2048 + } + } + } + + group "querier" { + count = 2 + + network { + mode = "bridge" + + port "http" {} + port "health" {} + port "grpc" {} + } + + service { + name = "loki-querier" + port = "http" + + meta { + alloc_id = NOMAD_ALLOC_ID + component = "querier" + } + + check { + name = "Loki querier" + port = "health" + type = "http" + path = "/ready" + interval = "50s" + timeout = "1s" + } + + connect { + sidecar_service { + proxy { + local_service_port = 80 + + expose { + path { + path = "/metrics" + protocol = "http" + local_path_port = 80 + listener_port = "http" + } + + path { + path = "/ready" + protocol = "http" + local_path_port = 80 + listener_port = "health" + } + } + } + } + } + } + + task "querier" { + driver = "docker" + user = "nobody" + kill_timeout = "90s" + + config { + image = "grafana/loki:${local.version}" + ports = [ + "http", + "health", + "grpc", + ] + + args = [ + "-target=querier", + "-config.file=/local/config.yml", + "-config.expand-env=true", + ] + } + + template { + data = file("config.yml") + destination = "local/config.yml" + } + + template { + data = <<-EOH + {{ with secret "secret/minio/loki" }} + S3_ACCESS_KEY_ID={{ .Data.data.access_key }} + S3_SECRET_ACCESS_KEY={{ .Data.data.secret_key }} + {{- end }} + EOH + + destination = "secrets/s3.env" + env = true + } + + dynamic "template" { + for_each = local.certs + content { + data = <<-EOH + {{- with secret "pki/issue/internal" "ttl=10d" "common_name=loki-querier.service.consul" (env "attr.unique.network.ip-address" | printf "ip_sans=%s") -}} + {{ .Data.${template.value} }} + {{- end -}} + EOH + + destination = "secrets/certs/${template.key}.pem" + change_mode = "restart" + splay = "5m" + } + } + + resources { + cpu = 200 + memory = 128 + memory_max = 2048 + } + } + } + + group "query-scheduler" { + count = 2 + + network { + mode = "bridge" + + port "http" {} + port "health" {} + port "grpc" { + to = 9096 + static = 9096 + } + } + + service { + name = "loki-query-scheduler" + port = "http" + + meta { + alloc_id = NOMAD_ALLOC_ID + component = "query-scheduler" + } + + check { + name = "Loki query-scheduler" + port = "health" + type = "http" + path = "/ready" + interval = "20s" + timeout = "1s" + } + + connect { + sidecar_service { + proxy { + local_service_port = 80 + + expose { + path { + path = "/metrics" + protocol = "http" + local_path_port = 80 + listener_port = "http" + } + + path { + path = "/ready" + protocol = "http" + local_path_port = 80 + listener_port = "health" + } + } + } + } + } + } + + task "query-scheduler" { + driver = "docker" + user = "nobody" + kill_timeout = "90s" + + config { + image = "grafana/loki:${local.version}" + ports = [ + "http", + "health", + "grpc", + ] + + args = [ + "-target=query-scheduler", + "-config.file=/local/config.yml", + "-config.expand-env=true", + ] + } + + template { + data = file("config.yml") + destination = "local/config.yml" + } + + dynamic "template" { + for_each = local.certs + content { + data = <<-EOH + {{- with secret "pki/issue/internal" "ttl=10d" "common_name=loki-query-scheduler.service.consul" (env "attr.unique.network.ip-address" | printf "ip_sans=%s") -}} + {{ .Data.${template.value} }} + {{- end -}} + EOH + + destination = "secrets/certs/${template.key}.pem" + change_mode = "restart" + splay = "5m" + } + } + + resources { + cpu = 100 + memory = 64 + memory_max = 128 + } + } + } + + group "query-frontend" { + count = 2 + + network { + mode = "bridge" + + port "http" {} + port "health" {} + port "grpc" {} + } + + service { + name = "loki-query-frontend" + port = "http" + + meta { + alloc_id = NOMAD_ALLOC_ID + component = "query-frontend" + } + + tags = [ + "traefik.enable=true", + "traefik.consulcatalog.connect=true", + + "traefik.http.routers.loki-query-frontend.entrypoints=https", + "traefik.http.routers.loki-query-frontend.rule=Host(`loki-query-frontend.service.consul`)", + ] + + check { + name = "Loki query-frontend" + port = "health" + type = "http" + path = "/ready" + interval = "20s" + timeout = "1s" + } + + connect { + sidecar_service { + proxy { + local_service_port = 80 + + expose { + path { + path = "/metrics" + protocol = "http" + local_path_port = 80 + listener_port = "http" + } + + path { + path = "/ready" + protocol = "http" + local_path_port = 80 + listener_port = "health" + } + } + } + } + } + } + + task "query-frontend" { + driver = "docker" + user = "nobody" + kill_timeout = "90s" + + config { + image = "grafana/loki:${local.version}" + ports = [ + "http", + "health", + "grpc", + ] + + args = [ + "-target=query-frontend", + "-config.file=/local/config.yml", + "-config.expand-env=true", + ] + } + + template { + data = file("config.yml") + destination = "local/config.yml" + } + + dynamic "template" { + for_each = local.certs + content { + data = <<-EOH + {{- with secret "pki/issue/internal" "ttl=10d" "common_name=loki-query-frontend.service.consul" (env "attr.unique.network.ip-address" | printf "ip_sans=%s") -}} + {{ .Data.${template.value} }} + {{- end -}} + EOH + + destination = "secrets/certs/${template.key}.pem" + change_mode = "restart" + splay = "5m" + } + } + + resources { + cpu = 100 + memory = 64 + memory_max = 128 + } + } + } + + group "index-gateway" { + count = 1 + + ephemeral_disk { + size = 1000 + migrate = true + sticky = true + } + + network { + mode = "bridge" + + port "http" {} + port "health" {} + port "grpc" { + to = 9097 + static = 9097 + } + } + + service { + name = "loki-index-gateway" + port = "http" + + meta { + alloc_id = NOMAD_ALLOC_ID + component = "index-gateway" + } + + check { + name = "Loki index-gateway" + port = "health" + type = "http" + path = "/ready" + interval = "20s" + timeout = "1s" + } + + connect { + sidecar_service { + proxy { + local_service_port = 80 + + expose { + path { + path = "/metrics" + protocol = "http" + local_path_port = 80 + listener_port = "http" + } + + path { + path = "/ready" + protocol = "http" + local_path_port = 80 + listener_port = "health" + } + } + } + } + } + } + + task "index-gateway" { + driver = "docker" + user = "nobody" + kill_timeout = "90s" + + config { + image = "grafana/loki:${local.version}" + ports = [ + "http", + "health", + "grpc", + ] + + args = [ + "-target=index-gateway", + "-config.file=/local/config.yml", + "-config.expand-env=true", + ] + } + + template { + data = file("config.yml") + destination = "local/config.yml" + } + + template { + data = <<-EOH + {{ with secret "secret/minio/loki" }} + S3_ACCESS_KEY_ID={{ .Data.data.access_key }} + S3_SECRET_ACCESS_KEY={{ .Data.data.secret_key }} + {{- end }} + EOH + + destination = "secrets/s3.env" + env = true + } + + dynamic "template" { + for_each = local.certs + content { + data = <<-EOH + {{- with secret "pki/issue/internal" "ttl=10d" "common_name=loki-index-gateway.service.consul" (env "attr.unique.network.ip-address" | printf "ip_sans=%s") -}} + {{ .Data.${template.value} }} + {{- end -}} + EOH + + destination = "secrets/certs/${template.key}.pem" + change_mode = "restart" + splay = "5m" + } + } + + resources { + cpu = 200 + memory = 128 + memory_max = 1024 + } + } + } +} diff --git a/production/nomad/loki/README.md b/production/nomad/loki/README.md deleted file mode 100644 index 0830f02d42dd..000000000000 --- a/production/nomad/loki/README.md +++ /dev/null @@ -1,33 +0,0 @@ -# Monolithic mode - -This Nomad job will deploy a Loki in -[monolithic mode](https://grafana.com/docs/loki/latest/fundamentals/architecture/deployment-modes/#monolithic-mode) -with minimum dependencies, using boltdb-shipper and S3 backend and with the -ability to scale. - -## Usage - -Have a look at the job file and Loki configuration file and change it according -to suite your environment. - -### Run job - -Inside directory with job run: - -```shell -nomad run job.nomad.hcl -``` - -### Scale Loki - -Change `count` in job file in `group "loki"` and run: - -```shell -nomad run job.nomad.hcl -``` - -or use Nomad CLI - -```shell -nomad job scale loki loki -``` From 977169cbff36075f9499f7838f5eccd386e098d1 Mon Sep 17 00:00:00 2001 From: Anatoly Laskaris Date: Fri, 27 May 2022 21:38:06 +0300 Subject: [PATCH 08/16] Add missing README --- production/nomad/loki/README.md | 33 +++++++++++++++++++++++++++++ production/nomad/loki/job.nomad.hcl | 2 +- 2 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 production/nomad/loki/README.md diff --git a/production/nomad/loki/README.md b/production/nomad/loki/README.md new file mode 100644 index 000000000000..0830f02d42dd --- /dev/null +++ b/production/nomad/loki/README.md @@ -0,0 +1,33 @@ +# Monolithic mode + +This Nomad job will deploy a Loki in +[monolithic mode](https://grafana.com/docs/loki/latest/fundamentals/architecture/deployment-modes/#monolithic-mode) +with minimum dependencies, using boltdb-shipper and S3 backend and with the +ability to scale. + +## Usage + +Have a look at the job file and Loki configuration file and change it according +to suite your environment. + +### Run job + +Inside directory with job run: + +```shell +nomad run job.nomad.hcl +``` + +### Scale Loki + +Change `count` in job file in `group "loki"` and run: + +```shell +nomad run job.nomad.hcl +``` + +or use Nomad CLI + +```shell +nomad job scale loki loki +``` diff --git a/production/nomad/loki/job.nomad.hcl b/production/nomad/loki/job.nomad.hcl index a2d868539d15..6956496e614c 100644 --- a/production/nomad/loki/job.nomad.hcl +++ b/production/nomad/loki/job.nomad.hcl @@ -5,7 +5,7 @@ job "loki" { count = 1 ephemeral_disk { - # Used to store index, chache, WAL + # Used to store index, cache, WAL # Nomad will try to preserve the disk between job updates size = 1000 sticky = true From 0b986a93631f890238a252c59dcaaaddc4485c12 Mon Sep 17 00:00:00 2001 From: Anatoly Laskaris Date: Fri, 27 May 2022 22:10:39 +0300 Subject: [PATCH 09/16] Add explanation why use Consul Connect and Vault PKI at the same time --- production/nomad/loki-distributed/README.md | 7 +++++++ production/nomad/loki-simple/README.md | 4 ++-- production/nomad/loki/README.md | 4 ++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/production/nomad/loki-distributed/README.md b/production/nomad/loki-distributed/README.md index caa5fac76155..42e7e931484e 100644 --- a/production/nomad/loki-distributed/README.md +++ b/production/nomad/loki-distributed/README.md @@ -9,6 +9,13 @@ Make sure to go over the job file and adjust it to your needs. ## Secure endpoints +Unfortenately Consul Connect cannot be used to secure GRPC communication between +Loki components, since some components should be able to connect to all +instances of other components. That is why internal Loki components +communication happening over GRPC are secured with mTLS with certificates +provisioned with Vault PKI. HTTP (user and external facing endpoints) are +protected with Consul Connect. + ### HTTP HTTP endpoints are hidden behind Consul Connect. diff --git a/production/nomad/loki-simple/README.md b/production/nomad/loki-simple/README.md index 2b2bff4dd7a3..deaf7dcb1e5b 100644 --- a/production/nomad/loki-simple/README.md +++ b/production/nomad/loki-simple/README.md @@ -7,8 +7,8 @@ ability to scale. ## Usage -Have a look at the job file and Loki configuration file and change it according -to suite your environment. +Have a look at the job file and Loki configuration file and change it to suite +your environment. ### Run job diff --git a/production/nomad/loki/README.md b/production/nomad/loki/README.md index 0830f02d42dd..1f24249861bf 100644 --- a/production/nomad/loki/README.md +++ b/production/nomad/loki/README.md @@ -7,8 +7,8 @@ ability to scale. ## Usage -Have a look at the job file and Loki configuration file and change it according -to suite your environment. +Have a look at the job file and Loki configuration file and change to suite your +environment. ### Run job From 42bfe63168f9fdd904050ea9e295cefebc90c863 Mon Sep 17 00:00:00 2001 From: Anatoly Laskaris Date: Sun, 29 May 2022 15:26:03 +0300 Subject: [PATCH 10/16] Simplify distributed setup --- production/nomad/README.md | 16 +- production/nomad/loki-distributed/README.md | 208 ++++++--- production/nomad/loki-distributed/config.yml | 78 ++-- .../nomad/loki-distributed/job.nomad.hcl | 410 +----------------- production/nomad/loki-simple/config.yml | 2 +- production/nomad/loki/job.nomad.hcl | 10 +- 6 files changed, 217 insertions(+), 507 deletions(-) diff --git a/production/nomad/README.md b/production/nomad/README.md index 3208f7eb3122..70506f3f17a3 100644 --- a/production/nomad/README.md +++ b/production/nomad/README.md @@ -13,7 +13,6 @@ - [Vault integration](https://www.nomadproject.io/docs/integrations/vault-integration) for providing S3 credentials securely -- Consul connect to secure Loki HTTP endpoints - Traefik configured to use [Consul provider](https://doc.traefik.io/traefik/providers/consul-catalog/) to loadbalance between Loki instances @@ -22,21 +21,20 @@ For use in production it is recommended to: -- use authentication - can be achieved with - [Traefik](https://doc.traefik.io/traefik/middlewares/http/basicauth/) - secure HTTP endpoints with [Consul Connect](https://www.nomadproject.io/docs/integrations/consul-connect) -- secure GRPC communication with TLS when running multiple instances - can be - achived with Vault's +- setup authentication - can be achieved with + [Traefik](https://doc.traefik.io/traefik/middlewares/http/basicauth/) +- secure GRPC communication with mTLS - can be achived with Vault's [PKI secret engine](https://www.vaultproject.io/docs/secrets/pki) -See [loki-distributed](./loki-distributed) setup for an inspiration. +See [loki-distributed](./loki-distributed) README for more info. ## Service discovery when scaling When using multiple Loki instances memberlist advertises wrong address (see this -[issue](https://github.com/grafana/loki/issues/5610)), that is why this example -is using Consul ring for service discovery. +[issue](https://github.com/grafana/loki/issues/5610)), that is why these +examples are using Consul ring for service discovery. Is you are using Nomad then you are probably also using Consul, so this shouldn't be an issue. @@ -123,7 +121,7 @@ template { } ``` -## Supplying alerting rules to Loki ruler with `local` ruler storage +## Supply alerting rules to Loki ruler with `local` ruler storage ### Using [`artifact` stanza](https://www.nomadproject.io/docs/job-specification/artifact) diff --git a/production/nomad/loki-distributed/README.md b/production/nomad/loki-distributed/README.md index 42e7e931484e..409f31bf0ff2 100644 --- a/production/nomad/loki-distributed/README.md +++ b/production/nomad/loki-distributed/README.md @@ -2,70 +2,13 @@ This Nomad job will deploy a Loki in [microservices mode](https://grafana.com/docs/loki/latest/fundamentals/architecture/deployment-modes/#microservices-mode) -using boltdb-shipper and S3 backend GRPC protected with mTLS and with HTTP -endpoinds accessible only through Traefik+Consul Connect. +using boltdb-shipper and S3 backend. Make sure to go over the job file and adjust it to your needs. -## Secure endpoints +## Recommendations for running in production -Unfortenately Consul Connect cannot be used to secure GRPC communication between -Loki components, since some components should be able to connect to all -instances of other components. That is why internal Loki components -communication happening over GRPC are secured with mTLS with certificates -provisioned with Vault PKI. HTTP (user and external facing endpoints) are -protected with Consul Connect. - -### HTTP - -HTTP endpoints are hidden behind Consul Connect. - -Ring endpoints are exposed through Traefik for operator and must be protected by -basicauth. Example: - -```hcl -tags = [ - "traefik.enable=true", - "traefik.consulcatalog.connect=true", - - "traefik.http.routers.loki-ingester-ring.entrypoints=https", - "traefik.http.routers.loki-ingester-ring.rule=Host(`loki-ingester.service.consul`) && Path(`/ring`)", - "traefik.http.middlewares.loki-ingester-ring.basicauth.users=devops:$apr11bMKZL02A$QrOgT3NAOx.koXWnqfXbo0", - "traefik.http.routers.loki-ingester-ring.middlewares=loki-ingester-ring@consulcatalog", -] -``` - -Read and write path (including ruler api) are also exposed and must be protected -by basicauth. Example: - -```hcl -tags = [ - "traefik.enable=true", - "traefik.consulcatalog.connect=true", - - "traefik.http.routers.loki-distributor.entrypoints=https", - "traefik.http.routers.loki-distributor.rule=Host(`loki-distributor.service.consul`)", - "traefik.http.middlewares.loki-distributor.basicauth.users=promtail:$apr1$y5tgAEDC$8SHWJq7aBoDd0qnOTQkok0", - "traefik.http.routers.loki-distributor.middlewares=loki-distributor@consulcatalog", - - "traefik.http.routers.loki-distributor-ring.entrypoints=https", - "traefik.http.routers.loki-distributor-ring.rule=Host(`loki-distributor.service.consul`) && Path(`/distributor/ring`)", - "traefik.http.middlewares.loki-distributor-ring.basicauth.users=devops:$apr11bMKZL02A$QrOgT3NAOx.koXWnqfXbo0", - "traefik.http.routers.loki-distributor-ring.middlewares=loki-distributor-ring@consulcatalog", -] -``` - -### GRPC - -GRPC endpoints are using self-signed certificates generated by -[Vault](https://www.vaultproject.io/docs/secrets/pki). - -### `/metrics` and `/ready` endpoints - -Exposed with -[`expose` stanza](https://www.nomadproject.io/docs/job-specification/expose) - -## Gather metrics +### Gather metrics To collect metrics from all components use this config: @@ -87,3 +30,148 @@ To collect metrics from all components use this config: - source_labels: ["__meta_consul_service_metadata_component"] target_label: "component" ``` + +### Secure HTTP endpoints with Consul Connect + +Set network to `bridge` mode and add `health` port, that will be used by Consul +healthcheck: + +```hcl + network { + mode = "bridge" + + port "http" {} + port "health" {} + port "grpc" {} + } +``` + +```hcl + task "distibutor" { + driver = "docker" + user = "nobody" + kill_timeout = "90s" + + config { + image = "grafana/loki:${var.versions.loki}" + ports = [ + "http", + "health", # do not forget to publish health port + "grpc", + ] +``` + +Bind HTTP endpoint to `127.0.0.1:80` so it is not accessible from outside: + +```yml +server: + http_listen_address: 127.0.0.1 + http_listen_port: 80 +``` + +Add service registration with Consul Connect enabled, `/metrics` and `/ready` +endpoint [exposed](https://www.nomadproject.io/docs/job-specification/expose) +and API accessible with basicauth through Traefik with Consul Connect +integration: + +```hcl + service { + name = "loki-distributor" + port = "http" + + meta { + alloc_id = NOMAD_ALLOC_ID + component = "distributor" + } + + tags = [ + "traefik.enable=true", + "traefik.consulcatalog.connect=true", + + "traefik.http.routers.loki-distributor.entrypoints=https", + "traefik.http.routers.loki-distributor.rule=Host(`loki-distributor.service.consul`)", + "traefik.http.middlewares.loki-distributor.basicauth.users=promtail:$$apr1$$wnih40yf$$vcxJYiqcEQLknQAZcpy/I1", + "traefik.http.routers.loki-distirbutor.middlewares=loki-distributor@consulcatalog", + + "traefik.http.routers.loki-distributor-ring.entrypoints=https", + "traefik.http.routers.loki-distributor-ring.rule=Host(`loki-distributor.service.consul`) && Path(`/distributor/ring`)", + "traefik.http.middlewares.loki-distributor-ring.basicauth.users=devops:$apr1$bNIZL02A$QrOgT3NAOx.koXWnqfXbo0", + "traefik.http.routers.loki-distributor-ring.middlewares=loki-distributor-ring@consulcatalog", + ] + + check { + name = "Loki distibutor" + port = "health" + type = "http" + path = "/ready" + interval = "20s" + timeout = "1s" + } + + connect { + sidecar_service { + proxy { + local_service_port = 80 + + expose { + path { + path = "/metrics" + protocol = "http" + local_path_port = 80 + listener_port = "http" + } + + path { + path = "/ready" + protocol = "http" + local_path_port = 80 + listener_port = "health" + } + } + } + } + } + } +``` + +## Secure GRPC endpoints with mTLS + +Unfortenately Consul Connect cannot be used to secure GRPC communication between +Loki components, since some components should be able to connect to all +instances of other components. We can secure components GRPC communication +with Vault [PKI engine](https://www.vaultproject.io/docs/secrets/pki). + +Certificate generation can be made less verbose with the following HCL trick: + +1. Add the following to `locals`: + +```hcl +locals { + certs = { + "CA" = "issuing_ca", + "cert" = "certificate", + "key" = "private_key", + } +} +``` + +2. Add dynamic template per service: + +```hcl + dynamic "template" { + for_each = local.certs + content { + data = <<-EOH + {{- with secret "pki/issue/internal" "ttl=10d" "common_name=loki-.service.consul" (env "attr.unique.network.ip-address" | printf "ip_sans=%s") -}} + {{ .Data.${template.value} }} + {{- end -}} + EOH + + destination = "secrets/certs/${template.key}.pem" + change_mode = "restart" + splay = "5m" + } + } +``` + +3. Update config to use generated certificates diff --git a/production/nomad/loki-distributed/config.yml b/production/nomad/loki-distributed/config.yml index 80d5609fbdbc..d4b7cf87efdb 100644 --- a/production/nomad/loki-distributed/config.yml +++ b/production/nomad/loki-distributed/config.yml @@ -2,15 +2,13 @@ auth_enabled: false server: log_level: info - # HTTP is listening on localhost to make it accessible only through Consul Connect - http_listen_port: 80 - http_listen_address: 127.0.0.1 + http_listen_port: {{ env "NOMAD_PORT_http" }} grpc_listen_port: {{ env "NOMAD_PORT_grpc" }} - grpc_tls_config: - client_auth_type: "RequireAndVerifyClientCert" - client_ca_file: "/secrets/certs/CA.pem" - cert_file: "/secrets/certs/cert.pem" - key_file: "/secrets/certs/key.pem" + # grpc_tls_config: + # client_auth_type: "RequireAndVerifyClientCert" + # client_ca_file: "/secrets/certs/CA.pem" + # cert_file: "/secrets/certs/cert.pem" + # key_file: "/secrets/certs/key.pem" common: replication_factor: 2 @@ -29,13 +27,13 @@ common: consul: host: {{ env "attr.unique.network.ip-address" }}:8500 -ingester_client: - grpc_client_config: - grpc_compression: snappy - tls_enabled: true - tls_ca_path: "/secrets/certs/CA.pem" - tls_cert_path: "/secrets/certs/cert.pem" - tls_key_path: "/secrets/certs/key.pem" +# ingester_client: +# grpc_client_config: +# grpc_compression: snappy +# tls_enabled: true +# tls_ca_path: "/secrets/certs/CA.pem" +# tls_cert_path: "/secrets/certs/cert.pem" +# tls_key_path: "/secrets/certs/key.pem" ingester: wal: @@ -43,33 +41,33 @@ ingester: flush_on_shutdown: true replay_memory_ceiling: "1G" -query_scheduler: - grpc_client_config: - grpc_compression: snappy - tls_enabled: true - tls_ca_path: "/secrets/certs/CA.pem" - tls_cert_path: "/secrets/certs/cert.pem" - tls_key_path: "/secrets/certs/key.pem" +# query_scheduler: +# grpc_client_config: +# grpc_compression: snappy +# tls_enabled: true +# tls_ca_path: "/secrets/certs/CA.pem" +# tls_cert_path: "/secrets/certs/cert.pem" +# tls_key_path: "/secrets/certs/key.pem" frontend: scheduler_address: loki-query-scheduler.service.consul:9096 compress_responses: true log_queries_longer_than: 5s - grpc_client_config: - grpc_compression: snappy - tls_enabled: true - tls_ca_path: "/secrets/certs/CA.pem" - tls_cert_path: "/secrets/certs/cert.pem" - tls_key_path: "/secrets/certs/key.pem" + # grpc_client_config: + # grpc_compression: snappy + # tls_enabled: true + # tls_ca_path: "/secrets/certs/CA.pem" + # tls_cert_path: "/secrets/certs/cert.pem" + # tls_key_path: "/secrets/certs/key.pem" frontend_worker: scheduler_address: loki-query-scheduler.service.consul:9096 - grpc_client_config: - grpc_compression: snappy - tls_enabled: true - tls_ca_path: "/secrets/certs/CA.pem" - tls_cert_path: "/secrets/certs/cert.pem" - tls_key_path: "/secrets/certs/key.pem" + # grpc_client_config: + # grpc_compression: snappy + # tls_enabled: true + # tls_ca_path: "/secrets/certs/CA.pem" + # tls_cert_path: "/secrets/certs/cert.pem" + # tls_key_path: "/secrets/certs/key.pem" schema_config: configs: @@ -90,12 +88,12 @@ storage_config: shared_store: s3 index_gateway_client: server_address: loki-index-gateway.service.consul:9097 - grpc_client_config: - grpc_compression: snappy - tls_enabled: true - tls_ca_path: "/secrets/certs/CA.pem" - tls_cert_path: "/secrets/certs/cert.pem" - tls_key_path: "/secrets/certs/key.pem" + # grpc_client_config: + # grpc_compression: snappy + # tls_enabled: true + # tls_ca_path: "/secrets/certs/CA.pem" + # tls_cert_path: "/secrets/certs/cert.pem" + # tls_key_path: "/secrets/certs/key.pem" aws: endpoint: https://minio.service.consul diff --git a/production/nomad/loki-distributed/job.nomad.hcl b/production/nomad/loki-distributed/job.nomad.hcl index 0dc1d3b377a9..2cd4d40da35b 100644 --- a/production/nomad/loki-distributed/job.nomad.hcl +++ b/production/nomad/loki-distributed/job.nomad.hcl @@ -1,33 +1,20 @@ locals { version = "2.5.0" - certs = { - "CA" = "issuing_ca", - "cert" = "certificate", - "key" = "private_key", - } } job "loki" { datacenters = ["dc1"] - vault { - policies = ["loki"] - } - group "compactor" { count = 1 ephemeral_disk { size = 1000 - migrate = true sticky = true } network { - mode = "bridge" - port "http" {} - port "health" {} port "grpc" {} } @@ -42,7 +29,6 @@ job "loki" { tags = [ "traefik.enable=true", - "traefik.consulcatalog.connect=true", "traefik.http.routers.loki-compactor-ring.entrypoints=https", "traefik.http.routers.loki-compactor-ring.rule=Host(`loki-compactor.service.consul`) && Path(`/compactor/ring`)", @@ -50,36 +36,12 @@ job "loki" { check { name = "Loki compactor" - port = "health" + port = "http" type = "http" path = "/ready" interval = "20s" timeout = "1s" } - - connect { - sidecar_service { - proxy { - local_service_port = 80 - - expose { - path { - path = "/metrics" - protocol = "http" - local_path_port = 80 - listener_port = "http" - } - - path { - path = "/ready" - protocol = "http" - local_path_port = 80 - listener_port = "health" - } - } - } - } - } } task "compactor" { @@ -91,7 +53,6 @@ job "loki" { image = "grafana/loki:${local.version}" ports = [ "http", - "health", "grpc", ] @@ -109,31 +70,14 @@ job "loki" { template { data = <<-EOH - {{ with secret "secret/minio/loki" }} - S3_ACCESS_KEY_ID={{ .Data.data.access_key }} - S3_SECRET_ACCESS_KEY={{ .Data.data.secret_key }} - {{- end }} + S3_ACCESS_KEY_ID= + S3_SECRET_ACCESS_KEY= EOH destination = "secrets/s3.env" env = true } - dynamic "template" { - for_each = local.certs - content { - data = <<-EOH - {{- with secret "pki/issue/internal" "ttl=10d" "common_name=loki-compactor.service.consul" (env "attr.unique.network.ip-address" | printf "ip_sans=%s") -}} - {{ .Data.${template.value} }} - {{- end -}} - EOH - - destination = "secrets/certs/${template.key}.pem" - change_mode = "restart" - splay = "1m" - } - } - resources { cpu = 3000 memory = 256 @@ -147,15 +91,11 @@ job "loki" { ephemeral_disk { size = 1000 - migrate = true sticky = true } network { - mode = "bridge" - port "http" {} - port "health" {} port "grpc" {} } @@ -170,7 +110,6 @@ job "loki" { tags = [ "traefik.enable=true", - "traefik.consulcatalog.connect=true", "traefik.http.routers.loki-ruler.entrypoints=https", "traefik.http.routers.loki-ruler.rule=Host(`loki-query-frontend.service.consul`) && (PathPrefix(`/loki/api/v1/rules`) || PathPrefix(`/api/prom/rules`) || PathPrefix (`/prometheus/api/v1`))", @@ -181,38 +120,13 @@ job "loki" { check { name = "Loki ruler" - port = "health" + port = "http" type = "http" path = "/ready" interval = "20s" timeout = "1s" } - connect { - sidecar_service { - proxy { - local_service_port = 80 - - expose { - path { - path = "/metrics" - protocol = "http" - local_path_port = 80 - listener_port = "http" - } - - path { - path = "/ready" - protocol = "http" - local_path_port = 80 - listener_port = "health" - } - } - } - } - } - } - task "ruler" { driver = "docker" user = "nobody" @@ -222,7 +136,6 @@ job "loki" { image = "grafana/loki:${local.version}" ports = [ "http", - "health", "grpc", ] @@ -251,31 +164,14 @@ job "loki" { template { data = <<-EOH - {{ with secret "secret/minio/loki" }} - S3_ACCESS_KEY_ID={{ .Data.data.access_key }} - S3_SECRET_ACCESS_KEY={{ .Data.data.secret_key }} - {{- end }} + S3_ACCESS_KEY_ID= + S3_SECRET_ACCESS_KEY= EOH destination = "secrets/s3.env" env = true } - dynamic "template" { - for_each = local.certs - content { - data = <<-EOH - {{- with secret "pki/issue/internal" "ttl=10d" "common_name=loki-ruler.service.consul" (env "attr.unique.network.ip-address" | printf "ip_sans=%s") -}} - {{ .Data.${template.value} }} - {{- end -}} - EOH - - destination = "secrets/certs/${template.key}.pem" - change_mode = "restart" - splay = "5m" - } - } - resources { cpu = 1000 memory = 256 @@ -288,10 +184,7 @@ job "loki" { count = 2 network { - mode = "bridge" - port "http" {} - port "health" {} port "grpc" {} } @@ -306,7 +199,6 @@ job "loki" { tags = [ "traefik.enable=true", - "traefik.consulcatalog.connect=true", "traefik.http.routers.loki-distributor.entrypoints=https", "traefik.http.routers.loki-distributor.rule=Host(`loki-distributor.service.consul`)", @@ -317,36 +209,12 @@ job "loki" { check { name = "Loki distibutor" - port = "health" + port = "http" type = "http" path = "/ready" interval = "20s" timeout = "1s" } - - connect { - sidecar_service { - proxy { - local_service_port = 80 - - expose { - path { - path = "/metrics" - protocol = "http" - local_path_port = 80 - listener_port = "http" - } - - path { - path = "/ready" - protocol = "http" - local_path_port = 80 - listener_port = "health" - } - } - } - } - } } task "distibutor" { @@ -358,7 +226,6 @@ job "loki" { image = "grafana/loki:${local.version}" ports = [ "http", - "health", "grpc", ] @@ -374,21 +241,6 @@ job "loki" { destination = "local/config.yml" } - dynamic "template" { - for_each = local.certs - content { - data = <<-EOH - {{- with secret "pki/issue/internal" "ttl=10d" "common_name=loki-distributer.service.consul" (env "attr.unique.network.ip-address" | printf "ip_sans=%s") -}} - {{ .Data.${template.value} }} - {{- end -}} - EOH - - destination = "secrets/certs/${template.key}.pem" - change_mode = "restart" - splay = "5m" - } - } - resources { cpu = 200 memory = 128 @@ -410,15 +262,11 @@ job "loki" { ephemeral_disk { size = 4000 - migrate = true sticky = true } network { - mode = "bridge" - port "http" {} - port "health" {} port "grpc" {} } @@ -433,7 +281,6 @@ job "loki" { tags = [ "traefik.enable=true", - "traefik.consulcatalog.connect=true", "traefik.http.routers.loki-ingester-ring.entrypoints=https", "traefik.http.routers.loki-ingester-ring.rule=Host(`loki-ingester.service.consul`) && Path(`/ring`)", @@ -441,36 +288,12 @@ job "loki" { check { name = "Loki ingester" - port = "health" + port = "http" type = "http" path = "/ready" interval = "20s" timeout = "1s" } - - connect { - sidecar_service { - proxy { - local_service_port = 80 - - expose { - path { - path = "/metrics" - protocol = "http" - local_path_port = 80 - listener_port = "http" - } - - path { - path = "/ready" - protocol = "http" - local_path_port = 80 - listener_port = "health" - } - } - } - } - } } task "ingester" { @@ -482,7 +305,6 @@ job "loki" { image = "grafana/loki:${local.version}" ports = [ "http", - "health", "grpc", ] @@ -500,31 +322,14 @@ job "loki" { template { data = <<-EOH - {{ with secret "secret/minio/loki" }} - S3_ACCESS_KEY_ID={{ .Data.data.access_key }} - S3_SECRET_ACCESS_KEY={{ .Data.data.secret_key }} - {{- end }} + S3_ACCESS_KEY_ID= + S3_SECRET_ACCESS_KEY= EOH destination = "secrets/s3.env" env = true } - dynamic "template" { - for_each = local.certs - content { - data = <<-EOH - {{- with secret "pki/issue/internal" "ttl=10d" "common_name=loki-ingestor.service.consul" (env "attr.unique.network.ip-address" | printf "ip_sans=%s") -}} - {{ .Data.${template.value} }} - {{- end -}} - EOH - - destination = "secrets/certs/${template.key}.pem" - change_mode = "restart" - splay = "5m" - } - } - resources { cpu = 300 memory = 128 @@ -537,10 +342,7 @@ job "loki" { count = 2 network { - mode = "bridge" - port "http" {} - port "health" {} port "grpc" {} } @@ -555,36 +357,12 @@ job "loki" { check { name = "Loki querier" - port = "health" + port = "http" type = "http" path = "/ready" interval = "50s" timeout = "1s" } - - connect { - sidecar_service { - proxy { - local_service_port = 80 - - expose { - path { - path = "/metrics" - protocol = "http" - local_path_port = 80 - listener_port = "http" - } - - path { - path = "/ready" - protocol = "http" - local_path_port = 80 - listener_port = "health" - } - } - } - } - } } task "querier" { @@ -596,7 +374,6 @@ job "loki" { image = "grafana/loki:${local.version}" ports = [ "http", - "health", "grpc", ] @@ -614,31 +391,14 @@ job "loki" { template { data = <<-EOH - {{ with secret "secret/minio/loki" }} - S3_ACCESS_KEY_ID={{ .Data.data.access_key }} - S3_SECRET_ACCESS_KEY={{ .Data.data.secret_key }} - {{- end }} + S3_ACCESS_KEY_ID= + S3_SECRET_ACCESS_KEY= EOH destination = "secrets/s3.env" env = true } - dynamic "template" { - for_each = local.certs - content { - data = <<-EOH - {{- with secret "pki/issue/internal" "ttl=10d" "common_name=loki-querier.service.consul" (env "attr.unique.network.ip-address" | printf "ip_sans=%s") -}} - {{ .Data.${template.value} }} - {{- end -}} - EOH - - destination = "secrets/certs/${template.key}.pem" - change_mode = "restart" - splay = "5m" - } - } - resources { cpu = 200 memory = 128 @@ -651,10 +411,7 @@ job "loki" { count = 2 network { - mode = "bridge" - port "http" {} - port "health" {} port "grpc" { to = 9096 static = 9096 @@ -672,36 +429,12 @@ job "loki" { check { name = "Loki query-scheduler" - port = "health" + port = "http" type = "http" path = "/ready" interval = "20s" timeout = "1s" } - - connect { - sidecar_service { - proxy { - local_service_port = 80 - - expose { - path { - path = "/metrics" - protocol = "http" - local_path_port = 80 - listener_port = "http" - } - - path { - path = "/ready" - protocol = "http" - local_path_port = 80 - listener_port = "health" - } - } - } - } - } } task "query-scheduler" { @@ -713,7 +446,6 @@ job "loki" { image = "grafana/loki:${local.version}" ports = [ "http", - "health", "grpc", ] @@ -729,21 +461,6 @@ job "loki" { destination = "local/config.yml" } - dynamic "template" { - for_each = local.certs - content { - data = <<-EOH - {{- with secret "pki/issue/internal" "ttl=10d" "common_name=loki-query-scheduler.service.consul" (env "attr.unique.network.ip-address" | printf "ip_sans=%s") -}} - {{ .Data.${template.value} }} - {{- end -}} - EOH - - destination = "secrets/certs/${template.key}.pem" - change_mode = "restart" - splay = "5m" - } - } - resources { cpu = 100 memory = 64 @@ -756,10 +473,7 @@ job "loki" { count = 2 network { - mode = "bridge" - port "http" {} - port "health" {} port "grpc" {} } @@ -774,7 +488,6 @@ job "loki" { tags = [ "traefik.enable=true", - "traefik.consulcatalog.connect=true", "traefik.http.routers.loki-query-frontend.entrypoints=https", "traefik.http.routers.loki-query-frontend.rule=Host(`loki-query-frontend.service.consul`)", @@ -782,38 +495,13 @@ job "loki" { check { name = "Loki query-frontend" - port = "health" + port = "http" type = "http" path = "/ready" interval = "20s" timeout = "1s" } - connect { - sidecar_service { - proxy { - local_service_port = 80 - - expose { - path { - path = "/metrics" - protocol = "http" - local_path_port = 80 - listener_port = "http" - } - - path { - path = "/ready" - protocol = "http" - local_path_port = 80 - listener_port = "health" - } - } - } - } - } - } - task "query-frontend" { driver = "docker" user = "nobody" @@ -823,7 +511,6 @@ job "loki" { image = "grafana/loki:${local.version}" ports = [ "http", - "health", "grpc", ] @@ -839,21 +526,6 @@ job "loki" { destination = "local/config.yml" } - dynamic "template" { - for_each = local.certs - content { - data = <<-EOH - {{- with secret "pki/issue/internal" "ttl=10d" "common_name=loki-query-frontend.service.consul" (env "attr.unique.network.ip-address" | printf "ip_sans=%s") -}} - {{ .Data.${template.value} }} - {{- end -}} - EOH - - destination = "secrets/certs/${template.key}.pem" - change_mode = "restart" - splay = "5m" - } - } - resources { cpu = 100 memory = 64 @@ -867,15 +539,11 @@ job "loki" { ephemeral_disk { size = 1000 - migrate = true sticky = true } network { - mode = "bridge" - port "http" {} - port "health" {} port "grpc" { to = 9097 static = 9097 @@ -893,36 +561,12 @@ job "loki" { check { name = "Loki index-gateway" - port = "health" + port = "http" type = "http" path = "/ready" interval = "20s" timeout = "1s" } - - connect { - sidecar_service { - proxy { - local_service_port = 80 - - expose { - path { - path = "/metrics" - protocol = "http" - local_path_port = 80 - listener_port = "http" - } - - path { - path = "/ready" - protocol = "http" - local_path_port = 80 - listener_port = "health" - } - } - } - } - } } task "index-gateway" { @@ -934,7 +578,6 @@ job "loki" { image = "grafana/loki:${local.version}" ports = [ "http", - "health", "grpc", ] @@ -952,31 +595,14 @@ job "loki" { template { data = <<-EOH - {{ with secret "secret/minio/loki" }} - S3_ACCESS_KEY_ID={{ .Data.data.access_key }} - S3_SECRET_ACCESS_KEY={{ .Data.data.secret_key }} - {{- end }} + S3_ACCESS_KEY_ID= + S3_SECRET_ACCESS_KEY= EOH destination = "secrets/s3.env" env = true } - dynamic "template" { - for_each = local.certs - content { - data = <<-EOH - {{- with secret "pki/issue/internal" "ttl=10d" "common_name=loki-index-gateway.service.consul" (env "attr.unique.network.ip-address" | printf "ip_sans=%s") -}} - {{ .Data.${template.value} }} - {{- end -}} - EOH - - destination = "secrets/certs/${template.key}.pem" - change_mode = "restart" - splay = "5m" - } - } - resources { cpu = 200 memory = 128 diff --git a/production/nomad/loki-simple/config.yml b/production/nomad/loki-simple/config.yml index 535da397854d..dc57f2e80b0e 100644 --- a/production/nomad/loki-simple/config.yml +++ b/production/nomad/loki-simple/config.yml @@ -43,7 +43,7 @@ storage_config: shared_store: s3 aws: - endpoint: https://s3.endpoint.com + endpoint: https://minio.service.consul bucketnames: loki region: us-west-1 access_key_id: ${S3_ACCESS_KEY_ID} diff --git a/production/nomad/loki/job.nomad.hcl b/production/nomad/loki/job.nomad.hcl index 6956496e614c..091d5cbd13cd 100644 --- a/production/nomad/loki/job.nomad.hcl +++ b/production/nomad/loki/job.nomad.hcl @@ -67,11 +67,11 @@ job "loki" { port = "http" # use Traefik to loadbalance between Loki instances - # tags = [ - # "traefik.enable=true", - # "traefik.http.routers.loki.entrypoints=https", - # "traefik.http.routers.loki.rule=Host(`loki.service.consul`)", - # ] + tags = [ + "traefik.enable=true", + "traefik.http.routers.loki.entrypoints=https", + "traefik.http.routers.loki.rule=Host(`loki.service.consul`)", + ] check { name = "Loki" From 7d549c5b0a01201c9262f59c4ece56bedcf74393 Mon Sep 17 00:00:00 2001 From: Anatoly Laskaris Date: Sun, 29 May 2022 15:31:02 +0300 Subject: [PATCH 11/16] Readme small fixes --- production/nomad/README.md | 7 ++++--- production/nomad/loki-distributed/README.md | 2 +- production/nomad/loki-simple/README.md | 2 +- production/nomad/loki/README.md | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/production/nomad/README.md b/production/nomad/README.md index 70506f3f17a3..4128c4411998 100644 --- a/production/nomad/README.md +++ b/production/nomad/README.md @@ -125,7 +125,8 @@ template { ### Using [`artifact` stanza](https://www.nomadproject.io/docs/job-specification/artifact) -Alert rules can be download using artifact stanza. It supports: +Alert rules can be downloaded from remote storage using artifact stanza. It +supports: - Git - Mercurial @@ -146,8 +147,8 @@ artifact { Alert rules can be stored locally (beside job definition) and provided to Loki ruler container with -[`template`](https://www.nomadproject.io/docs/job-specification/template) and -some HCL magic, namely: +[`template`](https://www.nomadproject.io/docs/job-specification/template) stanza +and some HCL magic, namely: - [fileset](https://www.nomadproject.io/docs/job-specification/hcl2/functions/file/fileset) - to generate a list of files diff --git a/production/nomad/loki-distributed/README.md b/production/nomad/loki-distributed/README.md index 409f31bf0ff2..4b0102e077f0 100644 --- a/production/nomad/loki-distributed/README.md +++ b/production/nomad/loki-distributed/README.md @@ -1,6 +1,6 @@ # Microservices mode -This Nomad job will deploy a Loki in +This Nomad job will deploy Loki in [microservices mode](https://grafana.com/docs/loki/latest/fundamentals/architecture/deployment-modes/#microservices-mode) using boltdb-shipper and S3 backend. diff --git a/production/nomad/loki-simple/README.md b/production/nomad/loki-simple/README.md index deaf7dcb1e5b..43bb2ab6ba46 100644 --- a/production/nomad/loki-simple/README.md +++ b/production/nomad/loki-simple/README.md @@ -1,6 +1,6 @@ # Simple scalable deployment mode -This Nomad job will deploy a Loki in +This Nomad job will deploy Loki in [simple scalable deployment mode](https://grafana.com/docs/loki/latest/fundamentals/architecture/deployment-modes/#simple-scalable-deployment-mode) with minimum dependencies, using boltdb-shipper and S3 backend and with the ability to scale. diff --git a/production/nomad/loki/README.md b/production/nomad/loki/README.md index 1f24249861bf..6fc02b59ae73 100644 --- a/production/nomad/loki/README.md +++ b/production/nomad/loki/README.md @@ -1,6 +1,6 @@ # Monolithic mode -This Nomad job will deploy a Loki in +This Nomad job will deploy Loki in [monolithic mode](https://grafana.com/docs/loki/latest/fundamentals/architecture/deployment-modes/#monolithic-mode) with minimum dependencies, using boltdb-shipper and S3 backend and with the ability to scale. From dac76199fbe0b069affecccf36fade1a4f479f36 Mon Sep 17 00:00:00 2001 From: Anatoly Laskaris Date: Sun, 29 May 2022 15:32:20 +0300 Subject: [PATCH 12/16] Update production/nomad/README.md --- production/nomad/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/production/nomad/README.md b/production/nomad/README.md index 4128c4411998..4efc631d9bf1 100644 --- a/production/nomad/README.md +++ b/production/nomad/README.md @@ -39,7 +39,7 @@ examples are using Consul ring for service discovery. Is you are using Nomad then you are probably also using Consul, so this shouldn't be an issue. -## Run Loki behind ingress +## Run Loki behind loadbalancer When running multiple instances of Loki incoming requests should be loadbalanced. From 32132ddd0dd639dedd33e1bb0ae6c205b6353388 Mon Sep 17 00:00:00 2001 From: Anatoly Laskaris Date: Sun, 29 May 2022 15:45:17 +0300 Subject: [PATCH 13/16] Add Nomad mention --- production/README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/production/README.md b/production/README.md index c90f83f5f103..030ae6b3ec49 100644 --- a/production/README.md +++ b/production/README.md @@ -1,9 +1,10 @@ # Running Loki -Currently there are five ways to try out Loki, in order from easier to hardest: +Currently there are six ways to try out Loki, in order from easier to hardest: - [Grafana Cloud: Hosted Logs](#grafana-cloud-logs) - [Run Loki locally with Docker](#run-locally-using-docker) +- [Run Loki with Nomad](#run-with-nomad) - [Use Helm to deploy on Kubernetes](#using-helm-to-deploy-on-kubernetes) - [Build Loki from source](#build-and-run-from-source) - [Get inspired by our production setup](#get-inspired-by-our-production-setup) @@ -43,6 +44,12 @@ For instructions on how to query Loki, see [our usage docs](https://grafana.com/ To deploy a cluster of loki locally, please refer to this [doc](./docker/) +## Run with Nomad + +There are example [Nomad jobs](./nomad) that can be used to deploy Loki with +[Nomad](https://www.nomadproject.io/) - simple and powerful workload +orchestrator from HashiCorp. + ## Using Helm to deploy on Kubernetes There is a [Helm chart](helm) to deploy Loki and Promtail to Kubernetes. From ed2dbfe3a92194bc1393f557f26c368355db73b3 Mon Sep 17 00:00:00 2001 From: Anatoly Laskaris Date: Mon, 30 May 2022 12:58:33 +0300 Subject: [PATCH 14/16] Commit suggestions --- production/nomad/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/production/nomad/README.md b/production/nomad/README.md index 4efc631d9bf1..14c3d9db286c 100644 --- a/production/nomad/README.md +++ b/production/nomad/README.md @@ -4,7 +4,7 @@ ### Hard requirements -- recent version of Nomad with healthy Docker driver +- recent version of Nomad [installed](https://www.nomadproject.io/docs/install) with healthy Docker driver - [Consul integration](https://www.nomadproject.io/docs/integrations/consul-integration) is enabled in Nomad - access to S3 storage @@ -138,7 +138,7 @@ Example with git: ```hcl artifact { - source = "git::github.com/someorg/observability//loki-rules" + source = "git::github.com////" destination = "local/rules/" } ``` From ea44c2da69c9d33319ba9fd5bf033140bc90cc8f Mon Sep 17 00:00:00 2001 From: Anatoly Laskaris Date: Mon, 30 May 2022 13:07:28 +0300 Subject: [PATCH 15/16] Add ability to specify Loki version from command line --- production/nomad/loki-distributed/README.md | 38 +++++++++++++++++-- .../nomad/loki-distributed/job.nomad.hcl | 22 ++++++----- production/nomad/loki-simple/README.md | 6 +++ production/nomad/loki-simple/job.nomad.hcl | 10 +++-- production/nomad/loki/README.md | 7 ++++ production/nomad/loki/job.nomad.hcl | 8 +++- 6 files changed, 73 insertions(+), 18 deletions(-) diff --git a/production/nomad/loki-distributed/README.md b/production/nomad/loki-distributed/README.md index 4b0102e077f0..7b94ec81242d 100644 --- a/production/nomad/loki-distributed/README.md +++ b/production/nomad/loki-distributed/README.md @@ -4,7 +4,39 @@ This Nomad job will deploy Loki in [microservices mode](https://grafana.com/docs/loki/latest/fundamentals/architecture/deployment-modes/#microservices-mode) using boltdb-shipper and S3 backend. -Make sure to go over the job file and adjust it to your needs. +## Usage + +Have a look at the job file and Loki configuration file and change it to suite +your environment. + +### Run job + +Inside directory with job run: + +```shell +nomad run job.nomad.hcl +``` + +To run deploy a different version change `variable.version` default value or +specify from command line: + +```shell +nomad job run -var="version=2.5.0" job.nomad.hcl +``` + +### Scale Loki + +Change `count` in job file in `group "loki"` and run: + +```shell +nomad run job.nomad.hcl +``` + +or use Nomad CLI + +```shell +nomad job scale loki distributor +``` ## Recommendations for running in production @@ -138,8 +170,8 @@ integration: Unfortenately Consul Connect cannot be used to secure GRPC communication between Loki components, since some components should be able to connect to all -instances of other components. We can secure components GRPC communication -with Vault [PKI engine](https://www.vaultproject.io/docs/secrets/pki). +instances of other components. We can secure components GRPC communication with +Vault [PKI engine](https://www.vaultproject.io/docs/secrets/pki). Certificate generation can be made less verbose with the following HCL trick: diff --git a/production/nomad/loki-distributed/job.nomad.hcl b/production/nomad/loki-distributed/job.nomad.hcl index 2cd4d40da35b..ef85828172fc 100644 --- a/production/nomad/loki-distributed/job.nomad.hcl +++ b/production/nomad/loki-distributed/job.nomad.hcl @@ -1,5 +1,7 @@ -locals { - version = "2.5.0" +variable "version" { + type = string + description = "Loki version" + default = "2.5.0" } job "loki" { @@ -50,7 +52,7 @@ job "loki" { kill_timeout = "90s" config { - image = "grafana/loki:${local.version}" + image = "grafana/loki:${var.version}" ports = [ "http", "grpc", @@ -133,7 +135,7 @@ job "loki" { kill_timeout = "90s" config { - image = "grafana/loki:${local.version}" + image = "grafana/loki:${var.version}" ports = [ "http", "grpc", @@ -223,7 +225,7 @@ job "loki" { kill_timeout = "90s" config { - image = "grafana/loki:${local.version}" + image = "grafana/loki:${var.version}" ports = [ "http", "grpc", @@ -302,7 +304,7 @@ job "loki" { kill_timeout = "90s" config { - image = "grafana/loki:${local.version}" + image = "grafana/loki:${var.version}" ports = [ "http", "grpc", @@ -371,7 +373,7 @@ job "loki" { kill_timeout = "90s" config { - image = "grafana/loki:${local.version}" + image = "grafana/loki:${var.version}" ports = [ "http", "grpc", @@ -443,7 +445,7 @@ job "loki" { kill_timeout = "90s" config { - image = "grafana/loki:${local.version}" + image = "grafana/loki:${var.version}" ports = [ "http", "grpc", @@ -508,7 +510,7 @@ job "loki" { kill_timeout = "90s" config { - image = "grafana/loki:${local.version}" + image = "grafana/loki:${var.version}" ports = [ "http", "grpc", @@ -575,7 +577,7 @@ job "loki" { kill_timeout = "90s" config { - image = "grafana/loki:${local.version}" + image = "grafana/loki:${var.version}" ports = [ "http", "grpc", diff --git a/production/nomad/loki-simple/README.md b/production/nomad/loki-simple/README.md index 43bb2ab6ba46..65decc4b3da0 100644 --- a/production/nomad/loki-simple/README.md +++ b/production/nomad/loki-simple/README.md @@ -18,6 +18,12 @@ Inside directory with job run: nomad run job.nomad.hcl ``` +To run deploy a different version change `variable.version` default value or specify from command line: +```shell +nomad job run -var="version=2.5.0" job.nomad.hcl + +``` + ### Scale Loki Change `count` in job file in `group "loki"` and run: diff --git a/production/nomad/loki-simple/job.nomad.hcl b/production/nomad/loki-simple/job.nomad.hcl index 6c705aa0343c..6e14b4db0f17 100644 --- a/production/nomad/loki-simple/job.nomad.hcl +++ b/production/nomad/loki-simple/job.nomad.hcl @@ -1,5 +1,7 @@ -locals { - version = "2.5.0" +variable "version" { + type = string + description = "Loki version" + default = "2.5.0" } job "loki" { @@ -23,7 +25,7 @@ job "loki" { user = "nobody" config { - image = "grafana/loki:${local.version}" + image = "grafana/loki:${var.version}" ports = [ "http", @@ -99,7 +101,7 @@ job "loki" { user = "nobody" config { - image = "grafana/loki:${local.version}" + image = "grafana/loki:${var.version}" ports = [ "http", diff --git a/production/nomad/loki/README.md b/production/nomad/loki/README.md index 6fc02b59ae73..8bea08e047b1 100644 --- a/production/nomad/loki/README.md +++ b/production/nomad/loki/README.md @@ -18,6 +18,13 @@ Inside directory with job run: nomad run job.nomad.hcl ``` +To run deploy a different version change `variable.version` default value or +specify from command line: + +```shell +nomad job run -var="version=2.5.0" job.nomad.hcl +``` + ### Scale Loki Change `count` in job file in `group "loki"` and run: diff --git a/production/nomad/loki/job.nomad.hcl b/production/nomad/loki/job.nomad.hcl index 091d5cbd13cd..ba721a634283 100644 --- a/production/nomad/loki/job.nomad.hcl +++ b/production/nomad/loki/job.nomad.hcl @@ -1,3 +1,9 @@ +variable "version" { + type = string + description = "Loki version" + default = "2.5.0" +} + job "loki" { datacenters = ["dc1"] @@ -24,7 +30,7 @@ job "loki" { user = "nobody" config { - image = "grafana/loki:2.5.0" + image = "grafana/loki:${var.version}" ports = [ "http", "grpc", From 65e79b535a310a4b1b25f8c59496cdb295a500e4 Mon Sep 17 00:00:00 2001 From: Anatoly Laskaris Date: Mon, 30 May 2022 13:11:14 +0300 Subject: [PATCH 16/16] Wording fix --- production/nomad/loki-distributed/README.md | 2 +- production/nomad/loki-simple/README.md | 5 +++-- production/nomad/loki/README.md | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/production/nomad/loki-distributed/README.md b/production/nomad/loki-distributed/README.md index 7b94ec81242d..c966bb091ba5 100644 --- a/production/nomad/loki-distributed/README.md +++ b/production/nomad/loki-distributed/README.md @@ -17,7 +17,7 @@ Inside directory with job run: nomad run job.nomad.hcl ``` -To run deploy a different version change `variable.version` default value or +To deploy a different version change `variable.version` default value or specify from command line: ```shell diff --git a/production/nomad/loki-simple/README.md b/production/nomad/loki-simple/README.md index 65decc4b3da0..4270bbde8b6b 100644 --- a/production/nomad/loki-simple/README.md +++ b/production/nomad/loki-simple/README.md @@ -18,10 +18,11 @@ Inside directory with job run: nomad run job.nomad.hcl ``` -To run deploy a different version change `variable.version` default value or specify from command line: +To deploy a different version change `variable.version` default value or specify +from command line: + ```shell nomad job run -var="version=2.5.0" job.nomad.hcl - ``` ### Scale Loki diff --git a/production/nomad/loki/README.md b/production/nomad/loki/README.md index 8bea08e047b1..1692cba4d569 100644 --- a/production/nomad/loki/README.md +++ b/production/nomad/loki/README.md @@ -18,7 +18,7 @@ Inside directory with job run: nomad run job.nomad.hcl ``` -To run deploy a different version change `variable.version` default value or +To deploy a different version change `variable.version` default value or specify from command line: ```shell