From c313ee8ecfa82e427af30e81fa5efbcc86d116bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C4=90=E1=BB=97=20Tr=E1=BB=8Dng=20H=E1=BA=A3i?= <41283691+hainenber@users.noreply.github.com> Date: Fri, 13 Oct 2023 18:58:46 +0700 Subject: [PATCH] feat(lambda/promtail): support dropping labels (#10755) **What this PR does / why we need it**: **Which issue(s) this PR fixes**: Fixes #10669 **Special notes for your reviewer**: **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [x] Documentation added - [x] Tests updated - [x] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) --------- Signed-off-by: hainenber Co-authored-by: Michel Hollands <42814411+MichelHollands@users.noreply.github.com> --- CHANGELOG.md | 1 + tools/lambda-promtail/README.md | 4 +- tools/lambda-promtail/lambda-promtail/cw.go | 2 +- .../lambda-promtail/kinesis.go | 2 +- tools/lambda-promtail/lambda-promtail/main.go | 50 +++++++++++++++---- .../lambda-promtail/main_test.go | 25 ++++++++++ .../lambda-promtail/promtail.go | 8 +-- tools/lambda-promtail/lambda-promtail/s3.go | 17 ++++--- tools/lambda-promtail/main.tf | 1 + tools/lambda-promtail/variables.tf | 6 +++ 10 files changed, 90 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b00c5884feb6d..cf63f63f9f4a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,7 @@ * [10416](https://github.com/grafana/loki/pull/10416) **lpugoy**: Lambda-Promtail: Add support for WAF logs in S3 * [10301](https://github.com/grafana/loki/pull/10301) **wildum**: users can now define `additional_fields` in cloudflare configuration. +* [10755](https://github.com/grafana/loki/pull/10755) **hainenber**: Lambda-Promtail: Add support for dropping labels passed via env var ##### Changes diff --git a/tools/lambda-promtail/README.md b/tools/lambda-promtail/README.md index 2f4ab27fd1215..32b52db38c6b3 100644 --- a/tools/lambda-promtail/README.md +++ b/tools/lambda-promtail/README.md @@ -54,12 +54,12 @@ Then use Terraform to deploy: ```bash ## use cloudwatch log group -terraform apply -var ":" -var "write_address=https://your-loki-url/loki/api/v1/push" -var "password=" -var "username=" -var 'bearer_token=' -var 'log_group_names=["log-group-01", "log-group-02"]' -var 'extra_labels="name1,value1,name2,value2"' -var "tenant_id=" -var 'skip_tls_verify="false"' +terraform apply -var ":" -var "write_address=https://your-loki-url/loki/api/v1/push" -var "password=" -var "username=" -var 'bearer_token=' -var 'log_group_names=["log-group-01", "log-group-02"]' -var 'extra_labels="name1,value1,name2,value2"' -var 'drop_labels="name1,name2"' -var "tenant_id=" -var 'skip_tls_verify="false"' ``` ```bash ## use kinesis data stream -terraform apply -var ":" -var "write_address=https://your-loki-url/loki/api/v1/push" -var "password=" -var "username=" -var 'kinesis_stream_name=["kinesis-stream-01", "kinesis-stream-02"]' -var 'extra_labels="name1,value1,name2,value2"' -var "tenant_id=" -var 'skip_tls_verify="false"' +terraform apply -var ":" -var "write_address=https://your-loki-url/loki/api/v1/push" -var "password=" -var "username=" -var 'kinesis_stream_name=["kinesis-stream-01", "kinesis-stream-02"]' -var 'extra_labels="name1,value1,name2,value2"' -var 'drop_labels="name1,name2"' -var "tenant_id=" -var 'skip_tls_verify="false"' ``` or CloudFormation: diff --git a/tools/lambda-promtail/lambda-promtail/cw.go b/tools/lambda-promtail/lambda-promtail/cw.go index 4238e09051ee6..895cd66c8f450 100644 --- a/tools/lambda-promtail/lambda-promtail/cw.go +++ b/tools/lambda-promtail/lambda-promtail/cw.go @@ -26,7 +26,7 @@ func parseCWEvent(ctx context.Context, b *batch, ev *events.CloudwatchLogsEvent) labels[model.LabelName("__aws_cloudwatch_log_stream")] = model.LabelValue(data.LogStream) } - labels = applyExtraLabels(labels) + labels = applyLabels(labels) for _, event := range data.LogEvents { timestamp := time.UnixMilli(event.Timestamp) diff --git a/tools/lambda-promtail/lambda-promtail/kinesis.go b/tools/lambda-promtail/lambda-promtail/kinesis.go index dd465021c9760..31b619284b06d 100644 --- a/tools/lambda-promtail/lambda-promtail/kinesis.go +++ b/tools/lambda-promtail/lambda-promtail/kinesis.go @@ -26,7 +26,7 @@ func parseKinesisEvent(ctx context.Context, b batchIf, ev *events.KinesisEvent) model.LabelName("__aws_kinesis_event_source_arn"): model.LabelValue(record.EventSourceArn), } - labels = applyExtraLabels(labels) + labels = applyLabels(labels) // Check if the data is gzipped by inspecting the 'data' field if isGzipped(record.Kinesis.Data) { diff --git a/tools/lambda-promtail/lambda-promtail/main.go b/tools/lambda-promtail/lambda-promtail/main.go index 3d230ca1334fc..14cef6732737a 100644 --- a/tools/lambda-promtail/lambda-promtail/main.go +++ b/tools/lambda-promtail/lambda-promtail/main.go @@ -25,18 +25,19 @@ const ( maxErrMsgLen = 1024 - invalidExtraLabelsError = "Invalid value for environment variable EXTRA_LABELS. Expected a comma separated list with an even number of entries. " + invalidExtraLabelsError = "invalid value for environment variable EXTRA_LABELS. Expected a comma separated list with an even number of entries. " ) var ( - writeAddress *url.URL - username, password, extraLabelsRaw, tenantID, bearerToken string - keepStream bool - batchSize int - s3Clients map[string]*s3.Client - extraLabels model.LabelSet - skipTlsVerify bool - printLogLine bool + writeAddress *url.URL + username, password, extraLabelsRaw, dropLabelsRaw, tenantID, bearerToken string + keepStream bool + batchSize int + s3Clients map[string]*s3.Client + extraLabels model.LabelSet + dropLabels []model.LabelName + skipTlsVerify bool + printLogLine bool ) func setupArguments() { @@ -60,6 +61,11 @@ func setupArguments() { panic(err) } + dropLabels, err = getDropLabels() + if err != nil { + panic(err) + } + username = os.Getenv("USERNAME") password = os.Getenv("PASSWORD") // If either username or password is set then both must be. @@ -128,8 +134,30 @@ func parseExtraLabels(extraLabelsRaw string, omitPrefix bool) (model.LabelSet, e return extractedLabels, nil } -func applyExtraLabels(labels model.LabelSet) model.LabelSet { - return labels.Merge(extraLabels) +func getDropLabels() ([]model.LabelName, error) { + var result []model.LabelName + + dropLabelsRaw = os.Getenv("DROP_LABELS") + dropLabelsRawSplit := strings.Split(dropLabelsRaw, ",") + for _, dropLabelRaw := range dropLabelsRawSplit { + dropLabel := model.LabelName(dropLabelRaw) + if !dropLabel.IsValid() { + return []model.LabelName{}, fmt.Errorf("invalid label name %s", dropLabelRaw) + } + result = append(result, dropLabel) + } + + return result, nil +} + +func applyLabels(labels model.LabelSet) model.LabelSet { + finalLabels := labels.Merge(extraLabels) + + for _, dropLabel := range dropLabels { + delete(finalLabels, dropLabel) + } + + return finalLabels } func checkEventType(ev map[string]interface{}) (interface{}, error) { diff --git a/tools/lambda-promtail/lambda-promtail/main_test.go b/tools/lambda-promtail/lambda-promtail/main_test.go index 03cc81d48fb7b..21063b643c109 100644 --- a/tools/lambda-promtail/lambda-promtail/main_test.go +++ b/tools/lambda-promtail/lambda-promtail/main_test.go @@ -1,6 +1,7 @@ package main import ( + "os" "testing" "github.com/prometheus/common/model" @@ -34,3 +35,27 @@ func TestLambdaPromtail_TestParseLabelsNoneProvided(t *testing.T) { require.Len(t, extraLabels, 0) require.Nil(t, err) } + +func TestLambdaPromtail_TestDropLabels(t *testing.T) { + os.Setenv("DROP_LABELS", "A1,A2") + + // Reset the shared global variables + defer func() { + os.Unsetenv("DROP_LABELS") + dropLabels = []model.LabelName{} + }() + + var err error + dropLabels, err = getDropLabels() + require.Nil(t, err) + require.Contains(t, dropLabels, model.LabelName("A1")) + + defaultLabelSet := model.LabelSet{ + model.LabelName("default"): model.LabelValue("default"), + model.LabelName("A1"): model.LabelValue("A1"), + model.LabelName("B2"): model.LabelValue("B2"), + } + modifiedLabels := applyLabels(defaultLabelSet) + require.NotContains(t, modifiedLabels, model.LabelName("A1")) + require.Contains(t, modifiedLabels, model.LabelName("B2")) +} diff --git a/tools/lambda-promtail/lambda-promtail/promtail.go b/tools/lambda-promtail/lambda-promtail/promtail.go index 56020c30ea81f..c1d01c36b174b 100644 --- a/tools/lambda-promtail/lambda-promtail/promtail.go +++ b/tools/lambda-promtail/lambda-promtail/promtail.go @@ -126,9 +126,11 @@ func (b *batch) createPushRequest() (*logproto.PushRequest, int) { } func (b *batch) flushBatch(ctx context.Context) error { - err := b.client.sendToPromtail(ctx, b) - if err != nil { - return err + if b.client != nil { + err := b.client.sendToPromtail(ctx, b) + if err != nil { + return err + } } b.resetBatch() diff --git a/tools/lambda-promtail/lambda-promtail/s3.go b/tools/lambda-promtail/lambda-promtail/s3.go index dc9707634e2d3..5dca5cf7d6090 100644 --- a/tools/lambda-promtail/lambda-promtail/s3.go +++ b/tools/lambda-promtail/lambda-promtail/s3.go @@ -162,7 +162,7 @@ func parseS3Log(ctx context.Context, b *batch, labels map[string]string, obj io. model.LabelName(fmt.Sprintf("__aws_%s_owner", parser.logTypeLabel)): model.LabelValue(labels[parser.ownerLabelKey]), } - ls = applyExtraLabels(ls) + ls = applyLabels(ls) // extract the timestamp of the nested event and sends the rest as raw json if labels["type"] == CLOUDTRAIL_LOG_TYPE { @@ -341,13 +341,14 @@ func stringToRawEvent(body string) (map[string]interface{}, error) { // It also makes use of the fact that the log10 of a number in base 10 is its number of digits - 1. // It returns early if the fractional seconds is 0 because getting the log10 of 0 results in -Inf. // For example, given a string 1234567890123: -// iLog10 = 12 // the parsed int is 13 digits long -// multiplier = 0.001 // to get the seconds part it must be divided by 1000 -// sec = 1234567890123 * 0.001 = 1234567890 // this is the seconds part of the Unix time -// fractionalSec = 123 // the rest of the parsed int -// fractionalSecLog10 = 2 // it is 3 digits long -// multiplier = 1000000 // nano is 10^-9, so the nanoseconds part is 9 digits long -// nsec = 123000000 // this is the nanoseconds part of the Unix time +// +// iLog10 = 12 // the parsed int is 13 digits long +// multiplier = 0.001 // to get the seconds part it must be divided by 1000 +// sec = 1234567890123 * 0.001 = 1234567890 // this is the seconds part of the Unix time +// fractionalSec = 123 // the rest of the parsed int +// fractionalSecLog10 = 2 // it is 3 digits long +// multiplier = 1000000 // nano is 10^-9, so the nanoseconds part is 9 digits long +// nsec = 123000000 // this is the nanoseconds part of the Unix time func getUnixSecNsec(s string) (sec int64, nsec int64, err error) { const ( UNIX_SEC_LOG10 = 9 diff --git a/tools/lambda-promtail/main.tf b/tools/lambda-promtail/main.tf index ebc9925cc9d40..1b91fdc797c11 100644 --- a/tools/lambda-promtail/main.tf +++ b/tools/lambda-promtail/main.tf @@ -174,6 +174,7 @@ resource "aws_lambda_function" "this" { KEEP_STREAM = var.keep_stream BATCH_SIZE = var.batch_size EXTRA_LABELS = var.extra_labels + DROP_LABELS = var.drop_labels OMIT_EXTRA_LABELS_PREFIX = var.omit_extra_labels_prefix ? "true" : "false" TENANT_ID = var.tenant_id SKIP_TLS_VERIFY = var.skip_tls_verify diff --git a/tools/lambda-promtail/variables.tf b/tools/lambda-promtail/variables.tf index 040f1961a8210..bda956bc855b3 100644 --- a/tools/lambda-promtail/variables.tf +++ b/tools/lambda-promtail/variables.tf @@ -72,6 +72,12 @@ variable "extra_labels" { default = "" } +variable "drop_labels" { + type = string + description = "Comma separated list of labels to be drop, in the format 'name1,name2,...,nameN' to be omitted to entries forwarded by lambda-promtail." + default = "" +} + variable "omit_extra_labels_prefix" { type = bool description = "Whether or not to omit the prefix `__extra_` from extra labels defined in the variable `extra_labels`."