From 815e0f5e59e9165e42bfe7179f0bbc8ca939a164 Mon Sep 17 00:00:00 2001 From: "Grot (@grafanabot)" <43478413+grafanabot@users.noreply.github.com> Date: Mon, 8 Jan 2024 13:33:36 +0100 Subject: [PATCH] [release-2.8.x] Ruler: protect overrides map with mutex when accessing tenant configs (#11602) Backport cd3cf6291ffb169bf4798f932ff1927f0b45e244 from #11601 --- CHANGELOG.md | 1 + pkg/ruler/registry.go | 7 ++++++- pkg/ruler/registry_test.go | 27 +++++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 760dfaed69f9..172b9a924ac6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ##### Fixes * [10375](https://github.com/grafana/loki/pull/10375) **trevorwhitney**: Fix ingester query when getting label values by passing matchers +* [11601](https://github.com/grafana/loki/pull/11601) **dannykopping** Ruler: Fixed a panic that can be caused by concurrent read-write access of tenant configs when there are a large amount of rules. ### All Changes diff --git a/pkg/ruler/registry.go b/pkg/ruler/registry.go index eab7005b76ff..7ef0d880db0b 100644 --- a/pkg/ruler/registry.go +++ b/pkg/ruler/registry.go @@ -5,6 +5,7 @@ import ( "fmt" "net/url" "strings" + "sync" "time" "github.com/go-kit/log" @@ -33,7 +34,8 @@ type walRegistry struct { logger log.Logger manager instance.Manager - metrics *storageRegistryMetrics + metrics *storageRegistryMetrics + overridesMu sync.Mutex config Config overrides RulesLimits @@ -223,6 +225,9 @@ func (r *walRegistry) getTenantConfig(tenant string) (instance.Config, error) { } func (r *walRegistry) getTenantRemoteWriteConfig(tenant string, base RemoteWriteConfig) (*RemoteWriteConfig, error) { + r.overridesMu.Lock() + defer r.overridesMu.Unlock() + overrides, err := base.Clone() if err != nil { return nil, fmt.Errorf("error generating tenant remote-write config: %w", err) diff --git a/pkg/ruler/registry_test.go b/pkg/ruler/registry_test.go index 614e10711f24..320cb54aea2e 100644 --- a/pkg/ruler/registry_test.go +++ b/pkg/ruler/registry_test.go @@ -5,6 +5,7 @@ import ( "fmt" "net/url" "strings" + "sync" "testing" "time" @@ -376,6 +377,32 @@ func TestTenantRemoteWriteConfigWithOverride(t *testing.T) { assert.ElementsMatch(t, actual, expected, "QueueConfig capacity do not match") } +func TestTenantRemoteWriteConfigWithOverrideConcurrentAccess(t *testing.T) { + require.NotPanics(t, func() { + reg := setupRegistry(t, cfg, newFakeLimits()) + var wg sync.WaitGroup + for i := 0; i < 1000; i++ { + wg.Add(1) + go func(reg *walRegistry) { + defer wg.Done() + + _, err := reg.getTenantConfig(enabledRWTenant) + require.NoError(t, err) + }(reg) + + wg.Add(1) + go func(reg *walRegistry) { + defer wg.Done() + + _, err := reg.getTenantConfig(additionalHeadersRWTenant) + require.NoError(t, err) + }(reg) + } + + wg.Wait() + }) +} + func TestTenantRemoteWriteConfigWithoutOverride(t *testing.T) { reg := setupRegistry(t, backCompatCfg, newFakeLimitsBackwardCompat())