Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

operator: Fix application tenant alertmanager configuration #9963

Merged
merged 5 commits into from
Jul 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions operator/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
## Main

- [9963](https://github.com/grafana/loki/pull/9963) **xperimental**: Fix application tenant alertmanager configuration
- [9795](https://github.com/grafana/loki/pull/9795) **JoaoBraveCoding**: Add initContainer to zone aware components to gatekeep them from starting without the AZ annotation
- [9503](https://github.com/grafana/loki/pull/9503) **shwetaap**: Add Pod annotations with node topology labels to support zone aware scheduling
- [9930](https://github.com/grafana/loki/pull/9930) **periklis**: Use PodAntiAffinity for all components
Expand Down
2 changes: 1 addition & 1 deletion operator/controllers/loki/lokistack_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ func (r *LokiStackReconciler) enqueueForAlertManagerServices() handler.EventHand
var requests []reconcile.Request

if obj.GetName() == openshift.MonitoringSVCOperated &&
(obj.GetNamespace() == openshift.MonitoringUserwWrkloadNS ||
(obj.GetNamespace() == openshift.MonitoringUserWorkloadNS ||
obj.GetNamespace() == openshift.MonitoringNS) {

for _, stack := range lokiStacks.Items {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ func UserWorkloadAlertManagerSVCExists(ctx context.Context, stack lokiv1.LokiSta
}

var svc corev1.Service
key := client.ObjectKey{Name: openshift.MonitoringSVCOperated, Namespace: openshift.MonitoringUserwWrkloadNS}
key := client.ObjectKey{Name: openshift.MonitoringSVCOperated, Namespace: openshift.MonitoringUserWorkloadNS}

err := k.Get(ctx, key, &svc)
if err != nil && !apierrors.IsNotFound(err) {
Expand Down
74 changes: 74 additions & 0 deletions operator/internal/manifests/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1047,6 +1047,80 @@ func TestConfigOptions_RulerOverrides_OCPUserWorkloadOnlyEnabled(t *testing.T) {
},
},
},
{
desc: "openshift-logging mode with application override",
opts: Options{
Stack: lokiv1.LokiStackSpec{
Rules: &lokiv1.RulesSpec{
Enabled: true,
},
Limits: &lokiv1.LimitsSpec{
Tenants: map[string]lokiv1.LimitsTemplateSpec{
"application": {
QueryLimits: &lokiv1.QueryLimitSpec{
QueryTimeout: "5m",
},
},
},
},
Tenants: &lokiv1.TenantsSpec{
Mode: lokiv1.OpenshiftLogging,
},
},
Timeouts: testTimeoutConfig(),
Ruler: Ruler{
Spec: &lokiv1.RulerConfigSpec{
AlertManagerSpec: &lokiv1.AlertManagerSpec{
EnableV2: false,
DiscoverySpec: &lokiv1.AlertManagerDiscoverySpec{
EnableSRV: false,
RefreshInterval: "2m",
},
Endpoints: []string{"http://my-alertmanager"},
},
},
},
OpenShiftOptions: openshift.Options{
BuildOpts: openshift.BuildOptions{
AlertManagerEnabled: false,
UserWorkloadAlertManagerEnabled: true,
},
},
},
wantOptions: &config.AlertManagerConfig{
EnableV2: false,
EnableDiscovery: false,
RefreshInterval: "2m",
Hosts: "http://my-alertmanager",
},
wantOverridesOptions: map[string]config.LokiOverrides{
"application": {
Limits: lokiv1.LimitsTemplateSpec{
QueryLimits: &lokiv1.QueryLimitSpec{
QueryTimeout: "5m",
},
},
Ruler: config.RulerOverrides{
AlertManager: &config.AlertManagerConfig{
Hosts: "https://_web._tcp.alertmanager-operated.openshift-user-workload-monitoring.svc",
EnableV2: true,
EnableDiscovery: true,
RefreshInterval: "1m",
Notifier: &config.NotifierConfig{
TLS: config.TLSConfig{
ServerName: pointer.String("alertmanager-user-workload.openshift-user-workload-monitoring.svc.cluster.local"),
CAPath: pointer.String("/var/run/ca/alertmanager/service-ca.crt"),
},
HeaderAuth: config.HeaderAuth{
Type: pointer.String("Bearer"),
CredentialsFile: pointer.String("/var/run/secrets/kubernetes.io/serviceaccount/token"),
},
},
},
},
},
},
},
{
desc: "openshift-network mode",
opts: Options{
Expand Down
2 changes: 1 addition & 1 deletion operator/internal/manifests/gateway_tenants.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ func ConfigureOptionsForMode(cfg *config.Options, opt Options) error {
case lokiv1.OpenshiftNetwork:
return openshift.ConfigureOptions(cfg, opt.OpenShiftOptions.BuildOpts.AlertManagerEnabled, false, "", "", "")
case lokiv1.OpenshiftLogging:
monitorServerName := fqdn(openshift.MonitoringSVCUserWorkload, openshift.MonitoringUserwWrkloadNS)
monitorServerName := fqdn(openshift.MonitoringSVCUserWorkload, openshift.MonitoringUserWorkloadNS)
return openshift.ConfigureOptions(
cfg,
opt.OpenShiftOptions.BuildOpts.AlertManagerEnabled,
Expand Down
23 changes: 6 additions & 17 deletions operator/internal/manifests/openshift/configure.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,24 +259,18 @@ func configureDefaultMonitoringAM(configOpt *config.Options) error {
}

func configureUserWorkloadAM(configOpt *config.Options, token, caPath, monitorServerName string) error {
if len(configOpt.Overrides) == 0 {
if configOpt.Overrides == nil {
configOpt.Overrides = map[string]config.LokiOverrides{}
}

lokiOverrides, ok := configOpt.Overrides[tenantApplication]
if ok {
return nil
}
lokiOverrides := configOpt.Overrides[tenantApplication]

lokiOverrides = config.LokiOverrides{
Ruler: config.RulerOverrides{
AlertManager: &config.AlertManagerConfig{},
},
if lokiOverrides.Ruler.AlertManager != nil {
return nil
}

configOpt.Overrides[tenantApplication] = lokiOverrides
amOverride := &config.AlertManagerConfig{
Hosts: fmt.Sprintf("https://_web._tcp.%s.%s.svc", MonitoringSVCOperated, MonitoringUserwWrkloadNS),
lokiOverrides.Ruler.AlertManager = &config.AlertManagerConfig{
Hosts: fmt.Sprintf("https://_web._tcp.%s.%s.svc", MonitoringSVCOperated, MonitoringUserWorkloadNS),
EnableV2: true,
EnableDiscovery: true,
RefreshInterval: "1m",
Expand All @@ -292,11 +286,6 @@ func configureUserWorkloadAM(configOpt *config.Options, token, caPath, monitorSe
},
}

if err := mergo.Merge(lokiOverrides.Ruler.AlertManager, amOverride); err != nil {
return kverrors.Wrap(err, "failed merging application tenant AlertManager config")
}

configOpt.Overrides[tenantApplication] = lokiOverrides

return nil
}
2 changes: 1 addition & 1 deletion operator/internal/manifests/openshift/var.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ var (
MonitoringSVCOperated = "alertmanager-operated"

MonitoringSVCUserWorkload = "alertmanager-user-workload"
MonitoringUserwWrkloadNS = "openshift-user-workload-monitoring"
MonitoringUserWorkloadNS = "openshift-user-workload-monitoring"
)

func authorizerRbacName(componentName string) string {
Expand Down