Skip to content

Commit

Permalink
operator: Use cluster monitoring alertmanager by default on openshift…
Browse files Browse the repository at this point in the history
… clusters (grafana#7272)
  • Loading branch information
Mohamed-Amine Bouqsimi authored and lxwzy committed Nov 7, 2022
1 parent 9133327 commit cef696b
Show file tree
Hide file tree
Showing 21 changed files with 550 additions and 37 deletions.
1 change: 1 addition & 0 deletions operator/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
## Main

- [7272](https://github.com/grafana/loki/pull/7272) **aminesnow**: Use cluster monitoring alertmanager by default on openshift clusters
- [7295](https://github.com/grafana/loki/pull/7295) **xperimental**: Add extended-validation for rules on OpenShift
- [6951](https://github.com/grafana/loki/pull/6951) **Red-GV**: Adding operational Lokistack alerts
- [7254](https://github.com/grafana/loki/pull/7254) **periklis**: Expose Loki Ruler API via the lokistack-gateway
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1097,6 +1097,12 @@ spec:
- get
- patch
- update
- apiGroups:
- monitoring.coreos.com
resources:
- alertmanagers
verbs:
- patch
- apiGroups:
- monitoring.coreos.com
resources:
Expand Down
6 changes: 6 additions & 0 deletions operator/config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,12 @@ rules:
- get
- patch
- update
- apiGroups:
- monitoring.coreos.com
resources:
- alertmanagers
verbs:
- patch
- apiGroups:
- monitoring.coreos.com
resources:
Expand Down
1 change: 1 addition & 0 deletions operator/controllers/loki/lokistack_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ type LokiStackReconciler struct {
// +kubebuilder:rbac:groups=apps,resources=deployments;statefulsets,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterrolebindings;clusterroles;roles;rolebindings,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=monitoring.coreos.com,resources=servicemonitors;prometheusrules,verbs=get;list;watch;create;update;delete
// +kubebuilder:rbac:groups=monitoring.coreos.com,resources=alertmanagers,verbs=patch
// +kubebuilder:rbac:groups=coordination.k8s.io,resources=leases,verbs=get;create;update
// +kubebuilder:rbac:groups=networking.k8s.io,resources=ingresses,verbs=get;list;watch;create;update
// +kubebuilder:rbac:groups=config.openshift.io,resources=dnses;apiservers,verbs=get;list;watch
Expand Down
30 changes: 30 additions & 0 deletions operator/internal/handlers/internal/openshift/alertmanager.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package openshift

import (
"context"

"github.com/ViaQ/logerr/v2/kverrors"
lokiv1 "github.com/grafana/loki/operator/apis/loki/v1"
"github.com/grafana/loki/operator/internal/external/k8s"
"github.com/grafana/loki/operator/internal/manifests/openshift"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"sigs.k8s.io/controller-runtime/pkg/client"
)

// AlertManagerSVCExists returns true if the Openshift AlertManager is present in the cluster.
func AlertManagerSVCExists(ctx context.Context, stack lokiv1.LokiStackSpec, k k8s.Client) (bool, error) {
if stack.Tenants == nil || (stack.Tenants.Mode != lokiv1.OpenshiftLogging && stack.Tenants.Mode != lokiv1.OpenshiftNetwork) {
return false, nil
}

var svc corev1.Service
key := client.ObjectKey{Name: openshift.MonitoringSVCOperated, Namespace: openshift.MonitoringNS}

err := k.Get(ctx, key, &svc)
if err != nil && !apierrors.IsNotFound(err) {
return false, kverrors.Wrap(err, "failed to lookup alertmanager service", "name", key)
}

return err == nil, nil
}
15 changes: 15 additions & 0 deletions operator/internal/handlers/lokistack_create_or_update.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@ import (
lokiv1beta1 "github.com/grafana/loki/operator/apis/loki/v1beta1"
"github.com/grafana/loki/operator/internal/external/k8s"
"github.com/grafana/loki/operator/internal/handlers/internal/gateway"
"github.com/grafana/loki/operator/internal/handlers/internal/openshift"
"github.com/grafana/loki/operator/internal/handlers/internal/rules"
"github.com/grafana/loki/operator/internal/handlers/internal/storage"
"github.com/grafana/loki/operator/internal/handlers/internal/tlsprofile"
"github.com/grafana/loki/operator/internal/manifests"
manifests_openshift "github.com/grafana/loki/operator/internal/manifests/openshift"
storageoptions "github.com/grafana/loki/operator/internal/manifests/storage"
"github.com/grafana/loki/operator/internal/metrics"
"github.com/grafana/loki/operator/internal/status"
Expand Down Expand Up @@ -168,6 +170,7 @@ func CreateOrUpdateLokiStack(
recordingRules []lokiv1beta1.RecordingRule
rulerConfig *lokiv1beta1.RulerConfigSpec
rulerSecret *manifests.RulerSecret
ocpAmEnabled bool
)
if stack.Spec.Rules != nil && stack.Spec.Rules.Enabled {
alertingRules, recordingRules, err = rules.List(ctx, k, req.Namespace, stack.Spec.Rules)
Expand Down Expand Up @@ -203,6 +206,13 @@ func CreateOrUpdateLokiStack(
}
}
}

ocpAmEnabled, err = openshift.AlertManagerSVCExists(ctx, stack.Spec, k)
if err != nil {
ll.Error(err, "failed to check OCP AlertManager")
return err
}

}

// Here we will translate the lokiv1.LokiStack options into manifest options
Expand All @@ -226,6 +236,11 @@ func CreateOrUpdateLokiStack(
Configs: tenantConfigs,
},
TLSProfileType: projectconfigv1.TLSProfileType(fg.TLSProfile),
OpenShiftOptions: manifests_openshift.Options{
BuildOpts: manifests_openshift.BuildOptions{
AlertManagerEnabled: ocpAmEnabled,
},
},
}

ll.Info("begin building manifests")
Expand Down
9 changes: 7 additions & 2 deletions operator/internal/manifests/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ import (
// LokiConfigMap creates the single configmap containing the loki configuration for the whole cluster
func LokiConfigMap(opt Options) (*corev1.ConfigMap, string, error) {
cfg := ConfigOptions(opt)

if opt.Stack.Tenants != nil {
if err := ConfigureOptionsForMode(&cfg, opt); err != nil {
return nil, "", err
}
}

c, rc, err := config.Build(cfg)
if err != nil {
return nil, "", err
Expand Down Expand Up @@ -55,8 +62,6 @@ func ConfigOptions(opt Options) config.Options {
)

if rulerEnabled {
rulerEnabled = true

// Map alertmanager config from CRD to config options
if opt.Ruler.Spec != nil {
evalInterval = string(opt.Ruler.Spec.EvalutionInterval)
Expand Down
204 changes: 203 additions & 1 deletion operator/internal/manifests/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@ import (

"github.com/google/uuid"
lokiv1 "github.com/grafana/loki/operator/apis/loki/v1"
"github.com/grafana/loki/operator/apis/loki/v1beta1"
"github.com/grafana/loki/operator/internal/manifests"
"github.com/grafana/loki/operator/internal/manifests/internal/config"
"github.com/grafana/loki/operator/internal/manifests/openshift"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
Expand All @@ -28,7 +30,6 @@ func TestConfigOptions_UserOptionsTakePrecedence(t *testing.T) {
// the user-defined values. This creates an all-inclusive manifests.Options and then checks
// that every value is present in the result
opts := randomConfigOptions()

res := manifests.ConfigOptions(opts)

expected, err := json.Marshal(opts.Stack)
Expand Down Expand Up @@ -287,3 +288,204 @@ func TestConfigOptions_RetentionConfig(t *testing.T) {
})
}
}

func TestConfigOptions_RulerAlertManager(t *testing.T) {
tt := []struct {
desc string
opts manifests.Options
wantOptions *config.AlertManagerConfig
}{
{
desc: "static mode",
opts: manifests.Options{
Stack: lokiv1.LokiStackSpec{
Tenants: &lokiv1.TenantsSpec{
Mode: lokiv1.Static,
},
},
},
wantOptions: nil,
},
{
desc: "dynamic mode",
opts: manifests.Options{
Stack: lokiv1.LokiStackSpec{
Tenants: &lokiv1.TenantsSpec{
Mode: lokiv1.Dynamic,
},
},
},
wantOptions: nil,
},
{
desc: "openshift-logging mode",
opts: manifests.Options{
Stack: lokiv1.LokiStackSpec{
Tenants: &lokiv1.TenantsSpec{
Mode: lokiv1.OpenshiftLogging,
},
},
OpenShiftOptions: openshift.Options{
BuildOpts: openshift.BuildOptions{
AlertManagerEnabled: true,
},
},
},
wantOptions: &config.AlertManagerConfig{
EnableV2: true,
EnableDiscovery: true,
RefreshInterval: "1m",
Hosts: "https://_web._tcp.alertmanager-operated.openshift-monitoring.svc",
},
},
{
desc: "openshift-network mode",
opts: manifests.Options{
Stack: lokiv1.LokiStackSpec{
Tenants: &lokiv1.TenantsSpec{
Mode: lokiv1.OpenshiftNetwork,
},
},
OpenShiftOptions: openshift.Options{
BuildOpts: openshift.BuildOptions{
AlertManagerEnabled: true,
},
},
},
wantOptions: &config.AlertManagerConfig{
EnableV2: true,
EnableDiscovery: true,
RefreshInterval: "1m",
Hosts: "https://_web._tcp.alertmanager-operated.openshift-monitoring.svc",
},
},
}

for _, tc := range tt {
tc := tc
t.Run(tc.desc, func(t *testing.T) {
t.Parallel()

cfg := manifests.ConfigOptions(tc.opts)
err := manifests.ConfigureOptionsForMode(&cfg, tc.opts)

require.Nil(t, err)
require.Equal(t, tc.wantOptions, cfg.Ruler.AlertManager)
})
}
}

func TestConfigOptions_RulerAlertManager_UserOverride(t *testing.T) {
tt := []struct {
desc string
opts manifests.Options
wantOptions *config.AlertManagerConfig
}{
{
desc: "static mode",
opts: manifests.Options{
Stack: lokiv1.LokiStackSpec{
Tenants: &lokiv1.TenantsSpec{
Mode: lokiv1.Static,
},
},
},
wantOptions: nil,
},
{
desc: "dynamic mode",
opts: manifests.Options{
Stack: lokiv1.LokiStackSpec{
Tenants: &lokiv1.TenantsSpec{
Mode: lokiv1.Dynamic,
},
},
},
wantOptions: nil,
},
{
desc: "openshift-logging mode",
opts: manifests.Options{
Stack: lokiv1.LokiStackSpec{
Tenants: &lokiv1.TenantsSpec{
Mode: lokiv1.OpenshiftLogging,
},
Rules: &lokiv1.RulesSpec{
Enabled: true,
},
},
Ruler: manifests.Ruler{
Spec: &v1beta1.RulerConfigSpec{
AlertManagerSpec: &v1beta1.AlertManagerSpec{
EnableV2: false,
DiscoverySpec: &v1beta1.AlertManagerDiscoverySpec{
EnableSRV: false,
RefreshInterval: "2m",
},
Endpoints: []string{"http://my-alertmanager"},
},
},
},
OpenShiftOptions: openshift.Options{
BuildOpts: openshift.BuildOptions{
AlertManagerEnabled: true,
},
},
},
wantOptions: &config.AlertManagerConfig{
EnableV2: false,
EnableDiscovery: false,
RefreshInterval: "2m",
Hosts: "http://my-alertmanager",
},
},
{
desc: "openshift-network mode",
opts: manifests.Options{
Stack: lokiv1.LokiStackSpec{
Tenants: &lokiv1.TenantsSpec{
Mode: lokiv1.OpenshiftNetwork,
},
Rules: &lokiv1.RulesSpec{
Enabled: true,
},
},
Ruler: manifests.Ruler{
Spec: &v1beta1.RulerConfigSpec{
AlertManagerSpec: &v1beta1.AlertManagerSpec{
EnableV2: false,
DiscoverySpec: &v1beta1.AlertManagerDiscoverySpec{
EnableSRV: false,
RefreshInterval: "2m",
},
Endpoints: []string{"http://my-alertmanager"},
},
},
},
OpenShiftOptions: openshift.Options{
BuildOpts: openshift.BuildOptions{
AlertManagerEnabled: true,
},
},
},
wantOptions: &config.AlertManagerConfig{
EnableV2: false,
EnableDiscovery: false,
RefreshInterval: "2m",
Hosts: "http://my-alertmanager",
},
},
}

for _, tc := range tt {
tc := tc
t.Run(tc.desc, func(t *testing.T) {
t.Parallel()

cfg := manifests.ConfigOptions(tc.opts)
err := manifests.ConfigureOptionsForMode(&cfg, tc.opts)
require.Nil(t, err)
require.Equal(t, tc.wantOptions, cfg.Ruler.AlertManager)
})
}
}
Loading

0 comments on commit cef696b

Please sign in to comment.