Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

operator: Use cluster monitoring alertmanager by default on openshift clusters #7272

Merged
merged 13 commits into from
Oct 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions operator/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
## Main

- [7272](https://github.com/grafana/loki/pull/7272) **aminesnow**: Use cluster monitoring alertmanager by default on openshift clusters
- [7295](https://github.com/grafana/loki/pull/7295) **xperimental**: Add extended-validation for rules on OpenShift
- [6951](https://github.com/grafana/loki/pull/6951) **Red-GV**: Adding operational Lokistack alerts
- [7254](https://github.com/grafana/loki/pull/7254) **periklis**: Expose Loki Ruler API via the lokistack-gateway
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1097,6 +1097,12 @@ spec:
- get
- patch
- update
- apiGroups:
- monitoring.coreos.com
resources:
- alertmanagers
verbs:
- patch
- apiGroups:
- monitoring.coreos.com
resources:
Expand Down
6 changes: 6 additions & 0 deletions operator/config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,12 @@ rules:
- get
- patch
- update
- apiGroups:
- monitoring.coreos.com
resources:
- alertmanagers
verbs:
- patch
- apiGroups:
- monitoring.coreos.com
resources:
Expand Down
1 change: 1 addition & 0 deletions operator/controllers/loki/lokistack_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ type LokiStackReconciler struct {
// +kubebuilder:rbac:groups=apps,resources=deployments;statefulsets,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterrolebindings;clusterroles;roles;rolebindings,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=monitoring.coreos.com,resources=servicemonitors;prometheusrules,verbs=get;list;watch;create;update;delete
// +kubebuilder:rbac:groups=monitoring.coreos.com,resources=alertmanagers,verbs=patch
// +kubebuilder:rbac:groups=coordination.k8s.io,resources=leases,verbs=get;create;update
// +kubebuilder:rbac:groups=networking.k8s.io,resources=ingresses,verbs=get;list;watch;create;update
// +kubebuilder:rbac:groups=config.openshift.io,resources=dnses;apiservers,verbs=get;list;watch
Expand Down
30 changes: 30 additions & 0 deletions operator/internal/handlers/internal/openshift/alertmanager.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package openshift

import (
"context"

"github.com/ViaQ/logerr/v2/kverrors"
lokiv1 "github.com/grafana/loki/operator/apis/loki/v1"
"github.com/grafana/loki/operator/internal/external/k8s"
"github.com/grafana/loki/operator/internal/manifests/openshift"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"sigs.k8s.io/controller-runtime/pkg/client"
)

// AlertManagerSVCExists returns true if the Openshift AlertManager is present in the cluster.
func AlertManagerSVCExists(ctx context.Context, stack lokiv1.LokiStackSpec, k k8s.Client) (bool, error) {
if stack.Tenants == nil || (stack.Tenants.Mode != lokiv1.OpenshiftLogging && stack.Tenants.Mode != lokiv1.OpenshiftNetwork) {
return false, nil
}

var svc corev1.Service
key := client.ObjectKey{Name: openshift.MonitoringSVCOperated, Namespace: openshift.MonitoringNS}

err := k.Get(ctx, key, &svc)
if err != nil && !apierrors.IsNotFound(err) {
return false, kverrors.Wrap(err, "failed to lookup alertmanager service", "name", key)
}

return err == nil, nil
}
15 changes: 15 additions & 0 deletions operator/internal/handlers/lokistack_create_or_update.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@ import (
lokiv1beta1 "github.com/grafana/loki/operator/apis/loki/v1beta1"
"github.com/grafana/loki/operator/internal/external/k8s"
"github.com/grafana/loki/operator/internal/handlers/internal/gateway"
"github.com/grafana/loki/operator/internal/handlers/internal/openshift"
"github.com/grafana/loki/operator/internal/handlers/internal/rules"
"github.com/grafana/loki/operator/internal/handlers/internal/storage"
"github.com/grafana/loki/operator/internal/handlers/internal/tlsprofile"
"github.com/grafana/loki/operator/internal/manifests"
manifests_openshift "github.com/grafana/loki/operator/internal/manifests/openshift"
storageoptions "github.com/grafana/loki/operator/internal/manifests/storage"
"github.com/grafana/loki/operator/internal/metrics"
"github.com/grafana/loki/operator/internal/status"
Expand Down Expand Up @@ -168,6 +170,7 @@ func CreateOrUpdateLokiStack(
recordingRules []lokiv1beta1.RecordingRule
rulerConfig *lokiv1beta1.RulerConfigSpec
rulerSecret *manifests.RulerSecret
ocpAmEnabled bool
)
if stack.Spec.Rules != nil && stack.Spec.Rules.Enabled {
alertingRules, recordingRules, err = rules.List(ctx, k, req.Namespace, stack.Spec.Rules)
Expand Down Expand Up @@ -203,6 +206,13 @@ func CreateOrUpdateLokiStack(
}
}
}

ocpAmEnabled, err = openshift.AlertManagerSVCExists(ctx, stack.Spec, k)
if err != nil {
ll.Error(err, "failed to check OCP AlertManager")
return err
}

}

// Here we will translate the lokiv1.LokiStack options into manifest options
Expand All @@ -226,6 +236,11 @@ func CreateOrUpdateLokiStack(
Configs: tenantConfigs,
},
TLSProfileType: projectconfigv1.TLSProfileType(fg.TLSProfile),
OpenShiftOptions: manifests_openshift.Options{
BuildOpts: manifests_openshift.BuildOptions{
AlertManagerEnabled: ocpAmEnabled,
},
},
}

ll.Info("begin building manifests")
Expand Down
9 changes: 7 additions & 2 deletions operator/internal/manifests/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ import (
// LokiConfigMap creates the single configmap containing the loki configuration for the whole cluster
func LokiConfigMap(opt Options) (*corev1.ConfigMap, string, error) {
cfg := ConfigOptions(opt)

if opt.Stack.Tenants != nil {
if err := ConfigureOptionsForMode(&cfg, opt); err != nil {
return nil, "", err
}
}

c, rc, err := config.Build(cfg)
if err != nil {
return nil, "", err
Expand Down Expand Up @@ -55,8 +62,6 @@ func ConfigOptions(opt Options) config.Options {
)

if rulerEnabled {
rulerEnabled = true

// Map alertmanager config from CRD to config options
if opt.Ruler.Spec != nil {
evalInterval = string(opt.Ruler.Spec.EvalutionInterval)
Expand Down
204 changes: 203 additions & 1 deletion operator/internal/manifests/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@ import (

"github.com/google/uuid"
lokiv1 "github.com/grafana/loki/operator/apis/loki/v1"
"github.com/grafana/loki/operator/apis/loki/v1beta1"
"github.com/grafana/loki/operator/internal/manifests"
"github.com/grafana/loki/operator/internal/manifests/internal/config"
"github.com/grafana/loki/operator/internal/manifests/openshift"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
Expand All @@ -28,7 +30,6 @@ func TestConfigOptions_UserOptionsTakePrecedence(t *testing.T) {
// the user-defined values. This creates an all-inclusive manifests.Options and then checks
// that every value is present in the result
opts := randomConfigOptions()

res := manifests.ConfigOptions(opts)

expected, err := json.Marshal(opts.Stack)
Expand Down Expand Up @@ -287,3 +288,204 @@ func TestConfigOptions_RetentionConfig(t *testing.T) {
})
}
}

func TestConfigOptions_RulerAlertManager(t *testing.T) {
tt := []struct {
desc string
opts manifests.Options
wantOptions *config.AlertManagerConfig
}{
{
desc: "static mode",
opts: manifests.Options{
Stack: lokiv1.LokiStackSpec{
Tenants: &lokiv1.TenantsSpec{
Mode: lokiv1.Static,
},
},
},
wantOptions: nil,
},
{
desc: "dynamic mode",
opts: manifests.Options{
Stack: lokiv1.LokiStackSpec{
Tenants: &lokiv1.TenantsSpec{
Mode: lokiv1.Dynamic,
},
},
},
wantOptions: nil,
},
{
desc: "openshift-logging mode",
periklis marked this conversation as resolved.
Show resolved Hide resolved
opts: manifests.Options{
Stack: lokiv1.LokiStackSpec{
Tenants: &lokiv1.TenantsSpec{
Mode: lokiv1.OpenshiftLogging,
},
},
OpenShiftOptions: openshift.Options{
BuildOpts: openshift.BuildOptions{
AlertManagerEnabled: true,
},
},
},
wantOptions: &config.AlertManagerConfig{
EnableV2: true,
EnableDiscovery: true,
RefreshInterval: "1m",
Hosts: "https://_web._tcp.alertmanager-operated.openshift-monitoring.svc",
},
},
{
desc: "openshift-network mode",
opts: manifests.Options{
Stack: lokiv1.LokiStackSpec{
Tenants: &lokiv1.TenantsSpec{
Mode: lokiv1.OpenshiftNetwork,
},
},
OpenShiftOptions: openshift.Options{
BuildOpts: openshift.BuildOptions{
AlertManagerEnabled: true,
},
},
},
wantOptions: &config.AlertManagerConfig{
EnableV2: true,
EnableDiscovery: true,
RefreshInterval: "1m",
Hosts: "https://_web._tcp.alertmanager-operated.openshift-monitoring.svc",
},
},
}

for _, tc := range tt {
tc := tc
t.Run(tc.desc, func(t *testing.T) {
t.Parallel()

cfg := manifests.ConfigOptions(tc.opts)
err := manifests.ConfigureOptionsForMode(&cfg, tc.opts)

require.Nil(t, err)
require.Equal(t, tc.wantOptions, cfg.Ruler.AlertManager)
})
}
}

func TestConfigOptions_RulerAlertManager_UserOverride(t *testing.T) {
tt := []struct {
desc string
opts manifests.Options
wantOptions *config.AlertManagerConfig
}{
{
desc: "static mode",
opts: manifests.Options{
Stack: lokiv1.LokiStackSpec{
Tenants: &lokiv1.TenantsSpec{
Mode: lokiv1.Static,
},
},
},
wantOptions: nil,
},
{
desc: "dynamic mode",
opts: manifests.Options{
Stack: lokiv1.LokiStackSpec{
Tenants: &lokiv1.TenantsSpec{
Mode: lokiv1.Dynamic,
},
},
},
wantOptions: nil,
},
{
desc: "openshift-logging mode",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have a fourth mode to test nowadays openshift-network

opts: manifests.Options{
Stack: lokiv1.LokiStackSpec{
Tenants: &lokiv1.TenantsSpec{
Mode: lokiv1.OpenshiftLogging,
},
Rules: &lokiv1.RulesSpec{
Enabled: true,
},
},
Ruler: manifests.Ruler{
Spec: &v1beta1.RulerConfigSpec{
AlertManagerSpec: &v1beta1.AlertManagerSpec{
EnableV2: false,
DiscoverySpec: &v1beta1.AlertManagerDiscoverySpec{
EnableSRV: false,
RefreshInterval: "2m",
},
Endpoints: []string{"http://my-alertmanager"},
},
},
},
OpenShiftOptions: openshift.Options{
BuildOpts: openshift.BuildOptions{
AlertManagerEnabled: true,
},
},
},
wantOptions: &config.AlertManagerConfig{
EnableV2: false,
EnableDiscovery: false,
RefreshInterval: "2m",
Hosts: "http://my-alertmanager",
},
},
{
desc: "openshift-network mode",
opts: manifests.Options{
Stack: lokiv1.LokiStackSpec{
Tenants: &lokiv1.TenantsSpec{
Mode: lokiv1.OpenshiftNetwork,
},
Rules: &lokiv1.RulesSpec{
Enabled: true,
},
},
Ruler: manifests.Ruler{
Spec: &v1beta1.RulerConfigSpec{
AlertManagerSpec: &v1beta1.AlertManagerSpec{
EnableV2: false,
DiscoverySpec: &v1beta1.AlertManagerDiscoverySpec{
EnableSRV: false,
RefreshInterval: "2m",
},
Endpoints: []string{"http://my-alertmanager"},
},
},
},
OpenShiftOptions: openshift.Options{
BuildOpts: openshift.BuildOptions{
AlertManagerEnabled: true,
},
},
},
wantOptions: &config.AlertManagerConfig{
EnableV2: false,
EnableDiscovery: false,
RefreshInterval: "2m",
Hosts: "http://my-alertmanager",
},
},
}

for _, tc := range tt {
tc := tc
t.Run(tc.desc, func(t *testing.T) {
t.Parallel()

cfg := manifests.ConfigOptions(tc.opts)
err := manifests.ConfigureOptionsForMode(&cfg, tc.opts)
require.Nil(t, err)
require.Equal(t, tc.wantOptions, cfg.Ruler.AlertManager)
})
}
}
Loading