From 93e1e38d050198bfeb0d7a4f14328bc51df3aae9 Mon Sep 17 00:00:00 2001 From: Mohamed Mahmoud Date: Mon, 29 Apr 2024 19:26:30 -0400 Subject: [PATCH] NETOBSERV-1625: Add ebpf altering for flows drop Signed-off-by: Mohamed Mahmoud --- .../v1beta1/flowcollector_types.go | 16 +++++ .../v1beta1/zz_generated.conversion.go | 2 + .../v1beta1/zz_generated.deepcopy.go | 5 ++ .../v1beta2/flowcollector_types.go | 16 +++++ .../v1beta2/zz_generated.deepcopy.go | 5 ++ .../flows.netobserv.io_flowcollectors.yaml | 28 +++++++++ ...observ-operator.clusterserviceversion.yaml | 2 + .../flows.netobserv.io_flowcollectors.yaml | 28 +++++++++ controllers/constants/constants.go | 1 + controllers/ebpf/agent-metrics.go | 61 +++++++++++++++++++ controllers/ebpf/agent_controller.go | 4 ++ docs/FlowCollector.md | 18 ++++++ 12 files changed, 186 insertions(+) diff --git a/apis/flowcollector/v1beta1/flowcollector_types.go b/apis/flowcollector/v1beta1/flowcollector_types.go index e111e520c..630d86c49 100644 --- a/apis/flowcollector/v1beta1/flowcollector_types.go +++ b/apis/flowcollector/v1beta1/flowcollector_types.go @@ -157,6 +157,16 @@ const ( FlowRTT AgentFeature = "FlowRTT" ) +// Name of a ebpf agent alert. +// Possible values are:
+// - `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.
+// +kubebuilder:validation:Enum:="NetObservDroppedFlows" +type EBPFAgentAlert string + +const ( + AlertDroppedFlows EBPFAgentAlert = "NetObservAgentFlowsDropped" +) + // `EBPFMetrics` defines the desired eBPF agent configuration regarding metrics type EBPFMetrics struct { // Metrics server endpoint configuration for Prometheus scraper @@ -165,6 +175,12 @@ type EBPFMetrics struct { // Set `enable` to `true` to enable eBPF agent metrics collection. Enable *bool `json:"enable,omitempty"` + + // `disableAlerts` is a list of alerts that should be disabled. + // Possible values are:
+ // `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.
+ // +optional + DisableAlerts []EBPFAgentAlert `json:"disableAlerts"` } // `EBPFFlowFilter` defines the desired eBPF agent configuration regarding flow filtering diff --git a/apis/flowcollector/v1beta1/zz_generated.conversion.go b/apis/flowcollector/v1beta1/zz_generated.conversion.go index 7b9874908..d4dc657bc 100644 --- a/apis/flowcollector/v1beta1/zz_generated.conversion.go +++ b/apis/flowcollector/v1beta1/zz_generated.conversion.go @@ -500,6 +500,7 @@ func autoConvert_v1beta1_EBPFMetrics_To_v1beta2_EBPFMetrics(in *EBPFMetrics, out return err } out.Enable = (*bool)(unsafe.Pointer(in.Enable)) + out.DisableAlerts = *(*[]v1beta2.EBPFAgentAlert)(unsafe.Pointer(&in.DisableAlerts)) return nil } @@ -513,6 +514,7 @@ func autoConvert_v1beta2_EBPFMetrics_To_v1beta1_EBPFMetrics(in *v1beta2.EBPFMetr return err } out.Enable = (*bool)(unsafe.Pointer(in.Enable)) + out.DisableAlerts = *(*[]EBPFAgentAlert)(unsafe.Pointer(&in.DisableAlerts)) return nil } diff --git a/apis/flowcollector/v1beta1/zz_generated.deepcopy.go b/apis/flowcollector/v1beta1/zz_generated.deepcopy.go index b52593981..a221e4715 100644 --- a/apis/flowcollector/v1beta1/zz_generated.deepcopy.go +++ b/apis/flowcollector/v1beta1/zz_generated.deepcopy.go @@ -164,6 +164,11 @@ func (in *EBPFMetrics) DeepCopyInto(out *EBPFMetrics) { *out = new(bool) **out = **in } + if in.DisableAlerts != nil { + in, out := &in.DisableAlerts, &out.DisableAlerts + *out = make([]EBPFAgentAlert, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EBPFMetrics. diff --git a/apis/flowcollector/v1beta2/flowcollector_types.go b/apis/flowcollector/v1beta2/flowcollector_types.go index 9279fd386..82fcf541a 100644 --- a/apis/flowcollector/v1beta2/flowcollector_types.go +++ b/apis/flowcollector/v1beta2/flowcollector_types.go @@ -164,6 +164,16 @@ const ( FlowRTT AgentFeature = "FlowRTT" ) +// Name of a ebpf agent alert. +// Possible values are:
+// - `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.
+// +kubebuilder:validation:Enum:="NetObservDroppedFlows" +type EBPFAgentAlert string + +const ( + AlertDroppedFlows EBPFAgentAlert = "NetObservAgentFlowsDropped" +) + // `EBPFMetrics` defines the desired eBPF agent configuration regarding metrics type EBPFMetrics struct { // Metrics server endpoint configuration for Prometheus scraper @@ -172,6 +182,12 @@ type EBPFMetrics struct { // Set `enable` to `true` to enable eBPF agent metrics collection. Enable *bool `json:"enable,omitempty"` + + // `disableAlerts` is a list of alerts that should be disabled. + // Possible values are:
+ // `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.
+ // +optional + DisableAlerts []EBPFAgentAlert `json:"disableAlerts"` } // `EBPFFlowFilter` defines the desired eBPF agent configuration regarding flow filtering diff --git a/apis/flowcollector/v1beta2/zz_generated.deepcopy.go b/apis/flowcollector/v1beta2/zz_generated.deepcopy.go index ff6c268b8..09b142b35 100644 --- a/apis/flowcollector/v1beta2/zz_generated.deepcopy.go +++ b/apis/flowcollector/v1beta2/zz_generated.deepcopy.go @@ -316,6 +316,11 @@ func (in *EBPFMetrics) DeepCopyInto(out *EBPFMetrics) { *out = new(bool) **out = **in } + if in.DisableAlerts != nil { + in, out := &in.DisableAlerts, &out.DisableAlerts + *out = make([]EBPFAgentAlert, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EBPFMetrics. diff --git a/bundle/manifests/flows.netobserv.io_flowcollectors.yaml b/bundle/manifests/flows.netobserv.io_flowcollectors.yaml index dcfe92885..0f3ddfe56 100644 --- a/bundle/manifests/flows.netobserv.io_flowcollectors.yaml +++ b/bundle/manifests/flows.netobserv.io_flowcollectors.yaml @@ -265,6 +265,20 @@ spec: description: '`metrics` defines the eBPF agent configuration regarding metrics' properties: + disableAlerts: + description: |- + `disableAlerts` is a list of alerts that should be disabled. + Possible values are:
+ `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.
+ items: + description: |- + Name of a ebpf agent alert. + Possible values are:
+ - `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.
+ enum: + - NetObservDroppedFlows + type: string + type: array enable: description: Set `enable` to `true` to enable eBPF agent metrics collection. @@ -3637,6 +3651,20 @@ spec: description: '`metrics` defines the eBPF agent configuration regarding metrics' properties: + disableAlerts: + description: |- + `disableAlerts` is a list of alerts that should be disabled. + Possible values are:
+ `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.
+ items: + description: |- + Name of a ebpf agent alert. + Possible values are:
+ - `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.
+ enum: + - NetObservDroppedFlows + type: string + type: array enable: description: Set `enable` to `true` to enable eBPF agent metrics collection. diff --git a/bundle/manifests/netobserv-operator.clusterserviceversion.yaml b/bundle/manifests/netobserv-operator.clusterserviceversion.yaml index 16fda41a9..f36c4e0bb 100644 --- a/bundle/manifests/netobserv-operator.clusterserviceversion.yaml +++ b/bundle/manifests/netobserv-operator.clusterserviceversion.yaml @@ -703,6 +703,8 @@ spec: path: agent.ebpf.interfaces - displayName: Metrics path: agent.ebpf.metrics + - displayName: Disable alerts + path: agent.ebpf.metrics.disableAlerts - displayName: Enable path: agent.ebpf.metrics.enable - displayName: Server diff --git a/config/crd/bases/flows.netobserv.io_flowcollectors.yaml b/config/crd/bases/flows.netobserv.io_flowcollectors.yaml index 9fbc092f3..0240eab38 100644 --- a/config/crd/bases/flows.netobserv.io_flowcollectors.yaml +++ b/config/crd/bases/flows.netobserv.io_flowcollectors.yaml @@ -237,6 +237,20 @@ spec: metrics: description: '`metrics` defines the eBPF agent configuration regarding metrics' properties: + disableAlerts: + description: |- + `disableAlerts` is a list of alerts that should be disabled. + Possible values are:
+ `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.
+ items: + description: |- + Name of a ebpf agent alert. + Possible values are:
+ - `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.
+ enum: + - NetObservDroppedFlows + type: string + type: array enable: description: Set `enable` to `true` to enable eBPF agent metrics collection. type: boolean @@ -3349,6 +3363,20 @@ spec: metrics: description: '`metrics` defines the eBPF agent configuration regarding metrics' properties: + disableAlerts: + description: |- + `disableAlerts` is a list of alerts that should be disabled. + Possible values are:
+ `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.
+ items: + description: |- + Name of a ebpf agent alert. + Possible values are:
+ - `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.
+ enum: + - NetObservDroppedFlows + type: string + type: array enable: description: Set `enable` to `true` to enable eBPF agent metrics collection. type: boolean diff --git a/controllers/constants/constants.go b/controllers/constants/constants.go index b571f7544..ba0c8a878 100644 --- a/controllers/constants/constants.go +++ b/controllers/constants/constants.go @@ -18,6 +18,7 @@ const ( EBPFAgentName = "netobserv-ebpf-agent" EBPFAgentMetricsSvcName = "ebpf-agent-svc-prom" EBPFAgentMetricsSvcMonitoringName = "ebpf-agent-svc-monitor" + EBPFAgentPromoAlertRule = "ebpf-agent-prom-alert" EBPFPrivilegedNSSuffix = "-privileged" EBPFServiceAccount = EBPFAgentName EBPFSecurityContext = EBPFAgentName diff --git a/controllers/ebpf/agent-metrics.go b/controllers/ebpf/agent-metrics.go index 3abfb5912..f0f2e5fc7 100644 --- a/controllers/ebpf/agent-metrics.go +++ b/controllers/ebpf/agent-metrics.go @@ -24,6 +24,9 @@ func (c *AgentController) reconcileMetricsService(ctx context.Context, target *f if c.AvailableAPIs.HasSvcMonitor() { c.Managed.TryDelete(ctx, c.serviceMonitor) } + if c.AvailableAPIs.HasPromRule() { + c.Managed.TryDelete(ctx, c.prometheusRule) + } return nil } @@ -37,6 +40,13 @@ func (c *AgentController) reconcileMetricsService(ctx context.Context, target *f return err } } + + if c.AvailableAPIs.HasPromRule() { + promRules := c.agentPrometheusRule(target) + if err := reconcilers.GenericReconcile(ctx, c.Managed, &c.Client, c.prometheusRule, promRules, &report, helper.PrometheusRuleChanged); err != nil { + return err + } + } return nil } @@ -103,3 +113,54 @@ func (c *AgentController) promServiceMonitoring(target *flowslatest.FlowCollecto }, } } + +func (c *AgentController) agentPrometheusRule(target *flowslatest.FlowCollectorEBPF) *monitoringv1.PrometheusRule { + rules := []monitoringv1.Rule{} + d := monitoringv1.Duration("10m") + + // EBPF hashmap table is full Not receiving any new flows + if shouldAddAlert(flowslatest.AlertDroppedFlows, target.Metrics.DisableAlerts) { + + rules = append(rules, monitoringv1.Rule{ + Alert: string(flowslatest.AlertDroppedFlows), + Annotations: map[string]string{ + "description": "NetObserv eBPF agent hashmap table is full, it means that the eBPF agent is not able to process new flows. Please consider to increase the hashmap table size.", + "summary": "NetObserv eBPF is not able to process any new flows", + }, + Expr: intstr.FromString("sum(rate(netobserv_agent_dropped_flows_total[1m])) == 0"), + For: &d, + Labels: map[string]string{ + "severity": "warning", + "app": "netobserv", + }, + }) + } + + prometheusRuleObject := monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: constants.EBPFAgentPromoAlertRule, + Labels: map[string]string{ + "app": constants.EBPFAgentName, + }, + Namespace: c.PrivilegedNamespace(), + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "NetobservEBPFAgentAlerts", + Rules: rules, + }, + }, + }, + } + return &prometheusRuleObject +} + +func shouldAddAlert(name flowslatest.EBPFAgentAlert, disabledList []flowslatest.EBPFAgentAlert) bool { + for _, disabledAlert := range disabledList { + if name == disabledAlert { + return false + } + } + return true +} diff --git a/controllers/ebpf/agent_controller.go b/controllers/ebpf/agent_controller.go index fab0b9aca..de4e54725 100644 --- a/controllers/ebpf/agent_controller.go +++ b/controllers/ebpf/agent_controller.go @@ -105,6 +105,7 @@ type AgentController struct { volumes volumes.Builder promSvc *corev1.Service serviceMonitor *monitoringv1.ServiceMonitor + prometheusRule *monitoringv1.PrometheusRule } func NewAgentController(common *reconcilers.Instance) *AgentController { @@ -117,6 +118,9 @@ func NewAgentController(common *reconcilers.Instance) *AgentController { if common.AvailableAPIs.HasSvcMonitor() { agent.serviceMonitor = common.Managed.NewServiceMonitor(constants.EBPFAgentMetricsSvcMonitoringName) } + if common.AvailableAPIs.HasPromRule() { + agent.prometheusRule = common.Managed.NewPrometheusRule(constants.EBPFAgentPromoAlertRule) + } return &agent } diff --git a/docs/FlowCollector.md b/docs/FlowCollector.md index a8ebc4789..40543b101 100644 --- a/docs/FlowCollector.md +++ b/docs/FlowCollector.md @@ -534,6 +534,15 @@ To filter a range of ports, use a "start-end" range, string format. For example + disableAlerts + []enum + + `disableAlerts` is a list of alerts that should be disabled. +Possible values are:
+`NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.

+ + false + enable boolean @@ -7606,6 +7615,15 @@ To filter a range of ports, use a "start-end" range, string format. For example + disableAlerts + []enum + + `disableAlerts` is a list of alerts that should be disabled. +Possible values are:
+`NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.

+ + false + enable boolean