Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NETOBSERV-1625: Add ebpf altering for flows drop #632

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions apis/flowcollector/v1beta1/flowcollector_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,16 @@ const (
FlowRTT AgentFeature = "FlowRTT"
)

// Name of a ebpf agent alert.
// Possible values are:<br>
// - `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br>
// +kubebuilder:validation:Enum:="NetObservDroppedFlows"
type EBPFAgentAlert string

const (
AlertDroppedFlows EBPFAgentAlert = "NetObservAgentFlowsDropped"
)

// `EBPFMetrics` defines the desired eBPF agent configuration regarding metrics
type EBPFMetrics struct {
// Metrics server endpoint configuration for Prometheus scraper
Expand All @@ -165,6 +175,12 @@ type EBPFMetrics struct {

// Set `enable` to `true` to enable eBPF agent metrics collection.
Enable *bool `json:"enable,omitempty"`

// `disableAlerts` is a list of alerts that should be disabled.
// Possible values are:<br>
// `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br>
// +optional
DisableAlerts []EBPFAgentAlert `json:"disableAlerts"`
}

// `EBPFFlowFilter` defines the desired eBPF agent configuration regarding flow filtering
Expand Down
2 changes: 2 additions & 0 deletions apis/flowcollector/v1beta1/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions apis/flowcollector/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions apis/flowcollector/v1beta2/flowcollector_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,16 @@ const (
FlowRTT AgentFeature = "FlowRTT"
)

// Name of a ebpf agent alert.
// Possible values are:<br>
// - `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br>
// +kubebuilder:validation:Enum:="NetObservDroppedFlows"
type EBPFAgentAlert string

const (
AlertDroppedFlows EBPFAgentAlert = "NetObservAgentFlowsDropped"
)

// `EBPFMetrics` defines the desired eBPF agent configuration regarding metrics
type EBPFMetrics struct {
// Metrics server endpoint configuration for Prometheus scraper
Expand All @@ -172,6 +182,12 @@ type EBPFMetrics struct {

// Set `enable` to `true` to enable eBPF agent metrics collection.
Enable *bool `json:"enable,omitempty"`

// `disableAlerts` is a list of alerts that should be disabled.
// Possible values are:<br>
// `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br>
// +optional
DisableAlerts []EBPFAgentAlert `json:"disableAlerts"`
}

// `EBPFFlowFilter` defines the desired eBPF agent configuration regarding flow filtering
Expand Down
5 changes: 5 additions & 0 deletions apis/flowcollector/v1beta2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions bundle/manifests/flows.netobserv.io_flowcollectors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,20 @@ spec:
description: '`metrics` defines the eBPF agent configuration
regarding metrics'
properties:
disableAlerts:
description: |-
`disableAlerts` is a list of alerts that should be disabled.
Possible values are:<br>
`NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br>
items:
description: |-
Name of a ebpf agent alert.
Possible values are:<br>
- `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br>
enum:
- NetObservDroppedFlows
type: string
type: array
enable:
description: Set `enable` to `true` to enable eBPF agent
metrics collection.
Expand Down Expand Up @@ -3637,6 +3651,20 @@ spec:
description: '`metrics` defines the eBPF agent configuration
regarding metrics'
properties:
disableAlerts:
description: |-
`disableAlerts` is a list of alerts that should be disabled.
Possible values are:<br>
`NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br>
items:
description: |-
Name of a ebpf agent alert.
Possible values are:<br>
- `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br>
enum:
- NetObservDroppedFlows
type: string
type: array
enable:
description: Set `enable` to `true` to enable eBPF agent
metrics collection.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -703,6 +703,8 @@ spec:
path: agent.ebpf.interfaces
- displayName: Metrics
path: agent.ebpf.metrics
- displayName: Disable alerts
path: agent.ebpf.metrics.disableAlerts
- displayName: Enable
path: agent.ebpf.metrics.enable
- displayName: Server
Expand Down
28 changes: 28 additions & 0 deletions config/crd/bases/flows.netobserv.io_flowcollectors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,20 @@ spec:
metrics:
description: '`metrics` defines the eBPF agent configuration regarding metrics'
properties:
disableAlerts:
description: |-
`disableAlerts` is a list of alerts that should be disabled.
Possible values are:<br>
`NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br>
items:
description: |-
Name of a ebpf agent alert.
Possible values are:<br>
- `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br>
enum:
- NetObservDroppedFlows
type: string
type: array
enable:
description: Set `enable` to `true` to enable eBPF agent metrics collection.
type: boolean
Expand Down Expand Up @@ -3349,6 +3363,20 @@ spec:
metrics:
description: '`metrics` defines the eBPF agent configuration regarding metrics'
properties:
disableAlerts:
description: |-
`disableAlerts` is a list of alerts that should be disabled.
Possible values are:<br>
`NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br>
items:
description: |-
Name of a ebpf agent alert.
Possible values are:<br>
- `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br>
enum:
- NetObservDroppedFlows
type: string
type: array
enable:
description: Set `enable` to `true` to enable eBPF agent metrics collection.
type: boolean
Expand Down
1 change: 1 addition & 0 deletions controllers/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const (
EBPFAgentName = "netobserv-ebpf-agent"
EBPFAgentMetricsSvcName = "ebpf-agent-svc-prom"
EBPFAgentMetricsSvcMonitoringName = "ebpf-agent-svc-monitor"
EBPFAgentPromoAlertRule = "ebpf-agent-prom-alert"
EBPFPrivilegedNSSuffix = "-privileged"
EBPFServiceAccount = EBPFAgentName
EBPFSecurityContext = EBPFAgentName
Expand Down
61 changes: 61 additions & 0 deletions controllers/ebpf/agent-metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ func (c *AgentController) reconcileMetricsService(ctx context.Context, target *f
if c.AvailableAPIs.HasSvcMonitor() {
c.Managed.TryDelete(ctx, c.serviceMonitor)
}
if c.AvailableAPIs.HasPromRule() {
c.Managed.TryDelete(ctx, c.prometheusRule)
}
return nil
}

Expand All @@ -37,6 +40,13 @@ func (c *AgentController) reconcileMetricsService(ctx context.Context, target *f
return err
}
}

if c.AvailableAPIs.HasPromRule() {
promRules := c.agentPrometheusRule(target)
if err := reconcilers.GenericReconcile(ctx, c.Managed, &c.Client, c.prometheusRule, promRules, &report, helper.PrometheusRuleChanged); err != nil {
return err
}
}
return nil
}

Expand Down Expand Up @@ -103,3 +113,54 @@ func (c *AgentController) promServiceMonitoring(target *flowslatest.FlowCollecto
},
}
}

func (c *AgentController) agentPrometheusRule(target *flowslatest.FlowCollectorEBPF) *monitoringv1.PrometheusRule {
rules := []monitoringv1.Rule{}
d := monitoringv1.Duration("10m")

// EBPF hashmap table is full Not receiving any new flows
if shouldAddAlert(flowslatest.AlertDroppedFlows, target.Metrics.DisableAlerts) {

rules = append(rules, monitoringv1.Rule{
Alert: string(flowslatest.AlertDroppedFlows),
Annotations: map[string]string{
"description": "NetObserv eBPF agent is not able to process new flows as it's hashmap is full. Hashmap table size can be increased by increasing cacheMaxFlows value in Flowcollector resource.",
"summary": "NetObserv eBPF is not able to process any new flows",
},
Expr: intstr.FromString("sum(rate(netobserv_agent_dropped_flows_total[1m])) > 0"),
For: &d,
Labels: map[string]string{
"severity": "warning",
"app": "netobserv",
},
})
}

prometheusRuleObject := monitoringv1.PrometheusRule{
ObjectMeta: metav1.ObjectMeta{
Name: constants.EBPFAgentPromoAlertRule,
Labels: map[string]string{
"app": constants.EBPFAgentName,
},
Namespace: c.PrivilegedNamespace(),
},
Spec: monitoringv1.PrometheusRuleSpec{
Groups: []monitoringv1.RuleGroup{
{
Name: "NetobservEBPFAgentAlerts",
Rules: rules,
},
},
},
}
return &prometheusRuleObject
}

func shouldAddAlert(name flowslatest.EBPFAgentAlert, disabledList []flowslatest.EBPFAgentAlert) bool {
for _, disabledAlert := range disabledList {
if name == disabledAlert {
return false
}
}
return true
}
4 changes: 4 additions & 0 deletions controllers/ebpf/agent_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ type AgentController struct {
volumes volumes.Builder
promSvc *corev1.Service
serviceMonitor *monitoringv1.ServiceMonitor
prometheusRule *monitoringv1.PrometheusRule
}

func NewAgentController(common *reconcilers.Instance) *AgentController {
Expand All @@ -117,6 +118,9 @@ func NewAgentController(common *reconcilers.Instance) *AgentController {
if common.AvailableAPIs.HasSvcMonitor() {
agent.serviceMonitor = common.Managed.NewServiceMonitor(constants.EBPFAgentMetricsSvcMonitoringName)
}
if common.AvailableAPIs.HasPromRule() {
agent.prometheusRule = common.Managed.NewPrometheusRule(constants.EBPFAgentPromoAlertRule)
}
return &agent
}

Expand Down
18 changes: 18 additions & 0 deletions docs/FlowCollector.md
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,15 @@ To filter a range of ports, use a "start-end" range, string format. For example
</tr>
</thead>
<tbody><tr>
<td><b>disableAlerts</b></td>
<td>[]enum</td>
<td>
`disableAlerts` is a list of alerts that should be disabled.
Possible values are:<br>
`NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br><br/>
</td>
<td>false</td>
</tr><tr>
<td><b>enable</b></td>
<td>boolean</td>
<td>
Expand Down Expand Up @@ -7606,6 +7615,15 @@ To filter a range of ports, use a "start-end" range, string format. For example
</tr>
</thead>
<tbody><tr>
<td><b>disableAlerts</b></td>
<td>[]enum</td>
<td>
`disableAlerts` is a list of alerts that should be disabled.
Possible values are:<br>
`NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br><br/>
</td>
<td>false</td>
</tr><tr>
<td><b>enable</b></td>
<td>boolean</td>
<td>
Expand Down
Loading