From 15b2ef01eae42a19cd0b252df6581f246b6e31fe Mon Sep 17 00:00:00 2001 From: Mario Macias Date: Mon, 5 Sep 2022 17:26:35 +0200 Subject: [PATCH 1/2] NETOBSERV-448: Moving agent type, ipfix and ebpf config into their own common subsection --- Makefile | 23 +- README.md | 2 +- api/v1alpha1/flowcollector_types.go | 55 +- api/v1alpha1/zz_generated.deepcopy.go | 20 +- bundle.Dockerfile | 2 +- .../flows.netobserv.io_flowcollectors.yaml | 360 +++++++------ ...observ-operator.clusterserviceversion.yaml | 55 +- bundle/metadata/annotations.yaml | 2 +- .../flows.netobserv.io_flowcollectors.yaml | 343 ++++++------ .../patches/version_in_flowcollectors.yaml | 2 +- .../version_in_flowcollectors_envtpl.yaml | 2 +- ...observ-operator.clusterserviceversion.yaml | 2 +- .../samples/flows_v1alpha1_flowcollector.yaml | 31 +- ...lows_v1alpha1_flowcollector_versioned.yaml | 31 +- controllers/ebpf/agent_controller.go | 44 +- .../flowcollector_controller_console_test.go | 2 +- .../flowcollector_controller_ebpf_test.go | 32 +- controllers/flowcollector_controller_test.go | 24 +- .../flowlogspipeline/flp_reconciler.go | 4 +- controllers/flowlogspipeline/flp_test.go | 4 +- controllers/ovs/flowsconfig_cno_reconciler.go | 4 +- .../ovs/flowsconfig_ovnk_reconciler.go | 8 +- docs/FlowCollector.md | 493 +++++++++--------- 23 files changed, 820 insertions(+), 725 deletions(-) diff --git a/Makefile b/Makefile index 490dc0118..7b4f31900 100644 --- a/Makefile +++ b/Makefile @@ -10,9 +10,12 @@ BUILD_SHA := $(shell git rev-parse --short HEAD) # Other component versions when building bundle / release PREVIOUS_VERSION ?= tochange -PLG_VERSION ?= v0.1.0 # console plugin -FLP_VERSION ?= v0.1.0 # flowlogs-pipeline -BPF_VERSION ?= v0.1.0 # eBPF agent +PLG_VERSION ?= v0.1.4 # console plugin +FLP_VERSION ?= v0.1.3 # flowlogs-pipeline +BPF_VERSION ?= v0.1.2 # eBPF agent + +# Allows building bundles in Mac replacing BSD 'sed' command by GNU-compatible 'gsed' +SED ?= sed # Port-forward (for loki/grafana deployments) PORT_FWD ?= true @@ -211,12 +214,12 @@ bundle: generate kustomize ## Generate bundle manifests and metadata, then valid operator-sdk generate kustomize manifests -q cd config/manager && $(KUSTOMIZE) edit set image controller=$(IMG) cp config/samples/flows_v1alpha1_flowcollector.yaml config/samples/flows_v1alpha1_flowcollector_versioned.yaml - sed -i -r 's~flowlogs-pipeline:main~flowlogs-pipeline:$(FLP_VERSION)~' config/samples/flows_v1alpha1_flowcollector_versioned.yaml - sed -i -r 's~console-plugin:main~console-plugin:$(PLG_VERSION)~' config/samples/flows_v1alpha1_flowcollector_versioned.yaml - sed -i -r 's~ebpf-agent:main~ebpf-agent:$(BPF_VERSION)~' config/samples/flows_v1alpha1_flowcollector_versioned.yaml - sed -i -r 's~blob/[0-9]+\.[0-9]+\.[0-9]+(-rc[0-9]+)\?/~blob/$(VERSION)/~g' ./config/manifests/bases/netobserv-operator.clusterserviceversion.yaml - sed -i -r 's~replaces: netobserv-operator\.v.*~replaces: netobserv-operator\.$(PREVIOUS_VERSION)~' ./config/manifests/bases/netobserv-operator.clusterserviceversion.yaml - $(KUSTOMIZE) build config/manifests | sed -e 's~:container-image:~$(IMG)~' | sed -e 's~:created-at:~$(DATE)~' | operator-sdk generate bundle -q --overwrite --version $(VERSION) $(BUNDLE_METADATA_OPTS) + $(SED) -i -r 's~flowlogs-pipeline:main~flowlogs-pipeline:$(FLP_VERSION)~' config/samples/flows_v1alpha1_flowcollector_versioned.yaml + $(SED) -i -r 's~console-plugin:main~console-plugin:$(PLG_VERSION)~' config/samples/flows_v1alpha1_flowcollector_versioned.yaml + $(SED) -i -r 's~ebpf-agent:main~ebpf-agent:$(BPF_VERSION)~' config/samples/flows_v1alpha1_flowcollector_versioned.yaml + $(SED) -i -r 's~blob/[0-9]+\.[0-9]+\.[0-9]+(-rc[0-9]+)\?/~blob/$(VERSION)/~g' ./config/manifests/bases/netobserv-operator.clusterserviceversion.yaml + $(SED) -i -r 's~replaces: netobserv-operator\.v.*~replaces: netobserv-operator\.$(PREVIOUS_VERSION)~' ./config/manifests/bases/netobserv-operator.clusterserviceversion.yaml + $(KUSTOMIZE) build config/manifests | $(SED) -e 's~:container-image:~$(IMG)~' | $(SED) -e 's~:created-at:~$(DATE)~' | operator-sdk generate bundle -q --overwrite --version $(VERSION) $(BUNDLE_METADATA_OPTS) operator-sdk bundle validate ./bundle .PHONY: bundle-build @@ -271,7 +274,7 @@ catalog-push: ## Push a catalog image. # Deploy the catalog. .PHONY: catalog-deploy catalog-deploy: - sed -e 's~~$(CATALOG_IMG)~' ./config/samples/catalog/catalog.yaml | kubectl apply -f - + $(SED) -e 's~~$(CATALOG_IMG)~' ./config/samples/catalog/catalog.yaml | kubectl apply -f - # Undeploy the catalog. .PHONY: catalog-undeploy diff --git a/README.md b/README.md index bac9b798b..fa42c6afc 100644 --- a/README.md +++ b/README.md @@ -107,7 +107,7 @@ As it operates cluster-wide, only a single `FlowCollector` is allowed, and it ha A couple of settings deserve special attention: -- Agent (`spec.agent`) can be `ebpf` (default) or `ipfix`. eBPF is recommended, as it should work in more situations and offers better performances. If you can't, or don't want to use eBPF, note that the IPFIX option is fully functional only when using OVN-Kubernetes CNI. Other CNIs are not officially supported, but you may still be able to configure them manually if they allow IPFIX exports. +- Agent (`spec.agent`) can be `EBPF` (default) or `IPFIX`. eBPF is recommended, as it should work in more situations and offers better performances. If you can't, or don't want to use eBPF, note that the IPFIX option is fully functional only when using OVN-Kubernetes CNI. Other CNIs are not officially supported, but you may still be able to configure them manually if they allow IPFIX exports. - Sampling (`spec.ebpf.sampling` and `spec.ipfix.sampling`): 24/7, 1:1 sampled flow collection may consume a non-negligible amount of resources. While we are doing our best to make it a viable option in production, it is still sometimes necessary to mitigate by setting a sampling ratio. A value of `100` means: one flow every 100 is sampled. `1` means all flows are sampled. The lower it is, the more flows you get, and the more accurate are derived metrics. By default, sampling is set to 50 (ie. 1:50) for eBPF and 400 (1:400) for IPFIX. Note that more sampled flows also means more storage needed. We recommend to start with default values and refine empirically, to figure out which setting your cluster can manage. diff --git a/api/v1alpha1/flowcollector_types.go b/api/v1alpha1/flowcollector_types.go index 887cdac9c..e7db4404c 100644 --- a/api/v1alpha1/flowcollector_types.go +++ b/api/v1alpha1/flowcollector_types.go @@ -5,7 +5,7 @@ Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -24,8 +24,8 @@ import ( // NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. const ( - AgentIPFIX = "ipfix" - AgentEBPF = "ebpf" + AgentIPFIX = "IPFIX" + AgentEBPF = "EBPF" ) // Please notice that the FlowCollectorSpec's properties MUST redefine one of the default @@ -39,29 +39,13 @@ const ( type FlowCollectorSpec struct { // Important: Run "make generate" to regenerate code after modifying this file - //+kubebuilder:default:="" // Namespace where NetObserv pods are deployed. // If empty, the namespace of the operator is going to be used. + // +optional Namespace string `json:"namespace,omitempty"` - //+kubebuilder:validation:Enum=ipfix;ebpf - //+kubebuilder:default:=ebpf - // Select the flows tracing agent. Possible values are "ipfix" to use - // the IPFIX collector, or "ebpf" (default) to use NetObserv eBPF agent. - // eBPF is recommended, as it should work in more situations and offers better performances. - // When using IPFIX with OVN-Kubernetes CNI, NetObserv will configure OVN's IPFIX exporter. - // Other CNIs are not supported, they could work but necessitate manual configuration. - Agent string `json:"agent"` - - // Settings related to IPFIX-based flow reporter when the "agent" property is set - // to "ipfix". - // +kubebuilder:default:={sampling:400} - IPFIX FlowCollectorIPFIX `json:"ipfix,omitempty"` - - // Settings related to eBPF-based flow reporter when the "agent" property is set - // to "ebpf". - // +kubebuilder:default={imagePullPolicy:"IfNotPresent"} - EBPF FlowCollectorEBPF `json:"ebpf,omitempty"` + // +kubebuilder:default:={type:"EBPF"} + Agent FlowCollectorAgent `json:"agent"` // Settings related to the flowlogs-pipeline component, which collects and enriches the flows, and produces metrics. FlowlogsPipeline FlowCollectorFLP `json:"flowlogsPipeline,omitempty"` @@ -84,6 +68,31 @@ type FlowCollectorSpec struct { OVNKubernetes OVNKubernetesConfig `json:"ovnKubernetes,omitempty"` } +// FlowCollectorAgent is a discriminated union that allows to select either ipfix or ebpf, but does not +// allow defining both fields. +// +union +type FlowCollectorAgent struct { + // Select the flows tracing agent. Possible values are "IPFIX" (default) to use + // the IPFIX collector, or "EBPF" to use NetObserv eBPF agent. When using IPFIX with OVN-Kubernetes + // CNI, NetObserv will configure OVN's IPFIX exporter. Other CNIs are not supported, they could + // work but require manual configuration. + // +unionDiscriminator + // +kubebuilder:validation:Enum:="IPFIX";"EBPF" + // +kubebuilder:validation:Required + // +kubebuilder:default:=EBPF + Type string `json:"type"` + + // Settings related to IPFIX-based flow reporter when the "agent.type" property is set + // to "IPFIX". + // +optional + IPFIX FlowCollectorIPFIX `json:"ipfix,omitempty"` + + // Settings related to eBPF-based flow reporter when the "agent.type" property is set + // to "EBPF". + // +optional + EBPF FlowCollectorEBPF `json:"ebpf,omitempty"` +} + // FlowCollectorIPFIX defines a FlowCollector that uses IPFIX on OVN-Kubernetes to collect the // flows information type FlowCollectorIPFIX struct { @@ -495,7 +504,7 @@ type FlowCollectorStatus struct { //+kubebuilder:object:root=true //+kubebuilder:subresource:status //+kubebuilder:resource:scope=Cluster -//+kubebuilder:printcolumn:name="Agent",type="string",JSONPath=`.spec.agent` +//+kubebuilder:printcolumn:name="Agent",type="string",JSONPath=`.spec.agent.type` //+kubebuilder:printcolumn:name="Kafka",type="boolean",JSONPath=`.spec.kafka.enable` //+kubebuilder:printcolumn:name="Status",type="string",JSONPath=".status.conditions[*].reason" diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 2d032ab50..00890d8a1 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -123,6 +123,23 @@ func (in *FlowCollector) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FlowCollectorAgent) DeepCopyInto(out *FlowCollectorAgent) { + *out = *in + out.IPFIX = in.IPFIX + in.EBPF.DeepCopyInto(&out.EBPF) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FlowCollectorAgent. +func (in *FlowCollectorAgent) DeepCopy() *FlowCollectorAgent { + if in == nil { + return nil + } + out := new(FlowCollectorAgent) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *FlowCollectorConsolePlugin) DeepCopyInto(out *FlowCollectorConsolePlugin) { *out = *in @@ -324,8 +341,7 @@ func (in *FlowCollectorLoki) DeepCopy() *FlowCollectorLoki { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *FlowCollectorSpec) DeepCopyInto(out *FlowCollectorSpec) { *out = *in - out.IPFIX = in.IPFIX - in.EBPF.DeepCopyInto(&out.EBPF) + in.Agent.DeepCopyInto(&out.Agent) in.FlowlogsPipeline.DeepCopyInto(&out.FlowlogsPipeline) in.Loki.DeepCopyInto(&out.Loki) out.Kafka = in.Kafka diff --git a/bundle.Dockerfile b/bundle.Dockerfile index 2078bc634..5be8914b0 100644 --- a/bundle.Dockerfile +++ b/bundle.Dockerfile @@ -6,7 +6,7 @@ LABEL operators.operatorframework.io.bundle.manifests.v1=manifests/ LABEL operators.operatorframework.io.bundle.metadata.v1=metadata/ LABEL operators.operatorframework.io.bundle.package.v1=netobserv-operator LABEL operators.operatorframework.io.bundle.channels.v1=alpha -LABEL operators.operatorframework.io.metrics.builder=operator-sdk-v1.22.2 +LABEL operators.operatorframework.io.metrics.builder=operator-sdk-v1.23.0 LABEL operators.operatorframework.io.metrics.mediatype.v1=metrics+v1 LABEL operators.operatorframework.io.metrics.project_layout=go.kubebuilder.io/v3 diff --git a/bundle/manifests/flows.netobserv.io_flowcollectors.yaml b/bundle/manifests/flows.netobserv.io_flowcollectors.yaml index c4ef4e7a0..ad6210238 100644 --- a/bundle/manifests/flows.netobserv.io_flowcollectors.yaml +++ b/bundle/manifests/flows.netobserv.io_flowcollectors.yaml @@ -15,7 +15,7 @@ spec: scope: Cluster versions: - additionalPrinterColumns: - - jsonPath: .spec.agent + - jsonPath: .spec.agent.type name: Agent type: string - jsonPath: .spec.kafka.enable @@ -50,16 +50,181 @@ spec: description: FlowCollectorSpec defines the desired state of FlowCollector properties: agent: - default: ipfix - description: Select the flows tracing agent. Possible values are "ipfix" - (default) to use the IPFIX collector, or "ebpf" to use NetObserv - eBPF agent. When using IPFIX with OVN-Kubernetes CNI, NetObserv - will configure OVN's IPFIX exporter. Other CNIs are not supported, - they could work but necessitate manual configuration. - enum: - - ipfix - - ebpf - type: string + default: + type: EBPF + description: FlowCollectorAgent is a discriminated union that allows + to select either ipfix or ebpf, but does not allow defining both + fields. + properties: + ebpf: + description: Settings related to eBPF-based flow reporter when + the "agent.type" property is set to "EBPF". + properties: + cacheActiveTimeout: + default: 5s + description: CacheActiveTimeout is the max period during which + the reporter will aggregate flows before sending + pattern: ^\d+(ns|ms|s|m)?$ + type: string + cacheMaxFlows: + default: 1000 + description: CacheMaxFlows is the max number of flows in an + aggregate; when reached, the reporter sends the flows + format: int32 + minimum: 1 + type: integer + env: + additionalProperties: + type: string + description: Env allows passing custom environment variables + to the NetObserv Agent. Useful for passing some very concrete + performance-tuning options (e.g. GOGC, GOMAXPROCS) that + shouldn't be publicly exposed as part of the FlowCollector + descriptor, as they are only useful in edge debug/support + scenarios. + type: object + excludeInterfaces: + default: + - lo + description: ExcludeInterfaces contains the interface names + that will be excluded from flow tracing. If an entry is + enclosed by slashes (e.g. `/br-/`), it will match as regular + expression, otherwise it will be matched as a case-sensitive + string. + items: + type: string + type: array + image: + default: 'quay.io/netobserv/netobserv-ebpf-agent:' + description: Image is the NetObserv Agent image (including + domain and tag) + type: string + imagePullPolicy: + default: IfNotPresent + description: ImagePullPolicy is the Kubernetes pull policy + for the image defined above + enum: + - IfNotPresent + - Always + - Never + type: string + interfaces: + description: Interfaces contains the interface names from + where flows will be collected. If empty, the agent will + fetch all the interfaces in the system, excepting the ones + listed in ExcludeInterfaces. If an entry is enclosed by + slashes (e.g. `/br-/`), it will match as regular expression, + otherwise it will be matched as a case-sensitive string. + items: + type: string + type: array + logLevel: + default: info + description: LogLevel defines the log level for the NetObserv + eBPF Agent + enum: + - trace + - debug + - info + - warn + - error + - fatal + - panic + type: string + privileged: + description: 'Privileged mode for the eBPF Agent container. + If false, the operator will add the following capabilities + to the container, to enable its correct operation: BPF, + PERFMON, NET_ADMIN, SYS_RESOURCE.' + type: boolean + resources: + description: 'Compute Resources required by this container. + Cannot be updated. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Limits describes the maximum amount of compute + resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Requests describes the minimum amount of + compute resources required. If Requests is omitted for + a container, it defaults to Limits if that is explicitly + specified, otherwise to an implementation-defined value. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' + type: object + type: object + sampling: + default: 50 + description: Sampling is the sampling rate on the reporter. + 100 means one flow on 100 is sent. 0 or 1 means all flows + are sampled. + format: int32 + minimum: 0 + type: integer + type: object + ipfix: + description: Settings related to IPFIX-based flow reporter when + the "agent.type" property is set to "IPFIX". + properties: + cacheActiveTimeout: + default: 20s + description: CacheActiveTimeout is the max period during which + the reporter will aggregate flows before sending + pattern: ^\d+(ns|ms|s|m)?$ + type: string + cacheMaxFlows: + default: 400 + description: CacheMaxFlows is the max number of flows in an + aggregate; when reached, the reporter sends the flows + format: int32 + minimum: 0 + type: integer + forceSampleAll: + default: false + description: It is not recommended to sample all the traffic + with IPFIX, as it may generate cluster instability. If you + REALLY want to do that, set this flag to true. Use at your + own risks. When it is set to true, the value of "sampling" + is ignored. + type: boolean + sampling: + default: 400 + description: Sampling is the sampling rate on the reporter. + 100 means one flow on 100 is sent. To ensure cluster stability, + it is not possible to set a value below 2. If you really + want to sample every packet, which may impact the cluster + stability, refer to "forceSampleAll". Alternatively, you + can use the eBPF Agent instead of IPFIX. + format: int32 + minimum: 2 + type: integer + type: object + type: + default: EBPF + description: Select the flows tracing agent. Possible values are + "IPFIX" (default) to use the IPFIX collector, or "EBPF" to use + NetObserv eBPF agent. When using IPFIX with OVN-Kubernetes CNI, + NetObserv will configure OVN's IPFIX exporter. Other CNIs are + not supported, they could work but require manual configuration. + enum: + - IPFIX + - EBPF + type: string + required: + - type + type: object clusterNetworkOperator: description: Settings related to the OpenShift Cluster Network Operator, when available. @@ -692,120 +857,6 @@ spec: required: - register type: object - ebpf: - default: - imagePullPolicy: IfNotPresent - description: Settings related to eBPF-based flow reporter when the - "agent" property is set to "ebpf". - properties: - cacheActiveTimeout: - default: 5s - description: CacheActiveTimeout is the max period during which - the reporter will aggregate flows before sending - pattern: ^\d+(ns|ms|s|m)?$ - type: string - cacheMaxFlows: - default: 1000 - description: CacheMaxFlows is the max number of flows in an aggregate; - when reached, the reporter sends the flows - format: int32 - minimum: 1 - type: integer - env: - additionalProperties: - type: string - description: Env allows passing custom environment variables to - the NetObserv Agent. Useful for passing some very concrete performance-tuning - options (e.g. GOGC, GOMAXPROCS) that shouldn't be publicly exposed - as part of the FlowCollector descriptor, as they are only useful - in edge debug/support scenarios. - type: object - excludeInterfaces: - default: - - lo - description: ExcludeInterfaces contains the interface names that - will be excluded from flow tracing. If an entry is enclosed - by slashes (e.g. `/br-/`), it will match as regular expression, - otherwise it will be matched as a case-sensitive string. - items: - type: string - type: array - image: - default: quay.io/netobserv/netobserv-ebpf-agent:v0.1.2 - description: Image is the NetObserv Agent image (including domain - and tag) - type: string - imagePullPolicy: - default: IfNotPresent - description: ImagePullPolicy is the Kubernetes pull policy for - the image defined above - enum: - - IfNotPresent - - Always - - Never - type: string - interfaces: - description: Interfaces contains the interface names from where - flows will be collected. If empty, the agent will fetch all - the interfaces in the system, excepting the ones listed in ExcludeInterfaces. - If an entry is enclosed by slashes (e.g. `/br-/`), it will match - as regular expression, otherwise it will be matched as a case-sensitive - string. - items: - type: string - type: array - logLevel: - default: info - description: LogLevel defines the log level for the NetObserv - eBPF Agent - enum: - - trace - - debug - - info - - warn - - error - - fatal - - panic - type: string - privileged: - description: 'Privileged mode for the eBPF Agent container. If - false, the operator will add the following capabilities to the - container, to enable its correct operation: BPF, PERFMON, NET_ADMIN, - SYS_RESOURCE.' - type: boolean - resources: - description: 'Compute Resources required by this container. Cannot - be updated. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' - properties: - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: 'Limits describes the maximum amount of compute - resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: 'Requests describes the minimum amount of compute - resources required. If Requests is omitted for a container, - it defaults to Limits if that is explicitly specified, otherwise - to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' - type: object - type: object - sampling: - description: Sampling is the sampling rate on the reporter. 100 - means one flow on 100 is sent. 0 or 1 means disabled. - format: int32 - type: integer - type: object flowlogsPipeline: description: Settings related to the flowlogs-pipeline component, which collects and enriches the flows, and produces metrics. @@ -1453,48 +1504,11 @@ spec: type: object type: object type: object - ipfix: - default: - sampling: 400 - description: Settings related to IPFIX-based flow reporter when the - "agent" property is set to "ipfix". - properties: - cacheActiveTimeout: - default: 20s - description: CacheActiveTimeout is the max period during which - the reporter will aggregate flows before sending - pattern: ^\d+(ns|ms|s|m)?$ - type: string - cacheMaxFlows: - default: 400 - description: CacheMaxFlows is the max number of flows in an aggregate; - when reached, the reporter sends the flows - format: int32 - minimum: 0 - type: integer - forceSampleAll: - default: false - description: It is not recommended to sample all the traffic with - IPFIX, as it may generate cluster instability. If you REALLY - want to do that, set this flag to true. Use at your own risks. - When it is set to true, the value of "sampling" is ignored. - type: boolean - sampling: - default: 400 - description: Sampling is the sampling rate on the reporter. 100 - means one flow on 100 is sent. To ensure cluster stability, - it is not possible to set a value below 2. If you really want - to sample every packet, which may impact the cluster stability, - refer to "forceSampleAll". Alternatively, you can use the eBPF - Agent instead of IPFIX. - format: int32 - minimum: 2 - type: integer - type: object kafka: description: Kafka configuration, allowing to use Kafka as a broker - as part of the flow collection pipeline. This is a new and experimental - feature, not yet recommended to use in production. + as part of the flow collection pipeline. Kafka can provide better + scalability, resiliency and high availability (for more details, + see https://www.redhat.com/en/topics/integration/what-is-apache-kafka). properties: address: default: "" @@ -1609,8 +1623,14 @@ spec: description: QuerierURL specifies the address of the Loki querier service, in case it is different from the Loki ingester URL. If empty, the URL value will be used (assuming that the Loki - ingester and querier are int he same host). + ingester and querier are in the same server). type: string + sendAuthToken: + default: false + description: SendAuthToken is a flag to enable or disable Authorization + header from service account secret It allows authentication + to loki operator gateway + type: boolean staticLabels: additionalProperties: type: string @@ -1619,6 +1639,13 @@ spec: description: StaticLabels is a map of common labels to set on each flow type: object + statusUrl: + description: StatusURL specifies the address of the Loki /ready + /metrics /config endpoints, in case it is different from the + Loki querier URL. If empty, the QuerierURL value will be used. + This is useful to show error messages and some context in the + frontend + type: string tenantID: default: netobserv description: TenantID is the Loki X-Scope-OrgID that identifies @@ -1693,7 +1720,6 @@ spec: type: string type: object namespace: - default: "" description: Namespace where NetObserv pods are deployed. If empty, the namespace of the operator is going to be used. type: string diff --git a/bundle/manifests/netobserv-operator.clusterserviceversion.yaml b/bundle/manifests/netobserv-operator.clusterserviceversion.yaml index 1fcb5b20e..f00fd816b 100644 --- a/bundle/manifests/netobserv-operator.clusterserviceversion.yaml +++ b/bundle/manifests/netobserv-operator.clusterserviceversion.yaml @@ -11,7 +11,27 @@ metadata: "name": "cluster" }, "spec": { - "agent": "ipfix", + "agent": { + "ebpf": { + "cacheActiveTimeout": "5s", + "cacheMaxFlows": 1000, + "excludeInterfaces": [ + "lo" + ], + "image": "quay.io/netobserv/netobserv-ebpf-agent:v0.1.2", + "imagePullPolicy": "IfNotPresent", + "interfaces": [], + "logLevel": "info", + "privileged": false, + "sampling": 50 + }, + "ipfix": { + "cacheActiveTimeout": "20s", + "cacheMaxFlows": 400, + "sampling": 400 + }, + "type": "EBPF" + }, "clusterNetworkOperator": { "namespace": "openshift-network-operator" }, @@ -28,19 +48,6 @@ metadata: }, "register": true }, - "ebpf": { - "cacheActiveTimeout": "5s", - "cacheMaxFlows": 1000, - "excludeInterfaces": [ - "lo" - ], - "image": "quay.io/netobserv/netobserv-ebpf-agent:v0.1.2", - "imagePullPolicy": "IfNotPresent", - "interfaces": [], - "logLevel": "info", - "privileged": false, - "sampling": 0 - }, "flowlogsPipeline": { "dropUnusedFields": true, "enableKubeProbes": true, @@ -53,11 +60,6 @@ metadata: "port": 2055, "prometheusPort": 9102 }, - "ipfix": { - "cacheActiveTimeout": "20s", - "cacheMaxFlows": 400, - "sampling": 400 - }, "kafka": { "address": "kafka-cluster-kafka-bootstrap.network-observability", "enable": false, @@ -111,7 +113,7 @@ metadata: containerImage: quay.io/netobserv/network-observability-operator:0.1.4 createdAt: "2022-08-08T16:02:25Z" description: Network flows collector and monitoring solution - operators.operatorframework.io/builder: operator-sdk-v1.16.0+git + operators.operatorframework.io/builder: operator-sdk-v1.23.0 operators.operatorframework.io/project_layout: go.kubebuilder.io/v3 repository: https://github.com/netobserv/network-observability-operator name: netobserv-operator.v0.1.4 @@ -131,8 +133,6 @@ spec: The operator provides dashboards, metrics, and keeps flows accessible in a queryable log store, Grafana Loki. When used in OpenShift, new dashboards are available in the Console. - This is an early release, we would be grateful if you could inform us of any issues. - ## Dependencies - [Loki](https://grafana.com/oss/loki/) is required, it is used as a store for all collected flows. @@ -155,13 +155,13 @@ spec: A couple of settings deserve special attention: - - Agent (`spec.agent`) can be `ipfix` or `ebpf`. The IPFIX option is fully functional when using [OVN-Kubernetes](https://github.com/ovn-org/ovn-kubernetes/) CNI. Other CNIs are not supported, but you may still be able to configure them manually if they allow IPFIX exports, whereas eBPF is expected to work regardless of the running CNI. + - Agent (`spec.agent`) can be `EBPF` or `IPFIX`. eBPF is recommended, as it should work in more situations and offers better performances. If you can't, or don't want to use eBPF, note that the IPFIX option is fully functional only when using [OVN-Kubernetes](https://github.com/ovn-org/ovn-kubernetes/) CNI. Other CNIs are not officially supported, but you may still be able to configure them manually if they allow IPFIX exports. - - Sampling (`spec.ipfix.sampling` and `spec.ebpf.sampling`): 24/7 unsampled flow collection may consume a non-negligible amount of resources. While we are doing our best to make it a viable option in production, it is still often necessary to mitigate by setting a sampling ratio. A value of `100` means: one flow every 100 is sampled. `1` means no sampling. The lower it is, the more accurate are flows and derived metrics. By default, sampling is set to 400 for IPFIX, and is disabled for eBPF. + - Sampling (`spec.ebpf.sampling` and `spec.ipfix.sampling`): 24/7, 1:1 sampled flow collection may consume a non-negligible amount of resources. While we are doing our best to make it a viable option in production, it is still sometimes necessary to mitigate by setting a sampling ratio. A value of `100` means: one flow every 100 is sampled. `1` means all flows are sampled. The lower it is, the more flows you get, and the more accurate are derived metrics. By default, sampling is set to 50 (ie. 1:50) for eBPF and 400 (1:400) for IPFIX. Note that more sampled flows also means more storage needed. We recommend to start with default values and refine empirically, to figure out which setting your cluster can manage. - Loki (`spec.loki`): configure here how to reach Loki. The default values match the Loki quick install paths mentioned above, but you may have to configure differently if you used another installation method. - - Kafka (`spec.kafka`): _experimental_ - when enabled, integrate the flow collection pipeline with Kafka, by splitting ingestion from transformation (kube enrichment, derived metrics, ...). Assumes Kafka is already deployed and a topic is created. + - Kafka (`spec.kafka`): when enabled, integrate the flow collection pipeline with Kafka, by splitting ingestion from transformation (kube enrichment, derived metrics, ...). Kafka can provide better scalability, resiliency and high availability ([view more details](https://www.redhat.com/en/topics/integration/what-is-apache-kafka)). Assumes Kafka is already deployed and a topic is created. ## Overview @@ -360,7 +360,10 @@ spec: - create serviceAccountName: netobserv-controller-manager deployments: - - name: netobserv-controller-manager + - label: + app: network-observability-operator + control-plane: controller-manager + name: netobserv-controller-manager spec: replicas: 1 selector: diff --git a/bundle/metadata/annotations.yaml b/bundle/metadata/annotations.yaml index c346d464b..d034d46ee 100644 --- a/bundle/metadata/annotations.yaml +++ b/bundle/metadata/annotations.yaml @@ -5,7 +5,7 @@ annotations: operators.operatorframework.io.bundle.metadata.v1: metadata/ operators.operatorframework.io.bundle.package.v1: netobserv-operator operators.operatorframework.io.bundle.channels.v1: alpha - operators.operatorframework.io.metrics.builder: operator-sdk-v1.22.2 + operators.operatorframework.io.metrics.builder: operator-sdk-v1.23.0 operators.operatorframework.io.metrics.mediatype.v1: metrics+v1 operators.operatorframework.io.metrics.project_layout: go.kubebuilder.io/v3 diff --git a/config/crd/bases/flows.netobserv.io_flowcollectors.yaml b/config/crd/bases/flows.netobserv.io_flowcollectors.yaml index d0ca2b98d..14fbc0978 100644 --- a/config/crd/bases/flows.netobserv.io_flowcollectors.yaml +++ b/config/crd/bases/flows.netobserv.io_flowcollectors.yaml @@ -17,7 +17,7 @@ spec: scope: Cluster versions: - additionalPrinterColumns: - - jsonPath: .spec.agent + - jsonPath: .spec.agent.type name: Agent type: string - jsonPath: .spec.kafka.enable @@ -48,17 +48,181 @@ spec: description: FlowCollectorSpec defines the desired state of FlowCollector properties: agent: - default: ebpf - description: Select the flows tracing agent. Possible values are "ipfix" - to use the IPFIX collector, or "ebpf" (default) to use NetObserv - eBPF agent. eBPF is recommended, as it should work in more situations - and offers better performances. When using IPFIX with OVN-Kubernetes - CNI, NetObserv will configure OVN's IPFIX exporter. Other CNIs are - not supported, they could work but necessitate manual configuration. - enum: - - ipfix - - ebpf - type: string + default: + type: EBPF + description: FlowCollectorAgent is a discriminated union that allows + to select either ipfix or ebpf, but does not allow defining both + fields. + properties: + ebpf: + description: Settings related to eBPF-based flow reporter when + the "agent.type" property is set to "EBPF". + properties: + cacheActiveTimeout: + default: 5s + description: CacheActiveTimeout is the max period during which + the reporter will aggregate flows before sending + pattern: ^\d+(ns|ms|s|m)?$ + type: string + cacheMaxFlows: + default: 1000 + description: CacheMaxFlows is the max number of flows in an + aggregate; when reached, the reporter sends the flows + format: int32 + minimum: 1 + type: integer + env: + additionalProperties: + type: string + description: Env allows passing custom environment variables + to the NetObserv Agent. Useful for passing some very concrete + performance-tuning options (e.g. GOGC, GOMAXPROCS) that + shouldn't be publicly exposed as part of the FlowCollector + descriptor, as they are only useful in edge debug/support + scenarios. + type: object + excludeInterfaces: + default: + - lo + description: ExcludeInterfaces contains the interface names + that will be excluded from flow tracing. If an entry is + enclosed by slashes (e.g. `/br-/`), it will match as regular + expression, otherwise it will be matched as a case-sensitive + string. + items: + type: string + type: array + image: + default: quay.io/netobserv/netobserv-ebpf-agent:main + description: Image is the NetObserv Agent image (including + domain and tag) + type: string + imagePullPolicy: + default: IfNotPresent + description: ImagePullPolicy is the Kubernetes pull policy + for the image defined above + enum: + - IfNotPresent + - Always + - Never + type: string + interfaces: + description: Interfaces contains the interface names from + where flows will be collected. If empty, the agent will + fetch all the interfaces in the system, excepting the ones + listed in ExcludeInterfaces. If an entry is enclosed by + slashes (e.g. `/br-/`), it will match as regular expression, + otherwise it will be matched as a case-sensitive string. + items: + type: string + type: array + logLevel: + default: info + description: LogLevel defines the log level for the NetObserv + eBPF Agent + enum: + - trace + - debug + - info + - warn + - error + - fatal + - panic + type: string + privileged: + description: 'Privileged mode for the eBPF Agent container. + If false, the operator will add the following capabilities + to the container, to enable its correct operation: BPF, + PERFMON, NET_ADMIN, SYS_RESOURCE.' + type: boolean + resources: + description: 'Compute Resources required by this container. + Cannot be updated. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Limits describes the maximum amount of compute + resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Requests describes the minimum amount of + compute resources required. If Requests is omitted for + a container, it defaults to Limits if that is explicitly + specified, otherwise to an implementation-defined value. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' + type: object + type: object + sampling: + default: 50 + description: Sampling is the sampling rate on the reporter. + 100 means one flow on 100 is sent. 0 or 1 means all flows + are sampled. + format: int32 + minimum: 0 + type: integer + type: object + ipfix: + description: Settings related to IPFIX-based flow reporter when + the "agent.type" property is set to "IPFIX". + properties: + cacheActiveTimeout: + default: 20s + description: CacheActiveTimeout is the max period during which + the reporter will aggregate flows before sending + pattern: ^\d+(ns|ms|s|m)?$ + type: string + cacheMaxFlows: + default: 400 + description: CacheMaxFlows is the max number of flows in an + aggregate; when reached, the reporter sends the flows + format: int32 + minimum: 0 + type: integer + forceSampleAll: + default: false + description: It is not recommended to sample all the traffic + with IPFIX, as it may generate cluster instability. If you + REALLY want to do that, set this flag to true. Use at your + own risks. When it is set to true, the value of "sampling" + is ignored. + type: boolean + sampling: + default: 400 + description: Sampling is the sampling rate on the reporter. + 100 means one flow on 100 is sent. To ensure cluster stability, + it is not possible to set a value below 2. If you really + want to sample every packet, which may impact the cluster + stability, refer to "forceSampleAll". Alternatively, you + can use the eBPF Agent instead of IPFIX. + format: int32 + minimum: 2 + type: integer + type: object + type: + default: EBPF + description: Select the flows tracing agent. Possible values are + "IPFIX" (default) to use the IPFIX collector, or "EBPF" to use + NetObserv eBPF agent. When using IPFIX with OVN-Kubernetes CNI, + NetObserv will configure OVN's IPFIX exporter. Other CNIs are + not supported, they could work but require manual configuration. + enum: + - IPFIX + - EBPF + type: string + required: + - type + type: object clusterNetworkOperator: description: Settings related to the OpenShift Cluster Network Operator, when available. @@ -691,122 +855,6 @@ spec: required: - register type: object - ebpf: - default: - imagePullPolicy: IfNotPresent - description: Settings related to eBPF-based flow reporter when the - "agent" property is set to "ebpf". - properties: - cacheActiveTimeout: - default: 5s - description: CacheActiveTimeout is the max period during which - the reporter will aggregate flows before sending - pattern: ^\d+(ns|ms|s|m)?$ - type: string - cacheMaxFlows: - default: 1000 - description: CacheMaxFlows is the max number of flows in an aggregate; - when reached, the reporter sends the flows - format: int32 - minimum: 1 - type: integer - env: - additionalProperties: - type: string - description: Env allows passing custom environment variables to - the NetObserv Agent. Useful for passing some very concrete performance-tuning - options (e.g. GOGC, GOMAXPROCS) that shouldn't be publicly exposed - as part of the FlowCollector descriptor, as they are only useful - in edge debug/support scenarios. - type: object - excludeInterfaces: - default: - - lo - description: ExcludeInterfaces contains the interface names that - will be excluded from flow tracing. If an entry is enclosed - by slashes (e.g. `/br-/`), it will match as regular expression, - otherwise it will be matched as a case-sensitive string. - items: - type: string - type: array - image: - default: quay.io/netobserv/netobserv-ebpf-agent:main - description: Image is the NetObserv Agent image (including domain - and tag) - type: string - imagePullPolicy: - default: IfNotPresent - description: ImagePullPolicy is the Kubernetes pull policy for - the image defined above - enum: - - IfNotPresent - - Always - - Never - type: string - interfaces: - description: Interfaces contains the interface names from where - flows will be collected. If empty, the agent will fetch all - the interfaces in the system, excepting the ones listed in ExcludeInterfaces. - If an entry is enclosed by slashes (e.g. `/br-/`), it will match - as regular expression, otherwise it will be matched as a case-sensitive - string. - items: - type: string - type: array - logLevel: - default: info - description: LogLevel defines the log level for the NetObserv - eBPF Agent - enum: - - trace - - debug - - info - - warn - - error - - fatal - - panic - type: string - privileged: - description: 'Privileged mode for the eBPF Agent container. If - false, the operator will add the following capabilities to the - container, to enable its correct operation: BPF, PERFMON, NET_ADMIN, - SYS_RESOURCE.' - type: boolean - resources: - description: 'Compute Resources required by this container. Cannot - be updated. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' - properties: - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: 'Limits describes the maximum amount of compute - resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' - type: object - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: 'Requests describes the minimum amount of compute - resources required. If Requests is omitted for a container, - it defaults to Limits if that is explicitly specified, otherwise - to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' - type: object - type: object - sampling: - default: 50 - description: Sampling is the sampling rate on the reporter. 100 - means one flow on 100 is sent. 0 or 1 means all flows are sampled. - format: int32 - minimum: 0 - type: integer - type: object flowlogsPipeline: description: Settings related to the flowlogs-pipeline component, which collects and enriches the flows, and produces metrics. @@ -1454,44 +1502,6 @@ spec: type: object type: object type: object - ipfix: - default: - sampling: 400 - description: Settings related to IPFIX-based flow reporter when the - "agent" property is set to "ipfix". - properties: - cacheActiveTimeout: - default: 20s - description: CacheActiveTimeout is the max period during which - the reporter will aggregate flows before sending - pattern: ^\d+(ns|ms|s|m)?$ - type: string - cacheMaxFlows: - default: 400 - description: CacheMaxFlows is the max number of flows in an aggregate; - when reached, the reporter sends the flows - format: int32 - minimum: 0 - type: integer - forceSampleAll: - default: false - description: It is not recommended to sample all the traffic with - IPFIX, as it may generate cluster instability. If you REALLY - want to do that, set this flag to true. Use at your own risks. - When it is set to true, the value of "sampling" is ignored. - type: boolean - sampling: - default: 400 - description: Sampling is the sampling rate on the reporter. 100 - means one flow on 100 is sent. To ensure cluster stability, - it is not possible to set a value below 2. If you really want - to sample every packet, which may impact the cluster stability, - refer to "forceSampleAll". Alternatively, you can use the eBPF - Agent instead of IPFIX. - format: int32 - minimum: 2 - type: integer - type: object kafka: description: Kafka configuration, allowing to use Kafka as a broker as part of the flow collection pipeline. Kafka can provide better @@ -1708,7 +1718,6 @@ spec: type: string type: object namespace: - default: "" description: Namespace where NetObserv pods are deployed. If empty, the namespace of the operator is going to be used. type: string diff --git a/config/crd/patches/version_in_flowcollectors.yaml b/config/crd/patches/version_in_flowcollectors.yaml index 1ec37e506..ff3b32417 100644 --- a/config/crd/patches/version_in_flowcollectors.yaml +++ b/config/crd/patches/version_in_flowcollectors.yaml @@ -6,5 +6,5 @@ path: /spec/versions/0/schema/openAPIV3Schema/properties/spec/properties/flowlogsPipeline/properties/image/default value: "quay.io/netobserv/flowlogs-pipeline:v0.1.3" - op: add - path: /spec/versions/0/schema/openAPIV3Schema/properties/spec/properties/ebpf/properties/image/default + path: /spec/versions/0/schema/openAPIV3Schema/properties/spec/properties/agent/properties/ebpf/properties/image/default value: "quay.io/netobserv/netobserv-ebpf-agent:v0.1.2" diff --git a/config/crd/patches/version_in_flowcollectors_envtpl.yaml b/config/crd/patches/version_in_flowcollectors_envtpl.yaml index 0813f4c9b..79c3716fc 100644 --- a/config/crd/patches/version_in_flowcollectors_envtpl.yaml +++ b/config/crd/patches/version_in_flowcollectors_envtpl.yaml @@ -6,5 +6,5 @@ path: /spec/versions/0/schema/openAPIV3Schema/properties/spec/properties/flowlogsPipeline/properties/image/default value: "quay.io/netobserv/flowlogs-pipeline:$FLP_VERSION" - op: add - path: /spec/versions/0/schema/openAPIV3Schema/properties/spec/properties/ebpf/properties/image/default + path: /spec/versions/0/schema/openAPIV3Schema/properties/spec/properties/agent/properties/ebpf/properties/image/default value: "quay.io/netobserv/netobserv-ebpf-agent:$BPF_VERSION" diff --git a/config/manifests/bases/netobserv-operator.clusterserviceversion.yaml b/config/manifests/bases/netobserv-operator.clusterserviceversion.yaml index ccbc7d538..cca616406 100644 --- a/config/manifests/bases/netobserv-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/netobserv-operator.clusterserviceversion.yaml @@ -49,7 +49,7 @@ spec: A couple of settings deserve special attention: - - Agent (`spec.agent`) can be `ebpf` or `ipfix`. eBPF is recommended, as it should work in more situations and offers better performances. If you can't, or don't want to use eBPF, note that the IPFIX option is fully functional only when using [OVN-Kubernetes](https://github.com/ovn-org/ovn-kubernetes/) CNI. Other CNIs are not officially supported, but you may still be able to configure them manually if they allow IPFIX exports. + - Agent (`spec.agent`) can be `EBPF` or `IPFIX`. eBPF is recommended, as it should work in more situations and offers better performances. If you can't, or don't want to use eBPF, note that the IPFIX option is fully functional only when using [OVN-Kubernetes](https://github.com/ovn-org/ovn-kubernetes/) CNI. Other CNIs are not officially supported, but you may still be able to configure them manually if they allow IPFIX exports. - Sampling (`spec.ebpf.sampling` and `spec.ipfix.sampling`): 24/7, 1:1 sampled flow collection may consume a non-negligible amount of resources. While we are doing our best to make it a viable option in production, it is still sometimes necessary to mitigate by setting a sampling ratio. A value of `100` means: one flow every 100 is sampled. `1` means all flows are sampled. The lower it is, the more flows you get, and the more accurate are derived metrics. By default, sampling is set to 50 (ie. 1:50) for eBPF and 400 (1:400) for IPFIX. Note that more sampled flows also means more storage needed. We recommend to start with default values and refine empirically, to figure out which setting your cluster can manage. diff --git a/config/samples/flows_v1alpha1_flowcollector.yaml b/config/samples/flows_v1alpha1_flowcollector.yaml index 3c48c4d8c..b74f78241 100644 --- a/config/samples/flows_v1alpha1_flowcollector.yaml +++ b/config/samples/flows_v1alpha1_flowcollector.yaml @@ -4,21 +4,22 @@ metadata: name: cluster spec: namespace: "network-observability" - agent: ebpf - ipfix: - cacheActiveTimeout: 20s - cacheMaxFlows: 400 - sampling: 400 - ebpf: - image: 'quay.io/netobserv/netobserv-ebpf-agent:main' - imagePullPolicy: IfNotPresent - sampling: 50 - cacheActiveTimeout: 5s - cacheMaxFlows: 1000 - interfaces: [] - excludeInterfaces: ["lo"] - logLevel: info - privileged: false + agent: + type: EBPF + ipfix: + cacheActiveTimeout: 20s + cacheMaxFlows: 400 + sampling: 400 + ebpf: + image: 'quay.io/netobserv/netobserv-ebpf-agent:main' + imagePullPolicy: IfNotPresent + sampling: 50 + cacheActiveTimeout: 5s + cacheMaxFlows: 1000 + interfaces: [ ] + excludeInterfaces: [ "lo" ] + logLevel: info + privileged: false flowlogsPipeline: kind: DaemonSet # kind: Deployment diff --git a/config/samples/flows_v1alpha1_flowcollector_versioned.yaml b/config/samples/flows_v1alpha1_flowcollector_versioned.yaml index e11bc654d..8292f72dc 100644 --- a/config/samples/flows_v1alpha1_flowcollector_versioned.yaml +++ b/config/samples/flows_v1alpha1_flowcollector_versioned.yaml @@ -4,21 +4,22 @@ metadata: name: cluster spec: namespace: "network-observability" - agent: ipfix - ipfix: - cacheActiveTimeout: 20s - cacheMaxFlows: 400 - sampling: 400 - ebpf: - image: 'quay.io/netobserv/netobserv-ebpf-agent:v0.1.2' - imagePullPolicy: IfNotPresent - sampling: 0 - cacheActiveTimeout: 5s - cacheMaxFlows: 1000 - interfaces: [] - excludeInterfaces: ["lo"] - logLevel: info - privileged: false + agent: + type: EBPF + ipfix: + cacheActiveTimeout: 20s + cacheMaxFlows: 400 + sampling: 400 + ebpf: + image: 'quay.io/netobserv/netobserv-ebpf-agent:v0.1.2' + imagePullPolicy: IfNotPresent + sampling: 50 + cacheActiveTimeout: 5s + cacheMaxFlows: 1000 + interfaces: [ ] + excludeInterfaces: [ "lo" ] + logLevel: info + privileged: false flowlogsPipeline: kind: DaemonSet # kind: Deployment diff --git a/controllers/ebpf/agent_controller.go b/controllers/ebpf/agent_controller.go index 0b4cb65b1..43f0319c3 100644 --- a/controllers/ebpf/agent_controller.go +++ b/controllers/ebpf/agent_controller.go @@ -91,7 +91,7 @@ func (c *AgentController) Reconcile( if err != nil { return fmt.Errorf("fetching current EBPF Agent: %w", err) } - if target.Spec.Agent != flowsv1alpha1.AgentEBPF { + if target.Spec.Agent.Type != flowsv1alpha1.AgentEBPF { if current == nil { rlog.Info("nothing to do, as the requested agent is not eBPF", "currentAgent", target.Spec.Agent) @@ -109,7 +109,7 @@ func (c *AgentController) Reconcile( } } - if err := c.permissions.Reconcile(ctx, &target.Spec.EBPF); err != nil { + if err := c.permissions.Reconcile(ctx, &target.Spec.Agent.EBPF); err != nil { return fmt.Errorf("reconciling permissions: %w", err) } desired := c.desired(target) @@ -143,10 +143,10 @@ func (c *AgentController) current(ctx context.Context) (*v1.DaemonSet, error) { } func (c *AgentController) desired(coll *flowsv1alpha1.FlowCollector) *v1.DaemonSet { - if coll == nil || coll.Spec.Agent != flowsv1alpha1.AgentEBPF { + if coll == nil || coll.Spec.Agent.Type != flowsv1alpha1.AgentEBPF { return nil } - version := helper.ExtractVersion(coll.Spec.EBPF.Image) + version := helper.ExtractVersion(coll.Spec.Agent.EBPF.Image) volumeMounts := []corev1.VolumeMount{} volumes := []corev1.Volume{} if coll.Spec.Kafka.Enable && coll.Spec.Kafka.TLS.Enable { @@ -180,9 +180,9 @@ func (c *AgentController) desired(coll *flowsv1alpha1.FlowCollector) *v1.DaemonS Volumes: volumes, Containers: []corev1.Container{{ Name: constants.EBPFAgentName, - Image: coll.Spec.EBPF.Image, - ImagePullPolicy: corev1.PullPolicy(coll.Spec.EBPF.ImagePullPolicy), - Resources: coll.Spec.EBPF.Resources, + Image: coll.Spec.Agent.EBPF.Image, + ImagePullPolicy: corev1.PullPolicy(coll.Spec.Agent.EBPF.ImagePullPolicy), + Resources: coll.Spec.Agent.EBPF.Resources, SecurityContext: c.securityContext(coll), Env: c.envConfig(coll), VolumeMounts: volumeMounts, @@ -195,43 +195,43 @@ func (c *AgentController) desired(coll *flowsv1alpha1.FlowCollector) *v1.DaemonS func (c *AgentController) envConfig(coll *flowsv1alpha1.FlowCollector) []corev1.EnvVar { var config []corev1.EnvVar - if coll.Spec.EBPF.CacheActiveTimeout != "" { + if coll.Spec.Agent.EBPF.CacheActiveTimeout != "" { config = append(config, corev1.EnvVar{ Name: envCacheActiveTimeout, - Value: coll.Spec.EBPF.CacheActiveTimeout, + Value: coll.Spec.Agent.EBPF.CacheActiveTimeout, }) } - if coll.Spec.EBPF.CacheMaxFlows != 0 { + if coll.Spec.Agent.EBPF.CacheMaxFlows != 0 { config = append(config, corev1.EnvVar{ Name: envCacheMaxFlows, - Value: strconv.Itoa(int(coll.Spec.EBPF.CacheMaxFlows)), + Value: strconv.Itoa(int(coll.Spec.Agent.EBPF.CacheMaxFlows)), }) } - if coll.Spec.EBPF.LogLevel != "" { + if coll.Spec.Agent.EBPF.LogLevel != "" { config = append(config, corev1.EnvVar{ Name: envLogLevel, - Value: coll.Spec.EBPF.LogLevel, + Value: coll.Spec.Agent.EBPF.LogLevel, }) } - if len(coll.Spec.EBPF.Interfaces) > 0 { + if len(coll.Spec.Agent.EBPF.Interfaces) > 0 { config = append(config, corev1.EnvVar{ Name: envInterfaces, - Value: strings.Join(coll.Spec.EBPF.Interfaces, envListSeparator), + Value: strings.Join(coll.Spec.Agent.EBPF.Interfaces, envListSeparator), }) } - if len(coll.Spec.EBPF.ExcludeInterfaces) > 0 { + if len(coll.Spec.Agent.EBPF.ExcludeInterfaces) > 0 { config = append(config, corev1.EnvVar{ Name: envExcludeInterfaces, - Value: strings.Join(coll.Spec.EBPF.ExcludeInterfaces, envListSeparator), + Value: strings.Join(coll.Spec.Agent.EBPF.ExcludeInterfaces, envListSeparator), }) } - if coll.Spec.EBPF.Sampling > 1 { + if coll.Spec.Agent.EBPF.Sampling > 1 { config = append(config, corev1.EnvVar{ Name: envSampling, - Value: strconv.Itoa(int(coll.Spec.EBPF.Sampling)), + Value: strconv.Itoa(int(coll.Spec.Agent.EBPF.Sampling)), }) } - for k, v := range coll.Spec.EBPF.Env { + for k, v := range coll.Spec.Agent.EBPF.Env { config = append(config, corev1.EnvVar{Name: k, Value: v}) } if coll.Spec.Kafka.Enable { @@ -297,8 +297,8 @@ func (c *AgentController) securityContext(coll *flowsv1alpha1.FlowCollector) *co RunAsUser: pointer.Int64(0), } - if coll.Spec.EBPF.Privileged { - sc.Privileged = &coll.Spec.EBPF.Privileged + if coll.Spec.Agent.EBPF.Privileged { + sc.Privileged = &coll.Spec.Agent.EBPF.Privileged } else { sc.Capabilities = &corev1.Capabilities{Add: permissions.AllowedCapabilities} } diff --git a/controllers/flowcollector_controller_console_test.go b/controllers/flowcollector_controller_console_test.go index 3d1c57b66..1c32afc13 100644 --- a/controllers/flowcollector_controller_console_test.go +++ b/controllers/flowcollector_controller_console_test.go @@ -71,7 +71,7 @@ func flowCollectorConsolePluginSpecs() { }, Spec: flowsv1alpha1.FlowCollectorSpec{ Namespace: cpNamespace, - Agent: "ipfix", + Agent: flowsv1alpha1.FlowCollectorAgent{Type: "IPFIX"}, ConsolePlugin: flowsv1alpha1.FlowCollectorConsolePlugin{ Port: 9001, ImagePullPolicy: "Never", diff --git a/controllers/flowcollector_controller_ebpf_test.go b/controllers/flowcollector_controller_ebpf_test.go index 390e79922..28dafae5e 100644 --- a/controllers/flowcollector_controller_ebpf_test.go +++ b/controllers/flowcollector_controller_ebpf_test.go @@ -43,7 +43,6 @@ func flowCollectorEBPFSpecs() { ObjectMeta: metav1.ObjectMeta{Name: crKey.Name}, Spec: flowsv1alpha1.FlowCollectorSpec{ Namespace: operatorNamespace, - Agent: "ebpf", FlowlogsPipeline: flowsv1alpha1.FlowCollectorFLP{ Kind: "DaemonSet", Port: 9999, @@ -51,16 +50,19 @@ func flowCollectorEBPFSpecs() { LogLevel: "error", Image: "testimg:latest", }, - EBPF: flowsv1alpha1.FlowCollectorEBPF{ - Image: "netobserv-ebpf-agent:latest", - Sampling: 123, - CacheActiveTimeout: "15s", - CacheMaxFlows: 100, - Interfaces: []string{"veth0", "/^br-/"}, - ExcludeInterfaces: []string{"br-3", "lo"}, - LogLevel: "trace", - Env: map[string]string{ - "BUFFERS_LENGTH": "100", + Agent: flowsv1alpha1.FlowCollectorAgent{ + Type: "EBPF", + EBPF: flowsv1alpha1.FlowCollectorEBPF{ + Image: "netobserv-ebpf-agent:latest", + Sampling: 123, + CacheActiveTimeout: "15s", + CacheMaxFlows: 100, + Interfaces: []string{"veth0", "/^br-/"}, + ExcludeInterfaces: []string{"br-3", "lo"}, + LogLevel: "trace", + Env: map[string]string{ + "BUFFERS_LENGTH": "100", + }, }, }, }, @@ -130,9 +132,9 @@ func flowCollectorEBPFSpecs() { It("Should update fields that have changed", func() { UpdateCR(crKey, func(fc *flowsv1alpha1.FlowCollector) { - Expect(fc.Spec.EBPF.Sampling).To(Equal(int32(123))) - fc.Spec.EBPF.Sampling = 4 - fc.Spec.EBPF.Privileged = true + Expect(fc.Spec.Agent.EBPF.Sampling).To(Equal(int32(123))) + fc.Spec.Agent.EBPF.Sampling = 4 + fc.Spec.Agent.EBPF.Privileged = true }) ds := appsv1.DaemonSet{} @@ -237,7 +239,7 @@ func flowCollectorEBPFKafkaSpecs() { ObjectMeta: metav1.ObjectMeta{Name: crKey.Name}, Spec: flowsv1alpha1.FlowCollectorSpec{ Namespace: operatorNamespace, - Agent: "ebpf", + Agent: flowsv1alpha1.FlowCollectorAgent{Type: "EBPF"}, Kafka: flowsv1alpha1.FlowCollectorKafka{ Enable: true, Address: "kafka-cluster-kafka-bootstrap", diff --git a/controllers/flowcollector_controller_test.go b/controllers/flowcollector_controller_test.go index b41ca0da6..4f8f33c0d 100644 --- a/controllers/flowcollector_controller_test.go +++ b/controllers/flowcollector_controller_test.go @@ -111,9 +111,11 @@ func flowCollectorControllerSpecs() { }}, }, }, - Agent: "ipfix", - IPFIX: flowsv1alpha1.FlowCollectorIPFIX{ - Sampling: 200, + Agent: flowsv1alpha1.FlowCollectorAgent{ + Type: "IPFIX", + IPFIX: flowsv1alpha1.FlowCollectorIPFIX{ + Sampling: 200, + }, }, ConsolePlugin: flowsv1alpha1.FlowCollectorConsolePlugin{ Port: 9001, @@ -203,8 +205,8 @@ func flowCollectorControllerSpecs() { It("Should update successfully", func() { UpdateCR(crKey, func(fc *flowsv1alpha1.FlowCollector) { - fc.Spec.IPFIX.CacheActiveTimeout = "30s" - fc.Spec.IPFIX.Sampling = 1234 + fc.Spec.Agent.IPFIX.CacheActiveTimeout = "30s" + fc.Spec.Agent.IPFIX.Sampling = 1234 fc.Spec.FlowlogsPipeline.Port = 1999 }) @@ -238,10 +240,10 @@ func flowCollectorControllerSpecs() { if err := k8sClient.Get(ctx, crKey, &fc); err != nil { return err } - fc.Spec.IPFIX.Sampling = 1 + fc.Spec.Agent.IPFIX.Sampling = 1 return k8sClient.Update(ctx, &fc) }).Should(Satisfy(func(err error) bool { - return err != nil && strings.Contains(err.Error(), "spec.ipfix.sampling: Invalid value: 1") + return err != nil && strings.Contains(err.Error(), "spec.agent.ipfix.sampling: Invalid value: 1") }), "Error expected for invalid sampling value") Eventually(func() error { @@ -249,8 +251,8 @@ func flowCollectorControllerSpecs() { if err := k8sClient.Get(ctx, crKey, &fc); err != nil { return err } - fc.Spec.IPFIX.Sampling = 10 - fc.Spec.IPFIX.ForceSampleAll = true + fc.Spec.Agent.IPFIX.Sampling = 10 + fc.Spec.Agent.IPFIX.ForceSampleAll = true return k8sClient.Update(ctx, &fc) }).Should(Succeed()) @@ -324,7 +326,7 @@ func flowCollectorControllerSpecs() { Image: "testimg:latest", } fc.Spec.Loki = flowsv1alpha1.FlowCollectorLoki{} - fc.Spec.IPFIX = flowsv1alpha1.FlowCollectorIPFIX{ + fc.Spec.Agent.IPFIX = flowsv1alpha1.FlowCollectorIPFIX{ Sampling: 200, } }) @@ -476,7 +478,7 @@ func flowCollectorControllerSpecs() { fc.Spec.FlowlogsPipeline.Kind = "Deployment" fc.Spec.FlowlogsPipeline.Port = 9999 fc.Spec.Namespace = otherNamespace - fc.Spec.IPFIX = flowsv1alpha1.FlowCollectorIPFIX{ + fc.Spec.Agent.IPFIX = flowsv1alpha1.FlowCollectorIPFIX{ Sampling: 200, } }) diff --git a/controllers/flowlogspipeline/flp_reconciler.go b/controllers/flowlogspipeline/flp_reconciler.go index 505a53a93..14518a109 100644 --- a/controllers/flowlogspipeline/flp_reconciler.go +++ b/controllers/flowlogspipeline/flp_reconciler.go @@ -141,7 +141,7 @@ func checkDeployNeeded(fc *flowsv1alpha1.FlowCollectorSpec, confKind string) (bo return fc.Kafka.Enable, nil case ConfKafkaIngester: // disabled if ebpf-agent is enabled, as it sends the flows directly to the transformer - return fc.Kafka.Enable && fc.Agent == flowsv1alpha1.AgentIPFIX, nil + return fc.Kafka.Enable && fc.Agent.Type == flowsv1alpha1.AgentIPFIX, nil default: return false, fmt.Errorf("unknown flowlogs-pipelines config kind") } @@ -172,7 +172,7 @@ func (r *singleDeploymentReconciler) Reconcile(ctx context.Context, desired *flo return err } - builder := newBuilder(r.nobjMngr.Namespace, desired.Spec.Agent, desiredFLP, desiredLoki, desiredKafka, r.confKind, r.useOpenShiftSCC) + builder := newBuilder(r.nobjMngr.Namespace, desired.Spec.Agent.Type, desiredFLP, desiredLoki, desiredKafka, r.confKind, r.useOpenShiftSCC) newCM, configDigest, err := builder.configMap() if err != nil { return err diff --git a/controllers/flowlogspipeline/flp_test.go b/controllers/flowlogspipeline/flp_test.go index 16089f667..50a6eeb27 100644 --- a/controllers/flowlogspipeline/flp_test.go +++ b/controllers/flowlogspipeline/flp_test.go @@ -480,7 +480,7 @@ func TestDeployNeeded(t *testing.T) { assert := assert.New(t) spec := flowsv1alpha1.FlowCollectorSpec{ - Agent: "ipfix", + Agent: flowsv1alpha1.FlowCollectorAgent{Type: "IPFIX"}, Kafka: flowsv1alpha1.FlowCollectorKafka{Enable: false, Address: "loaclhost:9092", Topic: "FLP"}, } // Kafka not configured @@ -507,7 +507,7 @@ func TestDeployNeeded(t *testing.T) { assert.NoError(err) // Kafka + eBPF agent configured - spec.Agent = "ebpf" + spec.Agent.Type = "EBPF" res, err = checkDeployNeeded(&spec, ConfSingle) assert.False(res) assert.NoError(err) diff --git a/controllers/ovs/flowsconfig_cno_reconciler.go b/controllers/ovs/flowsconfig_cno_reconciler.go index 85fd1602a..a3ac13589 100644 --- a/controllers/ovs/flowsconfig_cno_reconciler.go +++ b/controllers/ovs/flowsconfig_cno_reconciler.go @@ -47,7 +47,7 @@ func (c *FlowsConfigCNOController) Reconcile( if err != nil { return err } - if target.Spec.Agent != flowsv1alpha1.AgentIPFIX { + if target.Spec.Agent.Type != flowsv1alpha1.AgentIPFIX { if current == nil { return nil } @@ -113,7 +113,7 @@ func (c *FlowsConfigCNOController) current(ctx context.Context) (*flowsConfig, e func (c *FlowsConfigCNOController) desired( ctx context.Context, coll *flowsv1alpha1.FlowCollector) (*flowsConfig, error) { - corrected := coll.Spec.IPFIX.DeepCopy() + corrected := coll.Spec.Agent.IPFIX.DeepCopy() corrected.Sampling = getSampling(ctx, corrected) conf := flowsConfig{FlowCollectorIPFIX: *corrected} diff --git a/controllers/ovs/flowsconfig_ovnk_reconciler.go b/controllers/ovs/flowsconfig_ovnk_reconciler.go index eee91056d..e5cafec1c 100644 --- a/controllers/ovs/flowsconfig_ovnk_reconciler.go +++ b/controllers/ovs/flowsconfig_ovnk_reconciler.go @@ -87,20 +87,20 @@ func (c *FlowsConfigOVNKController) getDaemonSet(ctx context.Context) (*appsv1.D } func (c *FlowsConfigOVNKController) desiredEnv(ctx context.Context, coll *flowsv1alpha1.FlowCollector) (map[string]string, error) { - cacheTimeout, err := time.ParseDuration(coll.Spec.IPFIX.CacheActiveTimeout) + cacheTimeout, err := time.ParseDuration(coll.Spec.Agent.IPFIX.CacheActiveTimeout) if err != nil { return nil, err } - sampling := getSampling(ctx, &coll.Spec.IPFIX) + sampling := getSampling(ctx, &coll.Spec.Agent.IPFIX) envs := map[string]string{ "OVN_IPFIX_TARGETS": "", "OVN_IPFIX_CACHE_ACTIVE_TIMEOUT": strconv.Itoa(int(cacheTimeout.Seconds())), - "OVN_IPFIX_CACHE_MAX_FLOWS": strconv.Itoa(int(coll.Spec.IPFIX.CacheMaxFlows)), + "OVN_IPFIX_CACHE_MAX_FLOWS": strconv.Itoa(int(coll.Spec.Agent.IPFIX.CacheMaxFlows)), "OVN_IPFIX_SAMPLING": strconv.Itoa(int(sampling)), } - if coll.Spec.Agent != flowsv1alpha1.AgentIPFIX { + if coll.Spec.Agent.Type != flowsv1alpha1.AgentIPFIX { // No IPFIX => leave target empty and return return envs, nil } diff --git a/docs/FlowCollector.md b/docs/FlowCollector.md index bcd8f3cb8..b219c44b0 100644 --- a/docs/FlowCollector.md +++ b/docs/FlowCollector.md @@ -84,13 +84,12 @@ FlowCollectorSpec defines the desired state of FlowCollector - agent - enum + agent + object - Select the flows tracing agent. Possible values are "ipfix" to use the IPFIX collector, or "ebpf" (default) to use NetObserv eBPF agent. eBPF is recommended, as it should work in more situations and offers better performances. When using IPFIX with OVN-Kubernetes CNI, NetObserv will configure OVN's IPFIX exporter. Other CNIs are not supported, they could work but necessitate manual configuration.
+ FlowCollectorAgent is a discriminated union that allows to select either ipfix or ebpf, but does not allow defining both fields.

- Enum: ipfix, ebpf
- Default: ebpf
+ Default: map[type:EBPF]
true @@ -108,58 +107,293 @@ FlowCollectorSpec defines the desired state of FlowCollector false - ebpf + flowlogsPipeline object - Settings related to eBPF-based flow reporter when the "agent" property is set to "ebpf".
-
- Default: map[imagePullPolicy:IfNotPresent]
+ Settings related to the flowlogs-pipeline component, which collects and enriches the flows, and produces metrics.
false - flowlogsPipeline + kafka object - Settings related to the flowlogs-pipeline component, which collects and enriches the flows, and produces metrics.
+ Kafka configuration, allowing to use Kafka as a broker as part of the flow collection pipeline. Kafka can provide better scalability, resiliency and high availability (for more details, see https://www.redhat.com/en/topics/integration/what-is-apache-kafka).
false - ipfix + loki object - Settings related to IPFIX-based flow reporter when the "agent" property is set to "ipfix".
-
- Default: map[sampling:400]
+ Settings related to the Loki client, used as a flow store.
false - kafka + namespace + string + + Namespace where NetObserv pods are deployed. If empty, the namespace of the operator is going to be used.
+ + false + + ovnKubernetes object - Kafka configuration, allowing to use Kafka as a broker as part of the flow collection pipeline. Kafka can provide better scalability, resiliency and high availability (for more details, see https://www.redhat.com/en/topics/integration/what-is-apache-kafka).
+ Settings related to OVN-Kubernetes CNI, when available. This configuration is used when using OVN's IPFIX exports, without OpenShift. When using OpenShift, refer to the `clusterNetworkOperator` property instead.
false + + + + +### FlowCollector.spec.agent +[↩ Parent](#flowcollectorspec) + + + +FlowCollectorAgent is a discriminated union that allows to select either ipfix or ebpf, but does not allow defining both fields. + + + + + + + + + + + + + + + - + - + + + + + +
NameTypeDescriptionRequired
typeenum + Select the flows tracing agent. Possible values are "IPFIX" (default) to use the IPFIX collector, or "EBPF" to use NetObserv eBPF agent. When using IPFIX with OVN-Kubernetes CNI, NetObserv will configure OVN's IPFIX exporter. Other CNIs are not supported, they could work but require manual configuration.
+
+ Enum: IPFIX, EBPF
+ Default: EBPF
+
true
lokiebpf object - Settings related to the Loki client, used as a flow store.
+ Settings related to eBPF-based flow reporter when the "agent.type" property is set to "EBPF".
false
namespaceipfixobject + Settings related to IPFIX-based flow reporter when the "agent.type" property is set to "IPFIX".
+
false
+ + +### FlowCollector.spec.agent.ebpf +[↩ Parent](#flowcollectorspecagent) + + + +Settings related to eBPF-based flow reporter when the "agent.type" property is set to "EBPF". + + + + + + + + + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionRequired
cacheActiveTimeout string - Namespace where NetObserv pods are deployed. If empty, the namespace of the operator is going to be used.
+ CacheActiveTimeout is the max period during which the reporter will aggregate flows before sending

- Default:
+ Default: 5s
false
ovnKubernetescacheMaxFlowsinteger + CacheMaxFlows is the max number of flows in an aggregate; when reached, the reporter sends the flows
+
+ Format: int32
+ Default: 1000
+ Minimum: 1
+
false
envmap[string]string + Env allows passing custom environment variables to the NetObserv Agent. Useful for passing some very concrete performance-tuning options (e.g. GOGC, GOMAXPROCS) that shouldn't be publicly exposed as part of the FlowCollector descriptor, as they are only useful in edge debug/support scenarios.
+
false
excludeInterfaces[]string + ExcludeInterfaces contains the interface names that will be excluded from flow tracing. If an entry is enclosed by slashes (e.g. `/br-/`), it will match as regular expression, otherwise it will be matched as a case-sensitive string.
+
+ Default: [lo]
+
false
imagestring + Image is the NetObserv Agent image (including domain and tag)
+
+ Default: quay.io/netobserv/netobserv-ebpf-agent:main
+
false
imagePullPolicyenum + ImagePullPolicy is the Kubernetes pull policy for the image defined above
+
+ Enum: IfNotPresent, Always, Never
+ Default: IfNotPresent
+
false
interfaces[]string + Interfaces contains the interface names from where flows will be collected. If empty, the agent will fetch all the interfaces in the system, excepting the ones listed in ExcludeInterfaces. If an entry is enclosed by slashes (e.g. `/br-/`), it will match as regular expression, otherwise it will be matched as a case-sensitive string.
+
false
logLevelenum + LogLevel defines the log level for the NetObserv eBPF Agent
+
+ Enum: trace, debug, info, warn, error, fatal, panic
+ Default: info
+
false
privilegedboolean + Privileged mode for the eBPF Agent container. If false, the operator will add the following capabilities to the container, to enable its correct operation: BPF, PERFMON, NET_ADMIN, SYS_RESOURCE.
+
false
resources object - Settings related to OVN-Kubernetes CNI, when available. This configuration is used when using OVN's IPFIX exports, without OpenShift. When using OpenShift, refer to the `clusterNetworkOperator` property instead.
+ Compute Resources required by this container. Cannot be updated. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+
false
samplinginteger + Sampling is the sampling rate on the reporter. 100 means one flow on 100 is sent. 0 or 1 means all flows are sampled.
+
+ Format: int32
+ Default: 50
+ Minimum: 0
+
false
+ + +### FlowCollector.spec.agent.ebpf.resources +[↩ Parent](#flowcollectorspecagentebpf) + + + +Compute Resources required by this container. Cannot be updated. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionRequired
limitsmap[string]int or string + Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+
false
requestsmap[string]int or string + Requests describes the minimum amount of compute resources required. If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, otherwise to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+
false
+ + +### FlowCollector.spec.agent.ipfix +[↩ Parent](#flowcollectorspecagent) + + + +Settings related to IPFIX-based flow reporter when the "agent.type" property is set to "IPFIX". + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -1251,157 +1485,6 @@ Configuration of the port to service name translation -Compute Resources required by this container. Cannot be updated. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ - -
NameTypeDescriptionRequired
cacheActiveTimeoutstring + CacheActiveTimeout is the max period during which the reporter will aggregate flows before sending
+
+ Default: 20s
+
false
cacheMaxFlowsinteger + CacheMaxFlows is the max number of flows in an aggregate; when reached, the reporter sends the flows
+
+ Format: int32
+ Default: 400
+ Minimum: 0
+
false
forceSampleAllboolean + It is not recommended to sample all the traffic with IPFIX, as it may generate cluster instability. If you REALLY want to do that, set this flag to true. Use at your own risks. When it is set to true, the value of "sampling" is ignored.
+
+ Default: false
+
false
samplinginteger + Sampling is the sampling rate on the reporter. 100 means one flow on 100 is sent. To ensure cluster stability, it is not possible to set a value below 2. If you really want to sample every packet, which may impact the cluster stability, refer to "forceSampleAll". Alternatively, you can use the eBPF Agent instead of IPFIX.
+
+ Format: int32
+ Default: 400
+ Minimum: 2
false
- - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionRequired
limitsmap[string]int or string - Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
-
false
requestsmap[string]int or string - Requests describes the minimum amount of compute resources required. If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, otherwise to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
-
false
- - -### FlowCollector.spec.ebpf -[↩ Parent](#flowcollectorspec) - - - -Settings related to eBPF-based flow reporter when the "agent" property is set to "ebpf". - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionRequired
cacheActiveTimeoutstring - CacheActiveTimeout is the max period during which the reporter will aggregate flows before sending
-
- Default: 5s
-
false
cacheMaxFlowsinteger - CacheMaxFlows is the max number of flows in an aggregate; when reached, the reporter sends the flows
-
- Format: int32
- Default: 1000
- Minimum: 1
-
false
envmap[string]string - Env allows passing custom environment variables to the NetObserv Agent. Useful for passing some very concrete performance-tuning options (e.g. GOGC, GOMAXPROCS) that shouldn't be publicly exposed as part of the FlowCollector descriptor, as they are only useful in edge debug/support scenarios.
-
false
excludeInterfaces[]string - ExcludeInterfaces contains the interface names that will be excluded from flow tracing. If an entry is enclosed by slashes (e.g. `/br-/`), it will match as regular expression, otherwise it will be matched as a case-sensitive string.
-
- Default: [lo]
-
false
imagestring - Image is the NetObserv Agent image (including domain and tag)
-
- Default: quay.io/netobserv/netobserv-ebpf-agent:main
-
false
imagePullPolicyenum - ImagePullPolicy is the Kubernetes pull policy for the image defined above
-
- Enum: IfNotPresent, Always, Never
- Default: IfNotPresent
-
false
interfaces[]string - Interfaces contains the interface names from where flows will be collected. If empty, the agent will fetch all the interfaces in the system, excepting the ones listed in ExcludeInterfaces. If an entry is enclosed by slashes (e.g. `/br-/`), it will match as regular expression, otherwise it will be matched as a case-sensitive string.
-
false
logLevelenum - LogLevel defines the log level for the NetObserv eBPF Agent
-
- Enum: trace, debug, info, warn, error, fatal, panic
- Default: info
-
false
privilegedboolean - Privileged mode for the eBPF Agent container. If false, the operator will add the following capabilities to the container, to enable its correct operation: BPF, PERFMON, NET_ADMIN, SYS_RESOURCE.
-
false
resourcesobject - Compute Resources required by this container. Cannot be updated. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
-
false
samplinginteger - Sampling is the sampling rate on the reporter. 100 means one flow on 100 is sent. 0 or 1 means all flows are sampled.
-
- Format: int32
- Default: 50
- Minimum: 0
-
false
- - -### FlowCollector.spec.ebpf.resources -[↩ Parent](#flowcollectorspecebpf) - - - Compute Resources required by this container. Cannot be updated. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ @@ -2521,66 +2604,6 @@ Compute Resources required by this container. Cannot be updated. More info: http
-### FlowCollector.spec.ipfix -[↩ Parent](#flowcollectorspec) - - - -Settings related to IPFIX-based flow reporter when the "agent" property is set to "ipfix". - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionRequired
cacheActiveTimeoutstring - CacheActiveTimeout is the max period during which the reporter will aggregate flows before sending
-
- Default: 20s
-
false
cacheMaxFlowsinteger - CacheMaxFlows is the max number of flows in an aggregate; when reached, the reporter sends the flows
-
- Format: int32
- Default: 400
- Minimum: 0
-
false
forceSampleAllboolean - It is not recommended to sample all the traffic with IPFIX, as it may generate cluster instability. If you REALLY want to do that, set this flag to true. Use at your own risks. When it is set to true, the value of "sampling" is ignored.
-
- Default: false
-
false
samplinginteger - Sampling is the sampling rate on the reporter. 100 means one flow on 100 is sent. To ensure cluster stability, it is not possible to set a value below 2. If you really want to sample every packet, which may impact the cluster stability, refer to "forceSampleAll". Alternatively, you can use the eBPF Agent instead of IPFIX.
-
- Format: int32
- Default: 400
- Minimum: 2
-
false
- - ### FlowCollector.spec.kafka [↩ Parent](#flowcollectorspec) From b51280385ff05a3628dfd12199b22c1bd0b6f499 Mon Sep 17 00:00:00 2001 From: Mario Macias Date: Mon, 5 Sep 2022 17:35:12 +0200 Subject: [PATCH 2/2] fix some spec agent paths --- README.md | 6 +++--- .../manifests/netobserv-operator.clusterserviceversion.yaml | 4 ++-- .../bases/netobserv-operator.clusterserviceversion.yaml | 4 ++-- hack/refresh-ovs.sh | 6 +++--- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index fa42c6afc..a592ada85 100644 --- a/README.md +++ b/README.md @@ -107,9 +107,9 @@ As it operates cluster-wide, only a single `FlowCollector` is allowed, and it ha A couple of settings deserve special attention: -- Agent (`spec.agent`) can be `EBPF` (default) or `IPFIX`. eBPF is recommended, as it should work in more situations and offers better performances. If you can't, or don't want to use eBPF, note that the IPFIX option is fully functional only when using OVN-Kubernetes CNI. Other CNIs are not officially supported, but you may still be able to configure them manually if they allow IPFIX exports. +- Agent (`spec.agent.type`) can be `EBPF` (default) or `IPFIX`. eBPF is recommended, as it should work in more situations and offers better performances. If you can't, or don't want to use eBPF, note that the IPFIX option is fully functional only when using OVN-Kubernetes CNI. Other CNIs are not officially supported, but you may still be able to configure them manually if they allow IPFIX exports. -- Sampling (`spec.ebpf.sampling` and `spec.ipfix.sampling`): 24/7, 1:1 sampled flow collection may consume a non-negligible amount of resources. While we are doing our best to make it a viable option in production, it is still sometimes necessary to mitigate by setting a sampling ratio. A value of `100` means: one flow every 100 is sampled. `1` means all flows are sampled. The lower it is, the more flows you get, and the more accurate are derived metrics. By default, sampling is set to 50 (ie. 1:50) for eBPF and 400 (1:400) for IPFIX. Note that more sampled flows also means more storage needed. We recommend to start with default values and refine empirically, to figure out which setting your cluster can manage. +- Sampling (`spec.agent.ebpf.sampling` and `spec.agent.ipfix.sampling`): 24/7, 1:1 sampled flow collection may consume a non-negligible amount of resources. While we are doing our best to make it a viable option in production, it is still sometimes necessary to mitigate by setting a sampling ratio. A value of `100` means: one flow every 100 is sampled. `1` means all flows are sampled. The lower it is, the more flows you get, and the more accurate are derived metrics. By default, sampling is set to 50 (ie. 1:50) for eBPF and 400 (1:400) for IPFIX. Note that more sampled flows also means more storage needed. We recommend to start with default values and refine empirically, to figure out which setting your cluster can manage. - Loki (`spec.loki`): configure here how to reach Loki. The default values match the Loki quick install paths mentioned in the _Getting Started_ section, but you may have to configure differently if you used another installation method. @@ -204,7 +204,7 @@ It should return some json in this form: ### Everything seems correctly deployed but there isn't any flow showing up -If using IPFIX (ie. `spec.agent` is `ipfix` in FlowCollector), wait 10 minutes and check again. There is sometimes a delay, up to 10 minutes, before the flows appear. This is due to the IPFIX protocol requiring exporter and collector to exchange record template definitions as a preliminary step. The eBPF agent doesn't have such a delay. +If using IPFIX (ie. `spec.agent.type` is `IPFIX` in FlowCollector), wait 10 minutes and check again. There is sometimes a delay, up to 10 minutes, before the flows appear. This is due to the IPFIX protocol requiring exporter and collector to exchange record template definitions as a preliminary step. The eBPF agent doesn't have such a delay. Else, check for any suspicious error in logs, especially in the `flowlogs-pipeline` pods and the eBPF agent pods. You may also take a look at prometheus metrics prefixed with `netobserv_`: they can give you clues if flows are processed, if errors are reported, etc. diff --git a/bundle/manifests/netobserv-operator.clusterserviceversion.yaml b/bundle/manifests/netobserv-operator.clusterserviceversion.yaml index f00fd816b..7b753a6f9 100644 --- a/bundle/manifests/netobserv-operator.clusterserviceversion.yaml +++ b/bundle/manifests/netobserv-operator.clusterserviceversion.yaml @@ -155,9 +155,9 @@ spec: A couple of settings deserve special attention: - - Agent (`spec.agent`) can be `EBPF` or `IPFIX`. eBPF is recommended, as it should work in more situations and offers better performances. If you can't, or don't want to use eBPF, note that the IPFIX option is fully functional only when using [OVN-Kubernetes](https://github.com/ovn-org/ovn-kubernetes/) CNI. Other CNIs are not officially supported, but you may still be able to configure them manually if they allow IPFIX exports. + - Agent (`spec.agent.type`) can be `EBPF` or `IPFIX`. eBPF is recommended, as it should work in more situations and offers better performances. If you can't, or don't want to use eBPF, note that the IPFIX option is fully functional only when using [OVN-Kubernetes](https://github.com/ovn-org/ovn-kubernetes/) CNI. Other CNIs are not officially supported, but you may still be able to configure them manually if they allow IPFIX exports. - - Sampling (`spec.ebpf.sampling` and `spec.ipfix.sampling`): 24/7, 1:1 sampled flow collection may consume a non-negligible amount of resources. While we are doing our best to make it a viable option in production, it is still sometimes necessary to mitigate by setting a sampling ratio. A value of `100` means: one flow every 100 is sampled. `1` means all flows are sampled. The lower it is, the more flows you get, and the more accurate are derived metrics. By default, sampling is set to 50 (ie. 1:50) for eBPF and 400 (1:400) for IPFIX. Note that more sampled flows also means more storage needed. We recommend to start with default values and refine empirically, to figure out which setting your cluster can manage. + - Sampling (`spec.agent.ebpf.sampling` and `spec.agent.ipfix.sampling`): 24/7, 1:1 sampled flow collection may consume a non-negligible amount of resources. While we are doing our best to make it a viable option in production, it is still sometimes necessary to mitigate by setting a sampling ratio. A value of `100` means: one flow every 100 is sampled. `1` means all flows are sampled. The lower it is, the more flows you get, and the more accurate are derived metrics. By default, sampling is set to 50 (ie. 1:50) for eBPF and 400 (1:400) for IPFIX. Note that more sampled flows also means more storage needed. We recommend to start with default values and refine empirically, to figure out which setting your cluster can manage. - Loki (`spec.loki`): configure here how to reach Loki. The default values match the Loki quick install paths mentioned above, but you may have to configure differently if you used another installation method. diff --git a/config/manifests/bases/netobserv-operator.clusterserviceversion.yaml b/config/manifests/bases/netobserv-operator.clusterserviceversion.yaml index cca616406..cd8bb30f1 100644 --- a/config/manifests/bases/netobserv-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/netobserv-operator.clusterserviceversion.yaml @@ -49,9 +49,9 @@ spec: A couple of settings deserve special attention: - - Agent (`spec.agent`) can be `EBPF` or `IPFIX`. eBPF is recommended, as it should work in more situations and offers better performances. If you can't, or don't want to use eBPF, note that the IPFIX option is fully functional only when using [OVN-Kubernetes](https://github.com/ovn-org/ovn-kubernetes/) CNI. Other CNIs are not officially supported, but you may still be able to configure them manually if they allow IPFIX exports. + - Agent (`spec.agent.type`) can be `EBPF` or `IPFIX`. eBPF is recommended, as it should work in more situations and offers better performances. If you can't, or don't want to use eBPF, note that the IPFIX option is fully functional only when using [OVN-Kubernetes](https://github.com/ovn-org/ovn-kubernetes/) CNI. Other CNIs are not officially supported, but you may still be able to configure them manually if they allow IPFIX exports. - - Sampling (`spec.ebpf.sampling` and `spec.ipfix.sampling`): 24/7, 1:1 sampled flow collection may consume a non-negligible amount of resources. While we are doing our best to make it a viable option in production, it is still sometimes necessary to mitigate by setting a sampling ratio. A value of `100` means: one flow every 100 is sampled. `1` means all flows are sampled. The lower it is, the more flows you get, and the more accurate are derived metrics. By default, sampling is set to 50 (ie. 1:50) for eBPF and 400 (1:400) for IPFIX. Note that more sampled flows also means more storage needed. We recommend to start with default values and refine empirically, to figure out which setting your cluster can manage. + - Sampling (`spec.agent.ebpf.sampling` and `spec.agent.ipfix.sampling`): 24/7, 1:1 sampled flow collection may consume a non-negligible amount of resources. While we are doing our best to make it a viable option in production, it is still sometimes necessary to mitigate by setting a sampling ratio. A value of `100` means: one flow every 100 is sampled. `1` means all flows are sampled. The lower it is, the more flows you get, and the more accurate are derived metrics. By default, sampling is set to 50 (ie. 1:50) for eBPF and 400 (1:400) for IPFIX. Note that more sampled flows also means more storage needed. We recommend to start with default values and refine empirically, to figure out which setting your cluster can manage. - Loki (`spec.loki`): configure here how to reach Loki. The default values match the Loki quick install paths mentioned above, but you may have to configure differently if you used another installation method. diff --git a/hack/refresh-ovs.sh b/hack/refresh-ovs.sh index a0c1e89ca..d5f502e86 100755 --- a/hack/refresh-ovs.sh +++ b/hack/refresh-ovs.sh @@ -11,9 +11,9 @@ ovnns=openshift-ovn-kubernetes ovspods=`kubectl get pods -n $ovnns -l app=ovnkube-node --no-headers -o custom-columns=":metadata.name"` -cacheActiveTimeout=`kubectl get flowcollector cluster -o yaml | yq -e .spec.ipfix.cacheActiveTimeout` -cacheMaxFlows=`kubectl get flowcollector cluster -o yaml | yq -e .spec.ipfix.cacheMaxFlows` -sampling=`kubectl get flowcollector cluster -o yaml | yq -e .spec.ipfix.sampling` +cacheActiveTimeout=`kubectl get flowcollector cluster -o yaml | yq -e .spec.agent.ipfix.cacheActiveTimeout` +cacheMaxFlows=`kubectl get flowcollector cluster -o yaml | yq -e .spec.agent.ipfix.cacheMaxFlows` +sampling=`kubectl get flowcollector cluster -o yaml | yq -e .spec.agent.ipfix.sampling` config="cache_active_timeout=${cacheActiveTimeout::-1} cache_max_flows=$cacheMaxFlows sampling=$sampling" echo "Storing config: $config"