From b9e24343597b8a1a48a3aa21ccd8d27b80db83e2 Mon Sep 17 00:00:00 2001 From: Patryk Strusiewicz-Surmacki Date: Mon, 15 Apr 2024 19:57:26 +0200 Subject: [PATCH] Support for plugable reconcilers Signed-off-by: Patryk Strusiewicz-Surmacki --- cmd/manager/main.go | 95 ++++-- pkg/reconciler/layer2.go | 30 +- pkg/reconciler/layer3.go | 54 ++-- pkg/reconciler/layer3.go.orig | 421 --------------------------- pkg/reconciler/legacy_reconciler.go | 85 ++++++ pkg/reconciler/netconf_reconciler.go | 31 ++ pkg/reconciler/reconciler.go | 60 ++-- 7 files changed, 249 insertions(+), 527 deletions(-) delete mode 100644 pkg/reconciler/layer3.go.orig create mode 100644 pkg/reconciler/legacy_reconciler.go create mode 100644 pkg/reconciler/netconf_reconciler.go diff --git a/cmd/manager/main.go b/cmd/manager/main.go index 536390ab..6db7ec61 100644 --- a/cmd/manager/main.go +++ b/cmd/manager/main.go @@ -92,6 +92,7 @@ func main() { var onlyBPFMode bool var configFile string var interfacePrefix string + var useNetconf bool flag.StringVar(&configFile, "config", "", "The controller will load its initial configuration from this file. "+ "Omit this flag to use the default configuration values. "+ @@ -100,6 +101,8 @@ func main() { "Only attach BPF to specified interfaces in config. This will not start any reconciliation. Perfect for masters.") flag.StringVar(&interfacePrefix, "macvlan-interface-prefix", "", "Interface prefix for bridge devices for MACVlan sync") + flag.BoolVar(&useNetconf, "use-netconf", false, + "Use NETCONF interface to configure hosts instead of Netlink and FRR.") opts := zap.Options{ Development: true, } @@ -146,7 +149,7 @@ func main() { os.Exit(1) } - if err := initComponents(mgr, anycastTracker, cfg, clientConfig, onlyBPFMode); err != nil { + if err := initComponents(mgr, anycastTracker, cfg, clientConfig, onlyBPFMode, useNetconf); err != nil { setupLog.Error(err, "unable to initialize components") os.Exit(1) } @@ -163,10 +166,22 @@ func main() { } } -func initComponents(mgr manager.Manager, anycastTracker *anycast.Tracker, cfg *config.Config, clientConfig *rest.Config, onlyBPFMode bool) error { +func initComponents(mgr manager.Manager, anycastTracker *anycast.Tracker, cfg *config.Config, + clientConfig *rest.Config, onlyBPFMode bool, useNetconf bool) error { + var r reconciler.Adapter + var err error + if useNetconf { + r, err = reconciler.NewNetconfReconciler() + } else { + r, err = reconciler.NewLegacyReconciler(mgr.GetClient(), anycastTracker, mgr.GetLogger()) + } + + if err != nil { + return fmt.Errorf("unable to create reconciler: %w", err) + } // Start VRFRouteConfigurationReconciler when we are not running in only BPF mode. if !onlyBPFMode { - if err := setupReconcilers(mgr, anycastTracker); err != nil { + if err := setupReconcilers(mgr, anycastTracker, r); err != nil { return fmt.Errorf("unable to setup reconcilers: %w", err) } } @@ -179,18 +194,10 @@ func initComponents(mgr manager.Manager, anycastTracker *anycast.Tracker, cfg *c return fmt.Errorf("unable to set up ready check: %w", err) } - setupLog.Info("load bpf program into Kernel") - if err := bpf.InitBPFRouter(); err != nil { - return fmt.Errorf("unable to init BPF router: %w", err) - } - setupLog.Info("attach bpf to interfaces specified in config") - if err := bpf.AttachInterfaces(cfg.BPFInterfaces); err != nil { - return fmt.Errorf("unable to attach bpf to interfaces: %w", err) + if err := setupBPF(cfg); err != nil { + return fmt.Errorf("uneable to set up BPF: %w", err) } - setupLog.Info("start bpf interface check") - bpf.RunInterfaceCheck() - setupLog.Info("start anycast sync") anycastTracker.RunAnycastSync() @@ -200,33 +207,57 @@ func initComponents(mgr manager.Manager, anycastTracker *anycast.Tracker, cfg *c } if onlyBPFMode { - clusterClient, err := client.New(clientConfig, client.Options{}) - if err != nil { - return fmt.Errorf("error creating controller-runtime client: %w", err) + if err := runBPFOnlyMode(clientConfig); err != nil { + return fmt.Errorf("error running BPF only mode: %w", err) } + } - nc, err := healthcheck.LoadConfig(healthcheck.NetHealthcheckFile) - if err != nil { - return fmt.Errorf("error loading network healthcheck config: %w", err) - } + return nil +} - tcpDialer := healthcheck.NewTCPDialer(nc.Timeout) - hc, err := healthcheck.NewHealthChecker(clusterClient, - healthcheck.NewDefaultHealthcheckToolkit(nil, tcpDialer), - nc) - if err != nil { - return fmt.Errorf("error initializing healthchecker: %w", err) - } - if err = performNetworkingHealthcheck(hc); err != nil { - return fmt.Errorf("error performing healthcheck: %w", err) - } +func runBPFOnlyMode(clientConfig *rest.Config) error { + clusterClient, err := client.New(clientConfig, client.Options{}) + if err != nil { + return fmt.Errorf("error creating controller-runtime client: %w", err) + } + + nc, err := healthcheck.LoadConfig(healthcheck.NetHealthcheckFile) + if err != nil { + return fmt.Errorf("error loading network healthcheck config: %w", err) + } + + tcpDialer := healthcheck.NewTCPDialer(nc.Timeout) + hc, err := healthcheck.NewHealthChecker(clusterClient, + healthcheck.NewDefaultHealthcheckToolkit(nil, tcpDialer), + nc) + if err != nil { + return fmt.Errorf("error initializing healthchecker: %w", err) + } + if err = performNetworkingHealthcheck(hc); err != nil { + return fmt.Errorf("error performing healthcheck: %w", err) + } + + return nil +} + +func setupBPF(cfg *config.Config) error { + setupLog.Info("load bpf program into Kernel") + if err := bpf.InitBPFRouter(); err != nil { + return fmt.Errorf("unable to init BPF router: %w", err) + } + setupLog.Info("attach bpf to interfaces specified in config") + if err := bpf.AttachInterfaces(cfg.BPFInterfaces); err != nil { + return fmt.Errorf("unable to attach bpf to interfaces: %w", err) } + setupLog.Info("start bpf interface check") + bpf.RunInterfaceCheck() + return nil } -func setupReconcilers(mgr manager.Manager, anycastTracker *anycast.Tracker) error { - r, err := reconciler.NewReconciler(mgr.GetClient(), anycastTracker, mgr.GetLogger()) +func setupReconcilers(mgr manager.Manager, anycastTracker *anycast.Tracker, iReconciler reconciler.Adapter) error { + r, err := reconciler.NewReconciler(mgr.GetClient(), anycastTracker, mgr.GetLogger(), iReconciler) if err != nil { return fmt.Errorf("unable to create debounced reconciler: %w", err) } diff --git a/pkg/reconciler/layer2.go b/pkg/reconciler/layer2.go index 941b28f6..214a98c2 100644 --- a/pkg/reconciler/layer2.go +++ b/pkg/reconciler/layer2.go @@ -16,11 +16,11 @@ import ( "k8s.io/apimachinery/pkg/types" ) -func (r *reconcile) fetchLayer2(ctx context.Context) ([]networkv1alpha1.Layer2NetworkConfiguration, error) { +func (r *Reconciler) fetchLayer2(ctx context.Context) ([]networkv1alpha1.Layer2NetworkConfiguration, error) { layer2List := &networkv1alpha1.Layer2NetworkConfigurationList{} err := r.client.List(ctx, layer2List) if err != nil { - r.Logger.Error(err, "error getting list of Layer2s from Kubernetes") + r.logger.Error(err, "error getting list of Layer2s from Kubernetes") return nil, fmt.Errorf("error getting list of Layer2s from Kubernetes: %w", err) } @@ -28,14 +28,14 @@ func (r *reconcile) fetchLayer2(ctx context.Context) ([]networkv1alpha1.Layer2Ne node := &corev1.Node{} err = r.client.Get(ctx, types.NamespacedName{Name: nodeName}, node) if err != nil { - r.Logger.Error(err, "error getting local node name") + r.logger.Error(err, "error getting local node name") return nil, fmt.Errorf("error getting local node name: %w", err) } l2vnis := []networkv1alpha1.Layer2NetworkConfiguration{} for i := range layer2List.Items { item := &layer2List.Items[i] - logger := r.Logger.WithValues("name", item.ObjectMeta.Name, "namespace", item.ObjectMeta.Namespace, "vlan", item.Spec.ID, "vni", item.Spec.VNI) + logger := r.logger.WithValues("name", item.ObjectMeta.Name, "namespace", item.ObjectMeta.Namespace, "vlan", item.Spec.ID, "vni", item.Spec.VNI) if item.Spec.NodeSelector != nil { selector := labels.NewSelector() var reqs labels.Requirements @@ -76,7 +76,7 @@ func (r *reconcile) fetchLayer2(ctx context.Context) ([]networkv1alpha1.Layer2Ne return l2vnis, nil } -func (r *reconcile) reconcileLayer2(l2vnis []networkv1alpha1.Layer2NetworkConfiguration) error { +func (r *LegacyReconciler) reconcileLayer2(l2vnis []networkv1alpha1.Layer2NetworkConfiguration) error { desired, err := r.getDesired(l2vnis) if err != nil { return err @@ -111,10 +111,10 @@ func (r *reconcile) reconcileLayer2(l2vnis []networkv1alpha1.Layer2NetworkConfig } for i := range toDelete { - r.Logger.Info("Deleting Layer2 because it is no longer configured", "vlan", toDelete[i].VlanID, "vni", toDelete[i].VNI) + r.logger.Info("Deleting Layer2 because it is no longer configured", "vlan", toDelete[i].VlanID, "vni", toDelete[i].VNI) errs := r.netlinkManager.CleanupL2(&toDelete[i]) for _, err := range errs { - r.Logger.Error(err, "Error deleting Layer2", "vlan", toDelete[i].VlanID, "vni", toDelete[i].VNI) + r.logger.Error(err, "Error deleting Layer2", "vlan", toDelete[i].VlanID, "vni", toDelete[i].VNI) } } @@ -129,8 +129,8 @@ func (r *reconcile) reconcileLayer2(l2vnis []networkv1alpha1.Layer2NetworkConfig return nil } -func (r *reconcile) createL2(info *nl.Layer2Information, anycastTrackerInterfaces *[]int) error { - r.Logger.Info("Creating Layer2", "vlan", info.VlanID, "vni", info.VNI) +func (r *LegacyReconciler) createL2(info *nl.Layer2Information, anycastTrackerInterfaces *[]int) error { + r.logger.Info("Creating Layer2", "vlan", info.VlanID, "vni", info.VNI) err := r.netlinkManager.CreateL2(info) if err != nil { return fmt.Errorf("error creating layer2 vlan %d vni %d: %w", info.VlanID, info.VNI, err) @@ -145,7 +145,7 @@ func (r *reconcile) createL2(info *nl.Layer2Information, anycastTrackerInterface return nil } -func (r *reconcile) getDesired(l2vnis []networkv1alpha1.Layer2NetworkConfiguration) ([]nl.Layer2Information, error) { +func (r *LegacyReconciler) getDesired(l2vnis []networkv1alpha1.Layer2NetworkConfiguration) ([]nl.Layer2Information, error) { availableVrfs, err := r.netlinkManager.ListL3() if err != nil { return nil, fmt.Errorf("error loading available VRFs: %w", err) @@ -162,7 +162,7 @@ func (r *reconcile) getDesired(l2vnis []networkv1alpha1.Layer2NetworkConfigurati anycastGateways, err := r.netlinkManager.ParseIPAddresses(spec.AnycastGateways) if err != nil { - r.Logger.Error(err, "error parsing anycast gateways", "layer", l2vnis[i].ObjectMeta.Name, "gw", spec.AnycastGateways) + r.logger.Error(err, "error parsing anycast gateways", "layer", l2vnis[i].ObjectMeta.Name, "gw", spec.AnycastGateways) return nil, fmt.Errorf("error parsing anycast gateways: %w", err) } @@ -175,7 +175,7 @@ func (r *reconcile) getDesired(l2vnis []networkv1alpha1.Layer2NetworkConfigurati } } if !vrfAvailable { - r.Logger.Error(err, "VRF of Layer2 not found on node", "layer", l2vnis[i].ObjectMeta.Name, "vrf", spec.VRF) + r.logger.Error(err, "VRF of Layer2 not found on node", "layer", l2vnis[i].ObjectMeta.Name, "vrf", spec.VRF) continue } } @@ -213,8 +213,8 @@ func determineToBeDeleted(existing, desired []nl.Layer2Information) []nl.Layer2I return toDelete } -func (r *reconcile) reconcileExistingLayer(desired, currentConfig *nl.Layer2Information, anycastTrackerInterfaces *[]int) error { - r.Logger.Info("Reconciling existing Layer2", "vlan", desired.VlanID, "vni", desired.VNI) +func (r *LegacyReconciler) reconcileExistingLayer(desired, currentConfig *nl.Layer2Information, anycastTrackerInterfaces *[]int) error { + r.logger.Info("Reconciling existing Layer2", "vlan", desired.VlanID, "vni", desired.VNI) err := r.netlinkManager.ReconcileL2(currentConfig, desired) if err != nil { return fmt.Errorf("error reconciling layer2 vlan %d vni %d: %w", desired.VlanID, desired.VNI, err) @@ -229,7 +229,7 @@ func (r *reconcile) reconcileExistingLayer(desired, currentConfig *nl.Layer2Info return nil } -func (*reconcile) checkL2Duplicates(configs []networkv1alpha1.Layer2NetworkConfiguration) error { +func (*Reconciler) checkL2Duplicates(configs []networkv1alpha1.Layer2NetworkConfiguration) error { for i := range configs { for j := i + 1; j < len(configs); j++ { if configs[i].Spec.ID == configs[j].Spec.ID { diff --git a/pkg/reconciler/layer3.go b/pkg/reconciler/layer3.go index 72508743..871118a6 100644 --- a/pkg/reconciler/layer3.go +++ b/pkg/reconciler/layer3.go @@ -16,22 +16,22 @@ import ( const defaultSleep = 2 * time.Second -func (r *reconcile) fetchLayer3(ctx context.Context) ([]networkv1alpha1.VRFRouteConfiguration, error) { +func (r *Reconciler) fetchLayer3(ctx context.Context) ([]networkv1alpha1.VRFRouteConfiguration, error) { vrfs := &networkv1alpha1.VRFRouteConfigurationList{} err := r.client.List(ctx, vrfs) if err != nil { - r.Logger.Error(err, "error getting list of VRFs from Kubernetes") + r.logger.Error(err, "error getting list of VRFs from Kubernetes") return nil, fmt.Errorf("error getting list of VRFs from Kubernetes: %w", err) } return vrfs.Items, nil } -func (r *reconcile) fetchTaas(ctx context.Context) ([]networkv1alpha1.RoutingTable, error) { +func (r *Reconciler) fetchTaas(ctx context.Context) ([]networkv1alpha1.RoutingTable, error) { tables := &networkv1alpha1.RoutingTableList{} err := r.client.List(ctx, tables) if err != nil { - r.Logger.Error(err, "error getting list of TaaS from Kubernetes") + r.logger.Error(err, "error getting list of TaaS from Kubernetes") return nil, fmt.Errorf("error getting list of TaaS from Kubernetes: %w", err) } @@ -39,7 +39,7 @@ func (r *reconcile) fetchTaas(ctx context.Context) ([]networkv1alpha1.RoutingTab } // nolint: contextcheck // context is not relevant -func (r *reconcile) reconcileLayer3(l3vnis []networkv1alpha1.VRFRouteConfiguration, taas []networkv1alpha1.RoutingTable) error { +func (r *LegacyReconciler) reconcileLayer3(l3vnis []networkv1alpha1.VRFRouteConfiguration, taas []networkv1alpha1.RoutingTable) error { vrfConfigMap, err := r.createVrfConfigMap(l3vnis) if err != nil { return err @@ -65,7 +65,7 @@ func (r *reconcile) reconcileLayer3(l3vnis []networkv1alpha1.VRFRouteConfigurati created, deletedVRF, err := r.reconcileL3Netlink(l3Configs) if err != nil { - r.Logger.Error(err, "error reconciling Netlink") + r.logger.Error(err, "error reconciling Netlink") return err } @@ -87,20 +87,20 @@ func (r *reconcile) reconcileLayer3(l3vnis []networkv1alpha1.VRFRouteConfigurati time.Sleep(defaultSleep) for _, info := range created { if err := r.netlinkManager.UpL3(info); err != nil { - r.Logger.Error(err, "error setting L3 to state UP") + r.logger.Error(err, "error setting L3 to state UP") return fmt.Errorf("error setting L3 to state UP: %w", err) } } return nil } -func (r *reconcile) configureFRR(vrfConfigs []frr.VRFConfiguration, reloadTwice bool) error { +func (r *LegacyReconciler) configureFRR(vrfConfigs []frr.VRFConfiguration, reloadTwice bool) error { changed, err := r.frrManager.Configure(frr.Configuration{ VRFs: vrfConfigs, ASN: r.config.ServerASN, }, r.netlinkManager) if err != nil { - r.Logger.Error(err, "error updating FRR configuration") + r.logger.Error(err, "error updating FRR configuration") return fmt.Errorf("error updating FRR configuration: %w", err) } @@ -125,27 +125,27 @@ func (r *reconcile) configureFRR(vrfConfigs []frr.VRFConfiguration, reloadTwice return nil } -func (r *reconcile) reloadFRR() error { - r.Logger.Info("trying to reload FRR config because it changed") +func (r *LegacyReconciler) reloadFRR() error { + r.logger.Info("trying to reload FRR config because it changed") err := r.frrManager.ReloadFRR() if err != nil { - r.Logger.Error(err, "error reloading FRR systemd unit, trying restart") + r.logger.Error(err, "error reloading FRR systemd unit, trying restart") err = r.frrManager.RestartFRR() if err != nil { - r.Logger.Error(err, "error restarting FRR systemd unit") + r.logger.Error(err, "error restarting FRR systemd unit") return fmt.Errorf("error reloading / restarting FRR systemd unit: %w", err) } } - r.Logger.Info("reloaded FRR config") + r.logger.Info("reloaded FRR config") return nil } -func (r *reconcile) createVrfConfigMap(l3vnis []networkv1alpha1.VRFRouteConfiguration) (map[string]frr.VRFConfiguration, error) { +func (r *LegacyReconciler) createVrfConfigMap(l3vnis []networkv1alpha1.VRFRouteConfiguration) (map[string]frr.VRFConfiguration, error) { vrfConfigMap := map[string]frr.VRFConfiguration{} for i := range l3vnis { spec := l3vnis[i].Spec - logger := r.Logger.WithValues("name", l3vnis[i].ObjectMeta.Name, "namespace", l3vnis[i].ObjectMeta.Namespace, "vrf", spec.VRF) + logger := r.logger.WithValues("name", l3vnis[i].ObjectMeta.Name, "namespace", l3vnis[i].ObjectMeta.Namespace, "vrf", spec.VRF) var vni int var rt string @@ -161,13 +161,13 @@ func (r *reconcile) createVrfConfigMap(l3vnis []networkv1alpha1.VRFRouteConfigur vni = config.SkipVrfTemplateVni } else { err := fmt.Errorf("vrf not in vrf vni map") - r.Logger.Error(err, "VRF does not exist in VRF VNI config, ignoring", "vrf", spec.VRF, "name", l3vnis[i].ObjectMeta.Name, "namespace", l3vnis[i].ObjectMeta.Namespace) + r.logger.Error(err, "VRF does not exist in VRF VNI config, ignoring", "vrf", spec.VRF, "name", l3vnis[i].ObjectMeta.Name, "namespace", l3vnis[i].ObjectMeta.Namespace) continue } if vni == 0 && vni > 16777215 { err := fmt.Errorf("VNI can not be set to 0") - r.Logger.Error(err, "VNI can not be set to 0, ignoring", "vrf", spec.VRF, "name", l3vnis[i].ObjectMeta.Name, "namespace", l3vnis[i].ObjectMeta.Namespace) + r.logger.Error(err, "VNI can not be set to 0, ignoring", "vrf", spec.VRF, "name", l3vnis[i].ObjectMeta.Name, "namespace", l3vnis[i].ObjectMeta.Namespace) continue } @@ -240,7 +240,7 @@ func createVrfConfig(vrfConfigMap map[string]frr.VRFConfiguration, spec *network return &cfg, nil } -func (r *reconcile) reconcileL3Netlink(vrfConfigs []frr.VRFConfiguration) ([]nl.VRFInformation, bool, error) { +func (r *LegacyReconciler) reconcileL3Netlink(vrfConfigs []frr.VRFConfiguration) ([]nl.VRFInformation, bool, error) { existing, err := r.netlinkManager.ListL3() if err != nil { return nil, false, fmt.Errorf("error listing L3 VRF information: %w", err) @@ -260,7 +260,7 @@ func (r *reconcile) reconcileL3Netlink(vrfConfigs []frr.VRFConfiguration) ([]nl. if !stillExists || existing[i].MarkForDelete { toDelete = append(toDelete, existing[i]) } else if err := r.reconcileExisting(existing[i]); err != nil { - r.Logger.Error(err, "error reconciling existing VRF", "vrf", existing[i].Name, "vni", strconv.Itoa(existing[i].VNI)) + r.logger.Error(err, "error reconciling existing VRF", "vrf", existing[i].Name, "vni", strconv.Itoa(existing[i].VNI)) } } @@ -269,15 +269,15 @@ func (r *reconcile) reconcileL3Netlink(vrfConfigs []frr.VRFConfiguration) ([]nl. // Delete / Cleanup VRFs for _, info := range toDelete { - r.Logger.Info("Deleting VRF because it is no longer configured in Kubernetes", "vrf", info.Name, "vni", info.VNI) + r.logger.Info("Deleting VRF because it is no longer configured in Kubernetes", "vrf", info.Name, "vni", info.VNI) errs := r.netlinkManager.CleanupL3(info.Name) for _, err := range errs { - r.Logger.Error(err, "Error deleting VRF", "vrf", info.Name, "vni", strconv.Itoa(info.VNI)) + r.logger.Error(err, "Error deleting VRF", "vrf", info.Name, "vni", strconv.Itoa(info.VNI)) } } // Create VRFs for _, info := range toCreate { - r.Logger.Info("Creating VRF to match Kubernetes", "vrf", info.Name, "vni", info.VNI) + r.logger.Info("Creating VRF to match Kubernetes", "vrf", info.Name, "vni", info.VNI) err := r.netlinkManager.CreateL3(info) if err != nil { return nil, false, fmt.Errorf("error creating VRF %s, VNI %d: %w", info.Name, info.VNI, err) @@ -287,7 +287,7 @@ func (r *reconcile) reconcileL3Netlink(vrfConfigs []frr.VRFConfiguration) ([]nl. return toCreate, len(toDelete) > 0, nil } -func (r *reconcile) reconcileTaasNetlink(vrfConfigs []frr.VRFConfiguration) (bool, error) { +func (r *LegacyReconciler) reconcileTaasNetlink(vrfConfigs []frr.VRFConfiguration) (bool, error) { existing, err := r.netlinkManager.ListTaas() if err != nil { return false, fmt.Errorf("error listing TaaS VRF information: %w", err) @@ -306,7 +306,7 @@ func (r *reconcile) reconcileTaasNetlink(vrfConfigs []frr.VRFConfiguration) (boo return deletedInterface, nil } -func (r *reconcile) cleanupTaasNetlink(existing []nl.TaasInformation, intended []frr.VRFConfiguration) (bool, error) { +func (r *LegacyReconciler) cleanupTaasNetlink(existing []nl.TaasInformation, intended []frr.VRFConfiguration) (bool, error) { deletedInterface := false for _, cfg := range existing { stillExists := false @@ -326,7 +326,7 @@ func (r *reconcile) cleanupTaasNetlink(existing []nl.TaasInformation, intended [ return deletedInterface, nil } -func (r *reconcile) createTaasNetlink(existing []nl.TaasInformation, intended []frr.VRFConfiguration) error { +func (r *LegacyReconciler) createTaasNetlink(existing []nl.TaasInformation, intended []frr.VRFConfiguration) error { for i := range intended { alreadyExists := false for _, cfg := range existing { @@ -349,7 +349,7 @@ func (r *reconcile) createTaasNetlink(existing []nl.TaasInformation, intended [] return nil } -func (r *reconcile) reconcileExisting(cfg nl.VRFInformation) error { +func (r *LegacyReconciler) reconcileExisting(cfg nl.VRFInformation) error { if err := r.netlinkManager.EnsureBPFProgram(cfg); err != nil { return fmt.Errorf("error ensuring BPF program on VRF") } diff --git a/pkg/reconciler/layer3.go.orig b/pkg/reconciler/layer3.go.orig deleted file mode 100644 index 5d765655..00000000 --- a/pkg/reconciler/layer3.go.orig +++ /dev/null @@ -1,421 +0,0 @@ -package reconciler - -import ( - "context" - "fmt" - "net" - "sort" - "strconv" - "time" - - networkv1alpha1 "github.com/telekom/das-schiff-network-operator/api/v1alpha1" - "github.com/telekom/das-schiff-network-operator/pkg/config" - "github.com/telekom/das-schiff-network-operator/pkg/frr" - "github.com/telekom/das-schiff-network-operator/pkg/nl" -) - -const defaultSleep = 2 * time.Second - -func (r *reconcile) fetchLayer3(ctx context.Context) ([]networkv1alpha1.VRFRouteConfiguration, error) { - vrfs := &networkv1alpha1.VRFRouteConfigurationList{} - err := r.client.List(ctx, vrfs) - if err != nil { - r.Logger.Error(err, "error getting list of VRFs from Kubernetes") - return nil, fmt.Errorf("error getting list of VRFs from Kubernetes: %w", err) - } - - return vrfs.Items, nil -} - -func (r *reconcile) fetchTaas(ctx context.Context) ([]networkv1alpha1.RoutingTable, error) { - tables := &networkv1alpha1.RoutingTableList{} - err := r.client.List(ctx, tables) - if err != nil { - r.Logger.Error(err, "error getting list of TaaS from Kubernetes") - return nil, fmt.Errorf("error getting list of TaaS from Kubernetes: %w", err) - } - - return tables.Items, nil -} - -// nolint: contextcheck // context is not relevant -func (r *reconcile) reconcileLayer3(l3vnis []networkv1alpha1.VRFRouteConfiguration, taas []networkv1alpha1.RoutingTable) error { - vrfConfigMap, err := r.createVrfConfigMap(l3vnis) - if err != nil { - return err - } - - vrfFromTaas := createVrfFromTaaS(taas) - - allConfigs := []frr.VRFConfiguration{} - l3Configs := []frr.VRFConfiguration{} - taasConfigs := []frr.VRFConfiguration{} - for key := range vrfConfigMap { - allConfigs = append(allConfigs, vrfConfigMap[key]) - l3Configs = append(l3Configs, vrfConfigMap[key]) - } - for key := range vrfFromTaas { - allConfigs = append(allConfigs, vrfFromTaas[key]) - taasConfigs = append(taasConfigs, vrfFromTaas[key]) - } - - sort.SliceStable(allConfigs, func(i, j int) bool { - return allConfigs[i].VNI < allConfigs[j].VNI - }) - - created, deletedVRF, err := r.reconcileL3Netlink(l3Configs) - if err != nil { - r.Logger.Error(err, "error reconciling Netlink") - return err - } - - deletedTaas, err := r.reconcileTaasNetlink(taasConfigs) - if err != nil { - return err - } - reloadTwice := deletedVRF || deletedTaas - - // We wait here for two seconds to let FRR settle after updating netlink devices - time.Sleep(defaultSleep) - -<<<<<<< HEAD - err = r.configureFRR(allConfigs, reloadTwice) -======= - changed, err := r.frrManager.Configure(frr.Configuration{ - VRFs: vrfConfigs, - ASN: r.config.ServerASN, - }, r.netlinkManager) ->>>>>>> 9c7cb71 (Added unit tests) - if err != nil { - return err - } - - // Make sure that all created netlink VRFs are up after FRR reload - time.Sleep(defaultSleep) - for _, info := range created { - if err := r.netlinkManager.UpL3(info); err != nil { - r.Logger.Error(err, "error setting L3 to state UP") - return fmt.Errorf("error setting L3 to state UP: %w", err) - } - } - return nil -} - -func (r *reconcile) configureFRR(vrfConfigs []frr.VRFConfiguration, reloadTwice bool) error { - changed, err := r.frrManager.Configure(frr.Configuration{ - VRFs: vrfConfigs, - ASN: r.config.ServerASN, - }) - if err != nil { - r.Logger.Error(err, "error updating FRR configuration") - return fmt.Errorf("error updating FRR configuration: %w", err) - } - - if changed || r.dirtyFRRConfig { - err := r.reloadFRR() - if err != nil { - r.dirtyFRRConfig = true - return err - } - - // When a BGP VRF is deleted there is a leftover running configuration after reload - // A second reload fixes this. - if reloadTwice { - err := r.reloadFRR() - if err != nil { - r.dirtyFRRConfig = true - return err - } - } - r.dirtyFRRConfig = false - } - return nil -} - -func (r *reconcile) reloadFRR() error { - r.Logger.Info("trying to reload FRR config because it changed") - err := r.frrManager.ReloadFRR() - if err != nil { - r.Logger.Error(err, "error reloading FRR systemd unit, trying restart") - - err = r.frrManager.RestartFRR() - if err != nil { - r.Logger.Error(err, "error restarting FRR systemd unit") - return fmt.Errorf("error reloading / restarting FRR systemd unit: %w", err) - } - } - r.Logger.Info("reloaded FRR config") - return nil -} - -func (r *reconcile) createVrfConfigMap(l3vnis []networkv1alpha1.VRFRouteConfiguration) (map[string]frr.VRFConfiguration, error) { - vrfConfigMap := map[string]frr.VRFConfiguration{} - for i := range l3vnis { - spec := l3vnis[i].Spec - logger := r.Logger.WithValues("name", l3vnis[i].ObjectMeta.Name, "namespace", l3vnis[i].ObjectMeta.Namespace, "vrf", spec.VRF) - - var vni int - var rt string - - if val, ok := r.config.VRFConfig[spec.VRF]; ok { - vni = val.VNI - rt = val.RT - logger.Info("Configuring VRF from new VRFConfig", "vni", val.VNI, "rt", rt) - } else if val, ok := r.config.VRFToVNI[spec.VRF]; ok { - vni = val - logger.Info("Configuring VRF from old VRFToVNI", "vni", val) - } else if r.config.ShouldSkipVRFConfig(spec.VRF) { - vni = config.SkipVrfTemplateVni - } else { - err := fmt.Errorf("vrf not in vrf vni map") - r.Logger.Error(err, "VRF does not exist in VRF VNI config, ignoring", "vrf", spec.VRF, "name", l3vnis[i].ObjectMeta.Name, "namespace", l3vnis[i].ObjectMeta.Namespace) - continue - } - - cfg, err := createVrfConfig(vrfConfigMap, &spec, vni, rt) - if err != nil { - return nil, err - } - vrfConfigMap[spec.VRF] = *cfg - } - - return vrfConfigMap, nil -} - -func createVrfFromTaaS(taas []networkv1alpha1.RoutingTable) map[string]frr.VRFConfiguration { - vrfConfigMap := map[string]frr.VRFConfiguration{} - - for i := range taas { - spec := taas[i].Spec - - name := fmt.Sprintf("taas.%d", spec.TableID) - - vrfConfigMap[name] = frr.VRFConfiguration{ - Name: name, - VNI: spec.TableID, - IsTaaS: true, - } - } - - return vrfConfigMap -} - -func createVrfConfig(vrfConfigMap map[string]frr.VRFConfiguration, spec *networkv1alpha1.VRFRouteConfigurationSpec, vni int, rt string) (*frr.VRFConfiguration, error) { - // If VRF is not yet in dict, initialize it - if _, ok := vrfConfigMap[spec.VRF]; !ok { - vrfConfigMap[spec.VRF] = frr.VRFConfiguration{ - Name: spec.VRF, - VNI: vni, - RT: rt, - MTU: spec.MTU, - } - } - - cfg := vrfConfigMap[spec.VRF] - - if len(spec.Export) > 0 { - prefixList, err := handlePrefixItemList(spec.Export, spec.Seq, spec.Community) - if err != nil { - return nil, err - } - cfg.Export = append(cfg.Export, prefixList) - } - if len(spec.Import) > 0 { - prefixList, err := handlePrefixItemList(spec.Import, spec.Seq, nil) - if err != nil { - return nil, err - } - cfg.Import = append(cfg.Import, prefixList) - } - for _, aggregate := range spec.Aggregate { - _, network, err := net.ParseCIDR(aggregate) - if err != nil { - return nil, fmt.Errorf("error parsing CIDR %s: %w", aggregate, err) - } - if network.IP.To4() == nil { - cfg.AggregateIPv6 = append(cfg.AggregateIPv6, aggregate) - } else { - cfg.AggregateIPv4 = append(cfg.AggregateIPv4, aggregate) - } - } - return &cfg, nil -} - -func (r *reconcile) reconcileL3Netlink(vrfConfigs []frr.VRFConfiguration) ([]nl.VRFInformation, bool, error) { - existing, err := r.netlinkManager.ListL3() - if err != nil { - return nil, false, fmt.Errorf("error listing L3 VRF information: %w", err) - } - - // Check for VRFs that are configured on the host but no longer in Kubernetes - toDelete := []nl.VRFInformation{} - for i := range existing { - stillExists := false - for j := range vrfConfigs { - if vrfConfigs[j].Name == existing[i].Name && vrfConfigs[j].VNI == existing[i].VNI { - stillExists = true - existing[i].MTU = vrfConfigs[j].MTU - break - } - } - if !stillExists || existing[i].MarkForDelete { - toDelete = append(toDelete, existing[i]) - } else if err := r.reconcileExisting(existing[i]); err != nil { - r.Logger.Error(err, "error reconciling existing VRF", "vrf", existing[i].Name, "vni", strconv.Itoa(existing[i].VNI)) - } - } - - // Check for VRFs that are in Kubernetes but not yet configured on the host - toCreate := prepareVRFsToCreate(vrfConfigs, existing) - - // Delete / Cleanup VRFs - for _, info := range toDelete { - r.Logger.Info("Deleting VRF because it is no longer configured in Kubernetes", "vrf", info.Name, "vni", info.VNI) - errs := r.netlinkManager.CleanupL3(info.Name) - for _, err := range errs { - r.Logger.Error(err, "Error deleting VRF", "vrf", info.Name, "vni", strconv.Itoa(info.VNI)) - } - } - // Create VRFs - for _, info := range toCreate { - r.Logger.Info("Creating VRF to match Kubernetes", "vrf", info.Name, "vni", info.VNI) - err := r.netlinkManager.CreateL3(info) - if err != nil { - return nil, false, fmt.Errorf("error creating VRF %s, VNI %d: %w", info.Name, info.VNI, err) - } - } - - return toCreate, len(toDelete) > 0, nil -} - -func (r *reconcile) reconcileTaasNetlink(vrfConfigs []frr.VRFConfiguration) (bool, error) { - existing, err := r.netlinkManager.ListTaas() - if err != nil { - return false, fmt.Errorf("error listing TaaS VRF information: %w", err) - } - - deletedInterface, err := r.cleanupTaasNetlink(existing, vrfConfigs) - if err != nil { - return false, err - } - - err = r.createTaasNetlink(existing, vrfConfigs) - if err != nil { - return false, err - } - - return deletedInterface, nil -} - -func (r *reconcile) cleanupTaasNetlink(existing []nl.TaasInformation, intended []frr.VRFConfiguration) (bool, error) { - deletedInterface := false - for _, cfg := range existing { - stillExists := false - for i := range intended { - if intended[i].Name == cfg.Name && intended[i].VNI == cfg.Table { - stillExists = true - } - } - if !stillExists { - deletedInterface = true - err := r.netlinkManager.CleanupTaas(cfg) - if err != nil { - return false, fmt.Errorf("error deleting TaaS %s, table %d: %w", cfg.Name, cfg.Table, err) - } - } - } - return deletedInterface, nil -} - -func (r *reconcile) createTaasNetlink(existing []nl.TaasInformation, intended []frr.VRFConfiguration) error { - for i := range intended { - alreadyExists := false - for _, cfg := range existing { - if intended[i].Name == cfg.Name && intended[i].VNI == cfg.Table { - alreadyExists = true - break - } - } - if !alreadyExists { - info := nl.TaasInformation{ - Name: intended[i].Name, - Table: intended[i].VNI, - } - err := r.netlinkManager.CreateTaas(info) - if err != nil { - return fmt.Errorf("error creating Taas %s, table %d: %w", info.Name, info.Table, err) - } - } - } - return nil -} - -func (r *reconcile) reconcileExisting(cfg nl.VRFInformation) error { - if err := r.netlinkManager.EnsureBPFProgram(cfg); err != nil { - return fmt.Errorf("error ensuring BPF program on VRF") - } - if err := r.netlinkManager.EnsureMTU(cfg); err != nil { - return fmt.Errorf("error setting VRF veth link MTU: %d", cfg.MTU) - } - return nil -} - -func prepareVRFsToCreate(vrfConfigs []frr.VRFConfiguration, existing []nl.VRFInformation) []nl.VRFInformation { - create := []nl.VRFInformation{} - for i := range vrfConfigs { - // Skip VRF with VNI SKIP_VRF_TEMPLATE_VNI - if vrfConfigs[i].VNI == config.SkipVrfTemplateVni { - continue - } - alreadyExists := false - for _, cfg := range existing { - if vrfConfigs[i].Name == cfg.Name && vrfConfigs[i].VNI == cfg.VNI && !cfg.MarkForDelete { - alreadyExists = true - break - } - } - if !alreadyExists { - create = append(create, nl.VRFInformation{ - Name: vrfConfigs[i].Name, - VNI: vrfConfigs[i].VNI, - MTU: vrfConfigs[i].MTU, - }) - } - } - return create -} - -func handlePrefixItemList(input []networkv1alpha1.VrfRouteConfigurationPrefixItem, seq int, community *string) (frr.PrefixList, error) { - prefixList := frr.PrefixList{ - Seq: seq + 1, - Community: community, - } - for i, item := range input { - frrItem, err := copyPrefixItemToFRRItem(i, item) - if err != nil { - return frr.PrefixList{}, err - } - prefixList.Items = append(prefixList.Items, frrItem) - } - return prefixList, nil -} - -func copyPrefixItemToFRRItem(n int, item networkv1alpha1.VrfRouteConfigurationPrefixItem) (frr.PrefixedRouteItem, error) { - _, network, err := net.ParseCIDR(item.CIDR) - if err != nil { - return frr.PrefixedRouteItem{}, fmt.Errorf("error parsing CIDR :%s: %w", item.CIDR, err) - } - - seq := item.Seq - if seq <= 0 { - seq = n + 1 - } - return frr.PrefixedRouteItem{ - CIDR: *network, - IPv6: network.IP.To4() == nil, - Seq: seq, - Action: item.Action, - GE: item.GE, - LE: item.LE, - }, nil -} diff --git a/pkg/reconciler/legacy_reconciler.go b/pkg/reconciler/legacy_reconciler.go new file mode 100644 index 00000000..99ad6235 --- /dev/null +++ b/pkg/reconciler/legacy_reconciler.go @@ -0,0 +1,85 @@ +package reconciler + +import ( + "context" + "fmt" + "os" + + "github.com/go-logr/logr" + "github.com/telekom/das-schiff-network-operator/pkg/anycast" + "github.com/telekom/das-schiff-network-operator/pkg/config" + "github.com/telekom/das-schiff-network-operator/pkg/frr" + "github.com/telekom/das-schiff-network-operator/pkg/healthcheck" + "github.com/telekom/das-schiff-network-operator/pkg/nl" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type LegacyReconciler struct { + netlinkManager *nl.Manager + config *config.Config + frrManager *frr.Manager + anycastTracker *anycast.Tracker + dirtyFRRConfig bool + healthChecker *healthcheck.HealthChecker + logger logr.Logger +} + +func NewLegacyReconciler(clusterClient client.Client, anycastTracker *anycast.Tracker, logger logr.Logger) (*LegacyReconciler, error) { + reconciler := &LegacyReconciler{ + netlinkManager: &nl.Manager{}, + frrManager: frr.NewFRRManager(), + anycastTracker: anycastTracker, + logger: logger, + } + + if val := os.Getenv("FRR_CONFIG_FILE"); val != "" { + reconciler.frrManager.ConfigPath = val + } + if err := reconciler.frrManager.Init(); err != nil { + return nil, fmt.Errorf("error trying to init FRR Manager: %w", err) + } + + cfg, err := config.LoadConfig() + if err != nil { + return nil, fmt.Errorf("error loading config: %w", err) + } + reconciler.config = cfg + + nc, err := healthcheck.LoadConfig(healthcheck.NetHealthcheckFile) + if err != nil { + return nil, fmt.Errorf("error loading networking healthcheck config: %w", err) + } + + tcpDialer := healthcheck.NewTCPDialer(nc.Timeout) + reconciler.healthChecker, err = healthcheck.NewHealthChecker(clusterClient, + healthcheck.NewDefaultHealthcheckToolkit(reconciler.frrManager, tcpDialer), + nc) + if err != nil { + return nil, fmt.Errorf("error creating netwokring healthchecker: %w", err) + } + + return reconciler, nil +} + +func (r *LegacyReconciler) checkHealth(ctx context.Context) error { + if !r.healthChecker.IsNetworkingHealthy() { + _, err := r.healthChecker.IsFRRActive() + if err != nil { + return fmt.Errorf("error checking FRR status: %w", err) + } + if err = r.healthChecker.CheckInterfaces(); err != nil { + return fmt.Errorf("error checking network interfaces: %w", err) + } + if err = r.healthChecker.CheckReachability(); err != nil { + return fmt.Errorf("error checking network reachability: %w", err) + } + if err = r.healthChecker.RemoveTaints(ctx); err != nil { + return fmt.Errorf("error removing taint from the node: %w", err) + } + } + return nil +} + +func (r *LegacyReconciler) getConfig() *config.Config { + return r.config +} diff --git a/pkg/reconciler/netconf_reconciler.go b/pkg/reconciler/netconf_reconciler.go new file mode 100644 index 00000000..e7df62ae --- /dev/null +++ b/pkg/reconciler/netconf_reconciler.go @@ -0,0 +1,31 @@ +package reconciler + +import ( + "context" + "errors" + + networkv1alpha1 "github.com/telekom/das-schiff-network-operator/api/v1alpha1" + "github.com/telekom/das-schiff-network-operator/pkg/config" +) + +type NetconfReconciler struct{} + +func NewNetconfReconciler() (Adapter, error) { + return &NetconfReconciler{}, nil +} + +func (*NetconfReconciler) checkHealth(context.Context) error { + return errors.ErrUnsupported +} + +func (*NetconfReconciler) getConfig() *config.Config { + return nil +} + +func (*NetconfReconciler) reconcileLayer3([]networkv1alpha1.VRFRouteConfiguration, []networkv1alpha1.RoutingTable) error { + return errors.ErrUnsupported +} + +func (*NetconfReconciler) reconcileLayer2([]networkv1alpha1.Layer2NetworkConfiguration) error { + return errors.ErrUnsupported +} diff --git a/pkg/reconciler/reconciler.go b/pkg/reconciler/reconciler.go index 040d90c8..f2ef0304 100644 --- a/pkg/reconciler/reconciler.go +++ b/pkg/reconciler/reconciler.go @@ -7,6 +7,7 @@ import ( "time" "github.com/go-logr/logr" + networkv1alpha1 "github.com/telekom/das-schiff-network-operator/api/v1alpha1" "github.com/telekom/das-schiff-network-operator/pkg/anycast" "github.com/telekom/das-schiff-network-operator/pkg/config" "github.com/telekom/das-schiff-network-operator/pkg/debounce" @@ -18,6 +19,13 @@ import ( const defaultDebounceTime = 20 * time.Second +type Adapter interface { + reconcileLayer3([]networkv1alpha1.VRFRouteConfiguration, []networkv1alpha1.RoutingTable) error + reconcileLayer2([]networkv1alpha1.Layer2NetworkConfiguration) error + checkHealth(context.Context) error + getConfig() *config.Config +} + type Reconciler struct { client client.Client netlinkManager *nl.Manager @@ -25,11 +33,10 @@ type Reconciler struct { anycastTracker *anycast.Tracker config *config.Config logger logr.Logger + adapter Adapter healthChecker *healthcheck.HealthChecker debouncer *debounce.Debouncer - - dirtyFRRConfig bool } type reconcile struct { @@ -37,13 +44,14 @@ type reconcile struct { logr.Logger } -func NewReconciler(clusterClient client.Client, anycastTracker *anycast.Tracker, logger logr.Logger) (*Reconciler, error) { +func NewReconciler(clusterClient client.Client, anycastTracker *anycast.Tracker, logger logr.Logger, iReconciler Adapter) (*Reconciler, error) { reconciler := &Reconciler{ client: clusterClient, netlinkManager: nl.NewManager(&nl.Toolkit{}), frrManager: frr.NewFRRManager(), anycastTracker: anycastTracker, logger: logger, + adapter: iReconciler, } reconciler.debouncer = debounce.NewDebouncer(reconciler.reconcileDebounced, defaultDebounceTime, logger) @@ -77,55 +85,43 @@ func NewReconciler(clusterClient client.Client, anycastTracker *anycast.Tracker, return reconciler, nil } -func (reconciler *Reconciler) Reconcile(ctx context.Context) { - reconciler.debouncer.Debounce(ctx) +func (r *Reconciler) Reconcile(ctx context.Context) { + r.debouncer.Debounce(ctx) } -func (reconciler *Reconciler) reconcileDebounced(ctx context.Context) error { - r := &reconcile{ - Reconciler: reconciler, - Logger: reconciler.logger, +func (r *Reconciler) reconcileDebounced(ctx context.Context) error { + reconciler := &reconcile{ + Reconciler: r, + Logger: r.logger, } - r.Logger.Info("Reloading config") - if err := r.config.ReloadConfig(); err != nil { + reconciler.Logger.Info("Reloading config") + if err := reconciler.adapter.getConfig().ReloadConfig(); err != nil { return fmt.Errorf("error reloading network-operator config: %w", err) } - l3vnis, err := r.fetchLayer3(ctx) + l3vnis, err := reconciler.fetchLayer3(ctx) if err != nil { return err } - l2vnis, err := r.fetchLayer2(ctx) + l2vnis, err := reconciler.fetchLayer2(ctx) if err != nil { return err } - taas, err := r.fetchTaas(ctx) + taas, err := reconciler.fetchTaas(ctx) if err != nil { return err } - if err := r.reconcileLayer3(l3vnis, taas); err != nil { - return err + if err := reconciler.adapter.reconcileLayer3(l3vnis, taas); err != nil { + return fmt.Errorf("error while configuring Layer3: %w", err) } - if err := r.reconcileLayer2(l2vnis); err != nil { - return err + if err := reconciler.adapter.reconcileLayer2(l2vnis); err != nil { + return fmt.Errorf("error while configuring Layer2: %w", err) } - if !reconciler.healthChecker.IsNetworkingHealthy() { - _, err := reconciler.healthChecker.IsFRRActive() - if err != nil { - return fmt.Errorf("error checking FRR status: %w", err) - } - if err = reconciler.healthChecker.CheckInterfaces(); err != nil { - return fmt.Errorf("error checking network interfaces: %w", err) - } - if err = reconciler.healthChecker.CheckReachability(); err != nil { - return fmt.Errorf("error checking network reachability: %w", err) - } - if err = reconciler.healthChecker.RemoveTaints(ctx); err != nil { - return fmt.Errorf("error removing taint from the node: %w", err) - } + if err := reconciler.adapter.checkHealth(ctx); err != nil { + return fmt.Errorf("healthcheck error: %w", err) } return nil