diff --git a/cmd/ipam-node/app/app.go b/cmd/ipam-node/app/app.go index 8438981..a1b49f1 100644 --- a/cmd/ipam-node/app/app.go +++ b/cmd/ipam-node/app/app.go @@ -59,6 +59,7 @@ import ( nodectrl "github.com/Mellanox/nvidia-k8s-ipam/pkg/ipam-node/controllers/node" "github.com/Mellanox/nvidia-k8s-ipam/pkg/ipam-node/grpc/middleware" "github.com/Mellanox/nvidia-k8s-ipam/pkg/ipam-node/handlers" + "github.com/Mellanox/nvidia-k8s-ipam/pkg/ipam-node/migrator" storePkg "github.com/Mellanox/nvidia-k8s-ipam/pkg/ipam-node/store" poolPkg "github.com/Mellanox/nvidia-k8s-ipam/pkg/pool" "github.com/Mellanox/nvidia-k8s-ipam/pkg/version" @@ -188,6 +189,11 @@ func RunNodeDaemon(ctx context.Context, config *rest.Config, opts *options.Optio return err } s.Cancel() + if err := migrator.New(store).Migrate(ctx); err != nil { + logger.Error(err, fmt.Sprintf("failed to migrate host-local IPAM store, "+ + "set %s env variable to disable migration", migrator.EnvDisableMigration)) + return err + } grpcServer, listener, err := initGRPCServer(opts, logger, poolManager, store) if err != nil { diff --git a/pkg/ipam-node/migrator/migrator.go b/pkg/ipam-node/migrator/migrator.go new file mode 100644 index 0000000..daee30e --- /dev/null +++ b/pkg/ipam-node/migrator/migrator.go @@ -0,0 +1,159 @@ +/* + Copyright 2023, NVIDIA CORPORATION & AFFILIATES + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package migrator + +import ( + "context" + "fmt" + "io/fs" + "net" + "os" + "path/filepath" + "strings" + "time" + + "github.com/go-logr/logr" + + "github.com/Mellanox/nvidia-k8s-ipam/pkg/ip" + storePkg "github.com/Mellanox/nvidia-k8s-ipam/pkg/ipam-node/store" + "github.com/Mellanox/nvidia-k8s-ipam/pkg/ipam-node/types" +) + +const ( + // EnvDisableMigration contains the name of the environment variable which can be used + // to disable migration + EnvDisableMigration = "MIGRATOR_DISABLE_MIGRATION" + // EnvHostLocalStorePath contains the name of the environment variable which can be used to + // change host local IPAM store path + EnvHostLocalStorePath = "MIGRATOR_HOST_LOCAL_STORE" + // DefaultHostLocalStorePath contains default path for host-local store which was used + // in the older version + DefaultHostLocalStorePath = "/var/lib/cni/nv-ipam/state/host-local" + // PlaceholderForUnknownField contains placeholder string which will be used as a value + // for fields for which we have no data + PlaceholderForUnknownField = "MIGRATED_NO_DATA" +) + +// New create and initialize new instance of the migrator +func New(store storePkg.Store) Migrator { + return &migrator{ + store: store, + } +} + +// Migrator is the interface implemented by migrator package +type Migrator interface { + // Migrate execute migration logic. + // The implementation will check if host-local IPAM data that were created by the previous version + // of nv-ipam are available on the filesystem. If the data is found, + // the migrator will read and save them into the store and then remove data in the old format. + // Some information required by the new store schema can't be restored from the host-local IPAM store + // format used in the older version of the nv-ipam. Missing data will be replaced by + // a special placeholder which indicates that data is missing. + Migrate(ctx context.Context) error +} + +// default migrator implementation +type migrator struct { + store storePkg.Store +} + +// Migrate is the Migrator interface implementation for the migrator +func (m *migrator) Migrate(ctx context.Context) error { + logger := logr.FromContextOrDiscard(ctx).WithName("migrator") + if os.Getenv(EnvDisableMigration) != "" { + logger.Info(fmt.Sprintf("%s set, skip migration", EnvDisableMigration)) + return nil + } + hostLocalPath := os.Getenv(EnvHostLocalStorePath) + if hostLocalPath == "" { + hostLocalPath = DefaultHostLocalStorePath + } + info, err := os.Stat(hostLocalPath) + if err != nil { + if os.IsNotExist(err) { + logger.Info("host-local IPAM path not found, skip migration", "path", hostLocalPath) + return nil + } + return err + } + if !info.IsDir() { + return fmt.Errorf("host-local IPAM path is not a dir") + } + session, err := m.store.Open(ctx) + if err != nil { + return fmt.Errorf("failed to open store: %v", err) + } + logger.Info("check host-local store path", "path", hostLocalPath) + err = filepath.Walk(hostLocalPath, getWalkFunc(logger, session)) + if err != nil { + session.Cancel() + return err + } + err = session.Commit() + if err != nil { + return err + } + err = os.RemoveAll(hostLocalPath) + if err != nil { + logger.Error(err, "failed to remove data from host-local IPAM path") + return err + } + logger.Info("migration complete. host-local IPAM data removed", "path", hostLocalPath) + return nil +} + +func getWalkFunc(logger logr.Logger, session storePkg.Session) filepath.WalkFunc { + return func(path string, info fs.FileInfo, err error) error { + if err != nil { + return err + } + if info.IsDir() { + return nil + } + poolName := filepath.Base(filepath.Dir(path)) + addr := ip.NormalizeIP(net.ParseIP(info.Name())) + if addr == nil { + return nil + } + allocationContent, err := os.ReadFile(path) + if err != nil { + logger.Error(err, "failed to read allocation data for IP", + "pool", poolName, "ip", info.Name()) + return err + } + allocData := strings.Split(string(allocationContent), "\n") + if len(allocData) != 2 { + logger.Error(nil, "unexpected allocation data format", + "pool", poolName, "ip", info.Name(), "data", allocData) + return fmt.Errorf("unexpected allocation format") + } + containerID, interfaceName := strings.Trim(allocData[0], "\r"), allocData[1] + if err := session.Reserve(poolName, containerID, interfaceName, types.ReservationMetadata{ + CreateTime: time.Now().Format(time.RFC3339Nano), + PodUUID: PlaceholderForUnknownField, + PodName: PlaceholderForUnknownField, + PodNamespace: PlaceholderForUnknownField, + DeviceID: PlaceholderForUnknownField, + PoolConfigSnapshot: PlaceholderForUnknownField, + }, addr); err != nil { + logger.V(1).Info("failed to reserve IP, ignore allocation", + "pool", poolName, "ip", info.Name(), "reason", err.Error()) + // ignore reservation error and skip the reservation + return nil + } + logger.V(1).Info("IP reservation migrated", "pool", poolName, "ip", info.Name()) + return nil + } +} diff --git a/pkg/ipam-node/migrator/migrator_suite_test.go b/pkg/ipam-node/migrator/migrator_suite_test.go new file mode 100644 index 0000000..c7b332e --- /dev/null +++ b/pkg/ipam-node/migrator/migrator_suite_test.go @@ -0,0 +1,26 @@ +/* + Copyright 2023, NVIDIA CORPORATION & AFFILIATES + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package migrator_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestMigrator(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Migrator Suite") +} diff --git a/pkg/ipam-node/migrator/migrator_test.go b/pkg/ipam-node/migrator/migrator_test.go new file mode 100644 index 0000000..1e04d61 --- /dev/null +++ b/pkg/ipam-node/migrator/migrator_test.go @@ -0,0 +1,121 @@ +/* + Copyright 2023, NVIDIA CORPORATION & AFFILIATES + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package migrator_test + +import ( + "context" + "fmt" + "net" + "os" + "path/filepath" + + "github.com/go-logr/logr" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "k8s.io/klog/v2" + + "github.com/Mellanox/nvidia-k8s-ipam/pkg/ipam-node/migrator" + storePkg "github.com/Mellanox/nvidia-k8s-ipam/pkg/ipam-node/store" +) + +const ( + testPool1 = "pool1" + testPool2 = "pool2" + testID1 = "09548191eb996fcce2c6bf4eff0576b8f3df9232931de7e499f084a2c3a34501" + testID2 = "09548191eb996fcce2c6bf4eff0576b8f3df9232931de7e499f084a2c3a34502" + testID3 = "09548191eb996fcce2c6bf4eff0576b8f3df9232931de7e499f084a2c3a34503" + testIF1 = "net1" + testIF2 = "net2" + testPool1IP1 = "192.168.0.2" + testPool1IP2 = "192.168.0.3" + testPool1IP3 = "192.168.0.4" + testPool2IP1 = "172.16.0.2" +) + +var _ = Describe("Migrator", func() { + var ( + hostLocalStorePath string + ctx context.Context + ) + BeforeEach(func() { + ctx = logr.NewContext(context.Background(), klog.NewKlogr()) + hostLocalStorePath = filepath.Join(GinkgoT().TempDir(), "host-local") + Expect(os.Mkdir(hostLocalStorePath, 0744)).NotTo(HaveOccurred()) + pool1Dir := filepath.Join(hostLocalStorePath, testPool1) + pool2Dir := filepath.Join(hostLocalStorePath, testPool2) + Expect(os.Mkdir(pool1Dir, 0744)).NotTo(HaveOccurred()) + Expect(os.Mkdir(pool2Dir, 0744)).NotTo(HaveOccurred()) + + Expect(os.WriteFile(filepath.Join(pool1Dir, "lock"), []byte(""), 0644)).NotTo(HaveOccurred()) + Expect(os.WriteFile(filepath.Join(pool2Dir, "lock"), []byte(""), 0644)).NotTo(HaveOccurred()) + + Expect(os.WriteFile(filepath.Join(pool1Dir, "last_reserved_ip.0"), + []byte(fmt.Sprintf("%s", testPool1IP3)), 0644)).NotTo(HaveOccurred()) + Expect(os.WriteFile(filepath.Join(pool2Dir, "last_reserved_ip.0"), + []byte(fmt.Sprintf("%s", testPool2IP1)), 0644)).NotTo(HaveOccurred()) + + Expect(os.WriteFile(filepath.Join(pool1Dir, testPool1IP1), + []byte(fmt.Sprintf("%s\r\n%s", testID1, testIF1)), 0644)).NotTo(HaveOccurred()) + Expect(os.WriteFile(filepath.Join(pool1Dir, testPool1IP2), + []byte(fmt.Sprintf("%s\r\n%s", testID2, testIF1)), 0644)).NotTo(HaveOccurred()) + Expect(os.WriteFile(filepath.Join(pool1Dir, testPool1IP3), + []byte(fmt.Sprintf("%s\r\n%s", testID3, testIF1)), 0644)).NotTo(HaveOccurred()) + Expect(os.WriteFile(filepath.Join(pool2Dir, testPool2IP1), + []byte(fmt.Sprintf("%s\r\n%s", testID1, testIF2)), 0644)).NotTo(HaveOccurred()) + }) + It("Migrate", func() { + Expect(os.Setenv(migrator.EnvHostLocalStorePath, hostLocalStorePath)).NotTo(HaveOccurred()) + storePath := filepath.Join(GinkgoT().TempDir(), "test_store") + m := migrator.New(storePkg.New(storePath)) + Expect(m.Migrate(ctx)).NotTo(HaveOccurred()) + + session, err := storePkg.New(storePath).Open(ctx) + defer session.Cancel() + Expect(err).NotTo(HaveOccurred()) + + Expect(session.GetLastReservedIP(testPool1)).NotTo(BeNil()) + Expect(session.GetLastReservedIP(testPool2)).NotTo(BeNil()) + + reservationPool1 := session.GetReservationByID(testPool1, testID1, testIF1) + Expect(reservationPool1).NotTo(BeNil()) + Expect(reservationPool1.ContainerID).To(Equal(testID1)) + Expect(reservationPool1.InterfaceName).To(Equal(testIF1)) + Expect(reservationPool1.IPAddress).To(Equal(net.ParseIP(testPool1IP1))) + Expect(reservationPool1.Metadata.PodName).To(Equal(migrator.PlaceholderForUnknownField)) + Expect(reservationPool1.Metadata.PodNamespace).To(Equal(migrator.PlaceholderForUnknownField)) + Expect(reservationPool1.Metadata.PodUUID).To(Equal(migrator.PlaceholderForUnknownField)) + + reservationPool2 := session.GetReservationByID(testPool2, testID1, testIF2) + Expect(reservationPool2).NotTo(BeNil()) + + // check that host local store is removed + _, err = os.Stat(hostLocalStorePath) + Expect(os.IsNotExist(err)).To(BeTrue()) + }) + It("No host-local path", func() { + Expect(os.Setenv(migrator.EnvHostLocalStorePath, + filepath.Join(hostLocalStorePath, "not-exist"))).NotTo(HaveOccurred()) + storePath := filepath.Join(GinkgoT().TempDir(), "test_store") + m := migrator.New(storePkg.New(storePath)) + Expect(m.Migrate(ctx)).NotTo(HaveOccurred()) + }) + It("Empty host-local store", func() { + hostLocalStorePath := filepath.Join(GinkgoT().TempDir(), "host-local2") + Expect(os.Setenv(migrator.EnvHostLocalStorePath, hostLocalStorePath)).NotTo(HaveOccurred()) + Expect(os.Mkdir(hostLocalStorePath, 0744)).NotTo(HaveOccurred()) + storePath := filepath.Join(GinkgoT().TempDir(), "test_store") + m := migrator.New(storePkg.New(storePath)) + Expect(m.Migrate(ctx)).NotTo(HaveOccurred()) + }) +})