From 09b83e1a8c25c0246e8d725a7051462b9cc7cc80 Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Wed, 5 Oct 2022 17:09:50 +0200 Subject: [PATCH] rbd: remove dummy image workaround To address the problem that snapshot schedules are triggered for volumes that are promoted, a dummy image was disabled/enabled for replication. This was done as a workaround, because the promote operation was not triggering the schedules for the image being promoted. The bugs related to the same have been fixed in RBD mirroring functionality and hence the workaround #2656 can be removed from the code base. ceph tracker https://tracker.ceph.com/issues/53914 Signed-off-by: Madhu Rajanna --- internal/rbd/replicationcontrollerserver.go | 141 -------------------- 1 file changed, 141 deletions(-) diff --git a/internal/rbd/replicationcontrollerserver.go b/internal/rbd/replicationcontrollerserver.go index 7b7082c4ed0c..bad710061f88 100644 --- a/internal/rbd/replicationcontrollerserver.go +++ b/internal/rbd/replicationcontrollerserver.go @@ -24,7 +24,6 @@ import ( "regexp" "strconv" "strings" - "sync" "time" "github.com/ceph/ceph-csi/internal/util" @@ -70,18 +69,6 @@ const ( schedulingStartTimeKey = "schedulingStartTime" ) -type operation string - -var ( - // pool+"/"+key to check dummy image is created. - dummyImageCreated operation = "dummyImageCreated" - // Read write lock to ensure that only one operation is happening at a time. - operationLock = sync.Map{} - - // Lock to serialize operations on the dummy image to tickle RBD snapshot schedule. - dummyImageOpsLock sync.Mutex -) - // ReplicationServer struct of rbd CSI driver with supported methods of Replication // controller server spec. type ReplicationServer struct { @@ -269,11 +256,6 @@ func (rs *ReplicationServer) EnableVolumeReplication(ctx context.Context, return nil, status.Error(codes.Internal, err.Error()) } - err = createDummyImage(ctx, rbdVol) - if err != nil { - return nil, status.Errorf(codes.Internal, "failed to create dummy image %s", err.Error()) - } - if mirroringInfo.State != librbd.MirrorImageEnabled { err = rbdVol.enableImageMirroring(mirroringMode) if err != nil { @@ -286,117 +268,6 @@ func (rs *ReplicationServer) EnableVolumeReplication(ctx context.Context, return &replication.EnableVolumeReplicationResponse{}, nil } -// getDummyImageName returns the csi-vol-dummy+cluster FSID as the image name. -// each cluster should have a unique dummy image created. choosing the cluster -// FSID for the same reason. -func getDummyImageName(conn *util.ClusterConnection) (string, error) { - id, err := conn.GetFSID() - if err != nil { - return "", err - } - - return fmt.Sprintf("csi-vol-dummy-%s", id), nil -} - -// getOperationName returns the operation name for the given operation type -// combined with the pool name. -func getOperationName(poolName string, optName operation) string { - return fmt.Sprintf("%s/%s", poolName, optName) -} - -// createDummyImage creates a dummy image as a workaround for the rbd -// scheduling problem. -func createDummyImage(ctx context.Context, rbdVol *rbdVolume) error { - var err error - var imgName string - - dummyImageOpsLock.Lock() - defer dummyImageOpsLock.Unlock() - optName := getOperationName(rbdVol.Pool, dummyImageCreated) - if _, ok := operationLock.Load(optName); !ok { - // create a dummy image - imgName, err = getDummyImageName(rbdVol.conn) - if err != nil { - return err - } - dummyVol := *rbdVol - dummyVol.RbdImageName = imgName - // dummyVol holds rbdVol details, reset ImageID or else dummy image cannot be - // deleted from trash during repair operation. - dummyVol.ImageID = "" - f := []string{ - librbd.FeatureNameLayering, - librbd.FeatureNameObjectMap, - librbd.FeatureNameExclusiveLock, - librbd.FeatureNameFastDiff, - } - features := librbd.FeatureSetFromNames(f) - dummyVol.ImageFeatureSet = features - // create 1MiB dummy image. 1MiB=1048576 bytes - dummyVol.VolSize = 1048576 - err = createImage(ctx, &dummyVol, dummyVol.conn.Creds) - if err != nil { - if strings.Contains(err.Error(), "File exists") { - err = repairDummyImage(ctx, &dummyVol) - } - } - if err == nil { - operationLock.Store(optName, true) - } - } - - return err -} - -// repairDummyImage deletes and recreates the dummy image. -func repairDummyImage(ctx context.Context, dummyVol *rbdVolume) error { - // instead of checking the images features and than adding missing image - // features, updating the image size to 1Mib. We will delete the image - // and recreate it. - - // deleting and recreating the dummy image will not impact anything as its - // a workaround to fix the scheduling problem. - err := dummyVol.deleteImage(ctx) - if err != nil { - return err - } - - return createImage(ctx, dummyVol, dummyVol.conn.Creds) -} - -// tickleMirroringOnDummyImage disables and reenables mirroring on the dummy image, and sets a -// schedule of a minute for the dummy image, to force a schedule refresh for other mirrored images -// within a minute. -func tickleMirroringOnDummyImage(rbdVol *rbdVolume, mirroringMode librbd.ImageMirrorMode) error { - imgName, err := getDummyImageName(rbdVol.conn) - if err != nil { - return err - } - dummyVol := *rbdVol - dummyVol.RbdImageName = imgName - - dummyImageOpsLock.Lock() - defer dummyImageOpsLock.Unlock() - err = dummyVol.disableImageMirroring(false) - if err != nil { - return err - } - - err = dummyVol.enableImageMirroring(mirroringMode) - if err != nil { - return err - } - - if mirroringMode == librbd.ImageMirrorModeSnapshot { - err = dummyVol.addSnapshotScheduling(admin.Interval("1m"), admin.NoStartTime) - if err != nil { - return err - } - } - - return nil -} - // DisableVolumeReplication extracts the RBD volume information from the // volumeID, If the image is present and the mirroring is enabled on the RBD // image it will disable the mirroring. @@ -588,12 +459,6 @@ func (rs *ReplicationServer) PromoteVolume(ctx context.Context, } } - var mode librbd.ImageMirrorMode - mode, err = getMirroringMode(ctx, req.GetParameters()) - if err != nil { - return nil, status.Errorf(codes.Internal, "failed to get mirroring mode %s", err.Error()) - } - interval, startTime := getSchedulingDetails(req.GetParameters()) if interval != admin.NoInterval { err = rbdVol.addSnapshotScheduling(interval, startTime) @@ -608,12 +473,6 @@ func (rs *ReplicationServer) PromoteVolume(ctx context.Context, rbdVol) } - log.DebugLog(ctx, "attempting to tickle dummy image for restarting RBD schedules") - err = tickleMirroringOnDummyImage(rbdVol, mode) - if err != nil { - return nil, status.Errorf(codes.Internal, "failed to enable mirroring on dummy image %s", err.Error()) - } - return &replication.PromoteVolumeResponse{}, nil }