Skip to content

Commit

Permalink
rbd: fix resync issue
Browse files Browse the repository at this point in the history
During the Demote volume store
the image creation timestamp.

During Resync do below operation

* Check image creation timestamp
stored during Demote operation
and current creation timestamp during Resync
and check both are equal and its for
force resync then issue resync
* If the image on both sides is
not in unknown state, check
last_snapshot_timestamp on the
local mirror description, if its present
send volumeReady as false or else return
error message.

If both the images are in up+unknown the
send volumeReady as true.

Signed-off-by: Madhu Rajanna <madhupr007@gmail.com>
  • Loading branch information
Madhu-1 committed Aug 24, 2023
1 parent 88ccd84 commit 3e4dfd0
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 137 deletions.
117 changes: 86 additions & 31 deletions internal/csi-addons/rbd/replication.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"regexp"
"strconv"
"strings"
"sync"
"time"

corerbd "github.com/ceph/ceph-csi/internal/rbd"
Expand All @@ -40,6 +41,12 @@ import (
"google.golang.org/protobuf/types/known/timestamppb"
)

// imageResyncMap is used to store the volumeID and image creation time for resync.
// The volumeID gets added to the map when the DemoteVolume RPC is called and
// removed from the map when the PromoteVolume RPC is called or
// DisableVolumeReplication RPC is called.
var imageResyncMap = sync.Map{}

// imageMirroringMode is used to indicate the mirroring mode for an RBD image.
type imageMirroringMode string

Expand Down Expand Up @@ -337,11 +344,15 @@ func (rs *ReplicationServer) DisableVolumeReplication(ctx context.Context,
if err != nil {
return nil, getGRPCError(err)
}
imageResyncMap.Delete(volumeID)

return &replication.DisableVolumeReplicationResponse{}, nil
default:
return nil, status.Errorf(codes.InvalidArgument, "image is in %s Mode", mirroringInfo.State)
}
// remove the volume from the resync map as the image mirroring is disabled and
// during next Demote operation cephcsi will re-add the image to the sync map
imageResyncMap.Delete(volumeID)

return &replication.DisableVolumeReplicationResponse{}, nil
}
Expand Down Expand Up @@ -439,6 +450,13 @@ func (rs *ReplicationServer) PromoteVolume(ctx context.Context,
rbdVol)
}

// remove the volume from the resync map as the image is promoted and
// during next Demote operation cephcsi will re-add the image to the sync
// map.
// This is required if someone does not do DisableVolumeReplication but try
// to toggle between PromoteVolume and DemoteVolume.
imageResyncMap.Delete(volumeID)

return &replication.PromoteVolumeResponse{}, nil
}

Expand Down Expand Up @@ -480,6 +498,18 @@ func (rs *ReplicationServer) DemoteVolume(ctx context.Context,

return nil, err
}

err = rbdVol.GetImageInfo()
if err != nil {
log.ErrorLog(ctx, err.Error())

return nil, status.Error(codes.Internal, err.Error())
}
// store the image creation time for resync
if _, ok := imageResyncMap.Load(volumeID); !ok {
imageResyncMap.Store(volumeID, rbdVol.CreatedAt.AsTime())
}

mirroringInfo, err := rbdVol.GetImageMirroringInfo()
if err != nil {
log.ErrorLog(ctx, err.Error())
Expand Down Expand Up @@ -538,6 +568,8 @@ func checkRemoteSiteStatus(ctx context.Context, mirrorStatus *librbd.GlobalMirro
// ResyncVolume extracts the RBD volume information from the volumeID, If the
// image is present, mirroring is enabled and the image is in demoted state.
// If yes it will resync the image to correct the split-brain.
//
//nolint:gocyclo,cyclop // TODO: reduce complexity
func (rs *ReplicationServer) ResyncVolume(ctx context.Context,
req *replication.ResyncVolumeRequest,
) (*replication.ResyncVolumeResponse, error) {
Expand Down Expand Up @@ -572,22 +604,11 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context,
return nil, err
}

mirroringInfo, err := rbdVol.GetImageMirroringInfo()
err = rbdVol.CheckImageIsPrimary()
if err != nil {
// in case of Resync the image will get deleted and gets recreated and
// it takes time for this operation.
log.ErrorLog(ctx, err.Error())

return nil, status.Error(codes.Aborted, err.Error())
}

if mirroringInfo.State != librbd.MirrorImageEnabled {
return nil, status.Error(codes.InvalidArgument, "image mirroring is not enabled")
}

// return error if the image is still primary
if mirroringInfo.Primary {
return nil, status.Error(codes.InvalidArgument, "image is in primary state")
return nil, getGRPCError(err)
}

mirrorStatus, err := rbdVol.GetImageMirroringStatus()
Expand Down Expand Up @@ -637,14 +658,28 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context,
ready = checkRemoteSiteStatus(ctx, mirrorStatus)
}

err = rbdVol.ResyncVol(localStatus, req.Force)
err = rbdVol.GetImageInfo()
if err != nil {
return nil, getGRPCError(err)
return nil, status.Errorf(codes.Internal, "failed to get image info: %s", err.Error())
}

err = checkVolumeResyncStatus(localStatus)
ok, err := resyncVolume(ctx, volumeID, rbdVol.CreatedAt)
if err != nil {
return nil, status.Error(codes.Internal, err.Error())
return nil, status.Errorf(codes.Internal, "failed to check image needs resync: %s", err.Error())
}

if req.Force && ok {
err = rbdVol.ResyncVol(localStatus)
if err != nil {
return nil, getGRPCError(err)
}
}

if !ready {
err = checkVolumeResyncStatus(localStatus)
if err != nil {
return nil, status.Error(codes.Internal, err.Error())
}
}

err = rbdVol.RepairResyncedImageID(ctx, ready)
Expand All @@ -659,6 +694,29 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context,
return resp, nil
}

func resyncVolume(
ctx context.Context,
volumeID string,
currentImageTime *timestamppb.Timestamp,
) (bool, error) {
savedImageTime, ok := imageResyncMap.Load(volumeID)
if !ok {
return false, errors.New("failed to get image creation time")
}

log.UsefulLog(ctx, "savedImageTime=%v, currentImageTime=%v", savedImageTime, currentImageTime)
st, ok := savedImageTime.(time.Time)
if !ok {
return false, errors.New("failed to convert image creation time")
}

if st.Equal(currentImageTime.AsTime()) {
return true, nil
}

return false, nil
}

func getGRPCError(err error) error {
if err == nil {
return status.Error(codes.OK, codes.OK.String())
Expand Down Expand Up @@ -854,20 +912,17 @@ func getLastSyncInfo(description string) (*replication.GetVolumeReplicationInfoR
}

func checkVolumeResyncStatus(localStatus librbd.SiteMirrorImageStatus) error {
// we are considering 2 states to check resync started and resync completed
// as below. all other states will be considered as an error state so that
// cephCSI can return error message and volume replication operator can
// mark the VolumeReplication status as not resyncing for the volume.

// If the state is Replaying means the resync is going on.
// Once the volume on remote cluster is demoted and resync
// is completed the image state will be moved to UNKNOWN.
// RBD mirror daemon should be always running on the primary cluster.
if !localStatus.Up || (localStatus.State != librbd.MirrorImageStatusStateReplaying &&
localStatus.State != librbd.MirrorImageStatusStateUnknown) {
return fmt.Errorf(
"not resyncing. Local status: daemon up=%t image is in %q state",
localStatus.Up, localStatus.State)
// we are considering local snapshot timestamp to check if the resync is
// started or not, if we dont see local_snapshot_timestamp in the
// description of localStatus, we are returning error. if we see the local
// snapshot timestamp in the description we return resyncing started.
description := localStatus.Description
resp, err := getLastSyncInfo(description)
if err != nil {
return fmt.Errorf("failed to get last sync info: %w", err)
}
if resp.LastSyncTime == nil {
return errors.New("last sync time is nil")
}

return nil
Expand Down
66 changes: 9 additions & 57 deletions internal/csi-addons/rbd/replication_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,74 +225,26 @@ func TestCheckVolumeResyncStatus(t *testing.T) {
wantErr bool
}{
{
name: "test when rbd mirror daemon is not running",
name: "test when local_snapshot_timestamp is non zero",
args: librbd.SiteMirrorImageStatus{
State: librbd.MirrorImageStatusStateUnknown,
Up: false,
},
wantErr: true,
},
{
name: "test for unknown state",
args: librbd.SiteMirrorImageStatus{
State: librbd.MirrorImageStatusStateUnknown,
Up: true,
},
wantErr: false,
},
{
name: "test for error state",
args: librbd.SiteMirrorImageStatus{
State: librbd.MirrorImageStatusStateError,
Up: true,
},
wantErr: true,
},
{
name: "test for syncing state",
args: librbd.SiteMirrorImageStatus{
State: librbd.MirrorImageStatusStateSyncing,
Up: true,
},
wantErr: true,
},
{
name: "test for starting_replay state",
args: librbd.SiteMirrorImageStatus{
State: librbd.MirrorImageStatusStateStartingReplay,
Up: true,
},
wantErr: true,
},
{
name: "test for replaying state",
args: librbd.SiteMirrorImageStatus{
State: librbd.MirrorImageStatusStateReplaying,
Up: true,
//nolint:lll // sample output cannot be split into multiple lines.
Description: `replaying, {"bytes_per_second":0.0,"bytes_per_snapshot":81920.0,"last_snapshot_bytes":81920,"last_snapshot_sync_seconds":56743,"local_snapshot_timestamp":1684675261,"remote_snapshot_timestamp":1684675261,"replay_state":"idle"}`,
},
wantErr: false,
},
{
name: "test for stopping_replay state",
args: librbd.SiteMirrorImageStatus{
State: librbd.MirrorImageStatusStateStoppingReplay,
Up: true,
},
wantErr: true,
},
{
name: "test for stopped state",
name: "test when local_snapshot_timestamp is zero",
//nolint:lll // sample output cannot be split into multiple lines.
args: librbd.SiteMirrorImageStatus{
State: librbd.MirrorImageStatusStateStopped,
Up: true,
Description: `replaying, {"bytes_per_second":0.0,"bytes_per_snapshot":81920.0,"last_snapshot_bytes":81920,"last_snapshot_sync_seconds":56743,"local_snapshot_timestamp":0,"remote_snapshot_timestamp":1684675261,"replay_state":"idle"}`,
},
wantErr: true,
},
{
name: "test for invalid state",
name: "test when local_snapshot_timestamp is not present",
//nolint:lll // sample output cannot be split into multiple lines.
args: librbd.SiteMirrorImageStatus{
State: librbd.MirrorImageStatusState(100),
Up: true,
Description: `replaying, {"bytes_per_second":0.0,"bytes_per_snapshot":81920.0,"last_snapshot_bytes":81920,"last_snapshot_sync_seconds":56743,"remote_snapshot_timestamp":1684675261,"replay_state":"idle"}`,
},
wantErr: true,
},
Expand Down
2 changes: 1 addition & 1 deletion internal/rbd/nodeserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ func (ns *NodeServer) populateRbdVol(
}
}()
// get the image details from the ceph cluster.
err = rv.getImageInfo()
err = rv.GetImageInfo()
if err != nil {
log.ErrorLog(ctx, "failed to get image details %s: %v", rv, err)

Expand Down
4 changes: 2 additions & 2 deletions internal/rbd/rbd_journal.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ func checkSnapCloneExists(
}
vol.ReservedID = snapUUID
// Fetch on-disk image attributes
err = vol.getImageInfo()
err = vol.GetImageInfo()
if err != nil {
if errors.Is(err, ErrImageNotFound) {
err = parentVol.deleteSnapshot(ctx, rbdSnap)
Expand Down Expand Up @@ -292,7 +292,7 @@ func (rv *rbdVolume) Exists(ctx context.Context, parentVol *rbdVolume) (bool, er
// save it for size checks before fetching image data
requestSize := rv.VolSize //nolint:ifshort // FIXME: rename and split function into helpers
// Fetch on-disk image attributes and compare against request
err = rv.getImageInfo()
err = rv.GetImageInfo()
if err != nil {
if errors.Is(err, ErrImageNotFound) {
// Need to check cloned info here not on createvolume,
Expand Down
Loading

0 comments on commit 3e4dfd0

Please sign in to comment.