Skip to content

Commit

Permalink
Refactor voljournal to aid reuse with CephFS
Browse files Browse the repository at this point in the history
and to also inmprove the code reuse in rbd itself.

Signed-off-by: ShyamsundarR <srangana@redhat.com>
  • Loading branch information
ShyamsundarR committed May 26, 2019
1 parent d02e50a commit 82e12c9
Show file tree
Hide file tree
Showing 11 changed files with 770 additions and 686 deletions.
10 changes: 5 additions & 5 deletions pkg/rbd/controllerserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ func (cs *ControllerServer) CreateVolume(ctx context.Context, req *csi.CreateVol
}
defer func() {
if err != nil {
errDefer := unreserveVol(rbdVol, req.GetSecrets())
errDefer := undoVolReservation(rbdVol, req.GetSecrets())
if errDefer != nil {
klog.Warningf("failed undoing reservation of volume: %s (%s)", req.GetName(), errDefer)
}
Expand Down Expand Up @@ -257,7 +257,7 @@ func (cs *ControllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol
}
}()

if err := unreserveVol(rbdVol, req.GetSecrets()); err != nil {
if err := undoVolReservation(rbdVol, req.GetSecrets()); err != nil {
return nil, status.Error(codes.Internal, err.Error())
}
return &csi.DeleteVolumeResponse{}, nil
Expand Down Expand Up @@ -347,7 +347,7 @@ func (cs *ControllerServer) CreateSnapshot(ctx context.Context, req *csi.CreateS
// check for the requested source volume id and already allocated source volume id
found, err := checkSnapExists(rbdSnap, req.GetSecrets())
if err != nil {
if _, ok := err.(ErrSnapNameConflict); ok {
if _, ok := err.(util.ErrSnapNameConflict); ok {
return nil, status.Error(codes.AlreadyExists, err.Error())
}

Expand All @@ -371,7 +371,7 @@ func (cs *ControllerServer) CreateSnapshot(ctx context.Context, req *csi.CreateS
}
defer func() {
if err != nil {
errDefer := unreserveSnap(rbdSnap, req.GetSecrets())
errDefer := undoSnapReservation(rbdSnap, req.GetSecrets())
if errDefer != nil {
klog.Warningf("failed undoing reservation of snapshot: %s %v", req.GetName(), errDefer)
}
Expand Down Expand Up @@ -483,7 +483,7 @@ func (cs *ControllerServer) DeleteSnapshot(ctx context.Context, req *csi.DeleteS
if _, ok := err.(ErrSnapNotFound); !ok {
return nil, status.Error(codes.Internal, err.Error())
}
if err := unreserveSnap(rbdSnap, req.GetSecrets()); err != nil {
if err := undoSnapReservation(rbdSnap, req.GetSecrets()); err != nil {
return nil, status.Error(codes.Internal, err.Error())
}
return &csi.DeleteSnapshotResponse{}, nil
Expand Down
11 changes: 0 additions & 11 deletions pkg/rbd/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,6 @@ func (e ErrSnapNotFound) Error() string {
return e.err.Error()
}

// ErrSnapNameConflict is generated when a requested CSI snap name already exists on RBD but with
// different properties, and hence is in conflict with the passed in CSI volume name
type ErrSnapNameConflict struct {
requestName string
err error
}

func (e ErrSnapNameConflict) Error() string {
return e.err.Error()
}

// ErrVolNameConflict is generated when a requested CSI volume name already exists on RBD but with
// different properties, and hence is in conflict with the passed in CSI volume name
type ErrVolNameConflict struct {
Expand Down
2 changes: 1 addition & 1 deletion pkg/rbd/nodeserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ func (ns *NodeServer) getVolumeName(req *csi.NodePublishVolumeRequest) (string,
return "", status.Error(codes.InvalidArgument, err.Error())
}

return rbdImgNamePrefix + vi.ObjectUUID, nil
return volJournal.NamingPrefix() + vi.ObjectUUID, nil
}

func (ns *NodeServer) mountVolume(req *csi.NodePublishVolumeRequest, devicePath string) error {
Expand Down
114 changes: 17 additions & 97 deletions pkg/rbd/rbd.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,70 +27,6 @@ import (
"k8s.io/utils/nsenter"
)

/*
RADOS omaps usage:
This note details how we preserve idempotent nature of create requests and retain the relationship
between orchestrator (CO) generated Names and plugin generated names for images and snapshots
The implementation uses Ceph RADOS omaps to preserve the relationship between request name and
generated image (or snapshot) name. There are 4 types of omaps in use,
- A "csi.volumes.[csi-id]" (or "csi.volumes"+.+CSIInstanceID), we call this the csiVolsDirectory
- stores keys named using the CO generated names for volume requests
- keys are named "csi.volume."+[CO generated VolName]
- Key value contains the RBD image uuid that is created or will be created, for the CO provided
name
- A "csi.snaps.[csi-id]" (or "csi.snaps"+.+CSIInstanceID), we refer to this as the csiSnapsDirectory
- stores keys named using the CO generated names for snapshot requests
- keys are named "csi.snap."+[CO generated SnapName]
- Key value contains the RBD snapshot uuid that is created or will be created, for the CO
provided name
- A per image omap named "rbd.csi.volume."+[RBD image uuid], we refer to this as the rbdImageOMap
- stores a single key named "csi.volname", that has the value of the CO generated VolName that
this image refers to
- A per snapshot omap named "rbd.csi.snap."+[RBD snapshot uuid], we refer to this as the snapOMap
- stores a key named "csi.snapname", that has the value of the CO generated SnapName that this
snapshot refers to
- also stores another key named "csi.source", that has the value of the image name that is the
source of the snapshot
Creation of omaps:
When a volume create request is received (or a snapshot create, the snapshot is not detailed in this
comment further as the process is similar),
- The csiVolsDirectory is consulted to find if there is already a key with the CO VolName, and if present,
it is used to read its references to reach the RBD image that backs this VolName, to check if the
RBD image can satisfy the requirements for the request
- If during the process of checking the same, it is found that some linking information is stale
or missing, the corresponding keys upto the key in the csiVolsDirectory is cleaned up, to start afresh
- If the key with the CO VolName is not found, or was cleaned up, the request is treated as a
new create request, and an rbdImageOMap is created first with a generated uuid, this ensures that we
do not use a uuid that is already in use
- Next, a key with the VolName is created in the csiVolsDirectory, and its value is updated to store the
generated uuid
- This is followed by updating the rbdImageOMap with the VolName in the rbdImageCSIVolNameKey
- Finally, the image is created (or promoted from a snapshot, if content source was provided) using
the uuid and a corresponding image name prefix (rbdImgNamePrefix or rbdSnapNamePrefix)
The entire operation is locked based on VolName hash, to ensure there is only ever a single entity
modifying the related omaps for a given VolName.
This ensures idempotent nature of creates, as the same CO generated VolName would attempt to use
the same RBD image name to serve the request, as the relations are saved in the respective omaps.
Deletion of omaps:
Delete requests would not contain the VolName, hence deletion uses the volume ID, which is encoded
with the image name in it, to find the image and the rbdImageOMap. The rbdImageOMap is read to get
the VolName that this image points to. This VolName can be further used to read and delete the key
from the csiVolsDirectory.
As we trace back and find the VolName, we also take a hash based lock on the VolName before
proceeding with deleting the image and the related omap entries, to ensure there is only ever a
single entity modifying the related omaps for a given VolName.
*/

const (
// volIDVersion is the version number of volume ID encoding scheme
volIDVersion uint16 = 1
Expand All @@ -99,34 +35,8 @@ const (

// csiConfigFile is the location of the CSI config file
csiConfigFile = "/etc/ceph-csi-config/config.json"

// CSI volume-name keyname prefix, for key in csiVolsDirectory, suffix is the CSI passed volume name
csiVolNameKeyPrefix = "csi.volume."
// Per RBD image object map name prefix, suffix is the RBD image uuid
rbdImageOMapPrefix = "csi.volume."
// CSI volume-name key in per RBD image object map, containing CSI volume-name for which the
// image was created
rbdImageCSIVolNameKey = "csi.volname"
// RBD image name prefix, suffix is a uuid generated per image
rbdImgNamePrefix = "csi-vol-"

//CSI snap-name keyname prefix, for key in csiSnapsDirectory, suffix is the CSI passed snapshot name
csiSnapNameKeyPrefix = "csi.snap."
// Per RBD snapshot object map name prefix, suffix is the RBD image uuid
rbdSnapOMapPrefix = "csi.snap."
// CSI snap-name key in per RBD snapshot object map, containing CSI snapshot-name for which the
// snapshot was created
rbdSnapCSISnapNameKey = "csi.snapname"
// source image name key in per RBD snapshot object map, containing RBD source image name for
// which the snapshot was created
rbdSnapSourceImageKey = "csi.source"
// RBD snapshot name prefix, suffix is a uuid generated per snapshot
rbdSnapNamePrefix = "csi-snap-"
)

// PluginFolder defines the location of ceph plugin
var PluginFolder = "/var/lib/kubelet/plugins/"

// Driver contains the default identity,node and controller struct
type Driver struct {
cd *csicommon.CSIDriver
Expand All @@ -138,14 +48,18 @@ type Driver struct {

var (
version = "1.0.0"

// PluginFolder defines the location of ceph plugin
PluginFolder = "/var/lib/kubelet/plugins/"

// CSIInstanceID is the instance ID that is unique to an instance of CSI, used when sharing
// ceph clusters across CSI instances, to differentiate omap names per CSI instance
CSIInstanceID = "default"
// csiVolsDirectory is the name of the CSI volumes object map that contains CSI volume-name
// based keys
csiVolsDirectory = "csi.volumes"
// csiSnapsDirectory is the name of the CSI snapshots object map that contains CSI snapshot-name based keys
csiSnapsDirectory = "csi.snaps"

// volJournal and snapJournal are used to maintain RADOS based journals for CO generated
// VolumeName to backing RBD images
volJournal *util.CSIJournal
snapJournal *util.CSIJournal
)

// NewDriver returns new rbd driver
Expand Down Expand Up @@ -199,8 +113,14 @@ func (r *Driver) Run(driverName, nodeID, endpoint, instanceID string, containeri
if instanceID != "" {
CSIInstanceID = instanceID
}
csiVolsDirectory = csiVolsDirectory + "." + CSIInstanceID
csiSnapsDirectory = csiSnapsDirectory + "." + CSIInstanceID

// Get an instance of the volume and snapshot journal keys
volJournal = util.NewCSIVolumeJournal()
snapJournal = util.NewCSISnapshotJournal()

// Update keys with CSI instance suffix
volJournal.SetCSIDirectorySuffix(CSIInstanceID)
snapJournal.SetCSIDirectorySuffix(CSIInstanceID)

// Initialize default library driver
r.cd = csicommon.NewCSIDriver(driverName, version, nodeID)
Expand Down
Loading

0 comments on commit 82e12c9

Please sign in to comment.