Skip to content

Commit

Permalink
Add multiNodeWritable option for RBD Volumes
Browse files Browse the repository at this point in the history
This change adds the ability to define a `multiNodeWritable` option in
the Storage Class.

This change does a number of things:
1. Allow multi-node-multi-writer access modes if the SC options is
enabled
2. Bypass the watcher checks for MultiNodeMultiWriter Volumes
3. Maintains existing watcher checks for SingleNodeWriter access modes
regardless of the StorageClass option.
  • Loading branch information
j-griffith committed Mar 1, 2019
1 parent 49f5d4a commit 9c51bbb
Show file tree
Hide file tree
Showing 7 changed files with 63 additions and 11 deletions.
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@

.PHONY: all rbdplugin cephfsplugin

RBD_IMAGE_NAME=$(if $(ENV_RBD_IMAGE_NAME),$(ENV_RBD_IMAGE_NAME),quay.io/cephcsi/rbdplugin)
RBD_IMAGE_NAME=$(if $(ENV_RBD_IMAGE_NAME),$(ENV_RBD_IMAGE_NAME),quay.io/jgriffith/rbdplugin)
RBD_IMAGE_VERSION=$(if $(ENV_RBD_IMAGE_VERSION),$(ENV_RBD_IMAGE_VERSION),v1.0.0)

CEPHFS_IMAGE_NAME=$(if $(ENV_CEPHFS_IMAGE_NAME),$(ENV_CEPHFS_IMAGE_NAME),quay.io/cephcsi/cephfsplugin)
CEPHFS_IMAGE_NAME=$(if $(ENV_CEPHFS_IMAGE_NAME),$(ENV_CEPHFS_IMAGE_NAME),quay.io/jgriffith/cephfsplugin)
CEPHFS_IMAGE_VERSION=$(if $(ENV_CEPHFS_IMAGE_VERSION),$(ENV_CEPHFS_IMAGE_VERSION),v1.0.0)

$(info rbd image settings: $(RBD_IMAGE_NAME) version $(RBD_IMAGE_VERSION))
Expand All @@ -31,7 +31,7 @@ go-test:
./scripts/test-go.sh

static-check:
./scripts/lint-go.sh
./scripts/lint-go.sh
./scripts/lint-text.sh

rbdplugin:
Expand Down
6 changes: 6 additions & 0 deletions docs/deploy-rbd.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ Parameter | Required | Description
`csi.storage.k8s.io/provisioner-secret-name`, `csi.storage.k8s.io/node-publish-secret-name` | for Kubernetes | name of the Kubernetes Secret object containing Ceph client credentials. Both parameters should have the same value
`csi.storage.k8s.io/provisioner-secret-namespace`, `csi.storage.k8s.io/node-publish-secret-namespace` | for Kubernetes | namespaces of the above Secret objects
`mounter`| no | if set to `rbd-nbd`, use `rbd-nbd` on nodes that have `rbd-nbd` and `nbd` kernel modules to map rbd images
`fsType` | no | allows setting to `ext3 | ext-4 | xfs`, default is `ext-4`
`multiNodeWritable` | no | if set to `enabled` allows RBD volumes with MultiNode Access Modes to bypass watcher checks. By default multiple attachments of an RBD volume are NOT allowed. Even if this option is set in the StorageClass, it's ignored if a standard SingleNodeWriter Access Mode is requested

**Warning for multiNodeWritable:**

*NOTE* the `multiNodeWritable` setting is NOT safe for use by workloads that are not designed to coordinate access. This does NOT add any sort of a clustered filesystem or write syncronization, it's specifically for special workloads that handle access coordination on their own (ie Active/Passive scenarios). Using this mode in general usage *WILL RESULT IN DATA CORRUPTION*. We attempt to limit exposure to trouble here but ignoring the Storage Class setting unless your Volume explicitly asks for multi node access, and assume you know what you're doing.

**Required secrets:**

Expand Down
26 changes: 22 additions & 4 deletions pkg/rbd/controllerserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,16 @@ func (cs *ControllerServer) validateVolumeReq(req *csi.CreateVolumeRequest) erro
func parseVolCreateRequest(req *csi.CreateVolumeRequest) (*rbdVolume, error) {
// TODO (sbezverk) Last check for not exceeding total storage capacity

rbdVol, err := getRBDVolumeOptions(req.GetParameters())
// MultiNodeWriters are accepted but they're only for special cases, and we skip the watcher checks for them which isn't the greatest
// let's make sure we ONLY skip that if the user is requesting a MULTI Node accessbile mode
ignoreMultiWriterEnabled := true
for _, am := range req.VolumeCapabilities {
if am.GetAccessMode().GetMode() != csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER {
ignoreMultiWriterEnabled = false
}
}

rbdVol, err := getRBDVolumeOptions(req.GetParameters(), ignoreMultiWriterEnabled)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -281,11 +290,20 @@ func (cs *ControllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol
// ValidateVolumeCapabilities checks whether the volume capabilities requested
// are supported.
func (cs *ControllerServer) ValidateVolumeCapabilities(ctx context.Context, req *csi.ValidateVolumeCapabilitiesRequest) (*csi.ValidateVolumeCapabilitiesResponse, error) {
for _, cap := range req.VolumeCapabilities {
if cap.GetAccessMode().GetMode() != csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER {
return &csi.ValidateVolumeCapabilitiesResponse{Message: ""}, nil
params := req.GetParameters()
multiWriter, _ := params["multiNodeWritable"]
if multiWriter == "enabled" {
klog.V(3).Info("detected multiNodeWritable parameter in Storage Class, allowing multi-node access modes")

} else {
for _, cap := range req.VolumeCapabilities {
if cap.GetAccessMode().GetMode() != csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER {
return &csi.ValidateVolumeCapabilitiesResponse{Message: ""}, nil
}
}

}

return &csi.ValidateVolumeCapabilitiesResponse{
Confirmed: &csi.ValidateVolumeCapabilitiesResponse_Confirmed{
VolumeCapabilities: req.VolumeCapabilities,
Expand Down
10 changes: 9 additions & 1 deletion pkg/rbd/nodeserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,18 @@ func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis
if !notMnt {
return &csi.NodePublishVolumeResponse{}, nil
}
volOptions, err := getRBDVolumeOptions(req.GetVolumeContext())

ignoreMultiWriterEnabled := true
if req.VolumeCapability.AccessMode.Mode != csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER {
ignoreMultiWriterEnabled = false
}

volOptions, err := getRBDVolumeOptions(req.GetVolumeContext(), ignoreMultiWriterEnabled)
if err != nil {
return nil, err
}
// Check access mode settings in the request, even if SC is RW-Many, if the request is a normal Single Writer volume, we ignore this setting and proceed as normal

volOptions.VolName = volName
// Mapping RBD image
devicePath, err := attachRBDImage(volOptions, volOptions.UserID, req.GetSecrets())
Expand Down
7 changes: 6 additions & 1 deletion pkg/rbd/rbd.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,12 @@ func (r *Driver) Run(driverName, nodeID, endpoint string, containerized bool, ca
csi.ControllerServiceCapability_RPC_LIST_SNAPSHOTS,
csi.ControllerServiceCapability_RPC_CLONE_VOLUME,
})
r.cd.AddVolumeCapabilityAccessModes([]csi.VolumeCapability_AccessMode_Mode{csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER})

// TODO: JDG Should also look at remaining modes like MULT_NODE_READER (SINGLE_READER)
r.cd.AddVolumeCapabilityAccessModes(
[]csi.VolumeCapability_AccessMode_Mode{
csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER,
csi.VolumeCapability_AccessMode_MULTI_NODE_MULTI_WRITER})

// Create GRPC servers
r.ids = NewIdentityServer(r.cd)
Expand Down
10 changes: 9 additions & 1 deletion pkg/rbd/rbd_attach.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ func attachRBDImage(volOptions *rbdVolume, userID string, credentials map[string
Factor: rbdImageWatcherFactor,
Steps: rbdImageWatcherSteps,
}
err = waitForrbdImage(backoff, volOptions, userID, credentials)
err := waitForrbdImage(backoff, volOptions, userID, credentials)

if err != nil {
return "", err
Expand Down Expand Up @@ -313,8 +313,16 @@ func waitForrbdImage(backoff wait.Backoff, volOptions *rbdVolume, userID string,
if err != nil {
return false, fmt.Errorf("fail to check rbd image status with: (%v), rbd output: (%s)", err, rbdOutput)
}
// In the case of multiattach we want to short circuit the retries when used (so r`if used; return used`)
// otherwise we're setting this to false which translates to !ok, which means backoff and try again
// NOTE: we ONLY do this if an multi-node access mode is requested for this volume
if (volOptions.MultiNodeWritable == "enabled") && (used) {
klog.V(2).Info("detected MultiNodeWritable enabled, ignoring watcher in-use result")
return used, nil
}
return !used, nil
})

// return error if rbd image has not become available for the specified timeout
if err == wait.ErrWaitTimeout {
return fmt.Errorf("rbd image %s is still being used", imagePath)
Expand Down
9 changes: 8 additions & 1 deletion pkg/rbd/rbd_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ type rbdVolume struct {
AdminID string `json:"adminId"`
UserID string `json:"userId"`
Mounter string `json:"mounter"`
MultiNodeWritable string `json:"multiNodeWritable"`
}

type rbdSnapshot struct {
Expand Down Expand Up @@ -226,7 +227,7 @@ func execCommand(command string, args []string) ([]byte, error) {
return cmd.CombinedOutput()
}

func getRBDVolumeOptions(volOptions map[string]string) (*rbdVolume, error) {
func getRBDVolumeOptions(volOptions map[string]string, ignoreMultiNodeWritable bool) (*rbdVolume, error) {
var ok bool
rbdVol := &rbdVolume{}
rbdVol.Pool, ok = volOptions["pool"]
Expand Down Expand Up @@ -260,6 +261,12 @@ func getRBDVolumeOptions(volOptions map[string]string) (*rbdVolume, error) {

}
getCredsFromVol(rbdVol, volOptions)

klog.V(3).Infof("ignoreMultiNodeWritable flag in parse getRBDVolumeOptions is: %v", ignoreMultiNodeWritable)
// If the volume we're working with is NOT requesting multi-node attach then don't treat it special, ignore the setting in the SC and just keep our watcher checks
if !ignoreMultiNodeWritable {
rbdVol.MultiNodeWritable, ok = volOptions["multiNodeWritable"]
}
return rbdVol, nil
}

Expand Down

0 comments on commit 9c51bbb

Please sign in to comment.