From b5ff58165f673d58a2a084e789c088f5963bcac6 Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Tue, 24 May 2022 09:08:42 +0530 Subject: [PATCH] rbd: add support for rbd striping RBD supports creating rbd images with object size, stripe unit and stripe count to support striping. This PR adds the support for the same. More details about striping at https://docs.ceph.com/en/quincy/man/8/rbd/#striping fixes: #3124 Signed-off-by: Madhu Rajanna --- docs/deploy-rbd.md | 3 + e2e/rbd.go | 147 ++++++++++++++++++++++++++ e2e/rbd_helper.go | 66 ++++++++++++ examples/rbd/storageclass.yaml | 8 ++ internal/rbd/controllerserver.go | 38 +++++++ internal/rbd/controllerserver_test.go | 88 +++++++++++++++ internal/rbd/rbd_util.go | 133 ++++++++++++++++------- 7 files changed, 447 insertions(+), 36 deletions(-) create mode 100644 internal/rbd/controllerserver_test.go diff --git a/docs/deploy-rbd.md b/docs/deploy-rbd.md index 757969967a4a..bdbaf101fc0b 100644 --- a/docs/deploy-rbd.md +++ b/docs/deploy-rbd.md @@ -66,6 +66,9 @@ make image-cephcsi | `mounter` | no | if set to `rbd-nbd`, use `rbd-nbd` on nodes that have `rbd-nbd` and `nbd` kernel modules to map rbd images | | `encrypted` | no | disabled by default, use `"true"` to enable LUKS encryption on PVC and `"false"` to disable it. **Do not change for existing storageclasses** | | `encryptionKMSID` | no | required if encryption is enabled and a kms is used to store passphrases | +| `stripeUnit` | no | stripe unit in bytes | +| `stripeCount` | no | objects to stripe over before looping | +| `objectSize` | no | object size in bytes | **NOTE:** An accompanying CSI configuration file, needs to be provided to the running pods. Refer to [Creating CSI configuration](../examples/README.md#creating-csi-configuration) diff --git a/e2e/rbd.go b/e2e/rbd.go index fa925b16afad..e222bb2919cf 100644 --- a/e2e/rbd.go +++ b/e2e/rbd.go @@ -4080,6 +4080,153 @@ var _ = Describe("RBD", func() { }) }) + By("validate rbd image stripe", func() { + stripeUnit := 4096 + stripeCount := 8 + objectSize := 131072 + err := deleteResource(rbdExamplePath + "storageclass.yaml") + if err != nil { + e2elog.Failf("failed to delete storageclass: %v", err) + } + + err = createRBDStorageClass( + f.ClientSet, + f, + defaultSCName, + nil, + map[string]string{ + "stripeUnit": fmt.Sprintf("%d", stripeUnit), + "stripeCount": fmt.Sprintf("%d", stripeCount), + "objectSize": fmt.Sprintf("%d", objectSize), + }, + deletePolicy) + if err != nil { + e2elog.Failf("failed to create storageclass: %v", err) + } + defer func() { + err = deleteResource(rbdExamplePath + "storageclass.yaml") + if err != nil { + e2elog.Failf("failed to delete storageclass: %v", err) + } + err = createRBDStorageClass(f.ClientSet, f, defaultSCName, nil, nil, deletePolicy) + if err != nil { + e2elog.Failf("failed to create storageclass: %v", err) + } + }() + + err = createRBDSnapshotClass(f) + if err != nil { + e2elog.Failf("failed to create storageclass: %v", err) + } + defer func() { + err = deleteRBDSnapshotClass() + if err != nil { + e2elog.Failf("failed to delete VolumeSnapshotClass: %v", err) + } + }() + + // create PVC and bind it to an app + pvc, err := loadPVC(pvcPath) + if err != nil { + e2elog.Failf("failed to load PVC: %v", err) + } + + pvc.Namespace = f.UniqueName + + err = createPVCAndvalidatePV(f.ClientSet, pvc, deployTimeout) + if err != nil { + e2elog.Failf("failed to create PVC and application: %v", err) + } + // validate created backend rbd images + validateRBDImageCount(f, 1, defaultRBDPool) + validateOmapCount(f, 1, rbdType, defaultRBDPool, volumesType) + err = validateStripe(f, pvc, stripeUnit, stripeCount, objectSize) + if err != nil { + e2elog.Failf("failed to validate stripe: %v", err) + } + + snap := getSnapshot(snapshotPath) + snap.Namespace = f.UniqueName + snap.Spec.Source.PersistentVolumeClaimName = &pvc.Name + + err = createSnapshot(&snap, deployTimeout) + if err != nil { + e2elog.Failf("failed to create snapshot: %v", err) + } + // validate created backend rbd images + // parent PVC + snapshot + totalImages := 2 + validateRBDImageCount(f, totalImages, defaultRBDPool) + validateOmapCount(f, 1, rbdType, defaultRBDPool, volumesType) + validateOmapCount(f, 1, rbdType, defaultRBDPool, snapsType) + pvcClone, err := loadPVC(pvcClonePath) + if err != nil { + e2elog.Failf("failed to load PVC: %v", err) + } + + // create clone PVC as ROX + pvcClone.Namespace = f.UniqueName + pvcClone.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadOnlyMany} + err = createPVCAndvalidatePV(f.ClientSet, pvcClone, deployTimeout) + if err != nil { + e2elog.Failf("failed to create PVC: %v", err) + } + // validate created backend rbd images + // parent pvc + snapshot + clone + totalImages = 3 + validateRBDImageCount(f, totalImages, defaultRBDPool) + validateOmapCount(f, 2, rbdType, defaultRBDPool, volumesType) + validateOmapCount(f, 1, rbdType, defaultRBDPool, snapsType) + err = validateStripe(f, pvcClone, stripeUnit, stripeCount, objectSize) + if err != nil { + e2elog.Failf("failed to validate stripe for clone: %v", err) + } + // delete snapshot + err = deleteSnapshot(&snap, deployTimeout) + if err != nil { + e2elog.Failf("failed to delete snapshot: %v", err) + } + // delete clone pvc + err = deletePVCAndValidatePV(f.ClientSet, pvcClone, deployTimeout) + if err != nil { + e2elog.Failf("failed to delete PVC: %v", err) + } + + pvcSmartClone, err := loadPVC(pvcSmartClonePath) + if err != nil { + e2elog.Failf("failed to load pvcSmartClone: %v", err) + } + pvcSmartClone.Namespace = f.UniqueName + + err = createPVCAndvalidatePV(f.ClientSet, pvcSmartClone, deployTimeout) + if err != nil { + e2elog.Failf("failed to create pvc: %v", err) + } + // validate created backend rbd images + // parent pvc + temp clone + clone + totalImages = 3 + validateRBDImageCount(f, totalImages, defaultRBDPool) + validateOmapCount(f, 2, rbdType, defaultRBDPool, volumesType) + err = validateStripe(f, pvcSmartClone, stripeUnit, stripeCount, objectSize) + if err != nil { + e2elog.Failf("failed to validate stripe for clone: %v", err) + } + // delete parent pvc + err = deletePVCAndValidatePV(f.ClientSet, pvc, deployTimeout) + if err != nil { + e2elog.Failf("failed to delete PVC: %v", err) + } + + // delete clone pvc + err = deletePVCAndValidatePV(f.ClientSet, pvcSmartClone, deployTimeout) + if err != nil { + e2elog.Failf("failed to delete PVC: %v", err) + } + // validate created backend rbd images + validateRBDImageCount(f, 0, defaultRBDPool) + validateOmapCount(f, 0, rbdType, defaultRBDPool, volumesType) + }) + // Make sure this should be last testcase in this file, because // it deletes pool By("Create a PVC and delete PVC when backend pool deleted", func() { diff --git a/e2e/rbd_helper.go b/e2e/rbd_helper.go index c897c9248c24..d30e5f0be140 100644 --- a/e2e/rbd_helper.go +++ b/e2e/rbd_helper.go @@ -942,3 +942,69 @@ func waitToRemoveImagesFromTrash(f *framework.Framework, poolName string, t int) return err } + +// imageInfo strongly typed JSON spec for image info. +type imageInfo struct { + Name string `json:"name"` + StripeUnit int `json:"stripe_unit"` + StripeCount int `json:"stripe_count"` + ObjectSize int `json:"object_size"` +} + +// getImageInfo queries rbd about the given image and returns its metadata, and returns +// error if provided image is not found. +func getImageInfo(f *framework.Framework, imageName, poolName string) (imageInfo, error) { + // rbd --format=json info [image-spec | snap-spec] + var imgInfo imageInfo + + stdOut, stdErr, err := execCommandInToolBoxPod( + f, + fmt.Sprintf("rbd info %s %s --format json", rbdOptions(poolName), imageName), + rookNamespace) + if err != nil { + return imgInfo, fmt.Errorf("failed to get rbd info: %w", err) + } + if stdErr != "" { + return imgInfo, fmt.Errorf("failed to get rbd info: %v", stdErr) + } + err = json.Unmarshal([]byte(stdOut), &imgInfo) + if err != nil { + return imgInfo, fmt.Errorf("unmarshal failed: %w. raw buffer response: %s", + err, stdOut) + } + + return imgInfo, nil +} + +// validateStripe validate the stripe count, stripe unit and object size of the +// image. +func validateStripe(f *framework.Framework, + pvc *v1.PersistentVolumeClaim, + stripeUnit, + stripeCount, + objectSize int, +) error { + imageData, err := getImageInfoFromPVC(pvc.Namespace, pvc.Name, f) + if err != nil { + return err + } + + imgInfo, err := getImageInfo(f, imageData.imageName, defaultRBDPool) + if err != nil { + return err + } + + if imgInfo.ObjectSize != objectSize { + return fmt.Errorf("objectSize %d does not match expected %d", imgInfo.ObjectSize, objectSize) + } + + if imgInfo.StripeUnit != stripeUnit { + return fmt.Errorf("stripeUnit %d does not match expected %d", imgInfo.StripeUnit, stripeUnit) + } + + if imgInfo.StripeCount != stripeCount { + return fmt.Errorf("stripeCount %d does not match expected %d", imgInfo.StripeCount, stripeCount) + } + + return nil +} diff --git a/examples/rbd/storageclass.yaml b/examples/rbd/storageclass.yaml index 94c9413e13c3..a30114667ac6 100644 --- a/examples/rbd/storageclass.yaml +++ b/examples/rbd/storageclass.yaml @@ -134,6 +134,14 @@ parameters: # {"domainLabel":"zone","value":"zone1"}]} # ] + # Image striping, Refer https://docs.ceph.com/en/latest/man/8/rbd/#striping + # For more details + # (optional) stripe unit in bytes. + # stripeUnit: <> + # (optional) objects to stripe over before looping. + # stripeCount: <> + # (optional) The object size in bytes. + # objectSize: <> reclaimPolicy: Delete allowVolumeExpansion: true mountOptions: diff --git a/internal/rbd/controllerserver.go b/internal/rbd/controllerserver.go index 6e679630908b..b67760ddbbd6 100644 --- a/internal/rbd/controllerserver.go +++ b/internal/rbd/controllerserver.go @@ -20,6 +20,7 @@ import ( "context" "errors" "fmt" + "strconv" csicommon "github.com/ceph/ceph-csi/internal/csi-common" "github.com/ceph/ceph-csi/internal/util" @@ -94,6 +95,43 @@ func (cs *ControllerServer) validateVolumeReq(ctx context.Context, req *csi.Crea return err } + err = validateStriping(req.Parameters) + if err != nil { + return status.Error(codes.InvalidArgument, err.Error()) + } + + return nil +} + +func validateStriping(parameters map[string]string) error { + stripeUnit := parameters["stripeUnit"] + stripeCount := parameters["stripeCount"] + if stripeUnit != "" && stripeCount == "" { + return errors.New("stripeCount must be specified when stripeUnit is specified") + } + + if stripeUnit == "" && stripeCount != "" { + return errors.New("stripeUnit must be specified when stripeCount is specified") + } + + objectSize := parameters["objectSize"] + if objectSize != "" { + objSize, err := strconv.ParseUint(objectSize, 10, 64) + if err != nil { + return fmt.Errorf("failed to parse objectSize %s: %w", objectSize, err) + } + // check objectSize is power of 2 + /* + Take 2^3=8 for example. + x & (x-1) + 8 & 7 + 1000 & 0111 = 0000 + */ + if objSize == 0 || (objSize&(objSize-1)) != 0 { + return fmt.Errorf("objectSize %s is not power of 2", objectSize) + } + } + return nil } diff --git a/internal/rbd/controllerserver_test.go b/internal/rbd/controllerserver_test.go new file mode 100644 index 000000000000..7570c8ab68aa --- /dev/null +++ b/internal/rbd/controllerserver_test.go @@ -0,0 +1,88 @@ +/* +Copyright 2022 The Ceph-CSI Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package rbd + +import "testing" + +func TestValidateStriping(t *testing.T) { + t.Parallel() + tests := []struct { + name string + parameters map[string]string + wantErr bool + }{ + { + name: "when stripeUnit is not specified", + parameters: map[string]string{ + "stripeUnit": "", + "stripeCount": "10", + "objectSize": "2", + }, + wantErr: true, + }, + { + name: "when stripeCount is not specified", + parameters: map[string]string{ + "stripeUnit": "4096", + "stripeCount": "", + "objectSize": "2", + }, + wantErr: true, + }, + { + name: "when objectSize is not power of 2", + parameters: map[string]string{ + "stripeUnit": "4096", + "stripeCount": "8", + "objectSize": "3", + }, + wantErr: true, + }, + { + name: "when objectSize is 0", + parameters: map[string]string{ + "stripeUnit": "4096", + "stripeCount": "8", + "objectSize": "0", + }, + wantErr: true, + }, + { + name: "when valid stripe parameters are specified", + parameters: map[string]string{ + "stripeUnit": "4096", + "stripeCount": "8", + "objectSize": "131072", + }, + wantErr: false, + }, + { + name: "when no stripe parameters are specified", + parameters: map[string]string{}, + wantErr: false, + }, + } + for _, tt := range tests { + ts := tt + t.Run(ts.name, func(t *testing.T) { + t.Parallel() + if err := validateStriping(ts.parameters); (err != nil) != ts.wantErr { + t.Errorf("validateStriping() error = %v, wantErr %v", err, ts.wantErr) + } + }) + } +} diff --git a/internal/rbd/rbd_util.go b/internal/rbd/rbd_util.go index ed48bc43ac72..5f26b25793bd 100644 --- a/internal/rbd/rbd_util.go +++ b/internal/rbd/rbd_util.go @@ -21,6 +21,7 @@ import ( "encoding/json" "errors" "fmt" + "math" "os" "path/filepath" "strconv" @@ -99,6 +100,11 @@ type rbdImage struct { // VolSize is the size of the RBD image backing this rbdImage. VolSize int64 + // image striping configurations. + StripeCount uint64 + StripeUnit uint64 + ObjectSize uint64 + Monitors string // JournalPool is the ceph pool in which the CSI Journal/CSI snapshot Journal is // stored @@ -408,27 +414,19 @@ func (rs *rbdSnapshot) String() string { // createImage creates a new ceph image with provision and volume options. func createImage(ctx context.Context, pOpts *rbdVolume, cr *util.Credentials) error { volSzMiB := fmt.Sprintf("%dM", util.RoundOffVolSize(pOpts.VolSize)) - options := librbd.NewRbdImageOptions() - logMsg := "rbd: create %s size %s (features: %s) using mon %s" - if pOpts.DataPool != "" { - logMsg += fmt.Sprintf(", data pool %s", pOpts.DataPool) - err := options.SetString(librbd.RbdImageOptionDataPool, pOpts.DataPool) - if err != nil { - return fmt.Errorf("failed to set data pool: %w", err) - } - } - log.DebugLog(ctx, logMsg, + log.DebugLog(ctx, "rbd: create %s size %s (features: %s) using mon %s", pOpts, volSzMiB, pOpts.ImageFeatureSet.Names(), pOpts.Monitors) - if pOpts.ImageFeatureSet != 0 { - err := options.SetUint64(librbd.RbdImageOptionFeatures, uint64(pOpts.ImageFeatureSet)) - if err != nil { - return fmt.Errorf("failed to set image features: %w", err) - } + options := librbd.NewRbdImageOptions() + defer options.Destroy() + + err := pOpts.setImageOptions(ctx, options) + if err != nil { + return err } - err := pOpts.Connect(cr) + err = pOpts.Connect(cr) if err != nil { return err } @@ -1280,9 +1278,40 @@ func genVolFromVolumeOptions( rbdVol.Mounter) rbdVol.DisableInUseChecks = disableInUseChecks + err = rbdVol.setStripeConfiguration(volOptions) + if err != nil { + return nil, err + } + return rbdVol, nil } +func (ri *rbdImage) setStripeConfiguration(options map[string]string) error { + var err error + if val, ok := options["stripeUnit"]; ok { + ri.StripeUnit, err = strconv.ParseUint(val, 10, 64) + if err != nil { + return fmt.Errorf("failed to parse stripeUnit %s: %w", val, err) + } + } + + if val, ok := options["stripeCount"]; ok { + ri.StripeCount, err = strconv.ParseUint(val, 10, 64) + if err != nil { + return fmt.Errorf("failed to parse stripeCount %s: %w", val, err) + } + } + + if val, ok := options["objectSize"]; ok { + ri.ObjectSize, err = strconv.ParseUint(val, 10, 64) + if err != nil { + return fmt.Errorf("failed to parse objectSize %s: %w", val, err) + } + } + + return nil +} + func (rv *rbdVolume) validateImageFeatures(imageFeatures string) error { // It is possible for image features to be an empty string which // the Go split function would return a single item array with @@ -1384,7 +1413,8 @@ func (rv *rbdVolume) cloneRbdImageFromSnapshot( parentVol *rbdVolume, ) error { var err error - logMsg := "rbd: clone %s %s (features: %s) using mon %s" + log.DebugLog(ctx, "rbd: clone %s %s (features: %s) using mon %s", + pSnapOpts, rv, rv.ImageFeatureSet.Names(), rv.Monitors) err = parentVol.openIoctx() if err != nil { @@ -1397,30 +1427,15 @@ func (rv *rbdVolume) cloneRbdImageFromSnapshot( options := librbd.NewRbdImageOptions() defer options.Destroy() - - if rv.DataPool != "" { - logMsg += fmt.Sprintf(", data pool %s", rv.DataPool) - err = options.SetString(librbd.RbdImageOptionDataPool, rv.DataPool) - if err != nil { - return fmt.Errorf("failed to set data pool: %w", err) - } - } - - log.DebugLog(ctx, logMsg, - pSnapOpts, rv, rv.ImageFeatureSet.Names(), rv.Monitors) - - if rv.ImageFeatureSet != 0 { - err = options.SetUint64(librbd.RbdImageOptionFeatures, uint64(rv.ImageFeatureSet)) - if err != nil { - return fmt.Errorf("failed to set image features: %w", err) - } + err = rv.setImageOptions(ctx, options) + if err != nil { + return err } err = options.SetUint64(librbd.ImageOptionCloneFormat, 2) if err != nil { - return fmt.Errorf("failed to set image features: %w", err) + return err } - // As the clone is yet to be created, open the Ioctx. err = rv.openIoctx() if err != nil { @@ -1461,6 +1476,52 @@ func (rv *rbdVolume) cloneRbdImageFromSnapshot( return nil } +// setImageOptions sets the image options. +func (rv *rbdVolume) setImageOptions(ctx context.Context, options *librbd.ImageOptions) error { + var err error + + logMsg := fmt.Sprintf("setting image options on %s", rv) + if rv.DataPool != "" { + logMsg += fmt.Sprintf(", data pool %s", rv.DataPool) + err = options.SetString(librbd.RbdImageOptionDataPool, rv.DataPool) + if err != nil { + return fmt.Errorf("failed to set data pool: %w", err) + } + } + + if rv.ImageFeatureSet != 0 { + err = options.SetUint64(librbd.RbdImageOptionFeatures, uint64(rv.ImageFeatureSet)) + if err != nil { + return fmt.Errorf("failed to set image features: %w", err) + } + } + + if rv.StripeCount != 0 { + logMsg += fmt.Sprintf(", stripe count %d, stripe unit %d", rv.StripeCount, rv.StripeUnit) + err = options.SetUint64(librbd.RbdImageOptionStripeCount, rv.StripeCount) + if err != nil { + return fmt.Errorf("failed to set stripe count: %w", err) + } + err = options.SetUint64(librbd.RbdImageOptionStripeUnit, rv.StripeUnit) + if err != nil { + return fmt.Errorf("failed to set stripe unit: %w", err) + } + } + + if rv.ObjectSize != 0 { + order := uint64(math.Log2(float64(rv.ObjectSize))) + logMsg += fmt.Sprintf(", object size %d, order %d", rv.ObjectSize, order) + err = options.SetUint64(librbd.RbdImageOptionOrder, order) + if err != nil { + return fmt.Errorf("failed to set object size: %w", err) + } + } + + log.DebugLog(ctx, logMsg) + + return nil +} + // getImageInfo queries rbd about the given image and returns its metadata, and returns // ErrImageNotFound if provided image is not found. func (ri *rbdImage) getImageInfo() error {