diff --git a/docs/deploy-rbd.md b/docs/deploy-rbd.md index 757969967a4a..bdbaf101fc0b 100644 --- a/docs/deploy-rbd.md +++ b/docs/deploy-rbd.md @@ -66,6 +66,9 @@ make image-cephcsi | `mounter` | no | if set to `rbd-nbd`, use `rbd-nbd` on nodes that have `rbd-nbd` and `nbd` kernel modules to map rbd images | | `encrypted` | no | disabled by default, use `"true"` to enable LUKS encryption on PVC and `"false"` to disable it. **Do not change for existing storageclasses** | | `encryptionKMSID` | no | required if encryption is enabled and a kms is used to store passphrases | +| `stripeUnit` | no | stripe unit in bytes | +| `stripeCount` | no | objects to stripe over before looping | +| `objectSize` | no | object size in bytes | **NOTE:** An accompanying CSI configuration file, needs to be provided to the running pods. Refer to [Creating CSI configuration](../examples/README.md#creating-csi-configuration) diff --git a/e2e/rbd.go b/e2e/rbd.go index fa925b16afad..e222bb2919cf 100644 --- a/e2e/rbd.go +++ b/e2e/rbd.go @@ -4080,6 +4080,153 @@ var _ = Describe("RBD", func() { }) }) + By("validate rbd image stripe", func() { + stripeUnit := 4096 + stripeCount := 8 + objectSize := 131072 + err := deleteResource(rbdExamplePath + "storageclass.yaml") + if err != nil { + e2elog.Failf("failed to delete storageclass: %v", err) + } + + err = createRBDStorageClass( + f.ClientSet, + f, + defaultSCName, + nil, + map[string]string{ + "stripeUnit": fmt.Sprintf("%d", stripeUnit), + "stripeCount": fmt.Sprintf("%d", stripeCount), + "objectSize": fmt.Sprintf("%d", objectSize), + }, + deletePolicy) + if err != nil { + e2elog.Failf("failed to create storageclass: %v", err) + } + defer func() { + err = deleteResource(rbdExamplePath + "storageclass.yaml") + if err != nil { + e2elog.Failf("failed to delete storageclass: %v", err) + } + err = createRBDStorageClass(f.ClientSet, f, defaultSCName, nil, nil, deletePolicy) + if err != nil { + e2elog.Failf("failed to create storageclass: %v", err) + } + }() + + err = createRBDSnapshotClass(f) + if err != nil { + e2elog.Failf("failed to create storageclass: %v", err) + } + defer func() { + err = deleteRBDSnapshotClass() + if err != nil { + e2elog.Failf("failed to delete VolumeSnapshotClass: %v", err) + } + }() + + // create PVC and bind it to an app + pvc, err := loadPVC(pvcPath) + if err != nil { + e2elog.Failf("failed to load PVC: %v", err) + } + + pvc.Namespace = f.UniqueName + + err = createPVCAndvalidatePV(f.ClientSet, pvc, deployTimeout) + if err != nil { + e2elog.Failf("failed to create PVC and application: %v", err) + } + // validate created backend rbd images + validateRBDImageCount(f, 1, defaultRBDPool) + validateOmapCount(f, 1, rbdType, defaultRBDPool, volumesType) + err = validateStripe(f, pvc, stripeUnit, stripeCount, objectSize) + if err != nil { + e2elog.Failf("failed to validate stripe: %v", err) + } + + snap := getSnapshot(snapshotPath) + snap.Namespace = f.UniqueName + snap.Spec.Source.PersistentVolumeClaimName = &pvc.Name + + err = createSnapshot(&snap, deployTimeout) + if err != nil { + e2elog.Failf("failed to create snapshot: %v", err) + } + // validate created backend rbd images + // parent PVC + snapshot + totalImages := 2 + validateRBDImageCount(f, totalImages, defaultRBDPool) + validateOmapCount(f, 1, rbdType, defaultRBDPool, volumesType) + validateOmapCount(f, 1, rbdType, defaultRBDPool, snapsType) + pvcClone, err := loadPVC(pvcClonePath) + if err != nil { + e2elog.Failf("failed to load PVC: %v", err) + } + + // create clone PVC as ROX + pvcClone.Namespace = f.UniqueName + pvcClone.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadOnlyMany} + err = createPVCAndvalidatePV(f.ClientSet, pvcClone, deployTimeout) + if err != nil { + e2elog.Failf("failed to create PVC: %v", err) + } + // validate created backend rbd images + // parent pvc + snapshot + clone + totalImages = 3 + validateRBDImageCount(f, totalImages, defaultRBDPool) + validateOmapCount(f, 2, rbdType, defaultRBDPool, volumesType) + validateOmapCount(f, 1, rbdType, defaultRBDPool, snapsType) + err = validateStripe(f, pvcClone, stripeUnit, stripeCount, objectSize) + if err != nil { + e2elog.Failf("failed to validate stripe for clone: %v", err) + } + // delete snapshot + err = deleteSnapshot(&snap, deployTimeout) + if err != nil { + e2elog.Failf("failed to delete snapshot: %v", err) + } + // delete clone pvc + err = deletePVCAndValidatePV(f.ClientSet, pvcClone, deployTimeout) + if err != nil { + e2elog.Failf("failed to delete PVC: %v", err) + } + + pvcSmartClone, err := loadPVC(pvcSmartClonePath) + if err != nil { + e2elog.Failf("failed to load pvcSmartClone: %v", err) + } + pvcSmartClone.Namespace = f.UniqueName + + err = createPVCAndvalidatePV(f.ClientSet, pvcSmartClone, deployTimeout) + if err != nil { + e2elog.Failf("failed to create pvc: %v", err) + } + // validate created backend rbd images + // parent pvc + temp clone + clone + totalImages = 3 + validateRBDImageCount(f, totalImages, defaultRBDPool) + validateOmapCount(f, 2, rbdType, defaultRBDPool, volumesType) + err = validateStripe(f, pvcSmartClone, stripeUnit, stripeCount, objectSize) + if err != nil { + e2elog.Failf("failed to validate stripe for clone: %v", err) + } + // delete parent pvc + err = deletePVCAndValidatePV(f.ClientSet, pvc, deployTimeout) + if err != nil { + e2elog.Failf("failed to delete PVC: %v", err) + } + + // delete clone pvc + err = deletePVCAndValidatePV(f.ClientSet, pvcSmartClone, deployTimeout) + if err != nil { + e2elog.Failf("failed to delete PVC: %v", err) + } + // validate created backend rbd images + validateRBDImageCount(f, 0, defaultRBDPool) + validateOmapCount(f, 0, rbdType, defaultRBDPool, volumesType) + }) + // Make sure this should be last testcase in this file, because // it deletes pool By("Create a PVC and delete PVC when backend pool deleted", func() { diff --git a/e2e/rbd_helper.go b/e2e/rbd_helper.go index c897c9248c24..d30e5f0be140 100644 --- a/e2e/rbd_helper.go +++ b/e2e/rbd_helper.go @@ -942,3 +942,69 @@ func waitToRemoveImagesFromTrash(f *framework.Framework, poolName string, t int) return err } + +// imageInfo strongly typed JSON spec for image info. +type imageInfo struct { + Name string `json:"name"` + StripeUnit int `json:"stripe_unit"` + StripeCount int `json:"stripe_count"` + ObjectSize int `json:"object_size"` +} + +// getImageInfo queries rbd about the given image and returns its metadata, and returns +// error if provided image is not found. +func getImageInfo(f *framework.Framework, imageName, poolName string) (imageInfo, error) { + // rbd --format=json info [image-spec | snap-spec] + var imgInfo imageInfo + + stdOut, stdErr, err := execCommandInToolBoxPod( + f, + fmt.Sprintf("rbd info %s %s --format json", rbdOptions(poolName), imageName), + rookNamespace) + if err != nil { + return imgInfo, fmt.Errorf("failed to get rbd info: %w", err) + } + if stdErr != "" { + return imgInfo, fmt.Errorf("failed to get rbd info: %v", stdErr) + } + err = json.Unmarshal([]byte(stdOut), &imgInfo) + if err != nil { + return imgInfo, fmt.Errorf("unmarshal failed: %w. raw buffer response: %s", + err, stdOut) + } + + return imgInfo, nil +} + +// validateStripe validate the stripe count, stripe unit and object size of the +// image. +func validateStripe(f *framework.Framework, + pvc *v1.PersistentVolumeClaim, + stripeUnit, + stripeCount, + objectSize int, +) error { + imageData, err := getImageInfoFromPVC(pvc.Namespace, pvc.Name, f) + if err != nil { + return err + } + + imgInfo, err := getImageInfo(f, imageData.imageName, defaultRBDPool) + if err != nil { + return err + } + + if imgInfo.ObjectSize != objectSize { + return fmt.Errorf("objectSize %d does not match expected %d", imgInfo.ObjectSize, objectSize) + } + + if imgInfo.StripeUnit != stripeUnit { + return fmt.Errorf("stripeUnit %d does not match expected %d", imgInfo.StripeUnit, stripeUnit) + } + + if imgInfo.StripeCount != stripeCount { + return fmt.Errorf("stripeCount %d does not match expected %d", imgInfo.StripeCount, stripeCount) + } + + return nil +} diff --git a/examples/rbd/storageclass.yaml b/examples/rbd/storageclass.yaml index 94c9413e13c3..a30114667ac6 100644 --- a/examples/rbd/storageclass.yaml +++ b/examples/rbd/storageclass.yaml @@ -134,6 +134,14 @@ parameters: # {"domainLabel":"zone","value":"zone1"}]} # ] + # Image striping, Refer https://docs.ceph.com/en/latest/man/8/rbd/#striping + # For more details + # (optional) stripe unit in bytes. + # stripeUnit: <> + # (optional) objects to stripe over before looping. + # stripeCount: <> + # (optional) The object size in bytes. + # objectSize: <> reclaimPolicy: Delete allowVolumeExpansion: true mountOptions: diff --git a/internal/rbd/controllerserver.go b/internal/rbd/controllerserver.go index 6e679630908b..b67760ddbbd6 100644 --- a/internal/rbd/controllerserver.go +++ b/internal/rbd/controllerserver.go @@ -20,6 +20,7 @@ import ( "context" "errors" "fmt" + "strconv" csicommon "github.com/ceph/ceph-csi/internal/csi-common" "github.com/ceph/ceph-csi/internal/util" @@ -94,6 +95,43 @@ func (cs *ControllerServer) validateVolumeReq(ctx context.Context, req *csi.Crea return err } + err = validateStriping(req.Parameters) + if err != nil { + return status.Error(codes.InvalidArgument, err.Error()) + } + + return nil +} + +func validateStriping(parameters map[string]string) error { + stripeUnit := parameters["stripeUnit"] + stripeCount := parameters["stripeCount"] + if stripeUnit != "" && stripeCount == "" { + return errors.New("stripeCount must be specified when stripeUnit is specified") + } + + if stripeUnit == "" && stripeCount != "" { + return errors.New("stripeUnit must be specified when stripeCount is specified") + } + + objectSize := parameters["objectSize"] + if objectSize != "" { + objSize, err := strconv.ParseUint(objectSize, 10, 64) + if err != nil { + return fmt.Errorf("failed to parse objectSize %s: %w", objectSize, err) + } + // check objectSize is power of 2 + /* + Take 2^3=8 for example. + x & (x-1) + 8 & 7 + 1000 & 0111 = 0000 + */ + if objSize == 0 || (objSize&(objSize-1)) != 0 { + return fmt.Errorf("objectSize %s is not power of 2", objectSize) + } + } + return nil } diff --git a/internal/rbd/controllerserver_test.go b/internal/rbd/controllerserver_test.go new file mode 100644 index 000000000000..7570c8ab68aa --- /dev/null +++ b/internal/rbd/controllerserver_test.go @@ -0,0 +1,88 @@ +/* +Copyright 2022 The Ceph-CSI Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package rbd + +import "testing" + +func TestValidateStriping(t *testing.T) { + t.Parallel() + tests := []struct { + name string + parameters map[string]string + wantErr bool + }{ + { + name: "when stripeUnit is not specified", + parameters: map[string]string{ + "stripeUnit": "", + "stripeCount": "10", + "objectSize": "2", + }, + wantErr: true, + }, + { + name: "when stripeCount is not specified", + parameters: map[string]string{ + "stripeUnit": "4096", + "stripeCount": "", + "objectSize": "2", + }, + wantErr: true, + }, + { + name: "when objectSize is not power of 2", + parameters: map[string]string{ + "stripeUnit": "4096", + "stripeCount": "8", + "objectSize": "3", + }, + wantErr: true, + }, + { + name: "when objectSize is 0", + parameters: map[string]string{ + "stripeUnit": "4096", + "stripeCount": "8", + "objectSize": "0", + }, + wantErr: true, + }, + { + name: "when valid stripe parameters are specified", + parameters: map[string]string{ + "stripeUnit": "4096", + "stripeCount": "8", + "objectSize": "131072", + }, + wantErr: false, + }, + { + name: "when no stripe parameters are specified", + parameters: map[string]string{}, + wantErr: false, + }, + } + for _, tt := range tests { + ts := tt + t.Run(ts.name, func(t *testing.T) { + t.Parallel() + if err := validateStriping(ts.parameters); (err != nil) != ts.wantErr { + t.Errorf("validateStriping() error = %v, wantErr %v", err, ts.wantErr) + } + }) + } +} diff --git a/internal/rbd/rbd_util.go b/internal/rbd/rbd_util.go index ed48bc43ac72..5f26b25793bd 100644 --- a/internal/rbd/rbd_util.go +++ b/internal/rbd/rbd_util.go @@ -21,6 +21,7 @@ import ( "encoding/json" "errors" "fmt" + "math" "os" "path/filepath" "strconv" @@ -99,6 +100,11 @@ type rbdImage struct { // VolSize is the size of the RBD image backing this rbdImage. VolSize int64 + // image striping configurations. + StripeCount uint64 + StripeUnit uint64 + ObjectSize uint64 + Monitors string // JournalPool is the ceph pool in which the CSI Journal/CSI snapshot Journal is // stored @@ -408,27 +414,19 @@ func (rs *rbdSnapshot) String() string { // createImage creates a new ceph image with provision and volume options. func createImage(ctx context.Context, pOpts *rbdVolume, cr *util.Credentials) error { volSzMiB := fmt.Sprintf("%dM", util.RoundOffVolSize(pOpts.VolSize)) - options := librbd.NewRbdImageOptions() - logMsg := "rbd: create %s size %s (features: %s) using mon %s" - if pOpts.DataPool != "" { - logMsg += fmt.Sprintf(", data pool %s", pOpts.DataPool) - err := options.SetString(librbd.RbdImageOptionDataPool, pOpts.DataPool) - if err != nil { - return fmt.Errorf("failed to set data pool: %w", err) - } - } - log.DebugLog(ctx, logMsg, + log.DebugLog(ctx, "rbd: create %s size %s (features: %s) using mon %s", pOpts, volSzMiB, pOpts.ImageFeatureSet.Names(), pOpts.Monitors) - if pOpts.ImageFeatureSet != 0 { - err := options.SetUint64(librbd.RbdImageOptionFeatures, uint64(pOpts.ImageFeatureSet)) - if err != nil { - return fmt.Errorf("failed to set image features: %w", err) - } + options := librbd.NewRbdImageOptions() + defer options.Destroy() + + err := pOpts.setImageOptions(ctx, options) + if err != nil { + return err } - err := pOpts.Connect(cr) + err = pOpts.Connect(cr) if err != nil { return err } @@ -1280,9 +1278,40 @@ func genVolFromVolumeOptions( rbdVol.Mounter) rbdVol.DisableInUseChecks = disableInUseChecks + err = rbdVol.setStripeConfiguration(volOptions) + if err != nil { + return nil, err + } + return rbdVol, nil } +func (ri *rbdImage) setStripeConfiguration(options map[string]string) error { + var err error + if val, ok := options["stripeUnit"]; ok { + ri.StripeUnit, err = strconv.ParseUint(val, 10, 64) + if err != nil { + return fmt.Errorf("failed to parse stripeUnit %s: %w", val, err) + } + } + + if val, ok := options["stripeCount"]; ok { + ri.StripeCount, err = strconv.ParseUint(val, 10, 64) + if err != nil { + return fmt.Errorf("failed to parse stripeCount %s: %w", val, err) + } + } + + if val, ok := options["objectSize"]; ok { + ri.ObjectSize, err = strconv.ParseUint(val, 10, 64) + if err != nil { + return fmt.Errorf("failed to parse objectSize %s: %w", val, err) + } + } + + return nil +} + func (rv *rbdVolume) validateImageFeatures(imageFeatures string) error { // It is possible for image features to be an empty string which // the Go split function would return a single item array with @@ -1384,7 +1413,8 @@ func (rv *rbdVolume) cloneRbdImageFromSnapshot( parentVol *rbdVolume, ) error { var err error - logMsg := "rbd: clone %s %s (features: %s) using mon %s" + log.DebugLog(ctx, "rbd: clone %s %s (features: %s) using mon %s", + pSnapOpts, rv, rv.ImageFeatureSet.Names(), rv.Monitors) err = parentVol.openIoctx() if err != nil { @@ -1397,30 +1427,15 @@ func (rv *rbdVolume) cloneRbdImageFromSnapshot( options := librbd.NewRbdImageOptions() defer options.Destroy() - - if rv.DataPool != "" { - logMsg += fmt.Sprintf(", data pool %s", rv.DataPool) - err = options.SetString(librbd.RbdImageOptionDataPool, rv.DataPool) - if err != nil { - return fmt.Errorf("failed to set data pool: %w", err) - } - } - - log.DebugLog(ctx, logMsg, - pSnapOpts, rv, rv.ImageFeatureSet.Names(), rv.Monitors) - - if rv.ImageFeatureSet != 0 { - err = options.SetUint64(librbd.RbdImageOptionFeatures, uint64(rv.ImageFeatureSet)) - if err != nil { - return fmt.Errorf("failed to set image features: %w", err) - } + err = rv.setImageOptions(ctx, options) + if err != nil { + return err } err = options.SetUint64(librbd.ImageOptionCloneFormat, 2) if err != nil { - return fmt.Errorf("failed to set image features: %w", err) + return err } - // As the clone is yet to be created, open the Ioctx. err = rv.openIoctx() if err != nil { @@ -1461,6 +1476,52 @@ func (rv *rbdVolume) cloneRbdImageFromSnapshot( return nil } +// setImageOptions sets the image options. +func (rv *rbdVolume) setImageOptions(ctx context.Context, options *librbd.ImageOptions) error { + var err error + + logMsg := fmt.Sprintf("setting image options on %s", rv) + if rv.DataPool != "" { + logMsg += fmt.Sprintf(", data pool %s", rv.DataPool) + err = options.SetString(librbd.RbdImageOptionDataPool, rv.DataPool) + if err != nil { + return fmt.Errorf("failed to set data pool: %w", err) + } + } + + if rv.ImageFeatureSet != 0 { + err = options.SetUint64(librbd.RbdImageOptionFeatures, uint64(rv.ImageFeatureSet)) + if err != nil { + return fmt.Errorf("failed to set image features: %w", err) + } + } + + if rv.StripeCount != 0 { + logMsg += fmt.Sprintf(", stripe count %d, stripe unit %d", rv.StripeCount, rv.StripeUnit) + err = options.SetUint64(librbd.RbdImageOptionStripeCount, rv.StripeCount) + if err != nil { + return fmt.Errorf("failed to set stripe count: %w", err) + } + err = options.SetUint64(librbd.RbdImageOptionStripeUnit, rv.StripeUnit) + if err != nil { + return fmt.Errorf("failed to set stripe unit: %w", err) + } + } + + if rv.ObjectSize != 0 { + order := uint64(math.Log2(float64(rv.ObjectSize))) + logMsg += fmt.Sprintf(", object size %d, order %d", rv.ObjectSize, order) + err = options.SetUint64(librbd.RbdImageOptionOrder, order) + if err != nil { + return fmt.Errorf("failed to set object size: %w", err) + } + } + + log.DebugLog(ctx, logMsg) + + return nil +} + // getImageInfo queries rbd about the given image and returns its metadata, and returns // ErrImageNotFound if provided image is not found. func (ri *rbdImage) getImageInfo() error {