From 2597f683a922ae6e3c86e059e41646628e6267f1 Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Tue, 24 May 2022 09:08:42 +0530 Subject: [PATCH] rbd: add support for rbd striping RBD supports creating rbd images with object size, strip unit and strip count to support striping. This PR adds the support for the same. More details about strip at https://docs.ceph.com/en/quincy/man/8/rbd/#striping fixes: #3124 Signed-off-by: Madhu Rajanna --- e2e/rbd.go | 142 +++++++++++++++++++++++++++++++ e2e/rbd_helper.go | 61 +++++++++++++ examples/rbd/storageclass.yaml | 10 ++- internal/rbd/controllerserver.go | 19 +++++ internal/rbd/rbd_util.go | 91 ++++++++++++++++++-- 5 files changed, 315 insertions(+), 8 deletions(-) diff --git a/e2e/rbd.go b/e2e/rbd.go index 2b0c14d0108c..279672e015c1 100644 --- a/e2e/rbd.go +++ b/e2e/rbd.go @@ -4041,6 +4041,148 @@ var _ = Describe("RBD", func() { }) }) + By("validate rbd image strip", func() { + stripUnit := 4096 + stripCount := 8 + objectSize := 16 + err := deleteResource(rbdExamplePath + "storageclass.yaml") + if err != nil { + e2elog.Failf("failed to delete storageclass: %v", err) + } + + err = createRBDStorageClass( + f.ClientSet, + f, + defaultSCName, + nil, + map[string]string{ + "stripUnit": fmt.Sprintf("%d", stripUnit), + "stripCount": fmt.Sprintf("%d", stripCount), + "objectSize": fmt.Sprintf("%d", objectSize), + }, + deletePolicy) + if err != nil { + e2elog.Failf("failed to create storageclass: %v", err) + } + defer func() { + err = deleteResource(rbdExamplePath + "storageclass.yaml") + if err != nil { + e2elog.Failf("failed to delete storageclass: %v", err) + } + err = createRBDStorageClass(f.ClientSet, f, defaultSCName, nil, nil, deletePolicy) + if err != nil { + e2elog.Failf("failed to create storageclass: %v", err) + } + }() + + err = createRBDSnapshotClass(f) + if err != nil { + e2elog.Failf("failed to create storageclass: %v", err) + } + defer func() { + err = deleteRBDSnapshotClass() + if err != nil { + e2elog.Failf("failed to delete VolumeSnapshotClass: %v", err) + } + }() + + // create PVC and bind it to an app + pvc, err := loadPVC(pvcPath) + if err != nil { + e2elog.Failf("failed to load PVC: %v", err) + } + + pvc.Namespace = f.UniqueName + + err = createPVCAndvalidatePV(f.ClientSet, pvc, deployTimeout) + if err != nil { + e2elog.Failf("failed to create PVC and application: %v", err) + } + // validate created backend rbd images + validateRBDImageCount(f, 1, defaultRBDPool) + validateOmapCount(f, 1, rbdType, defaultRBDPool, volumesType) + err = validateStrip(f, pvc, stripUnit, stripCount, objectSize) + if err != nil { + e2elog.Failf("failed to validate strip %v", err) + } + + snap := getSnapshot(snapshotPath) + snap.Namespace = f.UniqueName + snap.Spec.Source.PersistentVolumeClaimName = &pvc.Name + + err = createSnapshot(&snap, deployTimeout) + if err != nil { + e2elog.Failf("failed to create snapshot: %v", err) + } + // validate created backend rbd images + // parent PVC + snapshot + totalImages := 2 + validateRBDImageCount(f, totalImages, defaultRBDPool) + validateOmapCount(f, 1, rbdType, defaultRBDPool, volumesType) + validateOmapCount(f, 1, rbdType, defaultRBDPool, snapsType) + pvcClone, err := loadPVC(pvcClonePath) + if err != nil { + e2elog.Failf("failed to load PVC: %v", err) + } + + // create clone PVC as ROX + pvcClone.Namespace = f.UniqueName + pvcClone.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadOnlyMany} + err = createPVCAndvalidatePV(f.ClientSet, pvcClone, deployTimeout) + if err != nil { + e2elog.Failf("failed to create PVC: %v", err) + } + // validate created backend rbd images + // parent pvc+ snapshot + clone + totalImages = 3 + validateRBDImageCount(f, totalImages, defaultRBDPool) + validateOmapCount(f, 2, rbdType, defaultRBDPool, volumesType) + validateOmapCount(f, 1, rbdType, defaultRBDPool, snapsType) + err = validateStrip(f, pvcClone, stripUnit, stripCount, objectSize) + if err != nil { + e2elog.Failf("failed to validate strip for clone %v", err) + } + // delete snapshot + err = deleteSnapshot(&snap, deployTimeout) + if err != nil { + e2elog.Failf("failed to delete snapshot: %v", err) + } + + pvcSmartClone, err := loadPVC(pvcSmartClonePath) + if err != nil { + e2elog.Failf("failed to load pvcSmartClone: %v", err) + } + pvcSmartClone.Namespace = f.UniqueName + + err = createPVCAndvalidatePV(f.ClientSet, pvcSmartClone, deployTimeout) + if err != nil { + e2elog.Failf("failed to create pvc: %v", err) + } + // validate created backend rbd images + // parent pvc + temp clone + clone + totalImages = 3 + validateRBDImageCount(f, totalImages, defaultRBDPool) + validateOmapCount(f, 2, rbdType, defaultRBDPool, volumesType) + err = validateStrip(f, pvcClone, stripUnit, stripCount, objectSize) + if err != nil { + e2elog.Failf("failed to validate strip for clone %v", err) + } + // delete parent pvc + err = deletePVCAndValidatePV(f.ClientSet, pvc, deployTimeout) + if err != nil { + e2elog.Failf("failed to delete PVC: %v", err) + } + + // delete clone pvc + err = deletePVCAndValidatePV(f.ClientSet, pvcSmartClone, deployTimeout) + if err != nil { + e2elog.Failf("failed to delete PVC: %v", err) + } + // validate created backend rbd images + validateRBDImageCount(f, 0, defaultRBDPool) + validateOmapCount(f, 0, rbdType, defaultRBDPool, volumesType) + }) + // Make sure this should be last testcase in this file, because // it deletes pool By("Create a PVC and delete PVC when backend pool deleted", func() { diff --git a/e2e/rbd_helper.go b/e2e/rbd_helper.go index 8dbcfb5a2896..9ae950caf14d 100644 --- a/e2e/rbd_helper.go +++ b/e2e/rbd_helper.go @@ -940,3 +940,64 @@ func waitToRemoveImagesFromTrash(f *framework.Framework, poolName string, t int) return err } + +// imageInfo strongly typed JSON spec for image info. +type imageInfo struct { + ObjectUUID string `json:"name"` + Size int64 `json:"size"` + Format int64 `json:"format"` + StripeUnit int `json:"stripe_unit"` + StripeCount int `json:"stripe_count"` + Order int `json:"order"` +} + +// getImageInfo queries rbd about the given image and returns its metadata, and returns +// error if provided image is not found. +func getImageInfo(f *framework.Framework, imageName, poolName string) (imageInfo, error) { + // rbd --format=json info [image-spec | snap-spec] + var imgInfo imageInfo + + stdOut, stdErr, err := execCommandInToolBoxPod( + f, + fmt.Sprintf("rbd info %s %s --format json", rbdOptions(poolName), imageName), + rookNamespace) + if err != nil { + return imgInfo, fmt.Errorf("failed to get rbd info %w", err) + } + if stdErr != "" { + return imgInfo, fmt.Errorf("failed to get rbd info %v", stdErr) + } + err = json.Unmarshal([]byte(stdOut), &imgInfo) + if err != nil { + return imgInfo, fmt.Errorf("unmarshal failed: %w. raw buffer response: %s", + err, stdOut) + } + + return imgInfo, nil +} + +func validateStrip(f *framework.Framework, pvc *v1.PersistentVolumeClaim, stripUnit, stripCount, objectSize int) error { + imageData, err := getImageInfoFromPVC(pvc.Namespace, pvc.Name, f) + if err != nil { + return err + } + + imgInfo, err := getImageInfo(f, imageData.imageName, defaultRBDPool) + if err != nil { + return err + } + + if imgInfo.Order != objectSize { + return fmt.Errorf("objectSize %d does not match expected %d", imgInfo.Order, objectSize) + } + + if imgInfo.StripeUnit != stripUnit { + return fmt.Errorf("stripUnit %d does not match expected %d", imgInfo.StripeUnit, stripUnit) + } + + if imgInfo.StripeCount != stripCount { + return fmt.Errorf("stripCount %d does not match expected %d", imgInfo.StripeCount, stripCount) + } + + return nil +} diff --git a/examples/rbd/storageclass.yaml b/examples/rbd/storageclass.yaml index 94c9413e13c3..2a35208cef1d 100644 --- a/examples/rbd/storageclass.yaml +++ b/examples/rbd/storageclass.yaml @@ -133,7 +133,15 @@ parameters: # {"domainLabel":"region","value":"west"}, # {"domainLabel":"zone","value":"zone1"}]} # ] - + + # Image stripping, Refer https://docs.ceph.com/en/latest/man/8/rbd/#striping + # For more details + # (optional) stripe unit in bytes. + # stripUnit: <> + # (optional) objects to stripe over before looping. + # stripCount: <> + # (optional) The object size in bytes. + # objectSize: <> reclaimPolicy: Delete allowVolumeExpansion: true mountOptions: diff --git a/internal/rbd/controllerserver.go b/internal/rbd/controllerserver.go index f002896f5749..8ad3080fba44 100644 --- a/internal/rbd/controllerserver.go +++ b/internal/rbd/controllerserver.go @@ -91,6 +91,25 @@ func (cs *ControllerServer) validateVolumeReq(ctx context.Context, req *csi.Crea return err } + err = validateStriping(req.Parameters) + if err != nil { + return status.Error(codes.InvalidArgument, err.Error()) + } + + return nil +} + +func validateStriping(parameters map[string]string) error { + stripUnit := parameters["stripUnit"] + stripCount := parameters["stripCount"] + if stripUnit != "" && stripCount == "" { + return errors.New("stripCount must be specified when stripUnit is specified") + } + + if stripUnit == "" && stripCount != "" { + return errors.New("stripUnit must be specified when stripCount is specified") + } + return nil } diff --git a/internal/rbd/rbd_util.go b/internal/rbd/rbd_util.go index ce60e3ee31ab..873da871d218 100644 --- a/internal/rbd/rbd_util.go +++ b/internal/rbd/rbd_util.go @@ -96,6 +96,11 @@ type rbdImage struct { // VolSize is the size of the RBD image backing this rbdImage. VolSize int64 + // image striping configurations. + StripCount uint64 + StripUnit uint64 + ObjectSize int64 + Monitors string // JournalPool is the ceph pool in which the CSI Journal/CSI snapshot Journal is // stored @@ -404,7 +409,8 @@ func createImage(ctx context.Context, pOpts *rbdVolume, cr *util.Credentials) er volSzMiB := fmt.Sprintf("%dM", util.RoundOffVolSize(pOpts.VolSize)) options := librbd.NewRbdImageOptions() - logMsg := "rbd: create %s size %s (features: %s) using mon %s" + logMsg := fmt.Sprintf("rbd: create %s size %s (features: %s) using mon %s", + pOpts, volSzMiB, pOpts.ImageFeatureSet.Names(), pOpts.Monitors) if pOpts.DataPool != "" { logMsg += fmt.Sprintf(", data pool %s", pOpts.DataPool) err := options.SetString(librbd.RbdImageOptionDataPool, pOpts.DataPool) @@ -412,8 +418,6 @@ func createImage(ctx context.Context, pOpts *rbdVolume, cr *util.Credentials) er return fmt.Errorf("failed to set data pool: %w", err) } } - log.DebugLog(ctx, logMsg, - pOpts, volSzMiB, pOpts.ImageFeatureSet.Names(), pOpts.Monitors) if pOpts.ImageFeatureSet != 0 { err := options.SetUint64(librbd.RbdImageOptionFeatures, uint64(pOpts.ImageFeatureSet)) @@ -422,6 +426,28 @@ func createImage(ctx context.Context, pOpts *rbdVolume, cr *util.Credentials) er } } + if pOpts.StripCount != 0 { + logMsg += fmt.Sprintf(", strip count %d, strip unit %d", pOpts.StripCount, pOpts.StripUnit) + err := options.SetUint64(librbd.RbdImageOptionStripeCount, pOpts.StripCount) + if err != nil { + return fmt.Errorf("failed to set strip count: %w", err) + } + err = options.SetUint64(librbd.RbdImageOptionStripeUnit, pOpts.StripUnit) + if err != nil { + return fmt.Errorf("failed to set strip unit: %w", err) + } + } + + if pOpts.ObjectSize != 0 { + logMsg += fmt.Sprintf(", object size %d", pOpts.ObjectSize) + err := options.SetUint64(librbd.RbdImageOptionOrder, uint64(pOpts.ObjectSize)) + if err != nil { + return fmt.Errorf("failed to set object size: %w", err) + } + } + + log.DebugLog(ctx, logMsg) + err := pOpts.Connect(cr) if err != nil { return err @@ -1267,9 +1293,40 @@ func genVolFromVolumeOptions( rbdVol.Mounter) rbdVol.DisableInUseChecks = disableInUseChecks + err = rbdVol.setStripConfiguration(volOptions) + if err != nil { + return nil, err + } + return rbdVol, nil } +func (ri *rbdImage) setStripConfiguration(options map[string]string) error { + var err error + if val, ok := options["stripUnit"]; ok { + ri.StripUnit, err = strconv.ParseUint(val, 10, 64) + if err != nil { + return fmt.Errorf("failed to parse stripUnit %s: %w", val, err) + } + } + + if val, ok := options["stripCount"]; ok { + ri.StripCount, err = strconv.ParseUint(val, 10, 64) + if err != nil { + return fmt.Errorf("failed to parse stripCount %s: %w", val, err) + } + } + + if val, ok := options["objectSize"]; ok { + ri.ObjectSize, err = strconv.ParseInt(val, 10, 64) + if err != nil { + return fmt.Errorf("failed to parse objectSize %s: %w", val, err) + } + } + + return nil +} + func (rv *rbdVolume) validateImageFeatures(imageFeatures string) error { // It is possible for image features to be an empty string which // the Go split function would return a single item array with @@ -1370,7 +1427,8 @@ func (rv *rbdVolume) cloneRbdImageFromSnapshot( pSnapOpts *rbdSnapshot, parentVol *rbdVolume) error { var err error - logMsg := "rbd: clone %s %s (features: %s) using mon %s" + logMsg := fmt.Sprintf("rbd: clone %s %s (features: %s) using mon %s", + pSnapOpts, rv, rv.ImageFeatureSet.Names(), rv.Monitors) err = parentVol.openIoctx() if err != nil { @@ -1392,9 +1450,6 @@ func (rv *rbdVolume) cloneRbdImageFromSnapshot( } } - log.DebugLog(ctx, logMsg, - pSnapOpts, rv, rv.ImageFeatureSet.Names(), rv.Monitors) - if rv.ImageFeatureSet != 0 { err = options.SetUint64(librbd.RbdImageOptionFeatures, uint64(rv.ImageFeatureSet)) if err != nil { @@ -1407,6 +1462,28 @@ func (rv *rbdVolume) cloneRbdImageFromSnapshot( return fmt.Errorf("failed to set image features: %w", err) } + if rv.StripCount != 0 { + logMsg += fmt.Sprintf(", strip count %d, strip unit %d", rv.StripCount, rv.StripUnit) + err = options.SetUint64(librbd.RbdImageOptionStripeCount, rv.StripCount) + if err != nil { + return fmt.Errorf("failed to set strip count: %w", err) + } + err = options.SetUint64(librbd.RbdImageOptionStripeUnit, rv.StripUnit) + if err != nil { + return fmt.Errorf("failed to set strip unit: %w", err) + } + } + + if rv.ObjectSize != 0 { + logMsg += fmt.Sprintf(", object size %d", rv.ObjectSize) + err = options.SetUint64(librbd.RbdImageOptionOrder, uint64(rv.ObjectSize)) + if err != nil { + return fmt.Errorf("failed to set object size: %w", err) + } + } + + log.DebugLog(ctx, logMsg) + // As the clone is yet to be created, open the Ioctx. err = rv.openIoctx() if err != nil {