From 9614bddd69cce8d5f12d05810d693238216906f3 Mon Sep 17 00:00:00 2001 From: Praveen M Date: Thu, 7 Sep 2023 09:44:08 +0530 Subject: [PATCH 1/2] helm: add option to enable read affinity for rbd This commit adds --enable-read-affinity flag to enable read affinity for rbd Signed-off-by: Praveen M --- charts/ceph-csi-rbd/README.md | 2 ++ .../ceph-csi-rbd/templates/nodeplugin-daemonset.yaml | 4 ++++ charts/ceph-csi-rbd/values.yaml | 11 +++++++++++ scripts/install-helm.sh | 2 +- 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/charts/ceph-csi-rbd/README.md b/charts/ceph-csi-rbd/README.md index 4bb4dcd4b09..165d2bf8f9f 100644 --- a/charts/ceph-csi-rbd/README.md +++ b/charts/ceph-csi-rbd/README.md @@ -176,6 +176,8 @@ charts and their default values. | `provisioner.podSecurityPolicy.enabled` | Specifies whether podSecurityPolicy is enabled | `false` | | `topology.enabled` | Specifies whether topology based provisioning support should be exposed by CSI | `false` | | `topology.domainLabels` | DomainLabels define which node labels to use as domains for CSI nodeplugins to advertise their domains | `{}` | +| `readAffinity.enabled` | Enable read affinity for RBD volumes. Recommended to set to true if running kernel 5.8 or newer. | `false` | +| `readAffinity.crushLocationLabels` | Define which node labels to use as CRUSH location. This should correspond to the values set in the CRUSH map. For more information, click [here](https://github.com/ceph/ceph-csi/blob/v3.9.0/docs/deploy-rbd.md#read-affinity-using-crush-locations-for-rbd-volumes)| `[]` | | `provisionerSocketFile` | The filename of the provisioner socket | `csi-provisioner.sock` | | `pluginSocketFile` | The filename of the plugin socket | `csi.sock` | | `kubeletDir` | kubelet working directory | `/var/lib/kubelet` | diff --git a/charts/ceph-csi-rbd/templates/nodeplugin-daemonset.yaml b/charts/ceph-csi-rbd/templates/nodeplugin-daemonset.yaml index a1744a8954a..f71286c489f 100644 --- a/charts/ceph-csi-rbd/templates/nodeplugin-daemonset.yaml +++ b/charts/ceph-csi-rbd/templates/nodeplugin-daemonset.yaml @@ -86,6 +86,10 @@ spec: {{- end }} {{- if .Values.nodeplugin.profiling.enabled }} - "--enableprofiling={{ .Values.nodeplugin.profiling.enabled }}" +{{- end }} + - "--enable-read-affinity={{ .Values.readAffinity.enabled }}" +{{- if .Values.readAffinity.enabled }} + - "--crush-location-labels={{ .Values.readAffinity.crushLocationLabels | join "," }}" {{- end }} env: - name: POD_IP diff --git a/charts/ceph-csi-rbd/values.yaml b/charts/ceph-csi-rbd/values.yaml index e320b736fb4..f263313847b 100644 --- a/charts/ceph-csi-rbd/values.yaml +++ b/charts/ceph-csi-rbd/values.yaml @@ -279,6 +279,17 @@ topology: - failure-domain/region - failure-domain/zone +readAffinity: + # Enable read affinity for RBD volumes. Recommended to + # set to true if running kernel 5.8 or newer. + enabled: false + # Define which node labels to use as CRUSH location. + # This should correspond to the values set in the CRUSH map. + # NOTE: the value here serves as an example + crushLocationLabels: + - topology.kubernetes.io/region + - topology.kubernetes.io/zone + storageClass: # Specifies whether the storageclass should be created create: false diff --git a/scripts/install-helm.sh b/scripts/install-helm.sh index 41c18bd4539..7116180ccdd 100755 --- a/scripts/install-helm.sh +++ b/scripts/install-helm.sh @@ -179,7 +179,7 @@ install_cephcsi_helm_charts() { kubectl_retry delete cm ceph-config --namespace "${NAMESPACE}" # shellcheck disable=SC2086 - "${HELM}" install --namespace ${NAMESPACE} --set provisioner.fullnameOverride=csi-rbdplugin-provisioner --set nodeplugin.fullnameOverride=csi-rbdplugin --set configMapName=ceph-csi-config --set provisioner.replicaCount=1 --set-json='commonLabels={"app.kubernetes.io/name": "ceph-csi-rbd", "app.kubernetes.io/managed-by": "helm"}' ${SET_SC_TEMPLATE_VALUES} ${RBD_SECRET_TEMPLATE_VALUES} ${RBD_CHART_NAME} "${SCRIPT_DIR}"/../charts/ceph-csi-rbd --set topology.enabled=true --set topology.domainLabels="{${NODE_LABEL_REGION},${NODE_LABEL_ZONE}}" --set provisioner.maxSnapshotsOnImage=3 --set provisioner.minSnapshotsOnImage=2 + "${HELM}" install --namespace ${NAMESPACE} --set provisioner.fullnameOverride=csi-rbdplugin-provisioner --set nodeplugin.fullnameOverride=csi-rbdplugin --set configMapName=ceph-csi-config --set provisioner.replicaCount=1 --set-json='commonLabels={"app.kubernetes.io/name": "ceph-csi-rbd", "app.kubernetes.io/managed-by": "helm"}' ${SET_SC_TEMPLATE_VALUES} ${RBD_SECRET_TEMPLATE_VALUES} ${RBD_CHART_NAME} "${SCRIPT_DIR}"/../charts/ceph-csi-rbd --set topology.enabled=true --set topology.domainLabels="{${NODE_LABEL_REGION},${NODE_LABEL_ZONE}}" --set provisioner.maxSnapshotsOnImage=3 --set provisioner.minSnapshotsOnImage=2 --set readAffinity.enabled=true check_deployment_status app=ceph-csi-rbd "${NAMESPACE}" check_daemonset_status app=ceph-csi-rbd "${NAMESPACE}" From 64e87368f70797f19bb8d4e2a00bb5a484b50a23 Mon Sep 17 00:00:00 2001 From: Praveen M Date: Wed, 13 Sep 2023 17:57:45 +0530 Subject: [PATCH 2/2] e2e: added test to verify read affinity functionality e2e test case is added to test if read affinity is enabled by verifying read_from_replica=localize option is passed Signed-off-by: Praveen M --- .../templates/nodeplugin-daemonset.yaml | 4 +- charts/ceph-csi-rbd/values.yaml | 20 ++-- e2e/deployment.go | 18 +++- e2e/pod.go | 100 ++++++++++++++++++ e2e/rbd.go | 39 ++++++- e2e/utils.go | 9 ++ scripts/install-helm.sh | 10 +- 7 files changed, 181 insertions(+), 19 deletions(-) diff --git a/charts/ceph-csi-rbd/templates/nodeplugin-daemonset.yaml b/charts/ceph-csi-rbd/templates/nodeplugin-daemonset.yaml index f71286c489f..18a5929beee 100644 --- a/charts/ceph-csi-rbd/templates/nodeplugin-daemonset.yaml +++ b/charts/ceph-csi-rbd/templates/nodeplugin-daemonset.yaml @@ -87,8 +87,8 @@ spec: {{- if .Values.nodeplugin.profiling.enabled }} - "--enableprofiling={{ .Values.nodeplugin.profiling.enabled }}" {{- end }} - - "--enable-read-affinity={{ .Values.readAffinity.enabled }}" -{{- if .Values.readAffinity.enabled }} + - "--enable-read-affinity={{ and .Values.readAffinity .Values.readAffinity.enabled }}" +{{- if and .Values.readAffinity .Values.readAffinity.enabled }} - "--crush-location-labels={{ .Values.readAffinity.crushLocationLabels | join "," }}" {{- end }} env: diff --git a/charts/ceph-csi-rbd/values.yaml b/charts/ceph-csi-rbd/values.yaml index f263313847b..0d43b167110 100644 --- a/charts/ceph-csi-rbd/values.yaml +++ b/charts/ceph-csi-rbd/values.yaml @@ -279,16 +279,16 @@ topology: - failure-domain/region - failure-domain/zone -readAffinity: - # Enable read affinity for RBD volumes. Recommended to - # set to true if running kernel 5.8 or newer. - enabled: false - # Define which node labels to use as CRUSH location. - # This should correspond to the values set in the CRUSH map. - # NOTE: the value here serves as an example - crushLocationLabels: - - topology.kubernetes.io/region - - topology.kubernetes.io/zone +# readAffinity: +# Enable read affinity for RBD volumes. Recommended to +# set to true if running kernel 5.8 or newer. +# enabled: false +# Define which node labels to use as CRUSH location. +# This should correspond to the values set in the CRUSH map. +# NOTE: the value here serves as an example +# crushLocationLabels: +# - topology.kubernetes.io/region +# - topology.kubernetes.io/zone storageClass: # Specifies whether the storageclass should be created diff --git a/e2e/deployment.go b/e2e/deployment.go index f4f63642af0..27a831791e9 100644 --- a/e2e/deployment.go +++ b/e2e/deployment.go @@ -231,15 +231,19 @@ func (yr *yamlResource) Do(action kubectlAction) error { // replaceNamespaceInTemplate() on it. There are several options for adjusting // templates, each has their own comment. type yamlResourceNamespaced struct { - filename string - namespace string + filename string + namespace string + domainLabel string + crushLocationLabels string // set the number of replicas in a Deployment to 1. oneReplica bool // enable topology support (for RBD) enableTopology bool - domainLabel string + + // enable read affinity support (for RBD) + enableReadAffinity bool } func (yrn *yamlResourceNamespaced) Do(action kubectlAction) error { @@ -260,6 +264,14 @@ func (yrn *yamlResourceNamespaced) Do(action kubectlAction) error { data = addTopologyDomainsToDSYaml(data, yrn.domainLabel) } + if yrn.enableReadAffinity { + data = enableReadAffinityInTemplate(data) + } + + if yrn.crushLocationLabels != "" { + data = addCrsuhLocationLabels(data, yrn.crushLocationLabels) + } + err = retryKubectlInput(yrn.namespace, action, data, deployTimeout) if err != nil { return fmt.Errorf("failed to %s resource %q in namespace %q: %w", action, yrn.filename, yrn.namespace, err) diff --git a/e2e/pod.go b/e2e/pod.go index 427189b58bc..84986e2197f 100644 --- a/e2e/pod.go +++ b/e2e/pod.go @@ -20,6 +20,7 @@ import ( "context" "errors" "fmt" + "regexp" "strings" "time" @@ -623,3 +624,102 @@ func verifySeLinuxMountOption( return nil } + +// verifyReadAffinity verifies if read affinity is enabled by checking if read_from_replica +// and crush_location options are present in the device config file (/sys/devices/rbd/0/config_info). +func verifyReadAffinity( + f *framework.Framework, + pvcPath, appPath, daemonSetName, cn, ns string, +) error { + readFromReplicaOption := "read_from_replica=localize" + expectedCrushLocationValues := map[string]string{ + strings.Split(crushLocationRegionLabel, "/")[1]: crushLocationRegionValue, + strings.Split(crushLocationZoneLabel, "/")[1]: crushLocationZoneValue, + } + + // create PVC + pvc, err := loadPVC(pvcPath) + if err != nil { + return fmt.Errorf("failed to load pvc: %w", err) + } + pvc.Namespace = f.UniqueName + err = createPVCAndvalidatePV(f.ClientSet, pvc, deployTimeout) + if err != nil { + return fmt.Errorf("failed to create PVC: %w", err) + } + app, err := loadApp(appPath) + if err != nil { + return fmt.Errorf("failed to load application: %w", err) + } + app.Namespace = f.UniqueName + err = createApp(f.ClientSet, app, deployTimeout) + if err != nil { + return fmt.Errorf("failed to create application: %w", err) + } + + imageInfo, err := getImageInfoFromPVC(pvc.Namespace, pvc.Name, f) + if err != nil { + return fmt.Errorf("failed to get imageInfo: %w", err) + } + + selector, err := getDaemonSetLabelSelector(f, ns, daemonSetName) + if err != nil { + return fmt.Errorf("failed to get selector label %w", err) + } + + opt := metav1.ListOptions{ + LabelSelector: selector, + } + + command := "cat /sys/devices/rbd/*/config_info" + configInfos, _, err := execCommandInContainer(f, command, ns, cn, &opt) + if err != nil { + return fmt.Errorf("failed to execute command %s: %w", command, err) + } + + var configInfo string + for _, config := range strings.Split(configInfos, "\n") { + if config == "" || !strings.Contains(config, imageInfo.imageName) { + continue + } + configInfo = config + + break + } + + if configInfo == "" { + return errors.New("failed to get config_info file") + } + + if !strings.Contains(configInfo, readFromReplicaOption) { + return fmt.Errorf("option %s not found in config_info: %s", readFromReplicaOption, configInfo) + } + + crushLocationPattern := "crush_location=([^,]+)" + regex := regexp.MustCompile(crushLocationPattern) + match := regex.FindString(configInfo) + if match == "" { + return fmt.Errorf("option crush_location not found in config_info: %s", configInfo) + } + + crushLocationValue := strings.Split(match, "=")[1] + keyValues := strings.Split(crushLocationValue, "|") + actualCrushLocationValues := make(map[string]string) + + for _, keyValue := range keyValues { + s := strings.Split(keyValue, ":") + actualCrushLocationValues[s[0]] = s[1] + } + for key, expectedValue := range expectedCrushLocationValues { + if actualValue, exists := actualCrushLocationValues[key]; !(exists && actualValue == expectedValue) { + return fmt.Errorf("crush location %s:%s not found in config_info : %s", key, expectedValue, configInfo) + } + } + + err = deletePVCAndApp("", f, pvc, app) + if err != nil { + return fmt.Errorf("failed to delete PVC and application: %w", err) + } + + return nil +} diff --git a/e2e/rbd.go b/e2e/rbd.go index 3361a39e5f2..2da732bd148 100644 --- a/e2e/rbd.go +++ b/e2e/rbd.go @@ -65,6 +65,12 @@ var ( rbdTopologyPool = "newrbdpool" rbdTopologyDataPool = "replicapool" // NOTE: should be different than rbdTopologyPool for test to be effective + // CRUSH location node labels & values. + crushLocationRegionLabel = "topology.kubernetes.io/region" + crushLocationRegionValue = "east" + crushLocationZoneLabel = "topology.kubernetes.io/zone" + crushLocationZoneValue = "east-zone1" + // yaml files required for deployment. pvcPath = rbdExamplePath + "pvc.yaml" appPath = rbdExamplePath + "pod.yaml" @@ -161,9 +167,11 @@ func createORDeleteRbdResources(action kubectlAction) { }, // the node-plugin itself &yamlResourceNamespaced{ - filename: rbdDirPath + rbdNodePlugin, - namespace: cephCSINamespace, - domainLabel: nodeRegionLabel + "," + nodeZoneLabel, + filename: rbdDirPath + rbdNodePlugin, + namespace: cephCSINamespace, + domainLabel: nodeRegionLabel + "," + nodeZoneLabel, + enableReadAffinity: true, + crushLocationLabels: crushLocationRegionLabel + "," + crushLocationZoneLabel, }, } @@ -275,6 +283,14 @@ var _ = Describe("RBD", func() { if err != nil { framework.Failf("failed to create node label: %v", err) } + err = createNodeLabel(f, crushLocationRegionLabel, crushLocationRegionValue) + if err != nil { + framework.Failf("failed to create node label: %v", err) + } + err = createNodeLabel(f, crushLocationZoneLabel, crushLocationZoneValue) + if err != nil { + framework.Failf("failed to create node label: %v", err) + } if cephCSINamespace != defaultNs { err = createNamespace(c, cephCSINamespace) if err != nil { @@ -409,6 +425,15 @@ var _ = Describe("RBD", func() { if err != nil { framework.Failf("failed to delete node label: %v", err) } + // Remove the CRUSH Location labels + err = deleteNodeLabel(c, crushLocationRegionLabel) + if err != nil { + framework.Failf("failed to delete node label: %v", err) + } + err = deleteNodeLabel(c, crushLocationZoneLabel) + if err != nil { + framework.Failf("failed to delete node label: %v", err) + } }) Context("Test RBD CSI", func() { @@ -444,6 +469,14 @@ var _ = Describe("RBD", func() { }) } + By("verify readAffinity support", func() { + err := verifyReadAffinity(f, pvcPath, appPath, + rbdDaemonsetName, rbdContainerName, cephCSINamespace) + if err != nil { + framework.Failf("failed to verify readAffinity: %v", err) + } + }) + By("verify mountOptions support", func() { err := verifySeLinuxMountOption(f, pvcPath, appPath, rbdDaemonsetName, rbdContainerName, cephCSINamespace) diff --git a/e2e/utils.go b/e2e/utils.go index 48ee28c92ad..67e413f5e38 100644 --- a/e2e/utils.go +++ b/e2e/utils.go @@ -827,6 +827,15 @@ func enableTopologyInTemplate(data string) string { return strings.ReplaceAll(data, "--feature-gates=Topology=false", "--feature-gates=Topology=true") } +func enableReadAffinityInTemplate(template string) string { + return strings.ReplaceAll(template, "# - \"--enable-read-affinity=true\"", "- \"--enable-read-affinity=true\"") +} + +func addCrsuhLocationLabels(template, labels string) string { + return strings.ReplaceAll(template, "# - \"--crush-location-labels=topology.io/zone,topology.io/rack\"", + "- \"--crush-location-labels="+labels+"\"") +} + func writeDataAndCalChecksum(app *v1.Pod, opt *metav1.ListOptions, f *framework.Framework) (string, error) { filePath := app.Spec.Containers[0].VolumeMounts[0].MountPath + "/test" // write data in PVC diff --git a/scripts/install-helm.sh b/scripts/install-helm.sh index 7116180ccdd..fe4e5d35b20 100755 --- a/scripts/install-helm.sh +++ b/scripts/install-helm.sh @@ -24,6 +24,10 @@ NODE_LABEL_REGION="test.failure-domain/region" NODE_LABEL_ZONE="test.failure-domain/zone" REGION_VALUE="testregion" ZONE_VALUE="testzone" +CRUSH_LOCATION_REGION_LABEL="topology.kubernetes.io/region" +CRUSH_LOCATION_ZONE_LABEL="topology.kubernetes.io/zone" +CRUSH_LOCATION_REGION_VALUE="east" +CRUSH_LOCATION_ZONE_VALUE="east-zone1" example() { echo "examples:" >&2 @@ -154,6 +158,8 @@ install_cephcsi_helm_charts() { for node in $(kubectl_retry get node -o jsonpath='{.items[*].metadata.name}'); do kubectl_retry label node/"${node}" ${NODE_LABEL_REGION}=${REGION_VALUE} kubectl_retry label node/"${node}" ${NODE_LABEL_ZONE}=${ZONE_VALUE} + kubectl_retry label node/"${node}" ${CRUSH_LOCATION_REGION_LABEL}=${CRUSH_LOCATION_REGION_VALUE} + kubectl_retry label node/"${node}" ${CRUSH_LOCATION_ZONE_LABEL}=${CRUSH_LOCATION_ZONE_VALUE} done # deploy storageclass if DEPLOY_SC flag is set @@ -179,7 +185,7 @@ install_cephcsi_helm_charts() { kubectl_retry delete cm ceph-config --namespace "${NAMESPACE}" # shellcheck disable=SC2086 - "${HELM}" install --namespace ${NAMESPACE} --set provisioner.fullnameOverride=csi-rbdplugin-provisioner --set nodeplugin.fullnameOverride=csi-rbdplugin --set configMapName=ceph-csi-config --set provisioner.replicaCount=1 --set-json='commonLabels={"app.kubernetes.io/name": "ceph-csi-rbd", "app.kubernetes.io/managed-by": "helm"}' ${SET_SC_TEMPLATE_VALUES} ${RBD_SECRET_TEMPLATE_VALUES} ${RBD_CHART_NAME} "${SCRIPT_DIR}"/../charts/ceph-csi-rbd --set topology.enabled=true --set topology.domainLabels="{${NODE_LABEL_REGION},${NODE_LABEL_ZONE}}" --set provisioner.maxSnapshotsOnImage=3 --set provisioner.minSnapshotsOnImage=2 --set readAffinity.enabled=true + "${HELM}" install --namespace ${NAMESPACE} --set provisioner.fullnameOverride=csi-rbdplugin-provisioner --set nodeplugin.fullnameOverride=csi-rbdplugin --set configMapName=ceph-csi-config --set provisioner.replicaCount=1 --set-json='commonLabels={"app.kubernetes.io/name": "ceph-csi-rbd", "app.kubernetes.io/managed-by": "helm"}' ${SET_SC_TEMPLATE_VALUES} ${RBD_SECRET_TEMPLATE_VALUES} ${RBD_CHART_NAME} "${SCRIPT_DIR}"/../charts/ceph-csi-rbd --set topology.enabled=true --set topology.domainLabels="{${NODE_LABEL_REGION},${NODE_LABEL_ZONE}}" --set provisioner.maxSnapshotsOnImage=3 --set provisioner.minSnapshotsOnImage=2 --set readAffinity.enabled=true --set readAffinity.crushLocationLabels="{${CRUSH_LOCATION_REGION_LABEL},${CRUSH_LOCATION_ZONE_LABEL}}" check_deployment_status app=ceph-csi-rbd "${NAMESPACE}" check_daemonset_status app=ceph-csi-rbd "${NAMESPACE}" @@ -191,6 +197,8 @@ cleanup_cephcsi_helm_charts() { for node in $(kubectl_retry get node --no-headers | cut -f 1 -d ' '); do kubectl_retry label node/"$node" test.failure-domain/region- kubectl_retry label node/"$node" test.failure-domain/zone- + kubectl_retry label node/"$node" "${CRUSH_LOCATION_REGION_LABEL}"- + kubectl_retry label node/"$node" "${CRUSH_LOCATION_ZONE_LABEL}"- done # TODO/LATER we could remove the CSI labels that would have been set as well NAMESPACE=$1