Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

helm & e2e: add option to enable read affinity for rbd #4111

Merged
merged 2 commits into from
Sep 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions charts/ceph-csi-rbd/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ charts and their default values.
| `provisioner.podSecurityPolicy.enabled` | Specifies whether podSecurityPolicy is enabled | `false` |
| `topology.enabled` | Specifies whether topology based provisioning support should be exposed by CSI | `false` |
| `topology.domainLabels` | DomainLabels define which node labels to use as domains for CSI nodeplugins to advertise their domains | `{}` |
| `readAffinity.enabled` | Enable read affinity for RBD volumes. Recommended to set to true if running kernel 5.8 or newer. | `false` |
| `readAffinity.crushLocationLabels` | Define which node labels to use as CRUSH location. This should correspond to the values set in the CRUSH map. For more information, click [here](https://github.com/ceph/ceph-csi/blob/v3.9.0/docs/deploy-rbd.md#read-affinity-using-crush-locations-for-rbd-volumes)| `[]` |
| `provisionerSocketFile` | The filename of the provisioner socket | `csi-provisioner.sock` |
| `pluginSocketFile` | The filename of the plugin socket | `csi.sock` |
| `kubeletDir` | kubelet working directory | `/var/lib/kubelet` |
Expand Down
4 changes: 4 additions & 0 deletions charts/ceph-csi-rbd/templates/nodeplugin-daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ spec:
{{- end }}
{{- if .Values.nodeplugin.profiling.enabled }}
- "--enableprofiling={{ .Values.nodeplugin.profiling.enabled }}"
{{- end }}
- "--enable-read-affinity={{ and .Values.readAffinity .Values.readAffinity.enabled }}"
{{- if and .Values.readAffinity .Values.readAffinity.enabled }}
- "--crush-location-labels={{ .Values.readAffinity.crushLocationLabels | join "," }}"
{{- end }}
env:
- name: POD_IP
Expand Down
11 changes: 11 additions & 0 deletions charts/ceph-csi-rbd/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,17 @@ topology:
- failure-domain/region
- failure-domain/zone

# readAffinity:
# Enable read affinity for RBD volumes. Recommended to
# set to true if running kernel 5.8 or newer.
# enabled: false
# Define which node labels to use as CRUSH location.
# This should correspond to the values set in the CRUSH map.
# NOTE: the value here serves as an example
# crushLocationLabels:
# - topology.kubernetes.io/region
# - topology.kubernetes.io/zone

storageClass:
# Specifies whether the storageclass should be created
create: false
Expand Down
18 changes: 15 additions & 3 deletions e2e/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -231,15 +231,19 @@ func (yr *yamlResource) Do(action kubectlAction) error {
// replaceNamespaceInTemplate() on it. There are several options for adjusting
// templates, each has their own comment.
type yamlResourceNamespaced struct {
filename string
namespace string
filename string
namespace string
domainLabel string
crushLocationLabels string

// set the number of replicas in a Deployment to 1.
oneReplica bool

// enable topology support (for RBD)
enableTopology bool
domainLabel string

// enable read affinity support (for RBD)
enableReadAffinity bool
}

func (yrn *yamlResourceNamespaced) Do(action kubectlAction) error {
Expand All @@ -260,6 +264,14 @@ func (yrn *yamlResourceNamespaced) Do(action kubectlAction) error {
data = addTopologyDomainsToDSYaml(data, yrn.domainLabel)
}

if yrn.enableReadAffinity {
data = enableReadAffinityInTemplate(data)
}

if yrn.crushLocationLabels != "" {
data = addCrsuhLocationLabels(data, yrn.crushLocationLabels)
}

err = retryKubectlInput(yrn.namespace, action, data, deployTimeout)
if err != nil {
return fmt.Errorf("failed to %s resource %q in namespace %q: %w", action, yrn.filename, yrn.namespace, err)
Expand Down
100 changes: 100 additions & 0 deletions e2e/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"errors"
"fmt"
"regexp"
"strings"
"time"

Expand Down Expand Up @@ -623,3 +624,102 @@ func verifySeLinuxMountOption(

return nil
}

// verifyReadAffinity verifies if read affinity is enabled by checking if read_from_replica
// and crush_location options are present in the device config file (/sys/devices/rbd/0/config_info).
func verifyReadAffinity(
f *framework.Framework,
pvcPath, appPath, daemonSetName, cn, ns string,
) error {
readFromReplicaOption := "read_from_replica=localize"
expectedCrushLocationValues := map[string]string{
strings.Split(crushLocationRegionLabel, "/")[1]: crushLocationRegionValue,
strings.Split(crushLocationZoneLabel, "/")[1]: crushLocationZoneValue,
}

// create PVC
pvc, err := loadPVC(pvcPath)
if err != nil {
return fmt.Errorf("failed to load pvc: %w", err)
}
pvc.Namespace = f.UniqueName
err = createPVCAndvalidatePV(f.ClientSet, pvc, deployTimeout)
if err != nil {
return fmt.Errorf("failed to create PVC: %w", err)
}
app, err := loadApp(appPath)
if err != nil {
return fmt.Errorf("failed to load application: %w", err)
}
app.Namespace = f.UniqueName
err = createApp(f.ClientSet, app, deployTimeout)
if err != nil {
return fmt.Errorf("failed to create application: %w", err)
}

imageInfo, err := getImageInfoFromPVC(pvc.Namespace, pvc.Name, f)
if err != nil {
return fmt.Errorf("failed to get imageInfo: %w", err)
}

selector, err := getDaemonSetLabelSelector(f, ns, daemonSetName)
if err != nil {
return fmt.Errorf("failed to get selector label %w", err)
}

opt := metav1.ListOptions{
LabelSelector: selector,
}

command := "cat /sys/devices/rbd/*/config_info"
configInfos, _, err := execCommandInContainer(f, command, ns, cn, &opt)
if err != nil {
return fmt.Errorf("failed to execute command %s: %w", command, err)
}

var configInfo string
for _, config := range strings.Split(configInfos, "\n") {
if config == "" || !strings.Contains(config, imageInfo.imageName) {
continue
}
configInfo = config

break
}

if configInfo == "" {
return errors.New("failed to get config_info file")
}

if !strings.Contains(configInfo, readFromReplicaOption) {
return fmt.Errorf("option %s not found in config_info: %s", readFromReplicaOption, configInfo)
}

crushLocationPattern := "crush_location=([^,]+)"
regex := regexp.MustCompile(crushLocationPattern)
match := regex.FindString(configInfo)
if match == "" {
return fmt.Errorf("option crush_location not found in config_info: %s", configInfo)
}

crushLocationValue := strings.Split(match, "=")[1]
keyValues := strings.Split(crushLocationValue, "|")
actualCrushLocationValues := make(map[string]string)

for _, keyValue := range keyValues {
s := strings.Split(keyValue, ":")
actualCrushLocationValues[s[0]] = s[1]
}
for key, expectedValue := range expectedCrushLocationValues {
if actualValue, exists := actualCrushLocationValues[key]; !(exists && actualValue == expectedValue) {
return fmt.Errorf("crush location %s:%s not found in config_info : %s", key, expectedValue, configInfo)
}
}

err = deletePVCAndApp("", f, pvc, app)
if err != nil {
return fmt.Errorf("failed to delete PVC and application: %w", err)
}

return nil
}
39 changes: 36 additions & 3 deletions e2e/rbd.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ var (
rbdTopologyPool = "newrbdpool"
rbdTopologyDataPool = "replicapool" // NOTE: should be different than rbdTopologyPool for test to be effective

// CRUSH location node labels & values.
crushLocationRegionLabel = "topology.kubernetes.io/region"
crushLocationRegionValue = "east"
crushLocationZoneLabel = "topology.kubernetes.io/zone"
crushLocationZoneValue = "east-zone1"

// yaml files required for deployment.
pvcPath = rbdExamplePath + "pvc.yaml"
appPath = rbdExamplePath + "pod.yaml"
Expand Down Expand Up @@ -161,9 +167,11 @@ func createORDeleteRbdResources(action kubectlAction) {
},
// the node-plugin itself
&yamlResourceNamespaced{
filename: rbdDirPath + rbdNodePlugin,
namespace: cephCSINamespace,
domainLabel: nodeRegionLabel + "," + nodeZoneLabel,
filename: rbdDirPath + rbdNodePlugin,
namespace: cephCSINamespace,
domainLabel: nodeRegionLabel + "," + nodeZoneLabel,
enableReadAffinity: true,
crushLocationLabels: crushLocationRegionLabel + "," + crushLocationZoneLabel,
},
}

Expand Down Expand Up @@ -275,6 +283,14 @@ var _ = Describe("RBD", func() {
if err != nil {
framework.Failf("failed to create node label: %v", err)
}
err = createNodeLabel(f, crushLocationRegionLabel, crushLocationRegionValue)
if err != nil {
framework.Failf("failed to create node label: %v", err)
}
err = createNodeLabel(f, crushLocationZoneLabel, crushLocationZoneValue)
if err != nil {
framework.Failf("failed to create node label: %v", err)
}
Comment on lines +286 to +293
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lets enhance CreateNodeLable to take map[string]string and set the label once (not as part of this PR)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Created a issue - #4146

if cephCSINamespace != defaultNs {
err = createNamespace(c, cephCSINamespace)
if err != nil {
Expand Down Expand Up @@ -409,6 +425,15 @@ var _ = Describe("RBD", func() {
if err != nil {
framework.Failf("failed to delete node label: %v", err)
}
// Remove the CRUSH Location labels
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here as well, lets update deleteNodeLabel to take multiple labels (if possible)

err = deleteNodeLabel(c, crushLocationRegionLabel)
if err != nil {
framework.Failf("failed to delete node label: %v", err)
}
err = deleteNodeLabel(c, crushLocationZoneLabel)
if err != nil {
framework.Failf("failed to delete node label: %v", err)
}
})

Context("Test RBD CSI", func() {
Expand Down Expand Up @@ -444,6 +469,14 @@ var _ = Describe("RBD", func() {
})
}

By("verify readAffinity support", func() {
err := verifyReadAffinity(f, pvcPath, appPath,
rbdDaemonsetName, rbdContainerName, cephCSINamespace)
if err != nil {
framework.Failf("failed to verify readAffinity: %v", err)
}
})

By("verify mountOptions support", func() {
err := verifySeLinuxMountOption(f, pvcPath, appPath,
rbdDaemonsetName, rbdContainerName, cephCSINamespace)
Expand Down
9 changes: 9 additions & 0 deletions e2e/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,15 @@ func enableTopologyInTemplate(data string) string {
return strings.ReplaceAll(data, "--feature-gates=Topology=false", "--feature-gates=Topology=true")
}

func enableReadAffinityInTemplate(template string) string {
return strings.ReplaceAll(template, "# - \"--enable-read-affinity=true\"", "- \"--enable-read-affinity=true\"")
}

func addCrsuhLocationLabels(template, labels string) string {
return strings.ReplaceAll(template, "# - \"--crush-location-labels=topology.io/zone,topology.io/rack\"",
"- \"--crush-location-labels="+labels+"\"")
}

func writeDataAndCalChecksum(app *v1.Pod, opt *metav1.ListOptions, f *framework.Framework) (string, error) {
filePath := app.Spec.Containers[0].VolumeMounts[0].MountPath + "/test"
// write data in PVC
Expand Down
10 changes: 9 additions & 1 deletion scripts/install-helm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ NODE_LABEL_REGION="test.failure-domain/region"
NODE_LABEL_ZONE="test.failure-domain/zone"
REGION_VALUE="testregion"
ZONE_VALUE="testzone"
CRUSH_LOCATION_REGION_LABEL="topology.kubernetes.io/region"
CRUSH_LOCATION_ZONE_LABEL="topology.kubernetes.io/zone"
CRUSH_LOCATION_REGION_VALUE="east"
CRUSH_LOCATION_ZONE_VALUE="east-zone1"

example() {
echo "examples:" >&2
Expand Down Expand Up @@ -154,6 +158,8 @@ install_cephcsi_helm_charts() {
for node in $(kubectl_retry get node -o jsonpath='{.items[*].metadata.name}'); do
kubectl_retry label node/"${node}" ${NODE_LABEL_REGION}=${REGION_VALUE}
kubectl_retry label node/"${node}" ${NODE_LABEL_ZONE}=${ZONE_VALUE}
kubectl_retry label node/"${node}" ${CRUSH_LOCATION_REGION_LABEL}=${CRUSH_LOCATION_REGION_VALUE}
kubectl_retry label node/"${node}" ${CRUSH_LOCATION_ZONE_LABEL}=${CRUSH_LOCATION_ZONE_VALUE}
done

# deploy storageclass if DEPLOY_SC flag is set
Expand All @@ -179,7 +185,7 @@ install_cephcsi_helm_charts() {
kubectl_retry delete cm ceph-config --namespace "${NAMESPACE}"

# shellcheck disable=SC2086
"${HELM}" install --namespace ${NAMESPACE} --set provisioner.fullnameOverride=csi-rbdplugin-provisioner --set nodeplugin.fullnameOverride=csi-rbdplugin --set configMapName=ceph-csi-config --set provisioner.replicaCount=1 --set-json='commonLabels={"app.kubernetes.io/name": "ceph-csi-rbd", "app.kubernetes.io/managed-by": "helm"}' ${SET_SC_TEMPLATE_VALUES} ${RBD_SECRET_TEMPLATE_VALUES} ${RBD_CHART_NAME} "${SCRIPT_DIR}"/../charts/ceph-csi-rbd --set topology.enabled=true --set topology.domainLabels="{${NODE_LABEL_REGION},${NODE_LABEL_ZONE}}" --set provisioner.maxSnapshotsOnImage=3 --set provisioner.minSnapshotsOnImage=2
"${HELM}" install --namespace ${NAMESPACE} --set provisioner.fullnameOverride=csi-rbdplugin-provisioner --set nodeplugin.fullnameOverride=csi-rbdplugin --set configMapName=ceph-csi-config --set provisioner.replicaCount=1 --set-json='commonLabels={"app.kubernetes.io/name": "ceph-csi-rbd", "app.kubernetes.io/managed-by": "helm"}' ${SET_SC_TEMPLATE_VALUES} ${RBD_SECRET_TEMPLATE_VALUES} ${RBD_CHART_NAME} "${SCRIPT_DIR}"/../charts/ceph-csi-rbd --set topology.enabled=true --set topology.domainLabels="{${NODE_LABEL_REGION},${NODE_LABEL_ZONE}}" --set provisioner.maxSnapshotsOnImage=3 --set provisioner.minSnapshotsOnImage=2 --set readAffinity.enabled=true --set readAffinity.crushLocationLabels="{${CRUSH_LOCATION_REGION_LABEL},${CRUSH_LOCATION_ZONE_LABEL}}"

check_deployment_status app=ceph-csi-rbd "${NAMESPACE}"
check_daemonset_status app=ceph-csi-rbd "${NAMESPACE}"
Expand All @@ -191,6 +197,8 @@ cleanup_cephcsi_helm_charts() {
for node in $(kubectl_retry get node --no-headers | cut -f 1 -d ' '); do
kubectl_retry label node/"$node" test.failure-domain/region-
kubectl_retry label node/"$node" test.failure-domain/zone-
kubectl_retry label node/"$node" "${CRUSH_LOCATION_REGION_LABEL}"-
kubectl_retry label node/"$node" "${CRUSH_LOCATION_ZONE_LABEL}"-
done
# TODO/LATER we could remove the CSI labels that would have been set as well
NAMESPACE=$1
Expand Down