Skip to content

Commit

Permalink
deploy: support for read affinity options per cluster
Browse files Browse the repository at this point in the history
Implemented the capability to include read affinity options
for individual clusters within the ceph-csi-config ConfigMap.
This allows users to configure the crush location for each
cluster separately. The read affinity options specified in
the ConfigMap will supersede those provided via command line arguments.

Signed-off-by: Praveen M <m.praveen@ibm.com>
  • Loading branch information
iPraveenParihar committed Oct 26, 2023
1 parent cba5402 commit 04ff893
Show file tree
Hide file tree
Showing 17 changed files with 492 additions and 119 deletions.
5 changes: 5 additions & 0 deletions charts/ceph-csi-rbd/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ serviceAccounts:
# - "<MONValue2>"
# rbd:
# netNamespaceFilePath: "{{ .kubeletDir }}/plugins/{{ .driverName }}/net"
# readAffinity:
# enabled: true
# crushLocationLabels:
# - topology.kubernetes.io/region
# - topology.kubernetes.io/zone
csiConfig: []

# Configuration details of clusterID,PoolID and FscID mapping
Expand Down
2 changes: 2 additions & 0 deletions cmd/cephcsi.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ func init() {
"",
"list of Kubernetes node labels, that determines the"+
" CRUSH location the node belongs to, separated by ','")
flag.StringVar(&conf.ContainerOrchestrator, "container-orchestrator", "kubernetes",
"container orchestrator where cephcsi is running")

// cephfs related flags
flag.BoolVar(
Expand Down
14 changes: 13 additions & 1 deletion deploy/csi-config-map-sample.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,15 @@ kind: ConfigMap
# path for the Ceph cluster identified by the <cluster-id>, This will be used
# by the RBD CSI plugin to execute the rbd map/unmap in the
# network namespace specified by the "rbd.netNamespaceFilePath".
# The "readAffinity" fields are used to enable read affinity and pass the crush
# location map for the Ceph cluster identified by the cluster <cluster-id>,
# enabling this will add
# "read_from_replica=localize,crush_location=<label:value>" to the map option.
# If a CSI plugin is using more than one Ceph cluster, repeat the section for
# each such cluster in use.
# NOTE: Changes to the configmap is automatically updated in the running pods,
# thus restarting existing pods using the configmap is NOT required on edits
# to the configmap.

# Lets see the different configuration under cluster-mapping.json key.
# This configuration is needed when volumes are mirrored using the Ceph-CSI.
# clusterIDMapping holds the mapping between two clusterId's of storage
Expand Down Expand Up @@ -66,6 +69,15 @@ data:
}
"nfs": {
"netNamespaceFilePath": "<kubeletRootPath>/plugins/nfs.csi.ceph.com/net",
},
"readAffinity": {
"enabled": "false",
"crushLocationLabels": [
"<Label1>",
"<Label2>"
...
"<Label3>"
]
}
}
]
Expand Down
8 changes: 7 additions & 1 deletion docs/deploy-rbd.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ make image-cephcsi
| `--maxsnapshotsonimage` | `450` | Maximum number of snapshots allowed on rbd image without flattening |
| `--setmetadata` | `false` | Set metadata on volume |
| `--enable-read-affinity` | `false` | enable read affinity |
| `--crush-location-labels`| _empty_ | Kubernetes node labels that determine the CRUSH location the node belongs to, separated by ',' |
| `--crush-location-labels`| _empty_ | Kubernetes node labels that determine the CRUSH location the node belongs to, separated by ','.<br>`Note: These labels will be replaced if crush location labels are defined in the ceph-csi-config ConfigMap for the specific cluster.` |

**Available volume parameters:**

Expand Down Expand Up @@ -224,6 +224,12 @@ If enabled, this option will be added to all RBD volumes mapped by Ceph CSI.
Well known labels can be found
[here](https://kubernetes.io/docs/reference/labels-annotations-taints/).

Read affinity can be configured for individual clusters within the
`ceph-csi-config` ConfigMap. This allows configuring the crush location labels
for each ceph cluster separately. The crush location labels specified in the
ConfigMap will supersede those provided via command line argument
`--crush-location-labels`.

>Note: Label values will have all its dots `"."` normalized with dashes `"-"`
in order for it to work with ceph CRUSH map.

Expand Down
25 changes: 15 additions & 10 deletions internal/rbd/driver/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
csicommon "github.com/ceph/ceph-csi/internal/csi-common"
"github.com/ceph/ceph-csi/internal/rbd"
"github.com/ceph/ceph-csi/internal/util"
"github.com/ceph/ceph-csi/internal/util/k8s"
"github.com/ceph/ceph-csi/internal/util/log"

"github.com/container-storage-interface/spec/lib/go/csi"
Expand Down Expand Up @@ -68,14 +69,14 @@ func NewControllerServer(d *csicommon.CSIDriver) *rbd.ControllerServer {
func NewNodeServer(
d *csicommon.CSIDriver,
t string,
topology map[string]string,
crushLocationMap map[string]string,
nodeLabels, topology, crushLocationMap map[string]string,
) (*rbd.NodeServer, error) {
ns := rbd.NodeServer{
DefaultNodeServer: csicommon.NewDefaultNodeServer(d, t, topology),
VolumeLocks: util.NewVolumeLocks(),
DefaultNodeServer: csicommon.NewDefaultNodeServer(d, t, topology),
VolumeLocks: util.NewVolumeLocks(),
NodeLabels: nodeLabels,
CLIReadAffinityMapOptions: util.ConstructReadAffinityMapOption(crushLocationMap),
}
ns.SetReadAffinityMapOptions(crushLocationMap)

return &ns, nil
}
Expand All @@ -87,8 +88,8 @@ func NewNodeServer(
// setupCSIAddonsServer().
func (r *Driver) Run(conf *util.Config) {
var (
err error
topology, crushLocationMap map[string]string
err error
nodeLabels, topology, crushLocationMap map[string]string
)
// update clone soft and hard limit
rbd.SetGlobalInt("rbdHardMaxCloneDepth", conf.RbdHardMaxCloneDepth)
Expand Down Expand Up @@ -125,13 +126,17 @@ func (r *Driver) Run(conf *util.Config) {
})
}

if conf.EnableReadAffinity {
crushLocationMap, err = util.GetCrushLocationMap(conf.CrushLocationLabels, conf.NodeID)
if conf.ContainerOrchestrator == "kubernetes" {
nodeLabels, err = k8s.GetNodeLabels(conf.NodeID)
if err != nil {
log.FatalLogMsg(err.Error())
}
}

if conf.EnableReadAffinity {
crushLocationMap = util.GetCrushLocationMap(conf.CrushLocationLabels, nodeLabels)
}

// Create GRPC servers
r.ids = NewIdentityServer(r.cd)

Expand All @@ -140,7 +145,7 @@ func (r *Driver) Run(conf *util.Config) {
if err != nil {
log.FatalLogMsg(err.Error())
}
r.ns, err = NewNodeServer(r.cd, conf.Vtype, topology, crushLocationMap)
r.ns, err = NewNodeServer(r.cd, conf.Vtype, nodeLabels, topology, crushLocationMap)
if err != nil {
log.FatalLogMsg("failed to start node server, err %v\n", err)
}
Expand Down
36 changes: 9 additions & 27 deletions internal/rbd/nodeserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,10 @@ type NodeServer struct {
// A map storing all volumes with ongoing operations so that additional operations
// for that same volume (as defined by VolumeID) return an Aborted error
VolumeLocks *util.VolumeLocks
// readAffinityMapOptions contains map options to enable read affinity.
readAffinityMapOptions string
// NodeLabels stores the node labels
NodeLabels map[string]string
// CLIReadAffinityMapOptions contains map options passed through command line to enable read affinity.
CLIReadAffinityMapOptions string
}

// stageTransaction struct represents the state a transaction was when it either completed
Expand Down Expand Up @@ -258,11 +260,10 @@ func (ns *NodeServer) populateRbdVol(
rv.Mounter = rbdNbdMounter
}

err = getMapOptions(req, rv)
err = ns.getMapOptions(req, rv)
if err != nil {
return nil, err
}
ns.appendReadAffinityMapOptions(rv)

rv.VolID = volID

Expand All @@ -280,14 +281,14 @@ func (ns *NodeServer) populateRbdVol(

// appendReadAffinityMapOptions appends readAffinityMapOptions to mapOptions
// if mounter is rbdDefaultMounter and readAffinityMapOptions is not empty.
func (ns NodeServer) appendReadAffinityMapOptions(rv *rbdVolume) {
func (rv *rbdVolume) appendReadAffinityMapOptions(readAffinityMapOptions string) {
switch {
case ns.readAffinityMapOptions == "" || rv.Mounter != rbdDefaultMounter:
case readAffinityMapOptions == "" || rv.Mounter != rbdDefaultMounter:
return
case rv.MapOptions != "":
rv.MapOptions += "," + ns.readAffinityMapOptions
rv.MapOptions += "," + readAffinityMapOptions
default:
rv.MapOptions = ns.readAffinityMapOptions
rv.MapOptions = readAffinityMapOptions
}
}

Expand Down Expand Up @@ -1395,22 +1396,3 @@ func getDeviceSize(ctx context.Context, devicePath string) (uint64, error) {

return size, nil
}

func (ns *NodeServer) SetReadAffinityMapOptions(crushLocationMap map[string]string) {
if len(crushLocationMap) == 0 {
return
}

var b strings.Builder
b.WriteString("read_from_replica=localize,crush_location=")
first := true
for key, val := range crushLocationMap {
if first {
b.WriteString(fmt.Sprintf("%s:%s", key, val))
first = false
} else {
b.WriteString(fmt.Sprintf("|%s:%s", key, val))
}
}
ns.readAffinityMapOptions = b.String()
}
161 changes: 111 additions & 50 deletions internal/rbd/nodeserver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,12 @@ package rbd

import (
"context"
"encoding/json"
"os"
"testing"

"github.com/ceph/ceph-csi/internal/util"

"github.com/container-storage-interface/spec/lib/go/csi"
"github.com/stretchr/testify/assert"
)
Expand Down Expand Up @@ -107,53 +111,6 @@ func TestParseBoolOption(t *testing.T) {
}
}

func TestNodeServer_SetReadAffinityMapOptions(t *testing.T) {
t.Parallel()
tests := []struct {
name string
crushLocationmap map[string]string
wantAny []string
}{
{
name: "nil crushLocationmap",
crushLocationmap: nil,
wantAny: []string{""},
},
{
name: "empty crushLocationmap",
crushLocationmap: map[string]string{},
wantAny: []string{""},
},
{
name: "single entry in crushLocationmap",
crushLocationmap: map[string]string{
"region": "east",
},
wantAny: []string{"read_from_replica=localize,crush_location=region:east"},
},
{
name: "multiple entries in crushLocationmap",
crushLocationmap: map[string]string{
"region": "east",
"zone": "east-1",
},
wantAny: []string{
"read_from_replica=localize,crush_location=region:east|zone:east-1",
"read_from_replica=localize,crush_location=zone:east-1|region:east",
},
},
}
for _, tt := range tests {
currentTT := tt
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
ns := &NodeServer{}
ns.SetReadAffinityMapOptions(currentTT.crushLocationmap)
assert.Contains(t, currentTT.wantAny, ns.readAffinityMapOptions)
})
}
}

func TestNodeServer_appendReadAffinityMapOptions(t *testing.T) {
t.Parallel()
type input struct {
Expand Down Expand Up @@ -236,11 +193,115 @@ func TestNodeServer_appendReadAffinityMapOptions(t *testing.T) {
MapOptions: currentTT.args.mapOptions,
Mounter: currentTT.args.mounter,
}
rv.appendReadAffinityMapOptions(currentTT.args.readAffinityMapOptions)
assert.Equal(t, currentTT.want, rv.MapOptions)
})
}
}

func TestReadAffinity_GetReadAffinityMapOptions(t *testing.T) {
t.Parallel()

nodeLabels := map[string]string{
"topology.kubernetes.io/zone": "east-1",
"topology.kubernetes.io/region": "east",
}

csiConfig := []util.ClusterInfo{
{
ClusterID: "cluster-1",
ReadAffinity: struct {
Enabled bool `json:"enabled"`
CrushLocationLabels []string `json:"crushLocationLabels"`
}{
Enabled: true,
CrushLocationLabels: []string{
"topology.kubernetes.io/region",
},
},
},
{
ClusterID: "cluster-2",
ReadAffinity: struct {
Enabled bool `json:"enabled"`
CrushLocationLabels []string `json:"crushLocationLabels"`
}{
Enabled: false,
CrushLocationLabels: []string{
"topology.kubernetes.io/region",
},
},
},
}

csiConfigFileContent, err := json.Marshal(csiConfig)
if err != nil {
t.Errorf("failed to marshal csi config info %v", err)
}
tmpConfPath := util.CsiConfigFile
err = os.Mkdir("/etc/ceph-csi-config", 0o600)
if err != nil {
t.Errorf("failed to create directory %s: %v", "/etc/ceph-csi-config", err)
}
err = os.WriteFile(tmpConfPath, csiConfigFileContent, 0o600)
if err != nil {
t.Errorf("failed to write %s file content: %v", util.CsiConfigFile, err)
}

tests := []struct {
name string
clusterID string
CLICrushLocationLabels string
want string
}{
{
name: "Enabled in cluster-1 and Enabled in CLI",
clusterID: "cluster-1",
CLICrushLocationLabels: "topology.kubernetes.io/region",
want: "read_from_replica=localize,crush_location=region:east",
},
{
name: "Disabled in cluster-2 and Enabled in CLI",
clusterID: "cluster-2",
CLICrushLocationLabels: "topology.kubernetes.io/zone",
want: "read_from_replica=localize,crush_location=zone:east-1",
},
{
name: "Disabled in cluster-2 and Disabled in CLI",
clusterID: "cluster-2",
CLICrushLocationLabels: "",
want: "",
},
{
name: "Absent is cluster-3 and Enabled in CLI",
clusterID: "cluster-3",
CLICrushLocationLabels: "topology.kubernetes.io/zone",
want: "read_from_replica=localize,crush_location=zone:east-1",
},
{
name: "Absent in cluster-3 and Disabled in CLI",
clusterID: "cluster-3",
CLICrushLocationLabels: "",
want: "",
},
}

for _, tt := range tests {
tc := tt
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
crushLocationMap := util.GetCrushLocationMap(tc.CLICrushLocationLabels, nodeLabels)
ns := &NodeServer{
readAffinityMapOptions: currentTT.args.readAffinityMapOptions,
CLIReadAffinityMapOptions: util.ConstructReadAffinityMapOption(crushLocationMap),
}
ns.appendReadAffinityMapOptions(rv)
assert.Equal(t, currentTT.want, rv.MapOptions)
readAffinityMapOptions, err := util.GetReadAffinityMapOptions(
tc.clusterID, ns.CLIReadAffinityMapOptions, nodeLabels,
)
if err != nil {
assert.Fail(t, err.Error())
}

assert.Equal(t, tc.want, readAffinityMapOptions)
})
}
}
Loading

0 comments on commit 04ff893

Please sign in to comment.