Skip to content

Commit

Permalink
add CanaryUpgradeCount to support canary upgrade of a single node (#258)
Browse files Browse the repository at this point in the history
* add CanaryUpgradeCount to support canary upgrade of a single node

Prior to this change, a canary upgrade is performed on the entire rack. This
change allows you to do the upgrade on as many nodes as you want in the first
rack. If CanaryUpgradeCount > rack_size, then the whole rack will be upgraded.

* handle default/base case and add some docs

* update CRD
  • Loading branch information
jsanda authored Sep 29, 2020
1 parent b1851d9 commit f73041c
Show file tree
Hide file tree
Showing 8 changed files with 234 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ spec:
description: Indicates that configuration and container image changes
should only be pushed to the first rack of the datacenter
type: boolean
canaryUpgradeCount:
description: The number of nodes that will be updated when CanaryUpgrade
is true. Note that the value is either 0 or greater than the rack
size, then all nodes in the rack will get updated.
format: int32
type: integer
clusterName:
description: The name by which CQL clients and instances will know the
cluster. If the same cluster name is shared by multiple Datacenters
Expand Down
23 changes: 23 additions & 0 deletions mage/ginkgo/lib.go
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,16 @@ func (ns *NsWrapper) WaitForPodStarted(podName string) {
ns.WaitForOutputAndLog(step, k, podName, 60)
}

func (ns *NsWrapper) WaitForCassandraImages(dcName string, expectedImages []string, timeout int) {
step := "verify cassandra image updates"
images := strings.Join(expectedImages, " ")
json := "jsonpath={.items[*].spec.containers[?(@.name == 'cassandra')].image}"
k := kubectl.Get("pods").
WithFlag("selector", fmt.Sprintf("cassandra.datastax.com/datacenter=%s", dcName)).
FormatOutput(json)
ns.WaitForOutputAndLog(step, k, images, timeout)
}

func (ns *NsWrapper) DisableGossipWaitNotReady(podName string) {
ns.DisableGossip(podName)
ns.WaitForPodNotStarted(podName)
Expand Down Expand Up @@ -370,6 +380,19 @@ func (ns *NsWrapper) GetDatacenterReadyPodNames(dcName string) []string {
return podNames
}

func (ns *NsWrapper) GetCassandraContainerImages(dcName string) []string {
json := "jsonpath={.items[*].spec.containers[?(@.name == 'cassandra')].image}"
k := kubectl.Get("pods").
WithFlag("selector", fmt.Sprintf("cassandra.datastax.com/datacenter=%s", dcName)).
FormatOutput(json)

output := ns.OutputPanic(k)
images := strings.Split(output, " ")
sort.Strings(images)

return images
}

func (ns *NsWrapper) WaitForOperatorReady() {
step := "waiting for the operator to become ready"
json := "jsonpath={.items[0].status.containerStatuses[0].ready}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ spec:
description: Indicates that configuration and container image changes
should only be pushed to the first rack of the datacenter
type: boolean
canaryUpgradeCount:
description: The number of nodes that will be updated when CanaryUpgrade
is true. Note that the value is either 0 or greater than the rack
size, then all nodes in the rack will get updated.
format: int32
type: integer
clusterName:
description: The name by which CQL clients and instances will know the
cluster. If the same cluster name is shared by multiple Datacenters
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ type CassandraDatacenterSpec struct {
// the first rack of the datacenter
CanaryUpgrade bool `json:"canaryUpgrade,omitempty"`

// The number of nodes that will be updated when CanaryUpgrade is true. Note that the value is
// either 0 or greater than the rack size, then all nodes in the rack will get updated.
CanaryUpgradeCount int32 `json:"canaryUpgradeCount,omitempty"`

// Turning this option on allows multiple server pods to be created on a k8s worker node.
// By default the operator creates just one server pod per k8s worker node using k8s
// podAntiAffinity and requiredDuringSchedulingIgnoredDuringExecution.
Expand Down
16 changes: 16 additions & 0 deletions operator/pkg/reconciliation/reconcile_racks.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,22 @@ func (rc *ReconciliationContext) CheckRackPodTemplate() result.ReconcileResult {
desiredSts.Labels = utils.MergeMap(map[string]string{}, statefulSet.Labels, desiredSts.Labels)
desiredSts.Annotations = utils.MergeMap(map[string]string{}, statefulSet.Annotations, desiredSts.Annotations)

if dc.Spec.CanaryUpgrade {
var partition int32
if dc.Spec.CanaryUpgradeCount == 0 || dc.Spec.CanaryUpgradeCount > int32(rc.desiredRackInformation[idx].NodeCount) {
partition = int32(rc.desiredRackInformation[idx].NodeCount)
} else {
partition = int32(rc.desiredRackInformation[idx].NodeCount) - dc.Spec.CanaryUpgradeCount
}
strategy := appsv1.StatefulSetUpdateStrategy{
Type: appsv1.RollingUpdateStatefulSetStrategyType,
RollingUpdate: &appsv1.RollingUpdateStatefulSetStrategy{
Partition: &partition,
},
}
desiredSts.Spec.UpdateStrategy = strategy
}

desiredSts.DeepCopyInto(statefulSet)
}

Expand Down
52 changes: 52 additions & 0 deletions operator/pkg/reconciliation/reconcile_racks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,58 @@ func TestCheckRackPodTemplate_SetControllerRefOnStatefulSet(t *testing.T) {
assert.Equal(t, rc.statefulSets[0].Name, actualObject.GetName())
}

func TestCheckRackPodTemplate_CanaryUpgrade(t *testing.T) {
rc, _, cleanpMockSrc := setupTest()
defer cleanpMockSrc()

rc.Datacenter.Spec.ServerVersion = "6.8.2"
rc.Datacenter.Spec.Racks = []api.Rack{
{Name: "rack1", Zone: "zone-1"},
}

if err := rc.CalculateRackInformation(); err != nil {
t.Fatalf("failed to calculate rack information: %s", err)
}

result := rc.CheckRackCreation()
assert.False(t, result.Completed(), "CheckRackCreation did not complete as expected")

if err := rc.Client.Update(rc.Ctx, rc.Datacenter); err != nil {
t.Fatalf("failed to add rack to cassandradatacenter: %s", err)
}

rc.Datacenter.Spec.CanaryUpgrade = true
rc.Datacenter.Spec.CanaryUpgradeCount = 1
rc.Datacenter.Spec.ServerVersion = "6.8.3"

result = rc.CheckRackPodTemplate()
_, err := result.Output()

assert.True(t, result.Completed())
assert.Nil(t, err)

assert.Equal(t, rc.Datacenter.Status.CassandraOperatorProgress, api.ProgressUpdating)

partition := &rc.Datacenter.Spec.CanaryUpgradeCount
expectedStrategy := appsv1.StatefulSetUpdateStrategy{
Type: appsv1.RollingUpdateStatefulSetStrategyType,
RollingUpdate: &appsv1.RollingUpdateStatefulSetStrategy{
Partition: partition,
},
}

assert.Equal(t, expectedStrategy, rc.statefulSets[0].Spec.UpdateStrategy)

rc.statefulSets[0].Status.Replicas = 2
rc.statefulSets[0].Status.ReadyReplicas = 2
rc.statefulSets[0].Status.CurrentReplicas = 1
rc.statefulSets[0].Status.UpdatedReplicas = 1

result = rc.CheckRackPodTemplate()

assert.True(t, result.Completed())
}

func TestReconcilePods(t *testing.T) {
t.Skip()
rc, _, cleanupMockScr := setupTest()
Expand Down
100 changes: 100 additions & 0 deletions tests/canary_upgrade/canary_upgrade_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
// Copyright DataStax, Inc.
// Please see the included license file for details.

package canary_upgrade

import (
"fmt"
"testing"

. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"

corev1 "k8s.io/api/core/v1"

ginkgo_util "github.com/datastax/cass-operator/mage/ginkgo"
"github.com/datastax/cass-operator/mage/kubectl"
)

var (
testName = "OSS test canary upgrade"
namespace = "test-canary-upgrade"
dcName = "dc1"
dcYaml = "../testdata/oss-upgrade-dc.yaml"
operatorYaml = "../testdata/operator.yaml"
dcResource = fmt.Sprintf("CassandraDatacenter/%s", dcName)
dcLabel = fmt.Sprintf("cassandra.datastax.com/datacenter=%s", dcName)
ns = ginkgo_util.NewWrapper(testName, namespace)
)

func TestLifecycle(t *testing.T) {
AfterSuite(func() {
logPath := fmt.Sprintf("%s/aftersuite", ns.LogDir)
kubectl.DumpAllLogs(logPath).ExecV()
fmt.Printf("\n\tPost-run logs dumped at: %s\n\n", logPath)
ns.Terminate()
})

RegisterFailHandler(Fail)
RunSpecs(t, testName)
}

var _ = Describe(testName, func() {
Context("when in a new cluster", func() {
Specify("the operator can perform a canary upgrade", func() {
By("creating a namespace")
err := kubectl.CreateNamespace(namespace).ExecV()
Expect(err).ToNot(HaveOccurred())

step := "setting up cass-operator resources via helm chart"
ns.HelmInstall("../../charts/cass-operator-chart")

ns.WaitForOperatorReady()

step = "creating a datacenter"
k := kubectl.ApplyFiles(dcYaml)
ns.ExecAndLog(step, k)

ns.WaitForSuperUserUpserted(dcName, 600)

step = "check recorded host IDs"
nodeStatusesHostIds := ns.GetNodeStatusesHostIds(dcName)
Expect(len(nodeStatusesHostIds), 3)

ns.WaitForDatacenterReady(dcName)
ns.WaitForDatacenterCondition(dcName, "Initialized", string(corev1.ConditionTrue))

step = "prepare for canary upgrade"
json := "{\"spec\": {\"canaryUpgrade\": true, \"canaryUpgradeCount\": 1}}"
k = kubectl.PatchMerge(dcResource, json)
ns.ExecAndLog(step, k)

step = "perform canary upgrade"
json = "{\"spec\": {\"serverVersion\": \"3.11.7\"}}"
k = kubectl.PatchMerge(dcResource, json)
ns.ExecAndLog(step, k)

ns.WaitForDatacenterOperatorProgress(dcName, "Updating", 30)
ns.WaitForDatacenterReadyPodCount(dcName, 3)

images := []string{
"datastax/cassandra-mgmtapi-3_11_6:v0.1.5",
"datastax/cassandra-mgmtapi-3_11_6:v0.1.5",
"datastax/cassandra-mgmtapi-3_11_7:v0.1.12",
}
ns.WaitForCassandraImages(dcName, images, 300)
ns.WaitForDatacenterReadyPodCount(dcName, 3)

step = "deleting the dc"
k = kubectl.DeleteFromFiles(dcYaml)
ns.ExecAndLog(step, k)

step = "checking that the dc no longer exists"
json = "jsonpath={.items}"
k = kubectl.Get("CassandraDatacenter").
WithLabel(dcLabel).
FormatOutput(json)
ns.WaitForOutputAndLog(step, k, "[]", 300)
})
})
})
27 changes: 27 additions & 0 deletions tests/testdata/oss-upgrade-dc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
apiVersion: cassandra.datastax.com/v1beta1
kind: CassandraDatacenter
metadata:
name: dc1
spec:
clusterName: cluster1
serverType: cassandra
# Do not change serverVersion. This cassdc is intended for testing a canary
# upgrade specifically from this version.
serverVersion: "3.11.6"
managementApiAuth:
insecure: {}
size: 3
storageConfig:
cassandraDataVolumeClaimSpec:
storageClassName: server-storage
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
racks:
- name: r1
config:
jvm-options:
initial_heap_size: "800m"
max_heap_size: "800m"

0 comments on commit f73041c

Please sign in to comment.