Skip to content

Commit

Permalink
e2e: Cherry-pick #790, #801 to oadp-1.1 - enable restic data verifica…
Browse files Browse the repository at this point in the history
…tion, tuning timeouts, reliability (#806)

* cherry-pick #790

set defaultVolumesToRestic in e2e for restic backup

fix restic prebackup verify

delete backupFile if it exists

Print command output, fix err handling.

ignore log `num errors=0`

merge fix

fix cutset

variable renames, DpaCustomResource struct cleanup

eliminate custom "credentialsFile", prefering bsl.Credentials

trim restore name, run post restore script

keep 43 chars for restore name with uuid

restore name no longer needs trimming

Fix creating restore name

detect sha256sum is working else use shasum

update checksum_cmd to uppercase and actually use var

remove two-phase restore. Use single restore with post restore script.

refactor DoesBSLExist, DoesVSLExist

print diff

Make nill velero config map equal to empty map.

cherry-pick of #801

e2e: Increase app build timeouts to 5 minutes

Create PVC for CSI before installing app.

Add mongo pvc for ibm cloud

InstallApplication Update if already exists. Resolves issue if existing is outdated

Revert "Create PVC for CSI before installing app."

This reverts commit efdb994.

e2e: resolve pvc create failure if previous test fail to uninstall app namespace

Change backup timeout to 12 minutes

Example of test failing because it didn't wait long enough https://prow.ci.openshift.org/view/gs/origin-ci-test/pr-logs/pull/openshift_oadp-operator/801/pull-ci-openshift-oadp-operator-master-4.11-operator-e2e-aws/1561223345897213952#1:build-log.txt%3A7247
Default velero csi snapshot timeout is 10 minutes.

Resolve azure 4.8, 4.9 fallback namespace deletion failure due to empty resource name.

Revert "cherry-pick of #801"

This reverts commit b9bc43e.

* e2e: print container logs on failure only, increase app build timeouts (#801)

* e2e: do not print container logs on server version skip

* e2e: Increase app build timeouts to 5 minutes

* Create PVC for CSI before installing app.

* Add mongo pvc for ibm cloud

* InstallApplication Update if already exists. Resolves issue if existing is outdated

* Revert "Create PVC for CSI before installing app."

This reverts commit efdb994.

* e2e: resolve pvc create failure if previous test fail to uninstall app namespace

* Change backup timeout to 12 minutes

Example of test failing because it didn't wait long enough https://prow.ci.openshift.org/view/gs/origin-ci-test/pr-logs/pull/openshift_oadp-operator/801/pull-ci-openshift-oadp-operator-master-4.11-operator-e2e-aws/1561223345897213952#1:build-log.txt%3A7247
Default velero csi snapshot timeout is 10 minutes.

* Resolve azure 4.8, 4.9 fallback namespace deletion failure due to empty resource name.

* DeepEqual check resource fields before making update call

Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com>

Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com>

Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com>
  • Loading branch information
kaovilai committed Aug 26, 2022
1 parent 2cde554 commit 2b11fe1
Show file tree
Hide file tree
Showing 9 changed files with 223 additions and 141 deletions.
110 changes: 46 additions & 64 deletions tests/e2e/backup_restore_suite_test.go
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
package e2e_test

import (
"context"
"errors"
"fmt"
"log"
"time"

"github.com/google/uuid"
. "github.com/onsi/ginkgo/v2"
"github.com/onsi/ginkgo/v2/types"
. "github.com/onsi/gomega"
. "github.com/openshift/oadp-operator/tests/e2e/lib"
utils "github.com/openshift/oadp-operator/tests/e2e/utils"
corev1 "k8s.io/api/core/v1"
k8serror "k8s.io/apimachinery/pkg/api/errors"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
)

Expand Down Expand Up @@ -53,32 +57,38 @@ var _ = Describe("AWS backup restore tests", func() {
var _ = BeforeEach(func() {
testSuiteInstanceName := "ts-" + instanceName
dpaCR.Name = testSuiteInstanceName

credData, err := utils.ReadFile(credFile)
Expect(err).NotTo(HaveOccurred())
err = CreateCredentialsSecret(credData, namespace, GetSecretRef(credSecretRef))
Expect(err).NotTo(HaveOccurred())
})

var _ = AfterEach(func() {
GinkgoWriter.Println("Printing velero deployment pod logs")
logs, err := GetVeleroContainerLogs(namespace)
Expect(err).NotTo(HaveOccurred())
GinkgoWriter.Println(logs)
GinkgoWriter.Println("End of velero deployment pod logs")
err = dpaCR.Delete()
Expect(err).ToNot(HaveOccurred())

})
var lastInstallingApplicationNamespace string
var lastInstallTime time.Time
var _ = ReportAfterEach(func(report SpecReport) {
if report.State == types.SpecStateSkipped {
// do not run if the test is skipped
return
}
GinkgoWriter.Println("Report after each: state: ", report.State.String())
if report.Failed() {
// print namespace error events for app namespace
if lastInstallingApplicationNamespace != "" {
PrintNamespaceEventsAfterTime(lastInstallingApplicationNamespace, lastInstallTime)
}
GinkgoWriter.Println("Printing velero deployment pod logs")
logs, err := GetVeleroContainerLogs(namespace)
Expect(err).NotTo(HaveOccurred())
GinkgoWriter.Println(logs)
GinkgoWriter.Println("End of velero deployment pod logs")
}
// remove app namespace if leftover (likely previously failed before reaching uninstall applications) to clear items such as PVCs which are immutable so that next test can create new ones
err := dpaCR.Client.Delete(context.Background(), &corev1.Namespace{ObjectMeta: v1.ObjectMeta{
Name: lastInstallingApplicationNamespace,
Namespace: lastInstallingApplicationNamespace,
}}, &client.DeleteOptions{})
if k8serror.IsNotFound(err) {
err = nil
}
Expect(err).ToNot(HaveOccurred())
err = dpaCR.Delete()
Expect(err).ToNot(HaveOccurred())
})

type BackupRestoreCase struct {
Expand Down Expand Up @@ -160,7 +170,7 @@ var _ = Describe("AWS backup restore tests", func() {
}

// wait for pods to be running
Eventually(AreAppBuildsReady(dpaCR.Client, brCase.ApplicationNamespace), timeoutMultiplier*time.Minute*3, time.Second*5).Should(BeTrue())
Eventually(AreAppBuildsReady(dpaCR.Client, brCase.ApplicationNamespace), timeoutMultiplier*time.Minute*5, time.Second*5).Should(BeTrue())
Eventually(AreApplicationPodsRunning(brCase.ApplicationNamespace), timeoutMultiplier*time.Minute*9, time.Second*5).Should(BeTrue())

// Run optional custom verification
Expand All @@ -172,11 +182,11 @@ var _ = Describe("AWS backup restore tests", func() {
Expect(err).ToNot(HaveOccurred())
// create backup
log.Printf("Creating backup %s for case %s", backupName, brCase.Name)
backup, err := CreateBackupForNamespaces(dpaCR.Client, namespace, backupName, []string{brCase.ApplicationNamespace})
backup, err := CreateBackupForNamespaces(dpaCR.Client, namespace, backupName, []string{brCase.ApplicationNamespace}, brCase.BackupRestoreType == RESTIC)
Expect(err).ToNot(HaveOccurred())

// wait for backup to not be running
Eventually(IsBackupDone(dpaCR.Client, namespace, backupName), timeoutMultiplier*time.Minute*4, time.Second*10).Should(BeTrue())
Eventually(IsBackupDone(dpaCR.Client, namespace, backupName), timeoutMultiplier*time.Minute*12, time.Second*10).Should(BeTrue())
GinkgoWriter.Println(DescribeBackup(dpaCR.Client, backup))
Expect(BackupErrorLogs(dpaCR.Client, backup)).To(Equal([]string{}))

Expand All @@ -200,52 +210,24 @@ var _ = Describe("AWS backup restore tests", func() {
Eventually(IsNamespaceDeleted(brCase.ApplicationNamespace), timeoutMultiplier*time.Minute*2, time.Second*5).Should(BeTrue())

updateLastInstallingNamespace(brCase.ApplicationNamespace)
// Check if backup needs restic deploymentconfig workaround. https://github.com/openshift/oadp-operator/blob/master/docs/TROUBLESHOOTING.md#deployconfig
if brCase.BackupRestoreType == RESTIC && nsRequiresResticDCWorkaround {
log.Printf("DC found in backup namespace, using DC restic workaround")
var dcWorkaroundResources = []string{"replicationcontroller", "deploymentconfig", "templateinstances.template.openshift.io"}
// run restore
log.Printf("Creating restore %s excluding DC workaround resources for case %s", restoreName, brCase.Name)
noDcDrestoreName := fmt.Sprintf("%s-no-dc-workaround", restoreName)
restore, err := CreateRestoreFromBackup(dpaCR.Client, namespace, backupName, noDcDrestoreName, WithExcludedResources(dcWorkaroundResources))
Expect(err).ToNot(HaveOccurred())
Eventually(IsRestoreDone(dpaCR.Client, namespace, noDcDrestoreName), timeoutMultiplier*time.Minute*4, time.Second*10).Should(BeTrue())
GinkgoWriter.Println(DescribeRestore(dpaCR.Client, restore))
Expect(RestoreErrorLogs(dpaCR.Client, restore)).To(Equal([]string{}))

// Check if restore succeeded
succeeded, err = IsRestoreCompletedSuccessfully(dpaCR.Client, namespace, noDcDrestoreName)
Expect(err).ToNot(HaveOccurred())
Expect(succeeded).To(Equal(true))
Eventually(AreAppBuildsReady(dpaCR.Client, brCase.ApplicationNamespace), timeoutMultiplier*time.Minute*3, time.Second*5).Should(BeTrue())

// run restore
log.Printf("Creating restore %s including DC workaround resources for case %s", restoreName, brCase.Name)
withDcRestoreName := fmt.Sprintf("%s-with-dc-workaround", restoreName)
restore, err = CreateRestoreFromBackup(dpaCR.Client, namespace, backupName, withDcRestoreName, WithIncludedResources(dcWorkaroundResources))
Expect(err).ToNot(HaveOccurred())
Eventually(IsRestoreDone(dpaCR.Client, namespace, withDcRestoreName), timeoutMultiplier*time.Minute*4, time.Second*10).Should(BeTrue())
GinkgoWriter.Println(DescribeRestore(dpaCR.Client, restore))
Expect(RestoreErrorLogs(dpaCR.Client, restore)).To(Equal([]string{}))

// Check if restore succeeded
succeeded, err = IsRestoreCompletedSuccessfully(dpaCR.Client, namespace, withDcRestoreName)
Expect(err).ToNot(HaveOccurred())
Expect(succeeded).To(Equal(true))
// run restore
log.Printf("Creating restore %s for case %s", restoreName, brCase.Name)
restore, err := CreateRestoreFromBackup(dpaCR.Client, namespace, backupName, restoreName)
Expect(err).ToNot(HaveOccurred())
Eventually(IsRestoreDone(dpaCR.Client, namespace, restoreName), timeoutMultiplier*time.Minute*4, time.Second*10).Should(BeTrue())
GinkgoWriter.Println(DescribeRestore(dpaCR.Client, restore))
Expect(RestoreErrorLogs(dpaCR.Client, restore)).To(Equal([]string{}))

} else {
// run restore
log.Printf("Creating restore %s for case %s", restoreName, brCase.Name)
restore, err := CreateRestoreFromBackup(dpaCR.Client, namespace, backupName, restoreName)
Expect(err).ToNot(HaveOccurred())
Eventually(IsRestoreDone(dpaCR.Client, namespace, restoreName), timeoutMultiplier*time.Minute*4, time.Second*10).Should(BeTrue())
GinkgoWriter.Println(DescribeRestore(dpaCR.Client, restore))
Expect(RestoreErrorLogs(dpaCR.Client, restore)).To(Equal([]string{}))
// Check if restore succeeded
succeeded, err = IsRestoreCompletedSuccessfully(dpaCR.Client, namespace, restoreName)
Expect(err).ToNot(HaveOccurred())
Expect(succeeded).To(Equal(true))

// Check if restore succeeded
succeeded, err = IsRestoreCompletedSuccessfully(dpaCR.Client, namespace, restoreName)
if brCase.BackupRestoreType == RESTIC && nsRequiresResticDCWorkaround {
// run the restic post restore script if restore type is RESTIC
log.Printf("Running restic post restore script for case %s", brCase.Name)
err = RunResticPostRestoreScript(restoreName)
Expect(err).ToNot(HaveOccurred())
Expect(succeeded).To(Equal(true))
}

// verify app is running
Expand Down Expand Up @@ -294,15 +276,15 @@ var _ = Describe("AWS backup restore tests", func() {
ApplicationNamespace: "mongo-persistent",
Name: "mongo-restic-e2e",
BackupRestoreType: RESTIC,
PreBackupVerify: mongoready(false, RESTIC),
PreBackupVerify: mongoready(true, RESTIC),
PostRestoreVerify: mongoready(false, RESTIC),
}, nil),
Entry("MySQL application RESTIC", BackupRestoreCase{
ApplicationTemplate: "./sample-applications/mysql-persistent/mysql-persistent.yaml",
ApplicationNamespace: "mysql-persistent",
Name: "mysql-restic-e2e",
BackupRestoreType: RESTIC,
PreBackupVerify: mysqlReady(false, RESTIC),
PreBackupVerify: mysqlReady(true, RESTIC),
PostRestoreVerify: mysqlReady(false, RESTIC),
}, nil),
)
Expand Down
36 changes: 30 additions & 6 deletions tests/e2e/dpa_deployment_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ var _ = Describe("Configuration testing for DPA Custom Resource", func() {
provider := Dpa.Spec.BackupLocations[0].Velero.Provider
bucket := Dpa.Spec.BackupLocations[0].Velero.ObjectStorage.Bucket
bslConfig := Dpa.Spec.BackupLocations[0].Velero.Config
bslCredential := corev1.SecretKeySelector{
LocalObjectReference: corev1.LocalObjectReference{
Name: "bsl-cloud-credentials-" + provider,
},
Key: "cloud",
}

type InstallCase struct {
Name string
Expand Down Expand Up @@ -57,6 +63,7 @@ var _ = Describe("Configuration testing for DPA Custom Resource", func() {
Prefix: VeleroPrefix,
},
},
Credential: &bslCredential,
},
},
},
Expand Down Expand Up @@ -90,6 +97,7 @@ var _ = Describe("Configuration testing for DPA Custom Resource", func() {
Prefix: VeleroPrefix,
},
},
Credential: &bslCredential,
},
},
},
Expand Down Expand Up @@ -131,6 +139,7 @@ var _ = Describe("Configuration testing for DPA Custom Resource", func() {
Prefix: VeleroPrefix,
},
},
Credential: &bslCredential,
},
},
},
Expand Down Expand Up @@ -177,6 +186,7 @@ var _ = Describe("Configuration testing for DPA Custom Resource", func() {
Prefix: VeleroPrefix,
},
},
Credential: &bslCredential,
},
},
},
Expand Down Expand Up @@ -212,6 +222,7 @@ var _ = Describe("Configuration testing for DPA Custom Resource", func() {
Prefix: VeleroPrefix,
},
},
Credential: &bslCredential,
},
},
},
Expand Down Expand Up @@ -245,6 +256,7 @@ var _ = Describe("Configuration testing for DPA Custom Resource", func() {
Prefix: VeleroPrefix,
},
},
Credential: &bslCredential,
},
},
},
Expand Down Expand Up @@ -277,6 +289,7 @@ var _ = Describe("Configuration testing for DPA Custom Resource", func() {
Prefix: VeleroPrefix,
},
},
Credential: &bslCredential,
},
},
},
Expand Down Expand Up @@ -311,6 +324,7 @@ var _ = Describe("Configuration testing for DPA Custom Resource", func() {
Prefix: VeleroPrefix,
},
},
Credential: &bslCredential,
},
},
},
Expand All @@ -333,6 +347,7 @@ var _ = Describe("Configuration testing for DPA Custom Resource", func() {
Prefix: VeleroPrefix,
},
},
Credential: &bslCredential,
},
},
},
Expand Down Expand Up @@ -369,6 +384,7 @@ var _ = Describe("Configuration testing for DPA Custom Resource", func() {
Prefix: VeleroPrefix,
},
},
Credential: &bslCredential,
},
},
},
Expand Down Expand Up @@ -431,6 +447,7 @@ var _ = Describe("Configuration testing for DPA Custom Resource", func() {
Prefix: VeleroPrefix,
},
},
Credential: &bslCredential,
},
},
},
Expand Down Expand Up @@ -466,6 +483,7 @@ var _ = Describe("Configuration testing for DPA Custom Resource", func() {
Prefix: VeleroPrefix,
},
},
Credential: &bslCredential,
},
},
},
Expand Down Expand Up @@ -499,6 +517,7 @@ var _ = Describe("Configuration testing for DPA Custom Resource", func() {
Prefix: VeleroPrefix,
},
},
Credential: &bslCredential,
},
},
},
Expand Down Expand Up @@ -536,10 +555,15 @@ var _ = Describe("Configuration testing for DPA Custom Resource", func() {
err := dpaCR.Build(installCase.BRestoreType)
Expect(err).NotTo(HaveOccurred())
if len(installCase.DpaSpec.BackupLocations) > 0 {
if installCase.DpaSpec.BackupLocations[0].Velero.Config != nil {
installCase.DpaSpec.BackupLocations[0].Velero.Config["credentialsFile"] = "bsl-cloud-credentials-" + dpaCR.Provider + "/cloud"
if installCase.TestCarriageReturn {
installCase.DpaSpec.BackupLocations[0].Velero.Config["credentialsFile"] = "bsl-cloud-credentials-" + dpaCR.Provider + "-with-carriage-return/cloud"
if installCase.DpaSpec.BackupLocations[0].Velero.Credential == nil {
installCase.DpaSpec.BackupLocations[0].Velero.Credential = &bslCredential
}
if installCase.TestCarriageReturn {
installCase.DpaSpec.BackupLocations[0].Velero.Credential = &corev1.SecretKeySelector{
LocalObjectReference: corev1.LocalObjectReference{
Name: "bsl-cloud-credentials-" + dpaCR.Provider + "-with-carriage-return",
},
Key: bslCredential.Key,
}
}
}
Expand All @@ -564,13 +588,13 @@ var _ = Describe("Configuration testing for DPA Custom Resource", func() {
log.Printf("Checking for bsl spec")
for _, bsl := range dpa.Spec.BackupLocations {
// Check if bsl matches the spec
Eventually(DoesBSLExist(namespace, *bsl.Velero, installCase.DpaSpec), timeoutMultiplier*time.Minute*3, time.Second*5).Should(BeTrue())
Expect(DoesBSLSpecMatchesDpa(namespace, *bsl.Velero, installCase.DpaSpec)).To(BeTrue())
}
}
if len(dpa.Spec.SnapshotLocations) > 0 {
log.Printf("Checking for vsl spec")
for _, vsl := range dpa.Spec.SnapshotLocations {
Eventually(DoesVSLExist(namespace, *vsl.Velero, installCase.DpaSpec), timeoutMultiplier*time.Minute*3, time.Second*5).Should(BeTrue())
Expect(DoesVSLSpecMatchesDpa(namespace, *vsl.Velero, installCase.DpaSpec)).To(BeTrue())
}
}

Expand Down
Loading

0 comments on commit 2b11fe1

Please sign in to comment.