Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change the selection of downgrade version in TestUpgradeBrokenPackageVersion #3458

Closed
wants to merge 3 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 66 additions & 43 deletions testing/integration/upgrade_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ package integration
import (
"context"
"encoding/json"
"errors"
"fmt"
"io/fs"
"net/http"
Expand Down Expand Up @@ -45,7 +44,14 @@ import (
agtversion "github.com/elastic/elastic-agent/version"
)

const fastWatcherCfg = `
// The watcher will need the default 10 minutes to complete for a Fleet managed agent, see https://github.com/elastic/elastic-agent/issues/2977.
const defaultWatcherDuration = 10 * time.Minute

// Configure standalone agents to complete faster to speed up tests.
const standaloneWatcherDuration = time.Minute

// Note: this configuration can't apply to Fleet managed upgrades until https://github.com/elastic/elastic-agent/issues/2977 is resolved
var fastWatcherCfg = `
agent.upgrade.watcher:
grace_period: 1m
error_check.interval: 15s
Expand Down Expand Up @@ -94,8 +100,6 @@ func TestFleetManagedUpgrade(t *testing.T) {
err = agentFixture.Prepare(ctx)
require.NoError(t, err, "error preparing agent fixture")

err = agentFixture.Configure(ctx, []byte(fastWatcherCfg))
require.NoError(t, err, "error configuring agent fixture")
testUpgradeFleetManagedElasticAgent(t, ctx, info, agentFixture, parsedVersion, define.Version())
})
}
Expand Down Expand Up @@ -159,6 +163,11 @@ func testUpgradeFleetManagedElasticAgent(t *testing.T, ctx context.Context, info
err = tools.UpgradeAgent(kibClient, policy.ID, toVersion)
require.NoError(t, err)

t.Cleanup(func() {
// The watcher needs to finish before the agent is uninstalled: https://github.com/elastic/elastic-agent/issues/3371
waitForUpgradeWatcherToComplete(t, agentFixture, parsedFromVersion, defaultWatcherDuration)
})

t.Log(`Waiting for enrolled Agent status to be "online"...`)
require.Eventually(t, tools.WaitForAgentStatus(t, kibClient, policy.ID, "online"), 10*time.Minute, 15*time.Second, "Agent status is not online")

Expand Down Expand Up @@ -386,12 +395,7 @@ func TestStandaloneDowngradeToPreviousSnapshotBuild(t *testing.T) {
testStandaloneUpgrade(ctx, t, agentFixture, parsedFromVersion, upgradeInputVersion, expectedAgentHashAfterUpgrade, false, true, false, CustomPGP{})
}

func getUpgradableVersions(ctx context.Context, t *testing.T, upgradeToVersion string) (upgradableVersions []*version.ParsedSemVer) {
t.Helper()

const currentMajorVersions = 2
const previousMajorVersions = 1

func getNoOfUpgradableVersions(ctx context.Context, t *testing.T, upgradeToVersion string, noOfCurrentMajor, noOfPreviousMajor int, skipSnapshots bool) (upgradableVersions []*version.ParsedSemVer) {
aac := tools.NewArtifactAPIClient()
vList, err := aac.GetVersions(ctx)
require.NoError(t, err, "error retrieving versions from Artifact API")
Expand All @@ -416,7 +420,7 @@ func getUpgradableVersions(ctx context.Context, t *testing.T, upgradeToVersion s
sort.Sort(sort.Reverse(sortedParsedVersions))

for _, parsedVersion := range sortedParsedVersions {
if currentMajorSelected == currentMajorVersions && previousMajorSelected == previousMajorVersions {
if currentMajorSelected == noOfCurrentMajor && previousMajorSelected == noOfPreviousMajor {
// we got all the versions we need, break the loop
break
}
Expand All @@ -427,18 +431,18 @@ func getUpgradableVersions(ctx context.Context, t *testing.T, upgradeToVersion s
continue
}

if parsedVersion.IsSnapshot() {
if skipSnapshots && parsedVersion.IsSnapshot() {
// skip all snapshots
continue
}

if parsedVersion.Major() == currentMajor && currentMajorSelected < currentMajorVersions {
if parsedVersion.Major() == currentMajor && currentMajorSelected < noOfCurrentMajor {
upgradableVersions = append(upgradableVersions, parsedVersion)
currentMajorSelected++
continue
}

if parsedVersion.Major() < currentMajor && previousMajorSelected < previousMajorVersions {
if parsedVersion.Major() < currentMajor && previousMajorSelected < noOfPreviousMajor {
upgradableVersions = append(upgradableVersions, parsedVersion)
previousMajorSelected++
continue
Expand All @@ -448,6 +452,15 @@ func getUpgradableVersions(ctx context.Context, t *testing.T, upgradeToVersion s
return
}

func getUpgradableVersions(ctx context.Context, t *testing.T, upgradeToVersion string) (upgradableVersions []*version.ParsedSemVer) {
t.Helper()

const currentMajorVersions = 2
const previousMajorVersions = 1

return getNoOfUpgradableVersions(ctx, t, upgradeToVersion, currentMajorVersions, previousMajorVersions, true)
}

func testStandaloneUpgrade(
ctx context.Context,
t *testing.T,
Expand Down Expand Up @@ -526,12 +539,15 @@ func testStandaloneUpgrade(
require.NoErrorf(t, err, "error triggering agent upgrade to version %q, output:\n%s",
parsedUpgradeVersion, upgradeTriggerOutput)

t.Cleanup(func() {
// The watcher needs to finish before the agent is uninstalled: https://github.com/elastic/elastic-agent/issues/3371
waitForUpgradeWatcherToComplete(t, f, parsedFromVersion, standaloneWatcherDuration)
})

require.Eventuallyf(t, func() bool {
return checkAgentHealthAndVersion(t, ctx, f, parsedUpgradeVersion.CoreVersion(), parsedUpgradeVersion.IsSnapshot(), expectedAgentHashAfterUpgrade)
}, 5*time.Minute, 1*time.Second, "agent never upgraded to expected version")

checkUpgradeWatcherRan(t, f, parsedFromVersion)

if expectedAgentHashAfterUpgrade != "" {
aVersion, err := c.Version(ctx)
assert.NoError(t, err, "error checking version after upgrade")
Expand Down Expand Up @@ -609,28 +625,17 @@ func checkLegacyAgentHealthAndVersion(t *testing.T, ctx context.Context, f *ates

}

// checkUpgradeWatcherRan asserts that the Upgrade Watcher finished running. We use the
// presence of the update marker file as evidence that the Upgrade Watcher is still running
// and the absence of that file as evidence that the Upgrade Watcher is no longer running.
func checkUpgradeWatcherRan(t *testing.T, agentFixture *atesting.Fixture, fromVersion *version.ParsedSemVer) {
// waitForUpgradeWatcherToComplete asserts that the Upgrade Watcher finished running.
func waitForUpgradeWatcherToComplete(t *testing.T, f *atesting.Fixture, fromVersion *version.ParsedSemVer, timeout time.Duration) {
t.Helper()

if fromVersion.Less(*version_8_9_0_SNAPSHOT) {
t.Logf("Version %q is too old for a quick update marker check, skipping...", fromVersion)
return
t.Logf("Version %q is too old for a quick update marker check", fromVersion)
timeout = defaultWatcherDuration
}

t.Log("Waiting for upgrade watcher to finish running...")

updateMarkerFile := filepath.Join(agentFixture.WorkDir(), "data", ".update-marker")
require.FileExists(t, updateMarkerFile)

now := time.Now()
require.Eventuallyf(t, func() bool {
_, err := os.Stat(updateMarkerFile)
return errors.Is(err, fs.ErrNotExist)
}, 2*time.Minute, 15*time.Second, "agent never removed update marker")
t.Logf("Upgrade Watcher completed in %s", time.Now().Sub(now))
t.Logf("Waiting %s for upgrade watcher to finish running", timeout)
time.Sleep(timeout)
}

func extractCommitHashFromArtifact(t *testing.T, ctx context.Context, artifactVersion *version.ParsedSemVer, agentProject tools.Project) string {
Expand Down Expand Up @@ -711,6 +716,11 @@ func TestStandaloneUpgradeRetryDownload(t *testing.T) {
err = agentFixture.Configure(ctx, []byte(fastWatcherCfg))
require.NoError(t, err, "error configuring agent fixture")

t.Cleanup(func() {
// The watcher needs to finish before the agent is uninstalled: https://github.com/elastic/elastic-agent/issues/3371
waitForUpgradeWatcherToComplete(t, agentFixture, upgradeFromVersion, standaloneWatcherDuration)
})

t.Log("Install the built Agent")
output, err := tools.InstallStandaloneAgent(agentFixture)
t.Log(string(output))
Expand Down Expand Up @@ -797,8 +807,6 @@ func TestStandaloneUpgradeRetryDownload(t *testing.T) {
t.Log("Waiting for upgrade to finish")
wg.Wait()

checkUpgradeWatcherRan(t, agentFixture, upgradeFromVersion)

t.Log("Check Agent version to ensure upgrade is successful")
currentVersion, err = getVersion(t, ctx, agentFixture)
require.NoError(t, err)
Expand Down Expand Up @@ -860,6 +868,9 @@ func TestUpgradeBrokenPackageVersion(t *testing.T) {
f, err := define.NewFixture(t, define.Version())
require.NoError(t, err)

fromVersion, err := version.ParseVersion(define.Version())
require.NoError(t, err)

// Prepare the Elastic Agent so the binary is extracted and ready to use.
err = f.Prepare(context.Background())
require.NoError(t, err)
Expand Down Expand Up @@ -908,18 +919,27 @@ func TestUpgradeBrokenPackageVersion(t *testing.T) {
versionList, err := aac.GetVersions(ctx)
require.NoError(t, err)
require.NotEmpty(t, versionList.Versions, "Artifact API returned no versions")
latestVersion := versionList.Versions[len(versionList.Versions)-1]

t.Logf("Upgrading to version %q", latestVersion)
// transform and reverse the version list and find the most recent version that is different from the broken version returned

// look for 1 version with the current major that is lower than the current one
Comment on lines +923 to +925
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm guessing it's 1 thing? AM I right?

Suggested change
// transform and reverse the version list and find the most recent version that is different from the broken version returned
// look for 1 version with the current major that is lower than the current one
// transform and reverse the version list and find the most recent version that is different from the broken version returned
// look for 1 version with the current major that is lower than the current one

upgradableVersions := getNoOfUpgradableVersions(ctx, t, actualVersion, 1, 0, false)
require.NotEmpty(t, upgradableVersions, "No version found to downgrade to from Artifact API")

err = c.Connect(ctx)
require.NoError(t, err, "error connecting client to agent")
defer c.Disconnect()

_, err = c.Upgrade(ctx, latestVersion, "", false, false)
require.NoErrorf(t, err, "error triggering agent upgrade to version %q", latestVersion)
parsedLatestVersion, err := version.ParseVersion(latestVersion)
require.NoError(t, err)
downgradeParsedVersion := upgradableVersions[0]
downgradeVersion := downgradeParsedVersion.VersionWithPrerelease()

_, err = c.Upgrade(ctx, downgradeVersion, "", false, false)
require.NoErrorf(t, err, "error triggering agent upgrade to version %q", downgradeVersion)

t.Cleanup(func() {
// The watcher needs to finish before the agent is uninstalled: https://github.com/elastic/elastic-agent/issues/3371
waitForUpgradeWatcherToComplete(t, f, fromVersion, standaloneWatcherDuration)
})

require.Eventuallyf(t, func() bool {
state, err := c.State(ctx)
Expand All @@ -928,8 +948,8 @@ func TestUpgradeBrokenPackageVersion(t *testing.T) {
return false
}
t.Logf("current agent state: %+v", state)
return state.Info.Version == parsedLatestVersion.CoreVersion() &&
state.Info.Snapshot == parsedLatestVersion.IsSnapshot() &&
return state.Info.Version == downgradeParsedVersion.CoreVersion() &&
state.Info.Snapshot == downgradeParsedVersion.IsSnapshot() &&
state.State == v2proto.State_HEALTHY
}, 5*time.Minute, 10*time.Second, "agent never upgraded to expected version")
}
Expand Down Expand Up @@ -1049,6 +1069,9 @@ inputs:
return checkAgentHealthAndVersion(t, ctx, agentFixture, upgradeToVersion.CoreVersion(), upgradeToVersion.IsSnapshot(), "")
}, 2*time.Minute, 250*time.Millisecond, "Upgraded Agent never became healthy")

// Wait for upgrade watcher to finish running
waitForUpgradeWatcherToComplete(t, agentFixture, upgradeFromVersion, standaloneWatcherDuration)

t.Log("Ensure the we have rolled back and the correct version is running")
require.Eventually(t, func() bool {
return checkAgentHealthAndVersion(t, ctx, agentFixture, upgradeFromVersion.CoreVersion(), upgradeFromVersion.IsSnapshot(), "")
Expand Down
Loading