Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Elastic Agent non-fleet broken upgrade between 8.3.x releases #701

Merged
merged 9 commits into from
Jul 19, 2022
2 changes: 1 addition & 1 deletion internal/pkg/agent/application/paths/paths_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@ const defaultAgentVaultPath = "vault"

// AgentVaultPath is the directory that contains all the files for the value
func AgentVaultPath() string {
return filepath.Join(Home(), defaultAgentVaultPath)
return filepath.Join(Top(), defaultAgentVaultPath)
}
2 changes: 1 addition & 1 deletion internal/pkg/agent/application/paths/paths_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,5 @@ func ArePathsEqual(expected, actual string) bool {

// AgentVaultPath is the directory that contains all the files for the value
func AgentVaultPath() string {
return filepath.Join(Home(), defaultAgentVaultPath)
return filepath.Join(Top(), defaultAgentVaultPath)
}
36 changes: 29 additions & 7 deletions internal/pkg/agent/application/secret/secret.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,23 +80,25 @@ func Create(key string, opts ...OptionFunc) error {
CreatedOn: time.Now().UTC(),
}

b, err := json.Marshal(secret)
if err != nil {
return err
}

return v.Set(key, b)
return set(v, key, secret)
}

// GetAgentSecret read the agent secret from the vault
func GetAgentSecret(opts ...OptionFunc) (secret Secret, err error) {
return Get(agentSecretKey, opts...)
}

// SetAgentSecret saves the agent secret from the vault
// This is needed for migration from 8.3.0-8.3.2 to higher versions
func SetAgentSecret(secret Secret, opts ...OptionFunc) error {
return Set(agentSecretKey, secret, opts...)
}

// Get reads the secret key from the vault
func Get(key string, opts ...OptionFunc) (secret Secret, err error) {
options := applyOptions(opts...)
v, err := vault.New(options.vaultPath)
// open vault readonly, will not create the vault directory or the seed it was not created before
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo?

Suggested change
// open vault readonly, will not create the vault directory or the seed it was not created before
// open vault readonly, it will not create the vault directory or the seed if it was not created before

v, err := vault.New(options.vaultPath, vault.WithReadonly(true))
if err != nil {
return secret, err
}
Expand All @@ -111,6 +113,26 @@ func Get(key string, opts ...OptionFunc) (secret Secret, err error) {
return secret, err
}

// Set saves the secret key to the vault
func Set(key string, secret Secret, opts ...OptionFunc) error {
options := applyOptions(opts...)
v, err := vault.New(options.vaultPath)
if err != nil {
return err
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Suggestion]
Give more context to the error

Suggested change
return err
return fmt.Errorf("could not create new vault: %w", err)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

}
defer v.Close()
return set(v, key, secret)
}

func set(v *vault.Vault, key string, secret Secret) error {
b, err := json.Marshal(secret)
if err != nil {
return err
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Suggestion]
Wrap the error to add more context

Suggested change
return err
return fmt.Errorf("could not marshal secret: %w", err)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

}

return v.Set(key, b)
}

// Remove removes the secret key from the vault
func Remove(key string, opts ...OptionFunc) error {
options := applyOptions(opts...)
Expand Down
37 changes: 0 additions & 37 deletions internal/pkg/agent/application/upgrade/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"io/ioutil"
"os"
"path/filepath"
"runtime"
"strings"

"github.com/otiai10/copy"
Expand All @@ -33,7 +32,6 @@ const (
agentName = "elastic-agent"
hashLen = 6
agentCommitFile = ".elastic-agent.active.commit"
darwin = "darwin"
)

var (
Expand Down Expand Up @@ -161,11 +159,6 @@ func (u *Upgrader) Upgrade(ctx context.Context, a Action, reexecNow bool) (_ ree
return nil, nil
}

// Copy vault directory for linux/windows only
if err := copyVault(newHash); err != nil {
return nil, errors.New(err, "failed to copy vault")
}

if err := copyActionStore(newHash); err != nil {
return nil, errors.New(err, "failed to copy action store")
}
Expand Down Expand Up @@ -300,36 +293,6 @@ func copyActionStore(newHash string) error {
return nil
}

func getVaultPath(newHash string) string {
vaultPath := paths.AgentVaultPath()
if runtime.GOOS == darwin {
return vaultPath
}
newHome := filepath.Join(filepath.Dir(paths.Home()), fmt.Sprintf("%s-%s", agentName, newHash))
return filepath.Join(newHome, filepath.Base(vaultPath))
}

// Copies the vault files for windows and linux
func copyVault(newHash string) error {
// No vault files to copy on darwin
if runtime.GOOS == darwin {
return nil
}

vaultPath := paths.AgentVaultPath()
newVaultPath := getVaultPath(newHash)

err := copyDir(vaultPath, newVaultPath)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}

return nil
}

// shutdownCallback returns a callback function to be executing during shutdown once all processes are closed.
// this goes through runtime directory of agent and copies all the state files created by processes to new versioned
// home directory with updated process name to match new version.
Expand Down
16 changes: 16 additions & 0 deletions internal/pkg/agent/cmd/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (
"github.com/elastic/elastic-agent/internal/pkg/agent/configuration"
"github.com/elastic/elastic-agent/internal/pkg/agent/control/server"
"github.com/elastic/elastic-agent/internal/pkg/agent/errors"
"github.com/elastic/elastic-agent/internal/pkg/agent/migration"
"github.com/elastic/elastic-agent/internal/pkg/agent/storage"
"github.com/elastic/elastic-agent/internal/pkg/cli"
"github.com/elastic/elastic-agent/internal/pkg/config"
Expand Down Expand Up @@ -121,6 +122,21 @@ func run(override cfgOverrider) error {
createAgentID = false
}

// This is specific for the agent upgrade from 8.3.0 - 8.3.2 to 8.x and above on Linux and Windows platforms.
// Addresses the issue: https://github.com/elastic/elastic-agent/issues/682
// The vault directory was located in the hash versioned "Home" directory of the agent.
// This moves the vault directory two levels up into the "Config" directory next to fleet.enc file
// in order to be able to "upgrade" the agent from deb/rpm that is not invoking the upgrade handle and
// doesn't perform the migration of the state or vault.
// If the agent secret doesn't exist, then search for the newest agent secret in the agent data directories
// and migrate it into the new vault location.
err = migration.MigrateAgentSecret(logger)
logger.Debug("migration of agent secret completed, err: %v", err)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it need to be Debugf for a line like this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

think it's fine, it's noop on Darwin and would only do this kind of migration if the 8.3.0-8.3.2 agents where previously installed.

if err != nil {
logger.Error(err)
AndersonQ marked this conversation as resolved.
Show resolved Hide resolved
return err
}

// Ensure we have the agent secret created.
// The secret is not created here if it exists already from the previous enrollment.
// This is needed for compatibility with agent running in standalone mode,
Expand Down
136 changes: 136 additions & 0 deletions internal/pkg/agent/migration/migrate_secret.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License;
// you may not use this file except in compliance with the Elastic License.

package migration

import (
"errors"
"io/fs"
"os"
"path/filepath"
"runtime"
"strings"

"github.com/elastic/elastic-agent-libs/logp"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/secret"
)

const (
darwin = "darwin"
)

// MigrateAgentSecret migrates agent secret if the secret doesn't exists agent upgrade from 8.3.0 - 8.3.2 to 8.x and above on Linux and Windows platforms.
func MigrateAgentSecret(log *logp.Logger) error {
// Nothing to migrate for darwin
if runtime.GOOS == darwin {
return nil
}

// Check if the secret already exists
log.Debug("migrate agent secret, check if secret already exists")
_, err := secret.GetAgentSecret()
if err != nil {
if errors.Is(err, fs.ErrNotExist) {
// The secret doesn't exists, perform migration below
log.Debug("agent secret doesn't exists, perform migration")
} else {
log.Errorf("failed read the agent secret: %v", err)
return err
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Blocker]
It's important to add more context to the error, also, I'd avoid the double logging and would just wrap the error instead.

Suggested change
log.Errorf("failed read the agent secret: %v", err)
return err
log.Errorf()
return fmt.Errorf("failed read the agent secret: %w", err)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

}
} else {
// The secret already exists, nothing to migrate
log.Debug("secret already exists nothing to migrate")
return nil
}

// Check if the secret was copied by the fleet upgrade handler to the legacy location
log.Debug("check if secret was copied over by 8.3.0-8.3.2 version of the agent")
sec, err := getAgentSecretFromHomePath(paths.Home())
if err != nil {
if errors.Is(err, fs.ErrNotExist) {
// The secret is not found in this instance of the vault, continue with migration
log.Debug("agent secret copied from 8.3.0-8.3.2 doesn't exists, continue with migration")
} else {
log.Errorf("failed agent 8.3.0-8.3.2 secret check: %v", err)
return err
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Blocker]
It's important to add more context to the error, also, I'd avoid the double logging and would just wrap the error instead.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

}
} else {
// The secret is found, save in the new agent vault
log.Debug("agent secret from 8.3.0-8.3.2 is found, migrate to the new vault")
return secret.SetAgentSecret(sec)
}

// Scan other agent data directories, find the latest agent secret
log.Debug("search for possible latest agent 8.3.0-8.3.2 secret")
dataDir := paths.Data()

sec, err = findPreviousAgentSecret(dataDir)
if err != nil {
if errors.Is(err, fs.ErrNotExist) {
// The secret is not found
log.Debug("no previous agent 8.3.0-8.3.2 secrets found, nothing to migrate")
return nil
}
log.Errorf("search for possible latest agent 8.3.0-8.3.2 secret failed: %v", err)
return err
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Blocker]
It's important to add more context to the error, also, I'd avoid the double logging and would just wrap the error instead.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

}
log.Debug("found previous agent 8.3.0-8.3.2 secret, migrate to the new vault")
AndersonQ marked this conversation as resolved.
Show resolved Hide resolved
return secret.SetAgentSecret(sec)
}

func findPreviousAgentSecret(dataDir string) (sec secret.Secret, err error) {
found := false
fileSystem := os.DirFS(dataDir)
err = fs.WalkDir(fileSystem, ".", func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if d.IsDir() {
if strings.HasPrefix(d.Name(), "elastic-agent-") {
vaultPath := getLegacyVaultPathFromPath(filepath.Join(dataDir, path))
s, err := secret.GetAgentSecret(secret.WithVaultPath(vaultPath))
// Ignore if error, keep scanning
if err != nil {
if errors.Is(err, fs.ErrNotExist) {
return nil
}
return err
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a wee confused here. The comment says it'll ignore the error, but you handle it and return on error. Is it "ignore fs.ErrNotExist errors only"?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated the comment and location

if s.CreatedOn.After(sec.CreatedOn) {
sec = s
found = true
}
} else if d.Name() != "." {
return fs.SkipDir
}
}
return nil
})
if !found {
return sec, fs.ErrNotExist
AndersonQ marked this conversation as resolved.
Show resolved Hide resolved
}
return sec, err
AndersonQ marked this conversation as resolved.
Show resolved Hide resolved
}

func getAgentSecretFromHomePath(homePath string) (sec secret.Secret, err error) {
vaultPath := getLegacyVaultPathFromPath(homePath)
fi, err := os.Stat(vaultPath)
if err != nil {
return
}

if !fi.IsDir() {
return sec, fs.ErrNotExist
}
return secret.GetAgentSecret(secret.WithVaultPath(vaultPath))
}

func getLegacyVaultPath() string {
return getLegacyVaultPathFromPath(paths.Home())
}

func getLegacyVaultPathFromPath(homePath string) string {
return filepath.Join(homePath, "vault")
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Nit]
Keep it consistent with the function name

Suggested change
func getLegacyVaultPathFromPath(homePath string) string {
return filepath.Join(homePath, "vault")
}
func getLegacyVaultPathFromPath(path string) string {
return filepath.Join(path, "vault")
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

Loading