Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Agent upgrade 8.2->8.3 #578

Merged
merged 6 commits into from
Jun 20, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 2 additions & 74 deletions internal/pkg/agent/application/upgrade/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
package upgrade

import (
"bytes"
"context"
"fmt"
"io/ioutil"
Expand All @@ -20,10 +19,8 @@ import (
"github.com/elastic/elastic-agent/internal/pkg/agent/application/info"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/reexec"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/secret"
"github.com/elastic/elastic-agent/internal/pkg/agent/errors"
"github.com/elastic/elastic-agent/internal/pkg/agent/program"
"github.com/elastic/elastic-agent/internal/pkg/agent/storage"
"github.com/elastic/elastic-agent/internal/pkg/artifact"
"github.com/elastic/elastic-agent/internal/pkg/capabilities"
"github.com/elastic/elastic-agent/internal/pkg/core/state"
Expand Down Expand Up @@ -173,10 +170,6 @@ func (u *Upgrader) Upgrade(ctx context.Context, a Action, reexecNow bool) (_ ree
return nil, errors.New(err, "failed to copy action store")
}

if err := encryptConfigIfNeeded(u.log, newHash); err != nil {
return nil, errors.New(err, "failed to encrypt the configuration")
}

if err := ChangeSymlink(ctx, newHash); err != nil {
rollbackInstall(ctx, newHash)
return nil, err
Expand Down Expand Up @@ -220,6 +213,8 @@ func (u *Upgrader) Ack(ctx context.Context) error {
return err
}

marker.Acked = true
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@michel-laterman this seems to be needed here, since the action was just acked so we can persist the market with correct "acked" value. Please double check.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is correct, however I didn't alter any of the upgrade action steps when I added the queue


return saveMarker(marker)
}

Expand Down Expand Up @@ -335,73 +330,6 @@ func copyVault(newHash string) error {
return nil
}

// Create the key if it doesn't exist and encrypt the fleet.yml and state.yml
func encryptConfigIfNeeded(log *logger.Logger, newHash string) (err error) {
vaultPath := getVaultPath(newHash)

err = secret.CreateAgentSecret(secret.WithVaultPath(vaultPath))
if err != nil {
return err
}

newHome := filepath.Join(filepath.Dir(paths.Home()), fmt.Sprintf("%s-%s", agentName, newHash))
ymlStateStorePath := filepath.Join(newHome, filepath.Base(paths.AgentStateStoreYmlFile()))
stateStorePath := filepath.Join(newHome, filepath.Base(paths.AgentStateStoreFile()))

files := []struct {
Src string
Dst string
}{
{
Src: ymlStateStorePath,
Dst: stateStorePath,
},
{
Src: paths.AgentConfigYmlFile(),
Dst: paths.AgentConfigFile(),
},
}
for _, f := range files {
var b []byte
b, err = ioutil.ReadFile(f.Src)
if err != nil {
if os.IsNotExist(err) {
continue
}
return err
}

// Encrypt yml file
store := storage.NewEncryptedDiskStore(f.Dst, storage.WithVaultPath(vaultPath))
err = store.Save(bytes.NewReader(b))
if err != nil {
return err
}

// Remove yml file if no errors
defer func(fp string) {
if err != nil {
return
}
if rerr := os.Remove(fp); rerr != nil {
log.Warnf("failed to remove file: %s, err: %v", fp, rerr)
}
}(f.Src)
}

// Do not remove AgentConfigYmlFile lock file if any error happened.
if err != nil {
return err
}

lockFp := paths.AgentConfigYmlFile() + ".lock"
if rerr := os.Remove(lockFp); rerr != nil {
log.Warnf("failed to remove file: %s, err: %v", lockFp, rerr)
}

return err
}

// shutdownCallback returns a callback function to be executing during shutdown once all processes are closed.
// this goes through runtime directory of agent and copies all the state files created by processes to new versioned
// home directory with updated process name to match new version.
Expand Down
92 changes: 92 additions & 0 deletions internal/pkg/agent/cmd/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
package cmd

import (
"bytes"
"context"
"fmt"
"io/ioutil"
Expand Down Expand Up @@ -126,6 +127,16 @@ func run(override cfgOverrider) error {
return err
}

// Check if the fleet.yml or state.yml exists and encrypt them.
// This is needed to handle upgrade properly.
// On agent upgrade the older version for example 8.2 unpacks the 8.3 agent
// and tries to run it.
// The new version of the agent requires encrypted configuration files or it will not start and upgrade will fail and revert.
err = encryptConfigIfNeeded(logger)
if err != nil {
return err
}

agentInfo, err := info.NewAgentInfoWithLog(defaultLogLevel(cfg), createAgentID)
if err != nil {
return errors.New(err,
Expand Down Expand Up @@ -476,3 +487,84 @@ func initTracer(agentName, version string, mcfg *monitoringCfg.MonitoringConfig)
Transport: ts,
})
}

// encryptConfigIfNeeded encrypts fleet.yml or state.yml if fleet.enc or state.enc does not exist already.
func encryptConfigIfNeeded(log *logger.Logger) (err error) {
log.Debug("encrypt config if needed")

files := []struct {
Src string
Dst string
}{
{
Src: paths.AgentStateStoreYmlFile(),
Dst: paths.AgentStateStoreFile(),
},
{
Src: paths.AgentConfigYmlFile(),
Dst: paths.AgentConfigFile(),
},
}
for _, f := range files {
var b []byte

log.Debugf("check if the file %v exists", f.Dst)
exists, err := fileExists(f.Dst)
if err != nil {
// log and continue
log.Debugf("failed to access file %v", f.Dst)
err = nil
}

// If .enc file already exists, continue
if exists {
log.Debugf("file %v already exists", f.Dst)
continue
}

log.Debugf("read file: %v", f.Src)
b, err = ioutil.ReadFile(f.Src)
if err != nil {
log.Debugf("read file: %v, err: %v", f.Src, err)
if os.IsNotExist(err) {
log.Debugf("file: %v doesn't exists, continue", f.Src)
continue
}
return err
}

// Encrypt yml file
log.Debugf("encrypt file %v into %v", f.Src, f.Dst)
store := storage.NewEncryptedDiskStore(f.Dst)
err = store.Save(bytes.NewReader(b))
if err != nil {
log.Debugf("failed to encrypt file: %v, err: %v", f.Dst, err)
return err
}
}

if err != nil {
return err
}

// Remove state.yml file if no errors
fp := paths.AgentStateStoreYmlFile()
if err := os.Remove(fp); err != nil {
// Log only
log.Warnf("failed to remove file: %s, err: %v", fp, err)
}

// The agent can't remove fleet.yml, because it can be rolled back by the older version of the agent "watcher"
// and pre 8.3 version needs unencrypted fleet.yml file in order to start.

return nil
}

func fileExists(fp string) (ok bool, err error) {
if _, err = os.Stat(fp); err == nil {
ok = true
} else if os.IsNotExist(err) {
err = nil
}
return
}
2 changes: 1 addition & 1 deletion internal/pkg/fleetapi/action.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ func (a *ActionPolicyChange) Expiration() (time.Time, error) {

// ActionUpgrade is a request for agent to upgrade.
type ActionUpgrade struct {
ActionID string `yaml:"action_id"`
ActionID string `yaml:"id"`
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@michel-laterman I had to change the yaml serialization field name back to what it was before in order to fix the issue with .update-market. Please double check that has no other side-effects in the context of the original changes mentioned in the description of this PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can do 2 step migration when now, we will support action_id and id
onload it will load both and if action_id is not present id will be written to action_id and then we will empty id
on write we will omitempty of id
this way we get rid of id in favor of action_id, next releases we can drop the field completely

ActionType string `yaml:"type"`
ActionStartTime string `json:"start_time" yaml:"start_time,omitempty"` // TODO change to time.Time in unmarshal
ActionExpiration string `json:"expiration" yaml:"expiration,omitempty"`
Expand Down