From 717708a72c0c81aded34e93e61b77c9b0cf4e16a Mon Sep 17 00:00:00 2001 From: Michel Laterman <82832767+michel-laterman@users.noreply.github.com> Date: Mon, 26 Sep 2022 10:22:48 -0700 Subject: [PATCH 01/63] Expand status reporter/controller interfaces to allow local reporters (#1285) * Expand status reporter/controller interfaces to allow local reporters Add a local reporter map to the status controller. These reporters are not used when updating status with fleet-server, they are only used to gather local state information - specifically if the agent is degraded because checkin with fleet-server has failed. This bypasses the bug that was introduced with the liveness endpoint where the agent could checkin (to fleet-server) with a degraded status because a previous checkin failed. Local reporters are used to generate a separate status. This status is used in the liveness endpoint. * fix linter --- CHANGELOG.next.asciidoc | 1 + .../gateway/fleet/fleet_gateway.go | 9 +- .../gateway/fleet/fleet_gateway_test.go | 8 ++ .../fleet/noop_status_controller_test.go | 12 ++- internal/pkg/core/status/handler.go | 3 +- internal/pkg/core/status/reporter.go | 84 +++++++++++++++++-- internal/pkg/testutils/status_reporter.go | 10 +++ 7 files changed, 110 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 8aa29f93e7f..35d7c2b95a5 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -118,6 +118,7 @@ - Use at least warning level for all status logs {pull}1218[1218] - Remove fleet event reporter and events from checkin body. {issue}993[993] - Fix unintended reset of source URI when downloading components {pull}1252[1252] +- Create separate status reporter for local only events so that degraded fleet-checkins no longer affect health on successful fleet-checkins. {issue}1157[1157] {pull}1285[1285] ==== New features diff --git a/internal/pkg/agent/application/gateway/fleet/fleet_gateway.go b/internal/pkg/agent/application/gateway/fleet/fleet_gateway.go index f6ff9b504f5..6df9f171fbe 100644 --- a/internal/pkg/agent/application/gateway/fleet/fleet_gateway.go +++ b/internal/pkg/agent/application/gateway/fleet/fleet_gateway.go @@ -88,6 +88,7 @@ type fleetGateway struct { checkinFailCounter int statusController status.Controller statusReporter status.Reporter + localReporter status.Reporter stateStore stateStore queue actionQueue } @@ -156,6 +157,7 @@ func newFleetGatewayWithScheduler( done: done, acker: acker, statusReporter: statusController.RegisterComponent("gateway"), + localReporter: statusController.RegisterLocalComponent("gateway-checkin"), statusController: statusController, stateStore: stateStore, queue: queue, @@ -208,6 +210,7 @@ func (f *fleetGateway) worker() { f.statusReporter.Update(state.Failed, errMsg, nil) } else { f.statusReporter.Update(state.Healthy, "", nil) + f.localReporter.Update(state.Healthy, "", nil) // we don't need to specifically set the local reporter to failed above, but it needs to be reset to healthy if a checking succeeds } case <-f.bgContext.Done(): @@ -291,12 +294,11 @@ func (f *fleetGateway) doExecute() (*fleetapi.CheckinResponse, error) { ) f.log.Error(err) + f.localReporter.Update(state.Failed, err.Error(), nil) return nil, err } if f.checkinFailCounter > 1 { - // do not update status reporter with failure - // status reporter would report connection failure on first successful connection, leading to - // stale result for certain period causing slight confusion. + f.localReporter.Update(state.Degraded, fmt.Sprintf("checkin failed: %v", err), nil) f.log.Errorf("checking number %d failed: %s", f.checkinFailCounter, err.Error()) } continue @@ -386,6 +388,7 @@ func (f *fleetGateway) stop() { f.log.Info("Fleet gateway is stopping") defer f.scheduler.Stop() f.statusReporter.Unregister() + f.localReporter.Unregister() close(f.done) f.wg.Wait() } diff --git a/internal/pkg/agent/application/gateway/fleet/fleet_gateway_test.go b/internal/pkg/agent/application/gateway/fleet/fleet_gateway_test.go index 99cb0630385..2d691185c1c 100644 --- a/internal/pkg/agent/application/gateway/fleet/fleet_gateway_test.go +++ b/internal/pkg/agent/application/gateway/fleet/fleet_gateway_test.go @@ -25,6 +25,7 @@ import ( "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" "github.com/elastic/elastic-agent/internal/pkg/agent/storage" "github.com/elastic/elastic-agent/internal/pkg/agent/storage/store" + "github.com/elastic/elastic-agent/internal/pkg/core/state" "github.com/elastic/elastic-agent/internal/pkg/fleetapi" noopacker "github.com/elastic/elastic-agent/internal/pkg/fleetapi/acker/noop" "github.com/elastic/elastic-agent/internal/pkg/scheduler" @@ -703,12 +704,18 @@ func TestRetriesOnFailures(t *testing.T) { queue.On("DequeueActions").Return([]fleetapi.Action{}) queue.On("Actions").Return([]fleetapi.Action{}) + localReporter := &testutils.MockReporter{} + localReporter.On("Update", state.Degraded, mock.Anything, mock.Anything).Times(2) + localReporter.On("Update", mock.Anything, mock.Anything, mock.Anything).Maybe() + localReporter.On("Unregister").Maybe() + fleetReporter := &testutils.MockReporter{} fleetReporter.On("Update", mock.Anything, mock.Anything, mock.Anything).Maybe() fleetReporter.On("Unregister").Maybe() statusController := &testutils.MockController{} statusController.On("RegisterComponent", "gateway").Return(fleetReporter).Once() + statusController.On("RegisterLocalComponent", "gateway-checkin").Return(localReporter).Once() statusController.On("StatusString").Return("string") gateway, err := newFleetGatewayWithScheduler( @@ -767,6 +774,7 @@ func TestRetriesOnFailures(t *testing.T) { waitFn() statusController.AssertExpectations(t) fleetReporter.AssertExpectations(t) + localReporter.AssertExpectations(t) }) t.Run("The retry loop is interruptible", diff --git a/internal/pkg/agent/application/gateway/fleet/noop_status_controller_test.go b/internal/pkg/agent/application/gateway/fleet/noop_status_controller_test.go index bbae6958ab6..18c84f5fc5e 100644 --- a/internal/pkg/agent/application/gateway/fleet/noop_status_controller_test.go +++ b/internal/pkg/agent/application/gateway/fleet/noop_status_controller_test.go @@ -13,13 +13,17 @@ import ( type noopController struct{} -func (*noopController) SetAgentID(_ string) {} -func (*noopController) RegisterComponent(_ string) status.Reporter { return &noopReporter{} } +func (*noopController) SetAgentID(_ string) {} +func (*noopController) RegisterComponent(_ string) status.Reporter { return &noopReporter{} } +func (*noopController) RegisterLocalComponent(_ string) status.Reporter { return &noopReporter{} } func (*noopController) RegisterComponentWithPersistance(_ string, _ bool) status.Reporter { return &noopReporter{} } -func (*noopController) RegisterApp(_ string, _ string) status.Reporter { return &noopReporter{} } -func (*noopController) Status() status.AgentStatus { return status.AgentStatus{Status: status.Healthy} } +func (*noopController) RegisterApp(_ string, _ string) status.Reporter { return &noopReporter{} } +func (*noopController) Status() status.AgentStatus { return status.AgentStatus{Status: status.Healthy} } +func (*noopController) LocalStatus() status.AgentStatus { + return status.AgentStatus{Status: status.Healthy} +} func (*noopController) StatusCode() status.AgentStatusCode { return status.Healthy } func (*noopController) UpdateStateID(_ string) {} func (*noopController) StatusString() string { return "online" } diff --git a/internal/pkg/core/status/handler.go b/internal/pkg/core/status/handler.go index e82f73fb216..1fa72a10f93 100644 --- a/internal/pkg/core/status/handler.go +++ b/internal/pkg/core/status/handler.go @@ -19,10 +19,11 @@ type LivenessResponse struct { } // ServeHTTP is an HTTP Handler for the status controller. +// It uses the local agent status so it is able to report a degraded state if the fleet-server checkin has issues. // Respose code is 200 for a healthy agent, and 503 otherwise. // Response body is a JSON object that contains the agent ID, status, message, and the last status update time. func (r *controller) ServeHTTP(wr http.ResponseWriter, req *http.Request) { - s := r.Status() + s := r.LocalStatus() lr := LivenessResponse{ ID: r.agentID, Status: s.Status.String(), diff --git a/internal/pkg/core/status/reporter.go b/internal/pkg/core/status/reporter.go index 848a69326e6..50f34651fa1 100644 --- a/internal/pkg/core/status/reporter.go +++ b/internal/pkg/core/status/reporter.go @@ -58,9 +58,11 @@ type AgentStatus struct { type Controller interface { SetAgentID(string) RegisterComponent(string) Reporter + RegisterLocalComponent(string) Reporter RegisterComponentWithPersistance(string, bool) Reporter RegisterApp(id string, name string) Reporter Status() AgentStatus + LocalStatus() AgentStatus StatusCode() AgentStatusCode StatusString() string UpdateStateID(string) @@ -68,15 +70,19 @@ type Controller interface { } type controller struct { - updateTime time.Time - log *logger.Logger - reporters map[string]*reporter - appReporters map[string]*reporter - stateID string - message string - agentID string - status AgentStatusCode - mx sync.Mutex + updateTime time.Time + log *logger.Logger + reporters map[string]*reporter + localReporters map[string]*reporter + appReporters map[string]*reporter + stateID string + message string + agentID string + status AgentStatusCode + localStatus AgentStatusCode + localMessage string + localTime time.Time + mx sync.Mutex } // NewController creates a new reporter. @@ -126,6 +132,28 @@ func (r *controller) UpdateStateID(stateID string) { r.updateStatus() } +// RegisterLocalComponent registers new component for local-only status updates. +func (r *controller) RegisterLocalComponent(componentIdentifier string) Reporter { + id := componentIdentifier + "-" + uuid.New().String()[:8] + rep := &reporter{ + name: componentIdentifier, + isRegistered: true, + unregisterFunc: func() { + r.mx.Lock() + delete(r.localReporters, id) + r.mx.Unlock() + }, + notifyChangeFunc: r.updateStatus, + isPersistent: false, + } + + r.mx.Lock() + r.localReporters[id] = rep + r.mx.Unlock() + + return rep +} + // Register registers new component for status updates. func (r *controller) RegisterComponent(componentIdentifier string) Reporter { return r.RegisterComponentWithPersistance(componentIdentifier, false) @@ -199,6 +227,25 @@ func (r *controller) Status() AgentStatus { } } +// LocalStatus returns the status from the local registered components if they are different from the agent status. +// If the agent status is more severe then the local status (failed vs degraded for example) agent status is used. +// If they are equal (healthy and healthy) agent status is used. +func (r *controller) LocalStatus() AgentStatus { + status := r.Status() + r.mx.Lock() + defer r.mx.Unlock() + + if r.localStatus > status.Status { + return AgentStatus{ + Status: r.localStatus, + Message: r.localMessage, + UpdateTime: r.localTime, + } + } + return status + +} + // StatusCode retrieves current agent status code. func (r *controller) StatusCode() AgentStatusCode { r.mx.Lock() @@ -208,9 +255,23 @@ func (r *controller) StatusCode() AgentStatusCode { func (r *controller) updateStatus() { status := Healthy + lStatus := Healthy message := "" + lMessage := "" r.mx.Lock() + for id, rep := range r.localReporters { + s := statusToAgentStatus(rep.status) + if s > lStatus { + lStatus = s + lMessage = fmt.Sprintf("component %s: %s", id, rep.message) + } + r.log.Debugf("local component '%s' has status '%s'", id, s) + if status == Failed { + break + } + } + for id, rep := range r.reporters { s := statusToAgentStatus(rep.status) if s > status { @@ -244,6 +305,11 @@ func (r *controller) updateStatus() { r.message = message r.updateTime = time.Now().UTC() } + if r.localStatus != lStatus { + r.localStatus = lStatus + r.localMessage = lMessage + r.localTime = time.Now().UTC() + } r.mx.Unlock() diff --git a/internal/pkg/testutils/status_reporter.go b/internal/pkg/testutils/status_reporter.go index 45448aa53b2..1d4fded4c0a 100644 --- a/internal/pkg/testutils/status_reporter.go +++ b/internal/pkg/testutils/status_reporter.go @@ -25,6 +25,11 @@ func (m *MockController) RegisterComponent(id string) status.Reporter { return args.Get(0).(status.Reporter) } +func (m *MockController) RegisterLocalComponent(id string) status.Reporter { + args := m.Called(id) + return args.Get(0).(status.Reporter) +} + func (m *MockController) RegisterComponentWithPersistance(id string, b bool) status.Reporter { args := m.Called(id, b) return args.Get(0).(status.Reporter) @@ -40,6 +45,11 @@ func (m *MockController) Status() status.AgentStatus { return args.Get(0).(status.AgentStatus) } +func (m *MockController) LocalStatus() status.AgentStatus { + args := m.Called() + return args.Get(0).(status.AgentStatus) +} + func (m *MockController) StatusCode() status.AgentStatusCode { args := m.Called() return args.Get(0).(status.AgentStatusCode) From 5225e5408683319713c2b6603d6f55b8b359ed0a Mon Sep 17 00:00:00 2001 From: Craig MacKenzie Date: Mon, 26 Sep 2022 18:18:51 -0400 Subject: [PATCH 02/63] Improve logging for agent upgrades. (#1287) --- CHANGELOG.next.asciidoc | 1 + .../handlers/handler_action_upgrade.go | 8 ++++ .../pkg/agent/application/upgrade/cleanup.go | 10 ++-- .../agent/application/upgrade/cleanup_test.go | 16 ++++++- .../pkg/agent/application/upgrade/rollback.go | 20 +++++--- .../application/upgrade/step_download.go | 4 ++ .../agent/application/upgrade/step_mark.go | 12 +++-- .../agent/application/upgrade/step_relink.go | 4 +- .../agent/application/upgrade/step_unpack.go | 16 +++++-- .../pkg/agent/application/upgrade/upgrade.go | 48 +++++++++++-------- internal/pkg/agent/cmd/watch.go | 25 +++++----- internal/pkg/agent/control/server/server.go | 2 + 12 files changed, 115 insertions(+), 51 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 35d7c2b95a5..faef2861ba9 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -206,3 +206,4 @@ - Fix incorrectly creating a filebeat redis input when a policy contains a packetbeat redis input. {issue}[427] {pull}[700] - Add `lumberjack` input type to the Filebeat spec. {pull}[959] - Add support for hints' based autodiscovery in kubernetes provider. {pull}[698] +- Improve logging during upgrades. {pull}[1287] diff --git a/internal/pkg/agent/application/pipeline/actions/handlers/handler_action_upgrade.go b/internal/pkg/agent/application/pipeline/actions/handlers/handler_action_upgrade.go index cfc7ea83749..a0d78a91622 100644 --- a/internal/pkg/agent/application/pipeline/actions/handlers/handler_action_upgrade.go +++ b/internal/pkg/agent/application/pipeline/actions/handlers/handler_action_upgrade.go @@ -39,6 +39,14 @@ func (h *Upgrade) Handle(ctx context.Context, a fleetapi.Action, acker store.Fle } _, err := h.upgrader.Upgrade(ctx, &upgradeAction{action}, true) + if err != nil { + // Always log upgrade failures at the error level. Action errors are logged at debug level + // by default higher up the stack in ActionDispatcher.Dispatch() + h.log.Errorw("Upgrade action failed", "error.message", err, + "action.version", action.Version, "action.source_uri", action.SourceURI, "action.id", action.ActionID, + "action.start_time", action.StartTime, "action.expiration", action.ActionExpiration) + } + return err } diff --git a/internal/pkg/agent/application/upgrade/cleanup.go b/internal/pkg/agent/application/upgrade/cleanup.go index 5e0618dfe78..2581e30a1d9 100644 --- a/internal/pkg/agent/application/upgrade/cleanup.go +++ b/internal/pkg/agent/application/upgrade/cleanup.go @@ -13,11 +13,15 @@ import ( "github.com/hashicorp/go-multierror" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" + "github.com/elastic/elastic-agent/pkg/core/logger" ) -// preUpgradeCleanup will remove files that do not have the passed version number from the downloads directory. -func preUpgradeCleanup(version string) error { - files, err := os.ReadDir(paths.Downloads()) +// cleanNonMatchingVersionsFromDownloads will remove files that do not have the passed version number from the downloads directory. +func cleanNonMatchingVersionsFromDownloads(log *logger.Logger, version string) error { + downloadsPath := paths.Downloads() + log.Debugw("Cleaning up non-matching downloaded versions", "version", version, "downloads.path", downloadsPath) + + files, err := os.ReadDir(downloadsPath) if err != nil { return fmt.Errorf("unable to read directory %q: %w", paths.Downloads(), err) } diff --git a/internal/pkg/agent/application/upgrade/cleanup_test.go b/internal/pkg/agent/application/upgrade/cleanup_test.go index 736a9c42b3d..1170c26946d 100644 --- a/internal/pkg/agent/application/upgrade/cleanup_test.go +++ b/internal/pkg/agent/application/upgrade/cleanup_test.go @@ -9,7 +9,9 @@ import ( "path/filepath" "testing" + "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" + "github.com/elastic/elastic-agent/pkg/core/logger" "github.com/stretchr/testify/require" ) @@ -31,7 +33,8 @@ func setupDir(t *testing.T) { func TestPreUpgradeCleanup(t *testing.T) { setupDir(t) - err := preUpgradeCleanup("8.4.0") + log := newErrorLogger(t) + err := cleanNonMatchingVersionsFromDownloads(log, "8.4.0") require.NoError(t, err) files, err := os.ReadDir(paths.Downloads()) @@ -42,3 +45,14 @@ func TestPreUpgradeCleanup(t *testing.T) { require.NoError(t, err) require.Equal(t, []byte("hello, world!"), p) } + +func newErrorLogger(t *testing.T) *logger.Logger { + t.Helper() + + loggerCfg := logger.DefaultLoggingConfig() + loggerCfg.Level = logp.ErrorLevel + + log, err := logger.NewFromConfig("", loggerCfg, false) + require.NoError(t, err) + return log +} diff --git a/internal/pkg/agent/application/upgrade/rollback.go b/internal/pkg/agent/application/upgrade/rollback.go index 8ce6958beae..b4f6014fb3d 100644 --- a/internal/pkg/agent/application/upgrade/rollback.go +++ b/internal/pkg/agent/application/upgrade/rollback.go @@ -31,33 +31,35 @@ const ( ) // Rollback rollbacks to previous version which was functioning before upgrade. -func Rollback(ctx context.Context, prevHash, currentHash string) error { +func Rollback(ctx context.Context, log *logger.Logger, prevHash string, currentHash string) error { // change symlink - if err := ChangeSymlink(ctx, prevHash); err != nil { + if err := ChangeSymlink(ctx, log, prevHash); err != nil { return err } // revert active commit - if err := UpdateActiveCommit(prevHash); err != nil { + if err := UpdateActiveCommit(log, prevHash); err != nil { return err } // Restart + log.Info("Restarting the agent after rollback") if err := restartAgent(ctx); err != nil { return err } // cleanup everything except version we're rolling back into - return Cleanup(prevHash, true) + return Cleanup(log, prevHash, true) } // Cleanup removes all artifacts and files related to a specified version. -func Cleanup(currentHash string, removeMarker bool) error { +func Cleanup(log *logger.Logger, currentHash string, removeMarker bool) error { + log.Debugw("Cleaning up upgrade", "hash", currentHash, "remove_marker", removeMarker) <-time.After(afterRestartDelay) // remove upgrade marker if removeMarker { - if err := CleanMarker(); err != nil { + if err := CleanMarker(log); err != nil { return err } } @@ -74,7 +76,9 @@ func Cleanup(currentHash string, removeMarker bool) error { } // remove symlink to avoid upgrade failures, ignore error - _ = os.Remove(prevSymlinkPath()) + prevSymlink := prevSymlinkPath() + log.Debugw("Removing previous symlink path", "file.path", prevSymlinkPath()) + _ = os.Remove(prevSymlink) dirPrefix := fmt.Sprintf("%s-", agentName) currentDir := fmt.Sprintf("%s-%s", agentName, currentHash) @@ -88,6 +92,7 @@ func Cleanup(currentHash string, removeMarker bool) error { } hashedDir := filepath.Join(paths.Data(), dir) + log.Debugw("Removing hashed data directory", "file.path", hashedDir) if cleanupErr := install.RemovePath(hashedDir); cleanupErr != nil { err = multierror.Append(err, cleanupErr) } @@ -113,6 +118,7 @@ func InvokeWatcher(log *logger.Logger) error { } }() + log.Debugw("Starting upgrade watcher", "path", cmd.Path, "args", cmd.Args, "env", cmd.Env, "dir", cmd.Dir) return cmd.Start() } diff --git a/internal/pkg/agent/application/upgrade/step_download.go b/internal/pkg/agent/application/upgrade/step_download.go index 27e4b9c9e9c..3190303b84e 100644 --- a/internal/pkg/agent/application/upgrade/step_download.go +++ b/internal/pkg/agent/application/upgrade/step_download.go @@ -40,6 +40,10 @@ func (u *Upgrader) downloadArtifact(ctx context.Context, version, sourceURI stri } } + u.log.Debugw("Downloading upgrade artifact", "version", version, + "source_uri", settings.SourceURI, "drop_path", settings.DropPath, + "target_path", settings.TargetDirectory, "install_path", settings.InstallPath) + verifier, err := newVerifier(version, u.log, &settings) if err != nil { return "", errors.New(err, "initiating verifier") diff --git a/internal/pkg/agent/application/upgrade/step_mark.go b/internal/pkg/agent/application/upgrade/step_mark.go index 66924337699..80bfaab6c44 100644 --- a/internal/pkg/agent/application/upgrade/step_mark.go +++ b/internal/pkg/agent/application/upgrade/step_mark.go @@ -17,6 +17,7 @@ import ( "github.com/elastic/elastic-agent/internal/pkg/agent/errors" "github.com/elastic/elastic-agent/internal/pkg/fleetapi" "github.com/elastic/elastic-agent/internal/pkg/release" + "github.com/elastic/elastic-agent/pkg/core/logger" ) const markerFilename = ".update-marker" @@ -91,7 +92,7 @@ func newMarkerSerializer(m *UpdateMarker) *updateMarkerSerializer { } // markUpgrade marks update happened so we can handle grace period -func (u *Upgrader) markUpgrade(_ context.Context, hash string, action Action) error { +func (u *Upgrader) markUpgrade(_ context.Context, log *logger.Logger, hash string, action Action) error { prevVersion := release.Version() prevHash := release.Commit() if len(prevHash) > hashLen { @@ -112,11 +113,12 @@ func (u *Upgrader) markUpgrade(_ context.Context, hash string, action Action) er } markerPath := markerFilePath() + log.Infow("Writing upgrade marker file", "file.path", markerPath, "hash", marker.Hash, "prev_hash", prevHash) if err := ioutil.WriteFile(markerPath, markerBytes, 0600); err != nil { return errors.New(err, errors.TypeFilesystem, "failed to create update marker file", errors.M(errors.MetaKeyPath, markerPath)) } - if err := UpdateActiveCommit(hash); err != nil { + if err := UpdateActiveCommit(log, hash); err != nil { return err } @@ -124,8 +126,9 @@ func (u *Upgrader) markUpgrade(_ context.Context, hash string, action Action) er } // UpdateActiveCommit updates active.commit file to point to active version. -func UpdateActiveCommit(hash string) error { +func UpdateActiveCommit(log *logger.Logger, hash string) error { activeCommitPath := filepath.Join(paths.Top(), agentCommitFile) + log.Infow("Updating active commit", "file.path", activeCommitPath, "hash", hash) if err := ioutil.WriteFile(activeCommitPath, []byte(hash), 0600); err != nil { return errors.New(err, errors.TypeFilesystem, "failed to update active commit", errors.M(errors.MetaKeyPath, activeCommitPath)) } @@ -134,8 +137,9 @@ func UpdateActiveCommit(hash string) error { } // CleanMarker removes a marker from disk. -func CleanMarker() error { +func CleanMarker(log *logger.Logger) error { markerFile := markerFilePath() + log.Debugw("Removing marker file", "file.path", markerFile) if err := os.Remove(markerFile); !os.IsNotExist(err) { return err } diff --git a/internal/pkg/agent/application/upgrade/step_relink.go b/internal/pkg/agent/application/upgrade/step_relink.go index 9c998262ecd..e56b5a6642e 100644 --- a/internal/pkg/agent/application/upgrade/step_relink.go +++ b/internal/pkg/agent/application/upgrade/step_relink.go @@ -14,10 +14,11 @@ import ( "github.com/elastic/elastic-agent-libs/file" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" "github.com/elastic/elastic-agent/internal/pkg/agent/errors" + "github.com/elastic/elastic-agent/pkg/core/logger" ) // ChangeSymlink updates symlink paths to match current version. -func ChangeSymlink(ctx context.Context, targetHash string) error { +func ChangeSymlink(ctx context.Context, log *logger.Logger, targetHash string) error { // create symlink to elastic-agent-{hash} hashedDir := fmt.Sprintf("%s-%s", agentName, targetHash) @@ -31,6 +32,7 @@ func ChangeSymlink(ctx context.Context, targetHash string) error { } prevNewPath := prevSymlinkPath() + log.Infow("Changing symlink", "symlink_path", symlinkPath, "new_path", newPath, "prev_path", prevNewPath) // remove symlink to avoid upgrade failures if err := os.Remove(prevNewPath); !os.IsNotExist(err) { diff --git a/internal/pkg/agent/application/upgrade/step_unpack.go b/internal/pkg/agent/application/upgrade/step_unpack.go index 108593c5083..4a9538a7e07 100644 --- a/internal/pkg/agent/application/upgrade/step_unpack.go +++ b/internal/pkg/agent/application/upgrade/step_unpack.go @@ -21,6 +21,7 @@ import ( "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" "github.com/elastic/elastic-agent/internal/pkg/agent/errors" + "github.com/elastic/elastic-agent/pkg/core/logger" ) // unpack unpacks archive correctly, skips root (symlink, config...) unpacks data/* @@ -30,18 +31,21 @@ func (u *Upgrader) unpack(ctx context.Context, version, archivePath string) (str var hash string var err error if runtime.GOOS == "windows" { - hash, err = unzip(version, archivePath) + hash, err = unzip(u.log, version, archivePath) } else { - hash, err = untar(version, archivePath) + hash, err = untar(u.log, version, archivePath) } + if err != nil { + u.log.Errorw("Failed to unpack upgrade artifact", "error.message", err, "version", version, "file.path", archivePath, "hash", hash) return "", err } + u.log.Infow("Unpacked upgrade artifact", "version", version, "file.path", archivePath, "hash", hash) return hash, nil } -func unzip(version, archivePath string) (string, error) { +func unzip(log *logger.Logger, version string, archivePath string) (string, error) { var hash, rootDir string r, err := zip.OpenReader(archivePath) if err != nil { @@ -82,8 +86,10 @@ func unzip(version, archivePath string) (string, error) { path := filepath.Join(paths.Data(), strings.TrimPrefix(fileName, "data/")) if f.FileInfo().IsDir() { + log.Debugw("Unpacking directory", "archive", "zip", "file.path", path) os.MkdirAll(path, f.Mode()) } else { + log.Debugw("Unpacking file", "archive", "zip", "file.path", path) os.MkdirAll(filepath.Dir(path), f.Mode()) f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode()) if err != nil { @@ -119,7 +125,7 @@ func unzip(version, archivePath string) (string, error) { return hash, nil } -func untar(version, archivePath string) (string, error) { +func untar(log *logger.Logger, version string, archivePath string) (string, error) { r, err := os.Open(archivePath) if err != nil { return "", errors.New(fmt.Sprintf("artifact for 'elastic-agent' version '%s' could not be found at '%s'", version, archivePath), errors.TypeFilesystem, errors.M(errors.MetaKeyPath, archivePath)) @@ -183,6 +189,7 @@ func untar(version, archivePath string) (string, error) { mode := fi.Mode() switch { case mode.IsRegular(): + log.Debugw("Unpacking file", "archive", "tar", "file.path", abs) // just to be sure, it should already be created by Dir type if err := os.MkdirAll(filepath.Dir(abs), 0755); err != nil { return "", errors.New(err, "TarInstaller: creating directory for file "+abs, errors.TypeFilesystem, errors.M(errors.MetaKeyPath, abs)) @@ -201,6 +208,7 @@ func untar(version, archivePath string) (string, error) { return "", fmt.Errorf("TarInstaller: error writing to %s: %w", abs, err) } case mode.IsDir(): + log.Debugw("Unpacking directory", "archive", "tar", "file.path", abs) if err := os.MkdirAll(abs, 0755); err != nil { return "", errors.New(err, "TarInstaller: creating directory for file "+abs, errors.TypeFilesystem, errors.M(errors.MetaKeyPath, abs)) } diff --git a/internal/pkg/agent/application/upgrade/upgrade.go b/internal/pkg/agent/application/upgrade/upgrade.go index 1c6a85fa9d9..e31c8ef0378 100644 --- a/internal/pkg/agent/application/upgrade/upgrade.go +++ b/internal/pkg/agent/application/upgrade/upgrade.go @@ -108,6 +108,7 @@ func (u *Upgrader) Upgradeable() bool { // Upgrade upgrades running agent, function returns shutdown callback if some needs to be executed for cases when // reexec is called by caller. func (u *Upgrader) Upgrade(ctx context.Context, a Action, reexecNow bool) (_ reexec.ShutdownCallbackFn, err error) { + u.log.Infow("Upgrading agent", "version", a.Version(), "source_uri", a.SourceURI()) span, ctx := apm.StartSpan(ctx, "upgrade", "app.internal") defer span.End() // report failed @@ -126,9 +127,9 @@ func (u *Upgrader) Upgrade(ctx context.Context, a Action, reexecNow bool) (_ ree "running under control of the systems supervisor") } - err = preUpgradeCleanup(u.agentInfo.Version()) + err = cleanNonMatchingVersionsFromDownloads(u.log, u.agentInfo.Version()) if err != nil { - u.log.Errorf("Unable to clean downloads dir %q before update: %v", paths.Downloads(), err) + u.log.Errorw("Unable to clean downloads before update", "error.message", err, "downloads.path", paths.Downloads()) } if u.caps != nil { @@ -142,10 +143,10 @@ func (u *Upgrader) Upgrade(ctx context.Context, a Action, reexecNow bool) (_ ree sourceURI := u.sourceURI(a.SourceURI()) archivePath, err := u.downloadArtifact(ctx, a.Version(), sourceURI) if err != nil { - // Run the same preUpgradeCleanup task to get rid of any newly downloaded files + // Run the same pre-upgrade cleanup task to get rid of any newly downloaded files // This may have an issue if users are upgrading to the same version number. - if dErr := preUpgradeCleanup(u.agentInfo.Version()); dErr != nil { - u.log.Errorf("Unable to remove file after verification failure: %v", dErr) + if dErr := cleanNonMatchingVersionsFromDownloads(u.log, u.agentInfo.Version()); dErr != nil { + u.log.Errorw("Unable to remove file after verification failure", "error.message", dErr) } return nil, err } @@ -169,39 +170,47 @@ func (u *Upgrader) Upgrade(ctx context.Context, a Action, reexecNow bool) (_ ree return nil, nil } - if err := copyActionStore(newHash); err != nil { + if err := copyActionStore(u.log, newHash); err != nil { return nil, errors.New(err, "failed to copy action store") } - if err := ChangeSymlink(ctx, newHash); err != nil { - rollbackInstall(ctx, newHash) + if err := ChangeSymlink(ctx, u.log, newHash); err != nil { + u.log.Errorw("Rolling back: changing symlink failed", "error.message", err) + rollbackInstall(ctx, u.log, newHash) return nil, err } - if err := u.markUpgrade(ctx, newHash, a); err != nil { - rollbackInstall(ctx, newHash) + if err := u.markUpgrade(ctx, u.log, newHash, a); err != nil { + u.log.Errorw("Rolling back: marking upgrade failed", "error.message", err) + rollbackInstall(ctx, u.log, newHash) return nil, err } if err := InvokeWatcher(u.log); err != nil { - rollbackInstall(ctx, newHash) + u.log.Errorw("Rolling back: starting watcher failed", "error.message", err) + rollbackInstall(ctx, u.log, newHash) return nil, errors.New("failed to invoke rollback watcher", err) } - cb := shutdownCallback(u.log, paths.Home(), release.Version(), a.Version(), release.TrimCommit(newHash)) + trimmedNewHash := release.TrimCommit(newHash) + cb := shutdownCallback(u.log, paths.Home(), release.Version(), a.Version(), trimmedNewHash) if reexecNow { + u.log.Debugw("Removing downloads directory", "file.path", paths.Downloads(), "rexec", reexecNow) err = os.RemoveAll(paths.Downloads()) if err != nil { - u.log.Errorf("Unable to clean downloads dir %q after update: %v", paths.Downloads(), err) + u.log.Errorw("Unable to clean downloads after update", "error.message", err, "downloads.path", paths.Downloads()) } + u.log.Infow("Restarting after upgrade", "new_version", release.Version(), "prev_version", a.Version(), + "hash", trimmedNewHash, "home", paths.Home()) u.reexec.ReExec(cb) return nil, nil } // Clean everything from the downloads dir + u.log.Debugw("Removing downloads directory", "file.path", paths.Downloads(), "rexec", reexecNow) err = os.RemoveAll(paths.Downloads()) if err != nil { - u.log.Errorf("Unable to clean downloads dir %q after update: %v", paths.Downloads(), err) + u.log.Errorw("Unable to clean downloads after update", "error.message", err, "file.path", paths.Downloads()) } return cb, nil @@ -283,19 +292,20 @@ func (u *Upgrader) reportUpdating(version string) { ) } -func rollbackInstall(ctx context.Context, hash string) { +func rollbackInstall(ctx context.Context, log *logger.Logger, hash string) { os.RemoveAll(filepath.Join(paths.Data(), fmt.Sprintf("%s-%s", agentName, hash))) - _ = ChangeSymlink(ctx, release.ShortCommit()) + _ = ChangeSymlink(ctx, log, release.ShortCommit()) } -func copyActionStore(newHash string) error { +func copyActionStore(log *logger.Logger, newHash string) error { // copies legacy action_store.yml, state.yml and state.enc encrypted file if exists storePaths := []string{paths.AgentActionStoreFile(), paths.AgentStateStoreYmlFile(), paths.AgentStateStoreFile()} + newHome := filepath.Join(filepath.Dir(paths.Home()), fmt.Sprintf("%s-%s", agentName, newHash)) + log.Debugw("Copying action store", "new_home_path", newHome) for _, currentActionStorePath := range storePaths { - newHome := filepath.Join(filepath.Dir(paths.Home()), fmt.Sprintf("%s-%s", agentName, newHash)) newActionStorePath := filepath.Join(newHome, filepath.Base(currentActionStorePath)) - + log.Debugw("Copying action store path", "from", currentActionStorePath, "to", newActionStorePath) currentActionStore, err := ioutil.ReadFile(currentActionStorePath) if os.IsNotExist(err) { // nothing to copy diff --git a/internal/pkg/agent/cmd/watch.go b/internal/pkg/agent/cmd/watch.go index 64bd604cd85..353017b714e 100644 --- a/internal/pkg/agent/cmd/watch.go +++ b/internal/pkg/agent/cmd/watch.go @@ -15,6 +15,7 @@ import ( "github.com/spf13/cobra" + "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent/internal/pkg/agent/application/filelock" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade" @@ -40,8 +41,13 @@ func newWatchCommandWithArgs(_ []string, streams *cli.IOStreams) *cobra.Command Short: "Watch watches Elastic Agent for failures and initiates rollback.", Long: `Watch watches Elastic Agent for failures and initiates rollback.`, Run: func(_ *cobra.Command, _ []string) { - if err := watchCmd(); err != nil { - fmt.Fprintf(streams.Err, "Error: %v\n%s\n", err, troubleshootMessage()) + log, err := configuredLogger() + if err != nil { + fmt.Fprintf(streams.Err, "Error configuring logger: %v\n%s\n", err, troubleshootMessage()) + } + if err := watchCmd(log); err != nil { + log.Errorw("Watch command failed", "error.message", err) + fmt.Fprintf(streams.Err, "Watch command failed: %v\n%s\n", err, troubleshootMessage()) os.Exit(1) } }, @@ -50,12 +56,7 @@ func newWatchCommandWithArgs(_ []string, streams *cli.IOStreams) *cobra.Command return cmd } -func watchCmd() error { - log, err := configuredLogger() - if err != nil { - return err - } - +func watchCmd(log *logp.Logger) error { marker, err := upgrade.LoadMarker() if err != nil { log.Error("failed to load marker", err) @@ -88,7 +89,7 @@ func watchCmd() error { // if we're not within grace and marker is still there it might mean // that cleanup was not performed ok, cleanup everything except current version // hash is the same as hash of agent which initiated watcher. - if err := upgrade.Cleanup(release.ShortCommit(), true); err != nil { + if err := upgrade.Cleanup(log, release.ShortCommit(), true); err != nil { log.Error("rollback failed", err) } // exit nicely @@ -97,8 +98,8 @@ func watchCmd() error { ctx := context.Background() if err := watch(ctx, tilGrace, log); err != nil { - log.Debugf("Error detected proceeding to rollback: %v", err) - err = upgrade.Rollback(ctx, marker.PrevHash, marker.Hash) + log.Error("Error detected proceeding to rollback: %v", err) + err = upgrade.Rollback(ctx, log, marker.PrevHash, marker.Hash) if err != nil { log.Error("rollback failed", err) } @@ -109,7 +110,7 @@ func watchCmd() error { // in windows it might leave self untouched, this will get cleaned up // later at the start, because for windows we leave marker untouched. removeMarker := !isWindows() - err = upgrade.Cleanup(marker.Hash, removeMarker) + err = upgrade.Cleanup(log, marker.Hash, removeMarker) if err != nil { log.Error("rollback failed", err) } diff --git a/internal/pkg/agent/control/server/server.go b/internal/pkg/agent/control/server/server.go index 0b89ccd8f71..7cebc84084c 100644 --- a/internal/pkg/agent/control/server/server.go +++ b/internal/pkg/agent/control/server/server.go @@ -181,6 +181,7 @@ func (s *Server) Upgrade(ctx context.Context, request *proto.UpgradeRequest) (*p } cb, err := u.Upgrade(ctx, &upgradeRequest{request}, false) if err != nil { + s.logger.Errorw("Upgrade failed", "error.message", err, "version", request.Version, "source_uri", request.SourceURI) return &proto.UpgradeResponse{ Status: proto.ActionStatus_FAILURE, Error: err.Error(), @@ -190,6 +191,7 @@ func (s *Server) Upgrade(ctx context.Context, request *proto.UpgradeRequest) (*p // this ensures that the upgrade response over GRPC is returned go func() { <-time.After(time.Second) + s.logger.Info("Restarting after upgrade", "version", request.Version) s.rex.ReExec(cb) }() return &proto.UpgradeResponse{ From 6a0fd1bf6d6066d0f9296ab1fcc1e72f0a5ab1d9 Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Tue, 27 Sep 2022 01:37:16 -0400 Subject: [PATCH 03/63] [Automation] Update elastic stack version to 8.6.0-326f84b0 for testing (#1318) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index 4e2277d2edf..a27574c766c 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-21651da3-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-326f84b0-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-21651da3-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-326f84b0-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing" From faf98e7c8a1c4289cbdc8f80813576e3a2d3206b Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Wed, 28 Sep 2022 01:35:32 -0400 Subject: [PATCH 04/63] [Automation] Update elastic stack version to 8.6.0-df00693f for testing (#1334) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index a27574c766c..cd80d94a564 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-326f84b0-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-df00693f-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-326f84b0-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-df00693f-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing" From e6143213c1cbc69fd83aac31b1c2730b5f670fe2 Mon Sep 17 00:00:00 2001 From: Josh Dover <1813008+joshdover@users.noreply.github.com> Date: Wed, 28 Sep 2022 12:22:34 +0200 Subject: [PATCH 05/63] Add success log message after previous checkin failures (#1327) --- CHANGELOG.next.asciidoc | 1 + .../application/gateway/fleet/fleet_gateway.go | 16 +++++++++++----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index faef2861ba9..7d500cf8f53 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -119,6 +119,7 @@ - Remove fleet event reporter and events from checkin body. {issue}993[993] - Fix unintended reset of source URI when downloading components {pull}1252[1252] - Create separate status reporter for local only events so that degraded fleet-checkins no longer affect health on successful fleet-checkins. {issue}1157[1157] {pull}1285[1285] +- Add success log message after previous checkin failures {pull}1327[1327] ==== New features diff --git a/internal/pkg/agent/application/gateway/fleet/fleet_gateway.go b/internal/pkg/agent/application/gateway/fleet/fleet_gateway.go index 6df9f171fbe..b88a0cafee0 100644 --- a/internal/pkg/agent/application/gateway/fleet/fleet_gateway.go +++ b/internal/pkg/agent/application/gateway/fleet/fleet_gateway.go @@ -210,7 +210,7 @@ func (f *fleetGateway) worker() { f.statusReporter.Update(state.Failed, errMsg, nil) } else { f.statusReporter.Update(state.Healthy, "", nil) - f.localReporter.Update(state.Healthy, "", nil) // we don't need to specifically set the local reporter to failed above, but it needs to be reset to healthy if a checking succeeds + f.localReporter.Update(state.Healthy, "", nil) // we don't need to specifically set the local reporter to failed above, but it needs to be reset to healthy if a checkin succeeds } case <-f.bgContext.Done(): @@ -280,11 +280,11 @@ func (f *fleetGateway) doExecute() (*fleetapi.CheckinResponse, error) { // Guard if the context is stopped by a out of bound call, // this mean we are rebooting to change the log level or the system is shutting us down. for f.bgContext.Err() == nil { - f.log.Debugf("Checking started") + f.log.Debugf("Checkin started") resp, err := f.execute(f.bgContext) if err != nil { f.checkinFailCounter++ - f.log.Errorf("Could not communicate with fleet-server Checking API will retry, error: %s", err) + f.log.Errorf("Could not communicate with fleet-server checkin API will retry, error: %s", err) if !f.backoff.Wait() { // Something bad has happened and we log it and we should update our current state. err := errors.New( @@ -299,10 +299,16 @@ func (f *fleetGateway) doExecute() (*fleetapi.CheckinResponse, error) { } if f.checkinFailCounter > 1 { f.localReporter.Update(state.Degraded, fmt.Sprintf("checkin failed: %v", err), nil) - f.log.Errorf("checking number %d failed: %s", f.checkinFailCounter, err.Error()) + f.log.Errorf("checkin number %d failed: %s", f.checkinFailCounter, err.Error()) } continue } + + if f.checkinFailCounter > 0 { + // Log at same level as error logs above so subsequent successes are visible when log level is set to 'error'. + f.log.Errorf("Checkin request to fleet-server succeeded after %d failures", f.checkinFailCounter) + } + f.checkinFailCounter = 0 // Request was successful, return the collected actions. return resp, nil @@ -338,7 +344,7 @@ func (f *fleetGateway) execute(ctx context.Context) (*fleetapi.CheckinResponse, f.unauthCounter++ if f.shouldUnenroll() { - f.log.Warnf("retrieved an invalid api key error '%d' times. Starting to unenroll the elastic agent.", f.unauthCounter) + f.log.Warnf("received an invalid api key error '%d' times. Starting to unenroll the elastic agent.", f.unauthCounter) return &fleetapi.CheckinResponse{ Actions: []fleetapi.Action{&fleetapi.ActionUnenroll{ActionID: "", ActionType: "UNENROLL", IsDetected: true}}, }, nil From 177b5fbb92bead26bef2492cdc69871ef518ae69 Mon Sep 17 00:00:00 2001 From: Anderson Queiroz Date: Wed, 28 Sep 2022 13:51:25 +0200 Subject: [PATCH 06/63] Fix status reporter initialization (#1341) --- internal/pkg/core/status/reporter.go | 13 +++++++------ internal/pkg/core/status/reporter_test.go | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/internal/pkg/core/status/reporter.go b/internal/pkg/core/status/reporter.go index 50f34651fa1..e123beab2a2 100644 --- a/internal/pkg/core/status/reporter.go +++ b/internal/pkg/core/status/reporter.go @@ -88,10 +88,11 @@ type controller struct { // NewController creates a new reporter. func NewController(log *logger.Logger) Controller { return &controller{ - status: Healthy, - reporters: make(map[string]*reporter), - appReporters: make(map[string]*reporter), - log: log, + status: Healthy, + reporters: make(map[string]*reporter), + localReporters: make(map[string]*reporter), + appReporters: make(map[string]*reporter), + log: log, } } @@ -154,12 +155,12 @@ func (r *controller) RegisterLocalComponent(componentIdentifier string) Reporter return rep } -// Register registers new component for status updates. +// RegisterComponent registers new component for status updates. func (r *controller) RegisterComponent(componentIdentifier string) Reporter { return r.RegisterComponentWithPersistance(componentIdentifier, false) } -// Register registers new component for status updates. +// RegisterComponentWithPersistance registers new component for status updates. func (r *controller) RegisterComponentWithPersistance(componentIdentifier string, persistent bool) Reporter { id := componentIdentifier + "-" + uuid.New().String()[:8] rep := &reporter{ diff --git a/internal/pkg/core/status/reporter_test.go b/internal/pkg/core/status/reporter_test.go index 09a66661fc5..c4f6796fb30 100644 --- a/internal/pkg/core/status/reporter_test.go +++ b/internal/pkg/core/status/reporter_test.go @@ -14,6 +14,24 @@ import ( "github.com/elastic/elastic-agent/pkg/core/logger" ) +func TestNewController_ensure_all_is_initialzed(t *testing.T) { + l, _ := logger.New("", false) + + newController := NewController(l) + + c, ok := newController.(*controller) + if !ok { + t.Fatalf("expected c %T, not c %T", controller{}, newController) + } + + c.reporters["ignore"] = &reporter{} + c.localReporters["ignore"] = &reporter{} + c.appReporters["ignore"] = &reporter{} + if c.log == nil { + t.Error("logger shouldn't be nil, it was not correctly assigned") + } +} + func TestReporter(t *testing.T) { l, _ := logger.New("", false) t.Run("healthy by default", func(t *testing.T) { From c35935d8d66e7755b475a5938630b1e7b1b0bdfe Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Thu, 29 Sep 2022 01:37:08 -0400 Subject: [PATCH 07/63] [Automation] Update elastic stack version to 8.6.0-a2f4f140 for testing (#1362) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index cd80d94a564..084b8b035c4 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-df00693f-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-a2f4f140-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-df00693f-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-a2f4f140-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing" From 4d7fae9f3c82222ac342733b6c7c1f91beed731e Mon Sep 17 00:00:00 2001 From: Julia Bardi <90178898+juliaElastic@users.noreply.github.com> Date: Thu, 29 Sep 2022 14:10:54 +0200 Subject: [PATCH 08/63] Added status message to CheckinRequest (#1369) * Added status message to CheckinRequest * added changelog * updated test * added omitempty --- CHANGELOG.next.asciidoc | 1 + internal/pkg/agent/application/gateway/fleet/fleet_gateway.go | 1 + .../pkg/agent/application/gateway/fleet/fleet_gateway_test.go | 2 ++ internal/pkg/fleetapi/checkin_cmd.go | 1 + 4 files changed, 5 insertions(+) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 7d500cf8f53..2ce614336a4 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -208,3 +208,4 @@ - Add `lumberjack` input type to the Filebeat spec. {pull}[959] - Add support for hints' based autodiscovery in kubernetes provider. {pull}[698] - Improve logging during upgrades. {pull}[1287] +- Added status message to CheckinRequest {pull}[1369] diff --git a/internal/pkg/agent/application/gateway/fleet/fleet_gateway.go b/internal/pkg/agent/application/gateway/fleet/fleet_gateway.go index b88a0cafee0..897b81ea0d3 100644 --- a/internal/pkg/agent/application/gateway/fleet/fleet_gateway.go +++ b/internal/pkg/agent/application/gateway/fleet/fleet_gateway.go @@ -337,6 +337,7 @@ func (f *fleetGateway) execute(ctx context.Context) (*fleetapi.CheckinResponse, AckToken: ackToken, Metadata: ecsMeta, Status: f.statusController.StatusString(), + Message: f.statusController.Status().Message, } resp, err := cmd.Execute(ctx, req) diff --git a/internal/pkg/agent/application/gateway/fleet/fleet_gateway_test.go b/internal/pkg/agent/application/gateway/fleet/fleet_gateway_test.go index 2d691185c1c..0cc00e739a8 100644 --- a/internal/pkg/agent/application/gateway/fleet/fleet_gateway_test.go +++ b/internal/pkg/agent/application/gateway/fleet/fleet_gateway_test.go @@ -26,6 +26,7 @@ import ( "github.com/elastic/elastic-agent/internal/pkg/agent/storage" "github.com/elastic/elastic-agent/internal/pkg/agent/storage/store" "github.com/elastic/elastic-agent/internal/pkg/core/state" + "github.com/elastic/elastic-agent/internal/pkg/core/status" "github.com/elastic/elastic-agent/internal/pkg/fleetapi" noopacker "github.com/elastic/elastic-agent/internal/pkg/fleetapi/acker/noop" "github.com/elastic/elastic-agent/internal/pkg/scheduler" @@ -717,6 +718,7 @@ func TestRetriesOnFailures(t *testing.T) { statusController.On("RegisterComponent", "gateway").Return(fleetReporter).Once() statusController.On("RegisterLocalComponent", "gateway-checkin").Return(localReporter).Once() statusController.On("StatusString").Return("string") + statusController.On("Status").Return(status.AgentStatus{Message: "message"}) gateway, err := newFleetGatewayWithScheduler( ctx, diff --git a/internal/pkg/fleetapi/checkin_cmd.go b/internal/pkg/fleetapi/checkin_cmd.go index e225aababb9..d6a63a45e29 100644 --- a/internal/pkg/fleetapi/checkin_cmd.go +++ b/internal/pkg/fleetapi/checkin_cmd.go @@ -23,6 +23,7 @@ const checkingPath = "/api/fleet/agents/%s/checkin" // CheckinRequest consists of multiple events reported to fleet ui. type CheckinRequest struct { Status string `json:"status"` + Message string `json:"message,omitempty"` AckToken string `json:"ack_token,omitempty"` Metadata *info.ECSMeta `json:"local_metadata,omitempty"` } From e184051717ba412a78e75994e647738229120365 Mon Sep 17 00:00:00 2001 From: Michal Pristas Date: Thu, 29 Sep 2022 16:08:12 +0200 Subject: [PATCH 09/63] Fix failures when using npipe monitoring endpoints (#1371) --- internal/pkg/core/monitoring/beats/sidecar_monitor.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/internal/pkg/core/monitoring/beats/sidecar_monitor.go b/internal/pkg/core/monitoring/beats/sidecar_monitor.go index aa249bafa0f..c5d45c1c82d 100644 --- a/internal/pkg/core/monitoring/beats/sidecar_monitor.go +++ b/internal/pkg/core/monitoring/beats/sidecar_monitor.go @@ -88,6 +88,10 @@ func (b *SidecarMonitor) EnrichArgs(spec program.Spec, pipelineID string, args [ func (b *SidecarMonitor) Cleanup(spec program.Spec, pipelineID string) error { endpoint := MonitoringEndpoint(spec, b.operatingSystem, pipelineID, true) drop := monitoringDrop(endpoint) + if drop == "" { + // not exposed using sockets + return nil + } return os.RemoveAll(drop) } @@ -104,6 +108,11 @@ func (b *SidecarMonitor) Prepare(spec program.Spec, pipelineID string, uid, gid endpoint := MonitoringEndpoint(spec, b.operatingSystem, pipelineID, true) drop := monitoringDrop(endpoint) + if drop == "" { + // not exposed using sockets + return nil + } + if err := os.MkdirAll(drop, 0775); err != nil { return errors.New(err, fmt.Sprintf("failed to create a directory %q", drop)) } From c6a22d4b165dd72167e90aea7e548c39cdcf9c63 Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Fri, 30 Sep 2022 01:36:59 -0400 Subject: [PATCH 10/63] [Automation] Update elastic stack version to 8.6.0-158a13db for testing (#1379) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index 084b8b035c4..bd04ca78f28 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-a2f4f140-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-158a13db-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-a2f4f140-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-158a13db-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing" From 90c2c00beaf50c9ab11d048188bc210905c4cc1d Mon Sep 17 00:00:00 2001 From: Yash Tewari Date: Mon, 3 Oct 2022 12:24:43 +0530 Subject: [PATCH 11/63] Mount /etc directory in Kubernetes DaemonSet manifests. (#1382) Changes made to files like `/etc/passwd` using Linux tools like `useradd` are not reflected in the mounted file on the Agent, because the tool replaces the file instead of changing it in-place. Mounting the parent directory solves this problem. --- .../elastic-agent-managed-kubernetes.yaml | 34 ++++--------------- .../elastic-agent-managed-daemonset.yaml | 34 ++++--------------- .../elastic-agent-standalone-kubernetes.yaml | 34 ++++--------------- .../elastic-agent-standalone-daemonset.yaml | 34 ++++--------------- 4 files changed, 28 insertions(+), 108 deletions(-) diff --git a/deploy/kubernetes/elastic-agent-managed-kubernetes.yaml b/deploy/kubernetes/elastic-agent-managed-kubernetes.yaml index acb8f8d5ea2..0f7bf79f107 100644 --- a/deploy/kubernetes/elastic-agent-managed-kubernetes.yaml +++ b/deploy/kubernetes/elastic-agent-managed-kubernetes.yaml @@ -83,21 +83,12 @@ spec: - name: varlog mountPath: /var/log readOnly: true - - name: etc-kubernetes - mountPath: /hostfs/etc/kubernetes + - name: etc-full + mountPath: /hostfs/etc readOnly: true - name: var-lib mountPath: /hostfs/var/lib readOnly: true - - name: passwd - mountPath: /hostfs/etc/passwd - readOnly: true - - name: group - mountPath: /hostfs/etc/group - readOnly: true - - name: etcsysmd - mountPath: /hostfs/etc/systemd - readOnly: true - name: etc-mid mountPath: /etc/machine-id readOnly: true @@ -114,26 +105,15 @@ spec: - name: varlog hostPath: path: /var/log - # Needed for cloudbeat - - name: etc-kubernetes + # The following volumes are needed for Cloud Security Posture integration (cloudbeat) + # If you are not using this integration, then these volumes and the corresponding + # mounts can be removed. + - name: etc-full hostPath: - path: /etc/kubernetes - # Needed for cloudbeat + path: /etc - name: var-lib hostPath: path: /var/lib - # Needed for cloudbeat - - name: passwd - hostPath: - path: /etc/passwd - # Needed for cloudbeat - - name: group - hostPath: - path: /etc/group - # Needed for cloudbeat - - name: etcsysmd - hostPath: - path: /etc/systemd # Mount /etc/machine-id from the host to determine host ID # Needed for Elastic Security integration - name: etc-mid diff --git a/deploy/kubernetes/elastic-agent-managed/elastic-agent-managed-daemonset.yaml b/deploy/kubernetes/elastic-agent-managed/elastic-agent-managed-daemonset.yaml index 878b15b8a6e..17959a4febe 100644 --- a/deploy/kubernetes/elastic-agent-managed/elastic-agent-managed-daemonset.yaml +++ b/deploy/kubernetes/elastic-agent-managed/elastic-agent-managed-daemonset.yaml @@ -83,21 +83,12 @@ spec: - name: varlog mountPath: /var/log readOnly: true - - name: etc-kubernetes - mountPath: /hostfs/etc/kubernetes + - name: etc-full + mountPath: /hostfs/etc readOnly: true - name: var-lib mountPath: /hostfs/var/lib readOnly: true - - name: passwd - mountPath: /hostfs/etc/passwd - readOnly: true - - name: group - mountPath: /hostfs/etc/group - readOnly: true - - name: etcsysmd - mountPath: /hostfs/etc/systemd - readOnly: true - name: etc-mid mountPath: /etc/machine-id readOnly: true @@ -114,26 +105,15 @@ spec: - name: varlog hostPath: path: /var/log - # Needed for cloudbeat - - name: etc-kubernetes + # The following volumes are needed for Cloud Security Posture integration (cloudbeat) + # If you are not using this integration, then these volumes and the corresponding + # mounts can be removed. + - name: etc-full hostPath: - path: /etc/kubernetes - # Needed for cloudbeat + path: /etc - name: var-lib hostPath: path: /var/lib - # Needed for cloudbeat - - name: passwd - hostPath: - path: /etc/passwd - # Needed for cloudbeat - - name: group - hostPath: - path: /etc/group - # Needed for cloudbeat - - name: etcsysmd - hostPath: - path: /etc/systemd # Mount /etc/machine-id from the host to determine host ID # Needed for Elastic Security integration - name: etc-mid diff --git a/deploy/kubernetes/elastic-agent-standalone-kubernetes.yaml b/deploy/kubernetes/elastic-agent-standalone-kubernetes.yaml index d6ce952dadd..e43a251408f 100644 --- a/deploy/kubernetes/elastic-agent-standalone-kubernetes.yaml +++ b/deploy/kubernetes/elastic-agent-standalone-kubernetes.yaml @@ -722,21 +722,12 @@ spec: - name: varlog mountPath: /var/log readOnly: true - - name: etc-kubernetes - mountPath: /hostfs/etc/kubernetes + - name: etc-full + mountPath: /hostfs/etc readOnly: true - name: var-lib mountPath: /hostfs/var/lib readOnly: true - - name: passwd - mountPath: /hostfs/etc/passwd - readOnly: true - - name: group - mountPath: /hostfs/etc/group - readOnly: true - - name: etcsysmd - mountPath: /hostfs/etc/systemd - readOnly: true volumes: - name: datastreams configMap: @@ -757,26 +748,15 @@ spec: - name: varlog hostPath: path: /var/log - # Needed for cloudbeat - - name: etc-kubernetes + # The following volumes are needed for Cloud Security Posture integration (cloudbeat) + # If you are not using this integration, then these volumes and the corresponding + # mounts can be removed. + - name: etc-full hostPath: - path: /etc/kubernetes - # Needed for cloudbeat + path: /etc - name: var-lib hostPath: path: /var/lib - # Needed for cloudbeat - - name: passwd - hostPath: - path: /etc/passwd - # Needed for cloudbeat - - name: group - hostPath: - path: /etc/group - # Needed for cloudbeat - - name: etcsysmd - hostPath: - path: /etc/systemd --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding diff --git a/deploy/kubernetes/elastic-agent-standalone/elastic-agent-standalone-daemonset.yaml b/deploy/kubernetes/elastic-agent-standalone/elastic-agent-standalone-daemonset.yaml index 675c68c6dfb..9d865811e46 100644 --- a/deploy/kubernetes/elastic-agent-standalone/elastic-agent-standalone-daemonset.yaml +++ b/deploy/kubernetes/elastic-agent-standalone/elastic-agent-standalone-daemonset.yaml @@ -94,21 +94,12 @@ spec: - name: varlog mountPath: /var/log readOnly: true - - name: etc-kubernetes - mountPath: /hostfs/etc/kubernetes + - name: etc-full + mountPath: /hostfs/etc readOnly: true - name: var-lib mountPath: /hostfs/var/lib readOnly: true - - name: passwd - mountPath: /hostfs/etc/passwd - readOnly: true - - name: group - mountPath: /hostfs/etc/group - readOnly: true - - name: etcsysmd - mountPath: /hostfs/etc/systemd - readOnly: true volumes: - name: datastreams configMap: @@ -129,23 +120,12 @@ spec: - name: varlog hostPath: path: /var/log - # Needed for cloudbeat - - name: etc-kubernetes + # The following volumes are needed for Cloud Security Posture integration (cloudbeat) + # If you are not using this integration, then these volumes and the corresponding + # mounts can be removed. + - name: etc-full hostPath: - path: /etc/kubernetes - # Needed for cloudbeat + path: /etc - name: var-lib hostPath: path: /var/lib - # Needed for cloudbeat - - name: passwd - hostPath: - path: /etc/passwd - # Needed for cloudbeat - - name: group - hostPath: - path: /etc/group - # Needed for cloudbeat - - name: etcsysmd - hostPath: - path: /etc/systemd From 6d4087597dbf0f8a17988688140ec8cc3bcc1e78 Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Tue, 4 Oct 2022 01:39:28 -0400 Subject: [PATCH 12/63] [Automation] Update elastic stack version to 8.6.0-aea1c645 for testing (#1405) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index bd04ca78f28..04187c29869 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-158a13db-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-aea1c645-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-158a13db-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-aea1c645-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing" From 7155492418e499f5bc854b9edc88300dbab1eb07 Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Wed, 5 Oct 2022 01:38:22 -0400 Subject: [PATCH 13/63] [Automation] Update elastic stack version to 8.6.0-0fca2953 for testing (#1412) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index 04187c29869..29ca952ce45 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-aea1c645-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-0fca2953-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-aea1c645-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-0fca2953-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing" From f4b8e2ece0b5a9451fa2c7a5789fda0ea4abfca5 Mon Sep 17 00:00:00 2001 From: Victor Martinez Date: Wed, 5 Oct 2022 14:58:38 +0100 Subject: [PATCH 14/63] ci: 7.17 is not available for the daily run (#1417) --- .ci/schedule-daily.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/schedule-daily.groovy b/.ci/schedule-daily.groovy index 5c1d7134858..adc1ec0f02e 100644 --- a/.ci/schedule-daily.groovy +++ b/.ci/schedule-daily.groovy @@ -20,7 +20,7 @@ pipeline { stages { stage('Nighly beats builds') { steps { - runBuilds(quietPeriodFactor: 2000, branches: ['main', '8.', '8.', '7.']) + runBuilds(quietPeriodFactor: 2000, branches: ['main', '8.', '8.']) } } } From 166e7f69c231abd1e5f4f6bb74b0abf865784e49 Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Thu, 6 Oct 2022 01:38:23 -0400 Subject: [PATCH 15/63] [Automation] Update elastic stack version to 8.6.0-e4c15f15 for testing (#1425) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index 29ca952ce45..a4101c6a007 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-0fca2953-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-e4c15f15-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-0fca2953-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-e4c15f15-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing" From bd06f46da995a70e8d2d8809f3c14ad0a51169f9 Mon Sep 17 00:00:00 2001 From: Aleksandr Maus Date: Thu, 6 Oct 2022 06:50:42 -0400 Subject: [PATCH 16/63] [backport main] Fix: Agent failed to upgrade from 8.4.2 to 8.5.0 BC1 for MAC 12 agent using agent binary. (#1401) [backport main] Fix: Agent failed to upgrade from 8.4.2 to 8.5.0 BC1 for MAC 12 agent using agent binary. (#1401) --- dev-tools/packaging/packages.yml | 3 + .../templates/darwin/elastic-agent.tmpl | 11 +++ internal/pkg/agent/application/info/state.go | 25 +---- .../pkg/agent/application/info/state_test.go | 53 ----------- .../pkg/agent/application/paths/common.go | 49 +++++++--- .../agent/application/paths/common_test.go | 92 +++++++++++++++++++ .../application/upgrade/service_darwin.go | 11 +-- .../agent/application/upgrade/step_relink.go | 17 +++- .../agent/application/upgrade/step_unpack.go | 20 ++-- .../pkg/agent/application/upgrade/upgrade.go | 7 +- internal/pkg/agent/install/install.go | 19 ++-- 11 files changed, 178 insertions(+), 129 deletions(-) create mode 100644 dev-tools/packaging/templates/darwin/elastic-agent.tmpl delete mode 100644 internal/pkg/agent/application/info/state_test.go create mode 100644 internal/pkg/agent/application/paths/common_test.go diff --git a/dev-tools/packaging/packages.yml b/dev-tools/packaging/packages.yml index 860e86e97a7..d2e8df06e4f 100644 --- a/dev-tools/packaging/packages.yml +++ b/dev-tools/packaging/packages.yml @@ -1089,6 +1089,9 @@ specs: <<: *agent_darwin_binary_spec <<: *elastic_license_for_binaries files: + 'data/{{.BeatName}}-{{ commit_short }}/elastic-agent': + template: '{{ elastic_beats_dir }}/dev-tools/packaging/templates/darwin/elastic-agent.tmpl' + mode: 0755 '{{.BeatName}}{{.BinaryExt}}': source: data/{{.BeatName}}-{{ commit_short }}/elastic-agent.app/Contents/MacOS/{{.BeatName}}{{.BinaryExt}} symlink: true diff --git a/dev-tools/packaging/templates/darwin/elastic-agent.tmpl b/dev-tools/packaging/templates/darwin/elastic-agent.tmpl new file mode 100644 index 00000000000..74c0f238c28 --- /dev/null +++ b/dev-tools/packaging/templates/darwin/elastic-agent.tmpl @@ -0,0 +1,11 @@ +#!/bin/sh +# Fix up the symlink and exit + +set -e + +symlink="/Library/Elastic/Agent/elastic-agent" + +if test -L "$symlink"; then + ln -sfn "data/elastic-agent-{{ commit_short }}/elastic-agent.app/Contents/MacOS/elastic-agent" "$symlink" +fi + diff --git a/internal/pkg/agent/application/info/state.go b/internal/pkg/agent/application/info/state.go index e00948fab58..b9d73504d06 100644 --- a/internal/pkg/agent/application/info/state.go +++ b/internal/pkg/agent/application/info/state.go @@ -5,14 +5,11 @@ package info import ( - "fmt" "os" "path/filepath" "runtime" - "strings" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" - "github.com/elastic/elastic-agent/internal/pkg/release" ) const ( @@ -31,17 +28,8 @@ func RunningInstalled() bool { } execPath, _ := os.Executable() execPath, _ = filepath.Abs(execPath) - execName := filepath.Base(execPath) - execDir := filepath.Dir(execPath) - if IsInsideData(execDir) { - // executable path is being reported as being down inside of data path - // move up to directories to perform the comparison - execDir = filepath.Dir(filepath.Dir(execDir)) - if runtime.GOOS == darwin { - execDir = filepath.Dir(filepath.Dir(filepath.Dir(execDir))) - } - execPath = filepath.Join(execDir, execName) - } + + execPath = filepath.Join(paths.ExecDir(filepath.Dir(execPath)), filepath.Base(execPath)) for _, expected := range expectedPaths { if paths.ArePathsEqual(expected, execPath) { return true @@ -49,12 +37,3 @@ func RunningInstalled() bool { } return false } - -// IsInsideData returns true when the exePath is inside of the current Agents data path. -func IsInsideData(exePath string) bool { - expectedPath := filepath.Join("data", fmt.Sprintf("elastic-agent-%s", release.ShortCommit())) - if runtime.GOOS == darwin { - expectedPath = filepath.Join(expectedPath, "elastic-agent.app", "Contents", "MacOS") - } - return strings.HasSuffix(exePath, expectedPath) -} diff --git a/internal/pkg/agent/application/info/state_test.go b/internal/pkg/agent/application/info/state_test.go deleted file mode 100644 index 39f5b7e9738..00000000000 --- a/internal/pkg/agent/application/info/state_test.go +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -package info - -import ( - "fmt" - "path/filepath" - "runtime" - "testing" - - "github.com/elastic/elastic-agent/internal/pkg/release" - "github.com/google/go-cmp/cmp" -) - -func TestIsInsideData(t *testing.T) { - - validExePath := filepath.Join("data", fmt.Sprintf("elastic-agent-%s", release.ShortCommit())) - - if runtime.GOOS == darwin { - validExePath = filepath.Join(validExePath, "elastic-agent.app", "Contents", "MacOS") - } - - tests := []struct { - name string - exePath string - res bool - }{ - { - name: "empty", - }, - { - name: "invalid", - exePath: "data/elastic-agent", - }, - { - name: "valid", - exePath: validExePath, - res: true, - }, - } - - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - res := IsInsideData(tc.exePath) - diff := cmp.Diff(tc.res, res) - if diff != "" { - t.Error(diff) - } - }) - } -} diff --git a/internal/pkg/agent/application/paths/common.go b/internal/pkg/agent/application/paths/common.go index 3bebe122154..8bf37d36c88 100644 --- a/internal/pkg/agent/application/paths/common.go +++ b/internal/pkg/agent/application/paths/common.go @@ -177,21 +177,14 @@ func SetInstall(path string) { // initialTop returns the initial top-level path for the binary // // When nested in top-level/data/elastic-agent-${hash}/ the result is top-level/. -// The agent fexecutable for MacOS is wrappend in the bundle, so the path to the binary is +// The agent executable for MacOS is wrapped in the app bundle, so the path to the binary is // top-level/data/elastic-agent-${hash}/elastic-agent.app/Contents/MacOS func initialTop() string { - exePath := retrieveExecutablePath() - if insideData(exePath) { - exePath = filepath.Dir(filepath.Dir(exePath)) - if runtime.GOOS == darwin { - exePath = filepath.Dir(filepath.Dir(filepath.Dir(exePath))) - } - } - return exePath + return ExecDir(retrieveExecutableDir()) } // retrieveExecutablePath returns the executing binary, even if the started binary was a symlink -func retrieveExecutablePath() string { +func retrieveExecutableDir() string { execPath, err := os.Executable() if err != nil { panic(err) @@ -203,11 +196,37 @@ func retrieveExecutablePath() string { return filepath.Dir(evalPath) } -// insideData returns true when the exePath is inside of the current Agents data path. -func insideData(exePath string) bool { - expectedPath := filepath.Join("data", fmt.Sprintf("elastic-agent-%s", release.ShortCommit())) +// isInsideData returns true when the exePath is inside of the current Agents data path. +func isInsideData(exeDir string) bool { + expectedDir := binaryDir(filepath.Join("data", fmt.Sprintf("elastic-agent-%s", release.ShortCommit()))) + return strings.HasSuffix(exeDir, expectedDir) +} + +// ExecDir returns the "executable" directory which is: +// 1. The same if the execDir is not inside of the data path +// 2. Two levels up if the execDir inside of the data path on non-macOS platforms +// 3. Five levels up if the execDir inside of the dataPath on macOS platform +func ExecDir(execDir string) string { + if isInsideData(execDir) { + execDir = filepath.Dir(filepath.Dir(execDir)) + if runtime.GOOS == darwin { + execDir = filepath.Dir(filepath.Dir(filepath.Dir(execDir))) + } + } + return execDir +} + +// binaryDir returns the application binary directory +// For macOS it appends the path inside of the app bundle +// For other platforms it returns the same dir +func binaryDir(baseDir string) string { if runtime.GOOS == darwin { - expectedPath = filepath.Join(expectedPath, "elastic-agent.app", "Contents", "MacOS") + baseDir = filepath.Join(baseDir, "elastic-agent.app", "Contents", "MacOS") } - return strings.HasSuffix(exePath, expectedPath) + return baseDir +} + +// BinaryPath returns the application binary path that is concatenation of the directory and the agentName +func BinaryPath(baseDir, agentName string) string { + return filepath.Join(binaryDir(baseDir), agentName) } diff --git a/internal/pkg/agent/application/paths/common_test.go b/internal/pkg/agent/application/paths/common_test.go new file mode 100644 index 00000000000..a5d76b405be --- /dev/null +++ b/internal/pkg/agent/application/paths/common_test.go @@ -0,0 +1,92 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package paths + +import ( + "fmt" + "path/filepath" + "runtime" + "testing" + + "github.com/elastic/elastic-agent/internal/pkg/release" + "github.com/google/go-cmp/cmp" +) + +func validTestPath() string { + validPath := filepath.Join("data", fmt.Sprintf("elastic-agent-%s", release.ShortCommit())) + if runtime.GOOS == darwin { + validPath = filepath.Join(validPath, "elastic-agent.app", "Contents", "MacOS") + } + return validPath +} + +func TestIsInsideData(t *testing.T) { + tests := []struct { + name string + exePath string + res bool + }{ + { + name: "empty", + }, + { + name: "invalid", + exePath: "data/elastic-agent", + }, + { + name: "valid", + exePath: validTestPath(), + res: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + res := isInsideData(tc.exePath) + diff := cmp.Diff(tc.res, res) + if diff != "" { + t.Error(diff) + } + }) + } +} + +func TestExecDir(t *testing.T) { + base := filepath.Join(string(filepath.Separator), "Library", "Elastic", "Agent") + tests := []struct { + name string + execDir string + resDir string + }{ + { + name: "empty", + }, + { + name: "non-data path", + execDir: "data/elastic-agent", + resDir: "data/elastic-agent", + }, + { + name: "valid", + execDir: validTestPath(), + resDir: ".", + }, + { + name: "valid abs", + execDir: filepath.Join(base, validTestPath()), + resDir: base, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + resDir := ExecDir(tc.execDir) + diff := cmp.Diff(tc.resDir, resDir) + if diff != "" { + t.Error(diff) + } + }) + } +} diff --git a/internal/pkg/agent/application/upgrade/service_darwin.go b/internal/pkg/agent/application/upgrade/service_darwin.go index 2bdb435147b..58709dd3e53 100644 --- a/internal/pkg/agent/application/upgrade/service_darwin.go +++ b/internal/pkg/agent/application/upgrade/service_darwin.go @@ -14,7 +14,6 @@ import ( "fmt" "os" "os/exec" - "path/filepath" "regexp" "strconv" "strings" @@ -50,13 +49,13 @@ func (p *darwinPidProvider) Close() {} func (p *darwinPidProvider) PID(ctx context.Context) (int, error) { piders := []func(context.Context) (int, error){ - p.piderFromCmd(ctx, "launchctl", "list", paths.ServiceName), + p.piderFromCmd("launchctl", "list", paths.ServiceName), } // if release is specifically built to be upgradeable (using DEV flag) // we dont require to run as a service and will need sudo fallback if release.Upgradeable() { - piders = append(piders, p.piderFromCmd(ctx, "sudo", "launchctl", "list", paths.ServiceName)) + piders = append(piders, p.piderFromCmd("sudo", "launchctl", "list", paths.ServiceName)) } var pidErrors error @@ -72,7 +71,7 @@ func (p *darwinPidProvider) PID(ctx context.Context) (int, error) { return 0, pidErrors } -func (p *darwinPidProvider) piderFromCmd(ctx context.Context, name string, args ...string) func(context.Context) (int, error) { +func (p *darwinPidProvider) piderFromCmd(name string, args ...string) func(context.Context) (int, error) { return func(context.Context) (int, error) { listCmd := exec.Command(name, args...) listCmd.SysProcAttr = &syscall.SysProcAttr{ @@ -115,8 +114,8 @@ func (p *darwinPidProvider) piderFromCmd(ctx context.Context, name string, args } func invokeCmd(topPath string) *exec.Cmd { - homeExePath := filepath.Join(topPath, agentName) - + // paths.BinaryPath properly derives the newPath depending on the platform. The path to the binary for macOS is inside of the app bundle. + homeExePath := paths.BinaryPath(topPath, agentName) cmd := exec.Command(homeExePath, watcherSubcommand, "--path.config", paths.Config(), "--path.home", paths.Top(), diff --git a/internal/pkg/agent/application/upgrade/step_relink.go b/internal/pkg/agent/application/upgrade/step_relink.go index e56b5a6642e..13c49693062 100644 --- a/internal/pkg/agent/application/upgrade/step_relink.go +++ b/internal/pkg/agent/application/upgrade/step_relink.go @@ -17,18 +17,25 @@ import ( "github.com/elastic/elastic-agent/pkg/core/logger" ) +const ( + windows = "windows" + exe = ".exe" +) + // ChangeSymlink updates symlink paths to match current version. func ChangeSymlink(ctx context.Context, log *logger.Logger, targetHash string) error { // create symlink to elastic-agent-{hash} hashedDir := fmt.Sprintf("%s-%s", agentName, targetHash) symlinkPath := filepath.Join(paths.Top(), agentName) - newPath := filepath.Join(paths.Top(), "data", hashedDir, agentName) + + // paths.BinaryPath properly derives the binary directory depending on the platform. The path to the binary for macOS is inside of the app bundle. + newPath := paths.BinaryPath(filepath.Join(paths.Top(), "data", hashedDir), agentName) // handle windows suffixes - if runtime.GOOS == "windows" { - symlinkPath += ".exe" - newPath += ".exe" + if runtime.GOOS == windows { + symlinkPath += exe + newPath += exe } prevNewPath := prevSymlinkPath() @@ -51,7 +58,7 @@ func prevSymlinkPath() string { agentPrevName := agentName + ".prev" // handle windows suffixes - if runtime.GOOS == "windows" { + if runtime.GOOS == windows { agentPrevName = agentName + ".exe.prev" } diff --git a/internal/pkg/agent/application/upgrade/step_unpack.go b/internal/pkg/agent/application/upgrade/step_unpack.go index 4a9538a7e07..45d007e55f4 100644 --- a/internal/pkg/agent/application/upgrade/step_unpack.go +++ b/internal/pkg/agent/application/upgrade/step_unpack.go @@ -8,10 +8,8 @@ import ( "archive/tar" "archive/zip" "compress/gzip" - "context" "fmt" "io" - "io/ioutil" "os" "path/filepath" "runtime" @@ -25,13 +23,13 @@ import ( ) // unpack unpacks archive correctly, skips root (symlink, config...) unpacks data/* -func (u *Upgrader) unpack(ctx context.Context, version, archivePath string) (string, error) { +func (u *Upgrader) unpack(version, archivePath string) (string, error) { // unpack must occur in directory that holds the installation directory // or the extraction will be double nested var hash string var err error - if runtime.GOOS == "windows" { - hash, err = unzip(u.log, version, archivePath) + if runtime.GOOS == windows { + hash, err = unzip(u.log, archivePath) } else { hash, err = untar(u.log, version, archivePath) } @@ -45,7 +43,7 @@ func (u *Upgrader) unpack(ctx context.Context, version, archivePath string) (str return hash, nil } -func unzip(log *logger.Logger, version string, archivePath string) (string, error) { +func unzip(log *logger.Logger, archivePath string) (string, error) { var hash, rootDir string r, err := zip.OpenReader(archivePath) if err != nil { @@ -69,7 +67,7 @@ func unzip(log *logger.Logger, version string, archivePath string) (string, erro //get hash fileName := strings.TrimPrefix(f.Name, fileNamePrefix) if fileName == agentCommitFile { - hashBytes, err := ioutil.ReadAll(rc) + hashBytes, err := io.ReadAll(rc) if err != nil || len(hashBytes) < hashLen { return err } @@ -87,10 +85,10 @@ func unzip(log *logger.Logger, version string, archivePath string) (string, erro if f.FileInfo().IsDir() { log.Debugw("Unpacking directory", "archive", "zip", "file.path", path) - os.MkdirAll(path, f.Mode()) + _ = os.MkdirAll(path, f.Mode()) } else { log.Debugw("Unpacking file", "archive", "zip", "file.path", path) - os.MkdirAll(filepath.Dir(path), f.Mode()) + _ = os.MkdirAll(filepath.Dir(path), f.Mode()) f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode()) if err != nil { return err @@ -101,6 +99,7 @@ func unzip(log *logger.Logger, version string, archivePath string) (string, erro } }() + //nolint:gosec // legacy if _, err = io.Copy(f, rc); err != nil { return err } @@ -163,7 +162,7 @@ func untar(log *logger.Logger, version string, archivePath string) (string, erro fileName := strings.TrimPrefix(f.Name, fileNamePrefix) if fileName == agentCommitFile { - hashBytes, err := ioutil.ReadAll(tr) + hashBytes, err := io.ReadAll(tr) if err != nil || len(hashBytes) < hashLen { return "", err } @@ -200,6 +199,7 @@ func untar(log *logger.Logger, version string, archivePath string) (string, erro return "", errors.New(err, "TarInstaller: creating file "+abs, errors.TypeFilesystem, errors.M(errors.MetaKeyPath, abs)) } + //nolint:gosec // legacy _, err = io.Copy(wf, tr) if closeErr := wf.Close(); closeErr != nil && err == nil { err = closeErr diff --git a/internal/pkg/agent/application/upgrade/upgrade.go b/internal/pkg/agent/application/upgrade/upgrade.go index e31c8ef0378..d8c55e17806 100644 --- a/internal/pkg/agent/application/upgrade/upgrade.go +++ b/internal/pkg/agent/application/upgrade/upgrade.go @@ -7,7 +7,6 @@ package upgrade import ( "context" "fmt" - "io/ioutil" "os" "path/filepath" "strings" @@ -151,7 +150,7 @@ func (u *Upgrader) Upgrade(ctx context.Context, a Action, reexecNow bool) (_ ree return nil, err } - newHash, err := u.unpack(ctx, a.Version(), archivePath) + newHash, err := u.unpack(a.Version(), archivePath) if err != nil { return nil, err } @@ -306,7 +305,7 @@ func copyActionStore(log *logger.Logger, newHash string) error { for _, currentActionStorePath := range storePaths { newActionStorePath := filepath.Join(newHome, filepath.Base(currentActionStorePath)) log.Debugw("Copying action store path", "from", currentActionStorePath, "to", newActionStorePath) - currentActionStore, err := ioutil.ReadFile(currentActionStorePath) + currentActionStore, err := os.ReadFile(currentActionStorePath) if os.IsNotExist(err) { // nothing to copy continue @@ -315,7 +314,7 @@ func copyActionStore(log *logger.Logger, newHash string) error { return err } - if err := ioutil.WriteFile(newActionStorePath, currentActionStore, 0600); err != nil { + if err := os.WriteFile(newActionStorePath, currentActionStore, 0600); err != nil { return err } } diff --git a/internal/pkg/agent/install/install.go b/internal/pkg/agent/install/install.go index a5b02eb015b..431fd1db931 100644 --- a/internal/pkg/agent/install/install.go +++ b/internal/pkg/agent/install/install.go @@ -6,14 +6,12 @@ package install import ( "fmt" - "io/ioutil" "os" "path/filepath" "runtime" "github.com/otiai10/copy" - "github.com/elastic/elastic-agent/internal/pkg/agent/application/info" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" "github.com/elastic/elastic-agent/internal/pkg/agent/errors" ) @@ -58,7 +56,10 @@ func Install(cfgFile string) error { // place shell wrapper, if present on platform if paths.ShellWrapperPath != "" { - // Install symlink for darwin instead + // Install symlink for darwin instead of the wrapper script. + // Elastic-agent should be first process that launchd starts in order to be able to grant + // the Full-Disk Access (FDA) to the agent and it's child processes. + // This is specifically important for osquery FDA permissions at the moment. if runtime.GOOS == darwin { // Check if previous shell wrapper or symlink exists and remove it so it can be overwritten if _, err := os.Lstat(paths.ShellWrapperPath); err == nil { @@ -80,7 +81,7 @@ func Install(cfgFile string) error { err = os.MkdirAll(filepath.Dir(paths.ShellWrapperPath), 0755) if err == nil { //nolint: gosec // this is intended to be an executable shell script, not chaning the permissions for the linter - err = ioutil.WriteFile(paths.ShellWrapperPath, []byte(paths.ShellWrapper), 0755) + err = os.WriteFile(paths.ShellWrapperPath, []byte(paths.ShellWrapper), 0755) } if err != nil { return errors.New( @@ -172,15 +173,7 @@ func findDirectory() (string, error) { if err != nil { return "", err } - sourceDir := filepath.Dir(execPath) - if info.IsInsideData(sourceDir) { - // executable path is being reported as being down inside of data path - // move up to directories to perform the copy - sourceDir = filepath.Dir(filepath.Dir(sourceDir)) - if runtime.GOOS == darwin { - sourceDir = filepath.Dir(filepath.Dir(filepath.Dir(sourceDir))) - } - } + sourceDir := paths.ExecDir(filepath.Dir(execPath)) err = verifyDirectory(sourceDir) if err != nil { return "", err From 22231384f80fa5c0bd6a31472808fec9c35e6841 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Thu, 6 Oct 2022 18:59:15 +0200 Subject: [PATCH 17/63] Fix docker provider add_fields processors (#1420) The Docker provider was using a wrong key when defining the `add_fields` processor, this causes Filebeat not to start the input and stay on a unhealthy state. This commig fixes it. Fixes https://github.com/elastic/beats/issues/29030 --- ...989867-fix-docker-provider-processors.yaml | 31 +++++++++++++++++++ .../pkg/composable/providers/docker/docker.go | 2 +- .../providers/docker/docker_test.go | 2 +- 3 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 changelog/fragments/1664989867-fix-docker-provider-processors.yaml diff --git a/changelog/fragments/1664989867-fix-docker-provider-processors.yaml b/changelog/fragments/1664989867-fix-docker-provider-processors.yaml new file mode 100644 index 00000000000..c7c87152479 --- /dev/null +++ b/changelog/fragments/1664989867-fix-docker-provider-processors.yaml @@ -0,0 +1,31 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: bug-fix + +# Change summary; a 80ish characters long description of the change. +summary: Fix docker provider add_fields processors + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +#description: + +# Affected component; a word indicating the component this changeset affects. +component: providers + +# PR number; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +#pr: 1234 + +# Issue number; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +#issue: 1234 diff --git a/internal/pkg/composable/providers/docker/docker.go b/internal/pkg/composable/providers/docker/docker.go index b832cbb6c92..f4b4afb9c70 100644 --- a/internal/pkg/composable/providers/docker/docker.go +++ b/internal/pkg/composable/providers/docker/docker.go @@ -149,7 +149,7 @@ func generateData(event bus.Event) (*dockerContainerData, error) { "image": container.Image, "labels": processorLabelMap, }, - "to": "container", + "target": "container", }, }, }, diff --git a/internal/pkg/composable/providers/docker/docker_test.go b/internal/pkg/composable/providers/docker/docker_test.go index d0b5c69ba4d..a035fe06a58 100644 --- a/internal/pkg/composable/providers/docker/docker_test.go +++ b/internal/pkg/composable/providers/docker/docker_test.go @@ -53,7 +53,7 @@ func TestGenerateData(t *testing.T) { "co_elastic_logs/disable": "true", }, }, - "to": "container", + "target": "container", }, }, } From d8c993922550f858845f4fa5b9a8da7d4c502333 Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Fri, 7 Oct 2022 01:36:35 -0400 Subject: [PATCH 18/63] [Automation] Update elastic stack version to 8.6.0-d939cfde for testing (#1436) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index a4101c6a007..0886e299887 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-e4c15f15-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-d939cfde-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-e4c15f15-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-d939cfde-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing" From b0a98e2fac32dea33fd98f58660fff61633363ef Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Mon, 10 Oct 2022 01:38:38 -0400 Subject: [PATCH 19/63] [Automation] Update elastic stack version to 8.6.0-7c9f25a9 for testing (#1446) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index 0886e299887..df51cdf11c0 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-d939cfde-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-7c9f25a9-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-d939cfde-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-7c9f25a9-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing" From a3ea75072acaf6b4ba104377c0a78a6c1b320fcf Mon Sep 17 00:00:00 2001 From: Chris Mark Date: Mon, 10 Oct 2022 12:52:22 +0300 Subject: [PATCH 20/63] Enable integration only when datastreams are not defined (#1456) --- .../composable/providers/kubernetes/hints.go | 7 +- .../providers/kubernetes/hints_test.go | 73 ++++++++++++++++++- 2 files changed, 74 insertions(+), 6 deletions(-) diff --git a/internal/pkg/composable/providers/kubernetes/hints.go b/internal/pkg/composable/providers/kubernetes/hints.go index 1a779e0c2c6..5499d1408cb 100644 --- a/internal/pkg/composable/providers/kubernetes/hints.go +++ b/internal/pkg/composable/providers/kubernetes/hints.go @@ -144,9 +144,7 @@ func GenerateHintsMapping(hints mapstr.M, kubeMeta mapstr.M, logger *logp.Logger if integration == "" { return hintsMapping } - integrationHints := mapstr.M{ - "enabled": true, - } + integrationHints := mapstr.M{} if containerID != "" { _, _ = hintsMapping.Put("container_id", containerID) @@ -194,6 +192,9 @@ func GenerateHintsMapping(hints mapstr.M, kubeMeta mapstr.M, logger *logp.Logger } dataStreams := builder.getDataStreams(hints) + if len(dataStreams) == 0 { + _, _ = integrationHints.Put("enabled", true) + } for _, dataStream := range dataStreams { streamHints := mapstr.M{ "enabled": true, diff --git a/internal/pkg/composable/providers/kubernetes/hints_test.go b/internal/pkg/composable/providers/kubernetes/hints_test.go index e23296d09a7..04c25575f26 100644 --- a/internal/pkg/composable/providers/kubernetes/hints_test.go +++ b/internal/pkg/composable/providers/kubernetes/hints_test.go @@ -78,7 +78,6 @@ func TestGenerateHintsMapping(t *testing.T) { expected := mapstr.M{ "redis": mapstr.M{ - "enabled": true, "host": "127.0.0.5:6379", "metrics_path": "/metrics", "username": "username", @@ -118,6 +117,76 @@ func TestGenerateHintsMapping(t *testing.T) { assert.Equal(t, expected, hintsMapping) } +func TestGenerateHintsMappingWithDefaults(t *testing.T) { + logger := getLogger() + pod := &kubernetes.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "testpod", + UID: types.UID(uid), + Namespace: "testns", + Labels: map[string]string{ + "foo": "bar", + "with-dash": "dash-value", + "with/slash": "some/path", + }, + Annotations: map[string]string{ + "app": "production", + }, + }, + TypeMeta: metav1.TypeMeta{ + Kind: "Pod", + APIVersion: "v1", + }, + Spec: kubernetes.PodSpec{ + NodeName: "testnode", + }, + Status: kubernetes.PodStatus{PodIP: "127.0.0.5"}, + } + + mapping := map[string]interface{}{ + "namespace": pod.GetNamespace(), + "pod": mapstr.M{ + "uid": string(pod.GetUID()), + "name": pod.GetName(), + "ip": pod.Status.PodIP, + }, + "namespace_annotations": mapstr.M{ + "nsa": "nsb", + }, + "labels": mapstr.M{ + "foo": "bar", + "with-dash": "dash-value", + "with/slash": "some/path", + }, + "annotations": mapstr.M{ + "app": "production", + }, + } + hints := mapstr.M{ + "hints": mapstr.M{ + "host": "${kubernetes.pod.ip}:6379", + "package": "redis", + "metrics_path": "/metrics", + "timeout": "42s", + "period": "42s", + }, + } + + expected := mapstr.M{ + "redis": mapstr.M{ + "enabled": true, + "host": "127.0.0.5:6379", + "metrics_path": "/metrics", + "timeout": "42s", + "period": "42s", + }, + } + + hintsMapping := GenerateHintsMapping(hints, mapping, logger, "") + + assert.Equal(t, expected, hintsMapping) +} + func TestGenerateHintsMappingWithContainerID(t *testing.T) { logger := getLogger() pod := &kubernetes.Pod{ @@ -184,7 +253,6 @@ func TestGenerateHintsMappingWithContainerID(t *testing.T) { "container_logs": mapstr.M{ "enabled": true, }, - "enabled": true, "host": "127.0.0.5:6379", "metrics_path": "/metrics", "username": "username", @@ -281,7 +349,6 @@ func TestGenerateHintsMappingWithLogStream(t *testing.T) { expected := mapstr.M{ "container_id": "asdfghjkl", "apache": mapstr.M{ - "enabled": true, "container_logs": mapstr.M{ "enabled": true, }, From f772a3deab4bab4894e37f11ad731b1be3ea93aa Mon Sep 17 00:00:00 2001 From: Michael Katsoulis Date: Mon, 10 Oct 2022 15:15:40 +0300 Subject: [PATCH 21/63] Add not dedoted k8s pod labels in autodiscover provider to be used for templating, exactly like annotations (#1398) --- CHANGELOG.next.asciidoc | 1 + .../pkg/composable/providers/kubernetes/pod.go | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 2ce614336a4..f178d80c735 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -120,6 +120,7 @@ - Fix unintended reset of source URI when downloading components {pull}1252[1252] - Create separate status reporter for local only events so that degraded fleet-checkins no longer affect health on successful fleet-checkins. {issue}1157[1157] {pull}1285[1285] - Add success log message after previous checkin failures {pull}1327[1327] +- Fix inconsistency between kubernetes pod annotations and labels in autodiscovery templates {pull}1327[1327] ==== New features diff --git a/internal/pkg/composable/providers/kubernetes/pod.go b/internal/pkg/composable/providers/kubernetes/pod.go index d4553dda6d3..27c9b53bec2 100644 --- a/internal/pkg/composable/providers/kubernetes/pod.go +++ b/internal/pkg/composable/providers/kubernetes/pod.go @@ -267,6 +267,12 @@ func generatePodData( _ = safemapstr.Put(annotations, k, v) } k8sMapping["annotations"] = annotations + // Pass labels(not dedoted) to all events so that they can be used in templating. + labels := mapstr.M{} + for k, v := range pod.GetObjectMeta().GetLabels() { + _ = safemapstr.Put(labels, k, v) + } + k8sMapping["labels"] = labels processors := []map[string]interface{}{} // meta map includes metadata that go under kubernetes.* @@ -305,6 +311,12 @@ func generateContainerData( _ = safemapstr.Put(annotations, k, v) } + // Pass labels to all events so that it can be used in templating. + labels := mapstr.M{} + for k, v := range pod.GetObjectMeta().GetLabels() { + _ = safemapstr.Put(labels, k, v) + } + for _, c := range containers { // If it doesn't have an ID, container doesn't exist in // the runtime, emit only an event if we are stopping, so @@ -329,8 +341,9 @@ func generateContainerData( if len(namespaceAnnotations) != 0 { k8sMapping["namespace_annotations"] = namespaceAnnotations } - // add annotations to be discoverable by templates + // add annotations and labels to be discoverable by templates k8sMapping["annotations"] = annotations + k8sMapping["labels"] = labels //container ECS fields cmeta := mapstr.M{ From 0efbca645122704ed2698e6216a75b2e7da10638 Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Tue, 11 Oct 2022 01:41:08 -0400 Subject: [PATCH 22/63] [Automation] Update elastic stack version to 8.6.0-c49fac70 for testing (#1464) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index df51cdf11c0..66813f6f057 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-7c9f25a9-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-c49fac70-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-7c9f25a9-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-c49fac70-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing" From 7f813fa99607a76da365e04cfedf8a5e424c2e74 Mon Sep 17 00:00:00 2001 From: Michael Katsoulis Date: Tue, 11 Oct 2022 13:58:52 +0300 Subject: [PATCH 23/63] Add storageclass permissions in agent clusterrole (#1470) * Add storageclass permissions in agent clusterrole --- CHANGELOG.next.asciidoc | 2 +- deploy/kubernetes/elastic-agent-managed-kubernetes.yaml | 4 ++++ .../elastic-agent-managed/elastic-agent-managed-role.yaml | 4 ++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index f178d80c735..f8033809cb4 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -121,7 +121,7 @@ - Create separate status reporter for local only events so that degraded fleet-checkins no longer affect health on successful fleet-checkins. {issue}1157[1157] {pull}1285[1285] - Add success log message after previous checkin failures {pull}1327[1327] - Fix inconsistency between kubernetes pod annotations and labels in autodiscovery templates {pull}1327[1327] - +- Add permissions to elastic-agent-managed clusterrole to get, list, watch storageclasses {pull}1470[1470] ==== New features - Prepare packaging for endpoint and asc files {pull-beats}[20186] diff --git a/deploy/kubernetes/elastic-agent-managed-kubernetes.yaml b/deploy/kubernetes/elastic-agent-managed-kubernetes.yaml index 0f7bf79f107..1f3c3d8ec9b 100644 --- a/deploy/kubernetes/elastic-agent-managed-kubernetes.yaml +++ b/deploy/kubernetes/elastic-agent-managed-kubernetes.yaml @@ -227,6 +227,10 @@ rules: resources: - podsecuritypolicies verbs: ["get", "list", "watch"] + - apiGroups: [ "storage.k8s.io" ] + resources: + - storageclasses + verbs: [ "get", "list", "watch" ] --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role diff --git a/deploy/kubernetes/elastic-agent-managed/elastic-agent-managed-role.yaml b/deploy/kubernetes/elastic-agent-managed/elastic-agent-managed-role.yaml index 0d961215f4e..778a4ba5520 100644 --- a/deploy/kubernetes/elastic-agent-managed/elastic-agent-managed-role.yaml +++ b/deploy/kubernetes/elastic-agent-managed/elastic-agent-managed-role.yaml @@ -63,6 +63,10 @@ rules: resources: - podsecuritypolicies verbs: ["get", "list", "watch"] + - apiGroups: [ "storage.k8s.io" ] + resources: + - storageclasses + verbs: [ "get", "list", "watch" ] --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role From 6223273d2e18b50ab9f29b99091b7540da476860 Mon Sep 17 00:00:00 2001 From: Julien Lind Date: Tue, 11 Oct 2022 14:02:07 +0200 Subject: [PATCH 24/63] Remote QA-labels automation (#1455) --- .github/workflows/qa-labels.yml | 93 --------------------------------- 1 file changed, 93 deletions(-) delete mode 100644 .github/workflows/qa-labels.yml diff --git a/.github/workflows/qa-labels.yml b/.github/workflows/qa-labels.yml deleted file mode 100644 index bbbd4439847..00000000000 --- a/.github/workflows/qa-labels.yml +++ /dev/null @@ -1,93 +0,0 @@ -name: Add QA labels to Elastic Agent issues -on: - # pull_request_target allows running actions on PRs from forks with a read/write GITHUB_TOKEN, but it will not allow - # running workflows defined in the PRs itself, only workflows already merged into the target branch. This avoids - # potential vulnerabilities that could allow someone to open a PR and retrieve secrets. - # It's important that this workflow never runs any checkout actions which could be used to circumvent this protection. - # See these links for more information: - # - https://github.blog/2020-08-03-github-actions-improvements-for-fork-and-pull-request-workflows/ - # - https://nathandavison.com/blog/github-actions-and-the-threat-of-malicious-pull-requests - pull_request_target: - types: - - closed - -jobs: - fetch_issues_to_label: - runs-on: ubuntu-latest - # Only run on PRs that were merged for the Elastic Agent teams - if: | - github.event.pull_request.merged_at && - ( - contains(github.event.pull_request.labels.*.name, 'Team:Elastic-Agent') || - contains(github.event.pull_request.labels.*.name, 'Team:Elastic-Agent-Data-Plane') || - contains(github.event.pull_request.labels.*.name, 'Team:Elastic-Agent-Control-Plane') - ) - outputs: - issue_ids: ${{ steps.issues_to_label.outputs.value }} - label_ids: ${{ steps.label_ids.outputs.value }} - steps: - - uses: octokit/graphql-action@v2.x - id: closing_issues - with: - query: | - query closingIssueNumbersQuery($prnumber: Int!) { - repository(owner: "elastic", name: "elastic-agent") { - pullRequest(number: $prnumber) { - closingIssuesReferences(first: 10) { - nodes { - id - labels(first: 20) { - nodes { - id - name - } - } - } - } - } - } - } - prnumber: ${{ github.event.number }} - token: ${{ secrets.GITHUB_TOKEN }} - - uses: sergeysova/jq-action@v2 - id: issues_to_label - with: - # Map to the issues' node id - cmd: echo $CLOSING_ISSUES | jq -c '.repository.pullRequest.closingIssuesReferences.nodes | map(.id)' - multiline: true - env: - CLOSING_ISSUES: ${{ steps.closing_issues.outputs.data }} - - uses: sergeysova/jq-action@v2 - id: label_ids - with: - # Get list of version labels on pull request and map to label's node id, append 'QA:Ready For Testing' id ("LA_kwDOGgEmJc7mkkl9]") - cmd: echo $PR_LABELS | jq -c 'map(select(.name | test("v[0-9]+\\.[0-9]+\\.[0-9]+")) | .node_id) + ["LA_kwDOGgEmJc7mkkl9]' - multiline: true - env: - PR_LABELS: ${{ toJSON(github.event.pull_request.labels) }} - - label_issues: - needs: fetch_issues_to_label - runs-on: ubuntu-latest - # For each issue closed by the PR run this job - if: | - fromJSON(needs.fetch_issues_to_label.outputs.issue_ids).length > 0 && - fromJSON(needs.fetch_issues_to_label.outputs.label_ids).length > 0 - strategy: - matrix: - issueNodeId: ${{ fromJSON(needs.fetch_issues_to_label.outputs.issue_ids) }} - labelId: ${{ fromJSON(needs.fetch_issues_to_label.outputs.label_ids) }} - name: Label issue ${{ matrix.issueNodeId }} - steps: - - uses: octokit/graphql-action@v2.x - id: add_labels_to_closed_issue - with: - query: | - mutation add_label($issueid:ID!, $labelids:[String!]!) { - addLabelsToLabelable(input: {labelableId: $issueid, labelIds: $labelids}) { - clientMutationId - } - } - issueid: ${{ matrix.issueNodeId }} - labelids: ${{ matrix.labelId }} - token: ${{ secrets.GITHUB_TOKEN }} From 6c325d09e7ea1e9b67bd5347d2d6c94fa0e7d3fb Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Tue, 11 Oct 2022 10:03:54 -0400 Subject: [PATCH 25/63] [Automation] Update go release version to 1.18.7 (#1444) Co-authored-by: apmmachine --- .go-version | 2 +- Dockerfile | 2 +- version/docs/version.asciidoc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.go-version b/.go-version index 04a8bc26d16..d6f3a382b34 100644 --- a/.go-version +++ b/.go-version @@ -1 +1 @@ -1.18.6 +1.18.7 diff --git a/Dockerfile b/Dockerfile index 78bc8928198..fd56ef5e2ff 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -ARG GO_VERSION=1.18.6 +ARG GO_VERSION=1.18.7 FROM circleci/golang:${GO_VERSION} diff --git a/version/docs/version.asciidoc b/version/docs/version.asciidoc index db48ba622f8..0485d65c441 100644 --- a/version/docs/version.asciidoc +++ b/version/docs/version.asciidoc @@ -1,6 +1,6 @@ :stack-version: 8.3.0 :doc-branch: main -:go-version: 1.18.6 +:go-version: 1.18.7 :release-state: unreleased :python: 3.7 :docker: 1.12 From 070af5fcaee916edd0a11a3cf52a00e7f5733499 Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Wed, 12 Oct 2022 01:37:52 -0400 Subject: [PATCH 26/63] [Automation] Update elastic stack version to 8.6.0-5a8d757d for testing (#1480) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index 66813f6f057..976d846eb52 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-c49fac70-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-5a8d757d-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-c49fac70-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-5a8d757d-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing" From cd6ad3d121d97bbfc09444f13bf0622f2e6425f1 Mon Sep 17 00:00:00 2001 From: Craig MacKenzie Date: Wed, 12 Oct 2022 16:45:28 -0400 Subject: [PATCH 27/63] Improve logging around agent checkins. (#1477) Improve logging around agent checkins. - Log transient checkin errors at Info. - Upgrade to an Error log after 2 repeated failures. - Log the wait time for the next retry. - Only update local state after repeated failures. --- CHANGELOG.next.asciidoc | 1 + ...5517984-improve-checkin-error-logging.yaml | 5 ++ .../gateway/fleet/fleet_gateway.go | 39 +++++++++------ .../gateway/fleet/fleet_gateway_test.go | 20 ++++++-- internal/pkg/core/backoff/backoff.go | 5 ++ internal/pkg/core/backoff/backoff_test.go | 50 +++++++++++++++---- internal/pkg/core/backoff/equal_jitter.go | 17 +++++-- internal/pkg/core/backoff/exponential.go | 17 ++++--- internal/pkg/fleetapi/checkin_cmd.go | 23 +++++---- internal/pkg/fleetapi/checkin_cmd_test.go | 20 +++++--- 10 files changed, 140 insertions(+), 57 deletions(-) create mode 100644 changelog/fragments/1665517984-improve-checkin-error-logging.yaml diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index f8033809cb4..e937813e86d 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -210,3 +210,4 @@ - Add support for hints' based autodiscovery in kubernetes provider. {pull}[698] - Improve logging during upgrades. {pull}[1287] - Added status message to CheckinRequest {pull}[1369] +- Improve logging of Fleet checkins errors. {pull}[1477] diff --git a/changelog/fragments/1665517984-improve-checkin-error-logging.yaml b/changelog/fragments/1665517984-improve-checkin-error-logging.yaml new file mode 100644 index 00000000000..7bf2777d9d5 --- /dev/null +++ b/changelog/fragments/1665517984-improve-checkin-error-logging.yaml @@ -0,0 +1,5 @@ +kind: enhancement +summary: Improve logging of Fleet check-in errors. +description: Improve logging of Fleet check-in errors and only report the local state as degraded after two consecutive failed check-ins. +pr: 1477 +issue: 1154 diff --git a/internal/pkg/agent/application/gateway/fleet/fleet_gateway.go b/internal/pkg/agent/application/gateway/fleet/fleet_gateway.go index 897b81ea0d3..9ebebcf2c0f 100644 --- a/internal/pkg/agent/application/gateway/fleet/fleet_gateway.go +++ b/internal/pkg/agent/application/gateway/fleet/fleet_gateway.go @@ -173,7 +173,7 @@ func (f *fleetGateway) worker() { // Execute the checkin call and for any errors returned by the fleet-server API // the function will retry to communicate with fleet-server with an exponential delay and some // jitter to help better distribute the load from a fleet of agents. - resp, err := f.doExecute() + resp, err := f.executeCheckinWithRetries() if err != nil { continue } @@ -274,21 +274,34 @@ func (f *fleetGateway) gatherQueuedActions(ts time.Time) (queued, expired []flee return queued, expired } -func (f *fleetGateway) doExecute() (*fleetapi.CheckinResponse, error) { +func (f *fleetGateway) executeCheckinWithRetries() (*fleetapi.CheckinResponse, error) { f.backoff.Reset() // Guard if the context is stopped by a out of bound call, // this mean we are rebooting to change the log level or the system is shutting us down. for f.bgContext.Err() == nil { f.log.Debugf("Checkin started") - resp, err := f.execute(f.bgContext) + resp, took, err := f.executeCheckin(f.bgContext) if err != nil { f.checkinFailCounter++ - f.log.Errorf("Could not communicate with fleet-server checkin API will retry, error: %s", err) + + // Report the first two failures at warn level as they may be recoverable with retries. + if f.checkinFailCounter <= 2 { + f.log.Warnw("Possible transient error during checkin with fleet-server, retrying", + "error.message", err, "request_duration_ns", took, "failed_checkins", f.checkinFailCounter, + "retry_after_ns", f.backoff.NextWait()) + } else { + // Only update the local status after repeated failures: https://github.com/elastic/elastic-agent/issues/1148 + f.localReporter.Update(state.Degraded, fmt.Sprintf("checkin failed: %v", err), nil) + f.log.Errorw("Cannot checkin in with fleet-server, retrying", + "error.message", err, "request_duration_ns", took, "failed_checkins", f.checkinFailCounter, + "retry_after_ns", f.backoff.NextWait()) + } + if !f.backoff.Wait() { // Something bad has happened and we log it and we should update our current state. err := errors.New( - "execute retry loop was stopped", + "checkin retry loop was stopped", errors.TypeNetwork, errors.M(errors.MetaKeyURI, f.client.URI()), ) @@ -297,10 +310,6 @@ func (f *fleetGateway) doExecute() (*fleetapi.CheckinResponse, error) { f.localReporter.Update(state.Failed, err.Error(), nil) return nil, err } - if f.checkinFailCounter > 1 { - f.localReporter.Update(state.Degraded, fmt.Sprintf("checkin failed: %v", err), nil) - f.log.Errorf("checkin number %d failed: %s", f.checkinFailCounter, err.Error()) - } continue } @@ -319,7 +328,7 @@ func (f *fleetGateway) doExecute() (*fleetapi.CheckinResponse, error) { return nil, f.bgContext.Err() } -func (f *fleetGateway) execute(ctx context.Context) (*fleetapi.CheckinResponse, error) { +func (f *fleetGateway) executeCheckin(ctx context.Context) (*fleetapi.CheckinResponse, time.Duration, error) { ecsMeta, err := info.Metadata() if err != nil { f.log.Error(errors.New("failed to load metadata", err)) @@ -340,7 +349,7 @@ func (f *fleetGateway) execute(ctx context.Context) (*fleetapi.CheckinResponse, Message: f.statusController.Status().Message, } - resp, err := cmd.Execute(ctx, req) + resp, took, err := cmd.Execute(ctx, req) if isUnauth(err) { f.unauthCounter++ @@ -348,15 +357,15 @@ func (f *fleetGateway) execute(ctx context.Context) (*fleetapi.CheckinResponse, f.log.Warnf("received an invalid api key error '%d' times. Starting to unenroll the elastic agent.", f.unauthCounter) return &fleetapi.CheckinResponse{ Actions: []fleetapi.Action{&fleetapi.ActionUnenroll{ActionID: "", ActionType: "UNENROLL", IsDetected: true}}, - }, nil + }, took, nil } - return nil, err + return nil, took, err } f.unauthCounter = 0 if err != nil { - return nil, err + return nil, took, err } // Save the latest ackToken @@ -368,7 +377,7 @@ func (f *fleetGateway) execute(ctx context.Context) (*fleetapi.CheckinResponse, } } - return resp, nil + return resp, took, nil } // shouldUnenroll checks if the max number of trying an invalid key is reached diff --git a/internal/pkg/agent/application/gateway/fleet/fleet_gateway_test.go b/internal/pkg/agent/application/gateway/fleet/fleet_gateway_test.go index 0cc00e739a8..1860782a1e7 100644 --- a/internal/pkg/agent/application/gateway/fleet/fleet_gateway_test.go +++ b/internal/pkg/agent/application/gateway/fleet/fleet_gateway_test.go @@ -21,6 +21,7 @@ import ( "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" + "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent/internal/pkg/agent/application/gateway" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" "github.com/elastic/elastic-agent/internal/pkg/agent/storage" @@ -693,7 +694,7 @@ func TestRetriesOnFailures(t *testing.T) { scheduler := scheduler.NewStepper() client := newTestingClient() dispatcher := newTestingDispatcher() - log, _ := logger.New("fleet_gateway", false) + log := newInfoLogger(t, "fleet_gateway") ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -706,8 +707,8 @@ func TestRetriesOnFailures(t *testing.T) { queue.On("Actions").Return([]fleetapi.Action{}) localReporter := &testutils.MockReporter{} - localReporter.On("Update", state.Degraded, mock.Anything, mock.Anything).Times(2) - localReporter.On("Update", mock.Anything, mock.Anything, mock.Anything).Maybe() + // The local state should only be reported as degraded after two consecutive failures. + localReporter.On("Update", state.Degraded, mock.Anything, mock.Anything).Once() localReporter.On("Unregister").Maybe() fleetReporter := &testutils.MockReporter{} @@ -814,3 +815,16 @@ type testAgentInfo struct{} func (testAgentInfo) AgentID() string { return "agent-secret" } type request struct{} + +func newInfoLogger(t *testing.T, name string) *logger.Logger { + t.Helper() + + loggerCfg := logger.DefaultLoggingConfig() + loggerCfg.Level = logp.InfoLevel + loggerCfg.ToFiles = false + loggerCfg.ToStderr = true + + log, err := logger.NewFromConfig("", loggerCfg, false) + require.NoError(t, err) + return log +} diff --git a/internal/pkg/core/backoff/backoff.go b/internal/pkg/core/backoff/backoff.go index 06723e7db9a..c97eaae199d 100644 --- a/internal/pkg/core/backoff/backoff.go +++ b/internal/pkg/core/backoff/backoff.go @@ -4,11 +4,16 @@ package backoff +import "time" + // Backoff defines the interface for backoff strategies. type Backoff interface { // Wait blocks for a duration of time governed by the backoff strategy. Wait() bool + // NextWait returns the duration of the next call to Wait(). + NextWait() time.Duration + // Reset resets the backoff duration to an initial value governed by the backoff strategy. Reset() } diff --git a/internal/pkg/core/backoff/backoff_test.go b/internal/pkg/core/backoff/backoff_test.go index 88498ff5a58..12332eb15f2 100644 --- a/internal/pkg/core/backoff/backoff_test.go +++ b/internal/pkg/core/backoff/backoff_test.go @@ -14,14 +14,9 @@ import ( type factory func(<-chan struct{}) Backoff -func TestBackoff(t *testing.T) { - t.Run("test close channel", testCloseChannel) - t.Run("test unblock after some time", testUnblockAfterInit) -} - -func testCloseChannel(t *testing.T) { - init := 2 * time.Second - max := 5 * time.Minute +func TestCloseChannel(t *testing.T) { + init := 2 * time.Millisecond + max := 5 * time.Second tests := map[string]factory{ "ExpBackoff": func(done <-chan struct{}) Backoff { @@ -42,9 +37,9 @@ func testCloseChannel(t *testing.T) { } } -func testUnblockAfterInit(t *testing.T) { - init := 1 * time.Second - max := 5 * time.Minute +func TestUnblockAfterInit(t *testing.T) { + init := 1 * time.Millisecond + max := 5 * time.Second tests := map[string]factory{ "ExpBackoff": func(done <-chan struct{}) Backoff { @@ -68,3 +63,36 @@ func testUnblockAfterInit(t *testing.T) { }) } } + +func TestNextWait(t *testing.T) { + init := time.Millisecond + max := 5 * time.Second + + tests := map[string]factory{ + "ExpBackoff": func(done <-chan struct{}) Backoff { + return NewExpBackoff(done, init, max) + }, + "EqualJitterBackoff": func(done <-chan struct{}) Backoff { + return NewEqualJitterBackoff(done, init, max) + }, + } + + for name, f := range tests { + t.Run(name, func(t *testing.T) { + c := make(chan struct{}) + b := f(c) + + startWait := b.NextWait() + assert.Equal(t, startWait, b.NextWait(), "next wait not stable") + + startedAt := time.Now() + b.Wait() + waitDuration := time.Now().Sub(startedAt) + nextWait := b.NextWait() + + t.Logf("actualWait: %s startWait: %s nextWait: %s", waitDuration, startWait, nextWait) + assert.Less(t, startWait, nextWait, "wait value did not increase") + assert.GreaterOrEqual(t, waitDuration, startWait, "next wait duration <= actual wait duration") + }) + } +} diff --git a/internal/pkg/core/backoff/equal_jitter.go b/internal/pkg/core/backoff/equal_jitter.go index d87077397cd..671201f5892 100644 --- a/internal/pkg/core/backoff/equal_jitter.go +++ b/internal/pkg/core/backoff/equal_jitter.go @@ -16,8 +16,9 @@ type EqualJitterBackoff struct { duration time.Duration done <-chan struct{} - init time.Duration - max time.Duration + init time.Duration + max time.Duration + nextRand time.Duration last time.Time } @@ -29,6 +30,7 @@ func NewEqualJitterBackoff(done <-chan struct{}, init, max time.Duration) Backof done: done, init: init, max: max, + nextRand: time.Duration(rand.Int63n(int64(init))), //nolint:gosec } } @@ -38,13 +40,18 @@ func (b *EqualJitterBackoff) Reset() { b.duration = b.init * 2 } +func (b *EqualJitterBackoff) NextWait() time.Duration { + // Make sure we have always some minimal back off and jitter. + temp := b.duration / 2 + return temp + b.nextRand +} + // Wait block until either the timer is completed or channel is done. func (b *EqualJitterBackoff) Wait() bool { - // Make sure we have always some minimal back off and jitter. - temp := int64(b.duration / 2) - backoff := time.Duration(temp + rand.Int63n(temp)) + backoff := b.NextWait() // increase duration for next wait. + b.nextRand = time.Duration(rand.Int63n(int64(b.duration))) b.duration *= 2 if b.duration > b.max { b.duration = b.max diff --git a/internal/pkg/core/backoff/exponential.go b/internal/pkg/core/backoff/exponential.go index 81224b95eb5..51b5b4e0cb5 100644 --- a/internal/pkg/core/backoff/exponential.go +++ b/internal/pkg/core/backoff/exponential.go @@ -36,18 +36,23 @@ func (b *ExpBackoff) Reset() { b.duration = b.init } +func (b *ExpBackoff) NextWait() time.Duration { + nextWait := b.duration + nextWait *= 2 + if nextWait > b.max { + nextWait = b.max + } + return nextWait +} + // Wait block until either the timer is completed or channel is done. func (b *ExpBackoff) Wait() bool { - backoff := b.duration - b.duration *= 2 - if b.duration > b.max { - b.duration = b.max - } + b.duration = b.NextWait() select { case <-b.done: return false - case <-time.After(backoff): + case <-time.After(b.duration): b.last = time.Now() return true } diff --git a/internal/pkg/fleetapi/checkin_cmd.go b/internal/pkg/fleetapi/checkin_cmd.go index d6a63a45e29..f79c6bab8bc 100644 --- a/internal/pkg/fleetapi/checkin_cmd.go +++ b/internal/pkg/fleetapi/checkin_cmd.go @@ -78,23 +78,26 @@ func NewCheckinCmd(info agentInfo, client client.Sender) *CheckinCmd { } } -// Execute enroll the Agent in the Fleet Server. -func (e *CheckinCmd) Execute(ctx context.Context, r *CheckinRequest) (*CheckinResponse, error) { +// Execute enroll the Agent in the Fleet Server. Returns the decoded check in response, a duration indicating +// how long the request took, and an error. +func (e *CheckinCmd) Execute(ctx context.Context, r *CheckinRequest) (*CheckinResponse, time.Duration, error) { if err := r.Validate(); err != nil { - return nil, err + return nil, 0, err } b, err := json.Marshal(r) if err != nil { - return nil, errors.New(err, + return nil, 0, errors.New(err, "fail to encode the checkin request", errors.TypeUnexpected) } cp := fmt.Sprintf(checkingPath, e.info.AgentID()) + sendStart := time.Now() resp, err := e.client.Send(ctx, "POST", cp, nil, nil, bytes.NewBuffer(b)) + sendDuration := time.Now().Sub(sendStart) if err != nil { - return nil, errors.New(err, + return nil, sendDuration, errors.New(err, "fail to checkin to fleet-server", errors.TypeNetwork, errors.M(errors.MetaKeyURI, cp)) @@ -102,26 +105,26 @@ func (e *CheckinCmd) Execute(ctx context.Context, r *CheckinRequest) (*CheckinRe defer resp.Body.Close() if resp.StatusCode != http.StatusOK { - return nil, client.ExtractError(resp.Body) + return nil, sendDuration, client.ExtractError(resp.Body) } rs, err := ioutil.ReadAll(resp.Body) if err != nil { - return nil, errors.New(err, "failed to read checkin response") + return nil, sendDuration, errors.New(err, "failed to read checkin response") } checkinResponse := &CheckinResponse{} decoder := json.NewDecoder(bytes.NewReader(rs)) if err := decoder.Decode(checkinResponse); err != nil { - return nil, errors.New(err, + return nil, sendDuration, errors.New(err, "fail to decode checkin response", errors.TypeNetwork, errors.M(errors.MetaKeyURI, cp)) } if err := checkinResponse.Validate(); err != nil { - return nil, err + return nil, sendDuration, err } - return checkinResponse, nil + return checkinResponse, sendDuration, nil } diff --git a/internal/pkg/fleetapi/checkin_cmd_test.go b/internal/pkg/fleetapi/checkin_cmd_test.go index 2d9aef2741a..56726bb5559 100644 --- a/internal/pkg/fleetapi/checkin_cmd_test.go +++ b/internal/pkg/fleetapi/checkin_cmd_test.go @@ -11,6 +11,7 @@ import ( "io/ioutil" "net/http" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -25,6 +26,7 @@ func (*agentinfo) AgentID() string { return "id" } func TestCheckin(t *testing.T) { const withAPIKey = "secret" + const requestDelay = time.Millisecond ctx := context.Background() agentInfo := &agentinfo{} @@ -39,6 +41,8 @@ func TestCheckin(t *testing.T) { mux.HandleFunc(path, authHandler(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusInternalServerError) fmt.Fprint(w, raw) + // Introduce a small delay to test the request time measurment. + time.Sleep(requestDelay) }, withAPIKey)) return mux }, withAPIKey, @@ -47,8 +51,10 @@ func TestCheckin(t *testing.T) { request := CheckinRequest{} - _, err := cmd.Execute(ctx, &request) + _, took, err := cmd.Execute(ctx, &request) require.Error(t, err) + // Ensure the request took at least as long as the artificial delay. + require.GreaterOrEqual(t, took, requestDelay) }, )) @@ -96,7 +102,7 @@ func TestCheckin(t *testing.T) { request := CheckinRequest{} - r, err := cmd.Execute(ctx, &request) + r, _, err := cmd.Execute(ctx, &request) require.NoError(t, err) require.Equal(t, 1, len(r.Actions)) @@ -157,7 +163,7 @@ func TestCheckin(t *testing.T) { request := CheckinRequest{} - r, err := cmd.Execute(ctx, &request) + r, _, err := cmd.Execute(ctx, &request) require.NoError(t, err) require.Equal(t, 2, len(r.Actions)) @@ -173,7 +179,7 @@ func TestCheckin(t *testing.T) { }, )) - t.Run("When we receive no action", withServerWithAuthClient( + t.Run("When we receive no action with delay", withServerWithAuthClient( func(t *testing.T) *http.ServeMux { raw := `{ "actions": [] }` mux := http.NewServeMux() @@ -189,7 +195,7 @@ func TestCheckin(t *testing.T) { request := CheckinRequest{} - r, err := cmd.Execute(ctx, &request) + r, _, err := cmd.Execute(ctx, &request) require.NoError(t, err) require.Equal(t, 0, len(r.Actions)) @@ -223,7 +229,7 @@ func TestCheckin(t *testing.T) { request := CheckinRequest{Metadata: testMetadata()} - r, err := cmd.Execute(ctx, &request) + r, _, err := cmd.Execute(ctx, &request) require.NoError(t, err) require.Equal(t, 0, len(r.Actions)) @@ -257,7 +263,7 @@ func TestCheckin(t *testing.T) { request := CheckinRequest{} - r, err := cmd.Execute(ctx, &request) + r, _, err := cmd.Execute(ctx, &request) require.NoError(t, err) require.Equal(t, 0, len(r.Actions)) From 12c55534a90e22ad4f641fcfef7090f99e3a1a75 Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Thu, 13 Oct 2022 01:40:06 -0400 Subject: [PATCH 28/63] [Automation] Update elastic stack version to 8.6.0-40086bc7 for testing (#1496) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index 976d846eb52..d466b0bdc05 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-5a8d757d-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-40086bc7-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-5a8d757d-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-40086bc7-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing" From d5cfe6ffb18335332f940aaacb0b0381c5f28b72 Mon Sep 17 00:00:00 2001 From: Andrew Gizas Date: Thu, 13 Oct 2022 10:04:53 +0300 Subject: [PATCH 29/63] Fixing makefile check (#1490) * Fixing makefile check --- deploy/kubernetes/Makefile | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/deploy/kubernetes/Makefile b/deploy/kubernetes/Makefile index 295b238cef5..1163b1d00a6 100644 --- a/deploy/kubernetes/Makefile +++ b/deploy/kubernetes/Makefile @@ -49,11 +49,10 @@ ci-clone-kibana-repository: cp $(FILE_REPO) $(ELASTIC_AGENT_REPO)/$(ELASTIC_AGENT_REPO_PATH) ## ci-create-kubernetes-templates-pull-request : Create the pull request for the kubernetes templates +$(eval HASDIFF =$(shell sh -c "git status | grep $(FILE_REPO) | wc -l")) .PHONY: ci-create-kubernetes-templates-pull-request ci-create-kubernetes-templates-pull-request: - HASDIFF=`git status | grep $(FILE_REPO) | wc -l`; \ - echo $${HASDIFF} -ifeq ($${HASDIFF},1) +ifeq ($(HASDIFF),1) echo "INFO: Create branch to update k8s templates" git config user.name obscloudnativemonitoring git config user.email obs-cloudnative-monitoring@elastic.co @@ -79,8 +78,7 @@ else --base main \ --head $(ELASTIC_AGENT_BRANCH) \ --reviewer elastic/obs-cloudnative-monitoring -endif - +endif else echo "No differences found with kibana git repository" endif From 259682d3eb95330619e1f0993ef35f46feb046fd Mon Sep 17 00:00:00 2001 From: Victor Martinez Date: Thu, 13 Oct 2022 09:16:59 +0100 Subject: [PATCH 30/63] action: validate changelog fragment (#1488) --- .github/workflows/changelog.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 .github/workflows/changelog.yml diff --git a/.github/workflows/changelog.yml b/.github/workflows/changelog.yml new file mode 100644 index 00000000000..d0f29a0fd25 --- /dev/null +++ b/.github/workflows/changelog.yml @@ -0,0 +1,17 @@ +name: Changelog +on: [pull_request] + +jobs: + fragments: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Fetch Go version from .go-version + run: echo "GO_VERSION=$(cat .go-version)" >> $GITHUB_ENV + - uses: actions/setup-go@v3 + with: + go-version: ${{ env.GO_VERSION }} + - name: check pr-has-fragment + run: | + GOBIN=$PWD/bin go install github.com/elastic/elastic-agent-changelog-tool@latest + ./bin/elastic-agent-changelog-tool pr-has-fragment --repo ${{ github.event.repository.name }} ${{github.event.number}} From 5505f5862e158e926cc72fc331a30573f1c5dbe7 Mon Sep 17 00:00:00 2001 From: Andrew Gizas Date: Thu, 13 Oct 2022 11:51:35 +0300 Subject: [PATCH 31/63] Allign managed with standalone role (#1500) --- deploy/kubernetes/elastic-agent-standalone-kubernetes.yaml | 4 ++++ .../elastic-agent-standalone-role.yaml | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/deploy/kubernetes/elastic-agent-standalone-kubernetes.yaml b/deploy/kubernetes/elastic-agent-standalone-kubernetes.yaml index e43a251408f..baf0ce00c94 100644 --- a/deploy/kubernetes/elastic-agent-standalone-kubernetes.yaml +++ b/deploy/kubernetes/elastic-agent-standalone-kubernetes.yaml @@ -864,6 +864,10 @@ rules: resources: - podsecuritypolicies verbs: ["get", "list", "watch"] + - apiGroups: [ "storage.k8s.io" ] + resources: + - storageclasses + verbs: [ "get", "list", "watch" ] --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role diff --git a/deploy/kubernetes/elastic-agent-standalone/elastic-agent-standalone-role.yaml b/deploy/kubernetes/elastic-agent-standalone/elastic-agent-standalone-role.yaml index 8a644f3aadf..a0cd80b456a 100644 --- a/deploy/kubernetes/elastic-agent-standalone/elastic-agent-standalone-role.yaml +++ b/deploy/kubernetes/elastic-agent-standalone/elastic-agent-standalone-role.yaml @@ -63,6 +63,10 @@ rules: resources: - podsecuritypolicies verbs: ["get", "list", "watch"] + - apiGroups: [ "storage.k8s.io" ] + resources: + - storageclasses + verbs: [ "get", "list", "watch" ] --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role From 890483300aba0fcd33714fa4fe9a81fb0bc4e533 Mon Sep 17 00:00:00 2001 From: Chris Mark Date: Thu, 13 Oct 2022 12:19:26 +0300 Subject: [PATCH 32/63] Fix k8s template link versioning (#1504) --- deploy/kubernetes/Makefile | 18 ++++++++++-------- .../elastic-agent-standalone-kubernetes.yaml | 2 +- .../elastic-agent-standalone-daemonset.yaml | 2 +- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/deploy/kubernetes/Makefile b/deploy/kubernetes/Makefile index 1163b1d00a6..d7eb3241161 100644 --- a/deploy/kubernetes/Makefile +++ b/deploy/kubernetes/Makefile @@ -1,5 +1,6 @@ ALL=elastic-agent-standalone elastic-agent-managed BEAT_VERSION=$(shell head -n 1 ../../version/docs/version.asciidoc | cut -c 17- ) +BRANCH_VERSION=$(shell sed -n '2p' ../../version/docs/version.asciidoc | cut -c 14- ) #variables needed for ci-create-kubernetes-templates-pull-request ELASTIC_AGENT_REPO=kibana @@ -9,7 +10,7 @@ ELASTIC_AGENT_BRANCH=update-k8s-templates-$(shell date "+%Y%m%d%H%M%S") .PHONY: generate-k8s $(ALL) generate-k8s: $(ALL) - + test: generate-k8s for FILE in $(shell ls *-kubernetes.yaml); do \ BEAT=$$(echo $$FILE | cut -d \- -f 1); \ @@ -19,21 +20,21 @@ test: generate-k8s clean: @for f in $(ALL); do rm -f "$$f-kubernetes.yaml"; done -$(ALL): +$(ALL): ifdef WITHOUTCONFIG @echo "Generating $@-kubernetes-without-configmap.yaml" @rm -f $@-kubernetes-without-configmap.yaml @for f in $(shell ls $@/*.yaml | grep -v daemonset-configmap); do \ - sed "s/%VERSION%/VERSION/g" $$f >> $@-kubernetes-without-configmap.yaml; \ + sed -e "s/%VERSION%/VERSION/g" -e "s/%BRANCH%/${BRANCH_VERSION}/g" $$f >> $@-kubernetes-without-configmap.yaml; \ echo --- >> $@-kubernetes-without-configmap.yaml; \ done else - @echo "Generating $@-kubernetes.yaml" - @rm -f $@-kubernetes.yaml + @echo "Generating $@-kubernetes.yaml" + @rm -f $@-kubernetes.yaml @for f in $(shell ls $@/*.yaml); do \ - sed "s/%VERSION%/${BEAT_VERSION}/g" $$f >> $@-kubernetes.yaml; \ + sed -e "s/%VERSION%/${BEAT_VERSION}/g" -e "s/%BRANCH%/${BRANCH_VERSION}/g" $$f >> $@-kubernetes.yaml; \ echo --- >> $@-kubernetes.yaml; \ - done + done endif CHDIR_SHELL := $(SHELL) @@ -47,7 +48,7 @@ endef ci-clone-kibana-repository: git clone git@github.com:elastic/kibana.git cp $(FILE_REPO) $(ELASTIC_AGENT_REPO)/$(ELASTIC_AGENT_REPO_PATH) - + ## ci-create-kubernetes-templates-pull-request : Create the pull request for the kubernetes templates $(eval HASDIFF =$(shell sh -c "git status | grep $(FILE_REPO) | wc -l")) .PHONY: ci-create-kubernetes-templates-pull-request @@ -79,6 +80,7 @@ else --head $(ELASTIC_AGENT_BRANCH) \ --reviewer elastic/obs-cloudnative-monitoring endif + else echo "No differences found with kibana git repository" endif diff --git a/deploy/kubernetes/elastic-agent-standalone-kubernetes.yaml b/deploy/kubernetes/elastic-agent-standalone-kubernetes.yaml index baf0ce00c94..6de0d0b9270 100644 --- a/deploy/kubernetes/elastic-agent-standalone-kubernetes.yaml +++ b/deploy/kubernetes/elastic-agent-standalone-kubernetes.yaml @@ -662,7 +662,7 @@ spec: # - -c # - >- # mkdir -p /etc/elastic-agent/inputs.d && - # wget -O - https://github.com/elastic/elastic-agent/archive/8.3.0.tar.gz | tar xz -C /etc/elastic-agent/inputs.d --strip=5 "elastic-agent-main/deploy/kubernetes/elastic-agent-standalone/templates.d" + # wget -O - https://github.com/elastic/elastic-agent/archive/main.tar.gz | tar xz -C /etc/elastic-agent/inputs.d --strip=5 "elastic-agent-main/deploy/kubernetes/elastic-agent-standalone/templates.d" # volumeMounts: # - name: external-inputs # mountPath: /etc/elastic-agent/inputs.d diff --git a/deploy/kubernetes/elastic-agent-standalone/elastic-agent-standalone-daemonset.yaml b/deploy/kubernetes/elastic-agent-standalone/elastic-agent-standalone-daemonset.yaml index 9d865811e46..d40291d2ed1 100644 --- a/deploy/kubernetes/elastic-agent-standalone/elastic-agent-standalone-daemonset.yaml +++ b/deploy/kubernetes/elastic-agent-standalone/elastic-agent-standalone-daemonset.yaml @@ -34,7 +34,7 @@ spec: # - -c # - >- # mkdir -p /etc/elastic-agent/inputs.d && - # wget -O - https://github.com/elastic/elastic-agent/archive/%VERSION%.tar.gz | tar xz -C /etc/elastic-agent/inputs.d --strip=5 "elastic-agent-main/deploy/kubernetes/elastic-agent-standalone/templates.d" + # wget -O - https://github.com/elastic/elastic-agent/archive/%BRANCH%.tar.gz | tar xz -C /etc/elastic-agent/inputs.d --strip=5 "elastic-agent-main/deploy/kubernetes/elastic-agent-standalone/templates.d" # volumeMounts: # - name: external-inputs # mountPath: /etc/elastic-agent/inputs.d From 7f5450b9843de1b5d9aa725faff2d5661735c162 Mon Sep 17 00:00:00 2001 From: Andrew Gizas Date: Thu, 13 Oct 2022 13:32:31 +0300 Subject: [PATCH 33/63] Allighningmanifests (#1507) * Allign managed with standalone role * Fixing missing Label --- deploy/kubernetes/Makefile | 1 - deploy/kubernetes/elastic-agent-managed-kubernetes.yaml | 2 +- .../elastic-agent-managed/elastic-agent-managed-daemonset.yaml | 2 +- deploy/kubernetes/elastic-agent-standalone-kubernetes.yaml | 2 +- .../elastic-agent-standalone-daemonset-configmap.yaml | 2 +- 5 files changed, 4 insertions(+), 5 deletions(-) diff --git a/deploy/kubernetes/Makefile b/deploy/kubernetes/Makefile index d7eb3241161..98e216142b7 100644 --- a/deploy/kubernetes/Makefile +++ b/deploy/kubernetes/Makefile @@ -75,7 +75,6 @@ else --title "Update kubernetes templates for elastic-agent" \ --body "Automated by ${BUILD_URL}" \ --label automation \ - --label release_note:automation \ --base main \ --head $(ELASTIC_AGENT_BRANCH) \ --reviewer elastic/obs-cloudnative-monitoring diff --git a/deploy/kubernetes/elastic-agent-managed-kubernetes.yaml b/deploy/kubernetes/elastic-agent-managed-kubernetes.yaml index 1f3c3d8ec9b..3a41910c51a 100644 --- a/deploy/kubernetes/elastic-agent-managed-kubernetes.yaml +++ b/deploy/kubernetes/elastic-agent-managed-kubernetes.yaml @@ -1,4 +1,4 @@ -# For more information refer to https://www.elastic.co/guide/en/fleet/current/running-on-kubernetes-managed-by-fleet.html +# For more information https://www.elastic.co/guide/en/fleet/current/running-on-kubernetes-managed-by-fleet.html apiVersion: apps/v1 kind: DaemonSet metadata: diff --git a/deploy/kubernetes/elastic-agent-managed/elastic-agent-managed-daemonset.yaml b/deploy/kubernetes/elastic-agent-managed/elastic-agent-managed-daemonset.yaml index 17959a4febe..e1b85082ac3 100644 --- a/deploy/kubernetes/elastic-agent-managed/elastic-agent-managed-daemonset.yaml +++ b/deploy/kubernetes/elastic-agent-managed/elastic-agent-managed-daemonset.yaml @@ -1,4 +1,4 @@ -# For more information refer to https://www.elastic.co/guide/en/fleet/current/running-on-kubernetes-managed-by-fleet.html +# For more information https://www.elastic.co/guide/en/fleet/current/running-on-kubernetes-managed-by-fleet.html apiVersion: apps/v1 kind: DaemonSet metadata: diff --git a/deploy/kubernetes/elastic-agent-standalone-kubernetes.yaml b/deploy/kubernetes/elastic-agent-standalone-kubernetes.yaml index 6de0d0b9270..373282a4c1b 100644 --- a/deploy/kubernetes/elastic-agent-standalone-kubernetes.yaml +++ b/deploy/kubernetes/elastic-agent-standalone-kubernetes.yaml @@ -1,4 +1,4 @@ -# For more information refer https://www.elastic.co/guide/en/fleet/current/running-on-kubernetes-standalone.html +# For more information https://www.elastic.co/guide/en/fleet/current/running-on-kubernetes-standalone.html apiVersion: v1 kind: ConfigMap metadata: diff --git a/deploy/kubernetes/elastic-agent-standalone/elastic-agent-standalone-daemonset-configmap.yaml b/deploy/kubernetes/elastic-agent-standalone/elastic-agent-standalone-daemonset-configmap.yaml index 15a24fc3c59..1a52302826d 100644 --- a/deploy/kubernetes/elastic-agent-standalone/elastic-agent-standalone-daemonset-configmap.yaml +++ b/deploy/kubernetes/elastic-agent-standalone/elastic-agent-standalone-daemonset-configmap.yaml @@ -1,4 +1,4 @@ -# For more information refer https://www.elastic.co/guide/en/fleet/current/running-on-kubernetes-standalone.html +# For more information https://www.elastic.co/guide/en/fleet/current/running-on-kubernetes-standalone.html apiVersion: v1 kind: ConfigMap metadata: From 35f12d52f77b013156acc876665a06cc17c9f95c Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Fri, 14 Oct 2022 01:37:34 -0400 Subject: [PATCH 34/63] [Automation] Update elastic stack version to 8.6.0-233dc5d4 for testing (#1515) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index d466b0bdc05..41048fde8fc 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-40086bc7-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-233dc5d4-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-40086bc7-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-233dc5d4-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing" From 6eadeeff9a8a4ae043d053308deebee07d2f0175 Mon Sep 17 00:00:00 2001 From: Edoardo Tenani <526307+endorama@users.noreply.github.com> Date: Fri, 14 Oct 2022 09:53:20 +0200 Subject: [PATCH 35/63] Convert CHANGELOG.next to fragments (#1244) --- CHANGELOG.next.asciidoc | 213 ------------------ README.md | 1 + ...ion-when-installing-the-Elastic-Agent.yaml | 3 + ...SHA-1-are-now-rejected-See-the-Go-118.yaml | 3 + ...rjack-input-type-to-the-Filebeat-spec.yaml | 3 + ...-autodiscovery-in-kubernetes-provider.yaml | 3 + ...ource-URI-when-downloading-components.yaml | 3 + ...nly-events-so-that-degraded-fleet-che.yaml | 4 + ...30732-Improve-logging-during-upgrades.yaml | 3 + ...ssage-after-previous-checkin-failures.yaml | 3 + 10 files changed, 26 insertions(+), 213 deletions(-) delete mode 100644 CHANGELOG.next.asciidoc create mode 100644 changelog/fragments/1660139385-Fix-a-panic-caused-by-a-race-condition-when-installing-the-Elastic-Agent.yaml create mode 100644 changelog/fragments/1660158319-Upgrade-to-Go-118-Certificates-signed-with-SHA-1-are-now-rejected-See-the-Go-118.yaml create mode 100644 changelog/fragments/1661188787-Add-lumberjack-input-type-to-the-Filebeat-spec.yaml create mode 100644 changelog/fragments/1663143487-Add-support-for-hints-based-autodiscovery-in-kubernetes-provider.yaml create mode 100644 changelog/fragments/1664177394-Fix-unintended-reset-of-source-URI-when-downloading-components.yaml create mode 100644 changelog/fragments/1664212969-Create-separate-status-reporter-for-local-only-events-so-that-degraded-fleet-che.yaml create mode 100644 changelog/fragments/1664230732-Improve-logging-during-upgrades.yaml create mode 100644 changelog/fragments/1664360554-Add-success-log-message-after-previous-checkin-failures.yaml diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc deleted file mode 100644 index e937813e86d..00000000000 --- a/CHANGELOG.next.asciidoc +++ /dev/null @@ -1,213 +0,0 @@ -// Use these for links to issue and pulls. Note issues and pulls redirect one to -// each other on Github, so don't worry too much on using the right prefix. -:issue-beats: https://github.com/elastic/beats/issues/ -:pull-beats: https://github.com/elastic/beats/pull/ - -:issue: https://github.com/elastic/elastic-agent/issues/ -:pull: https://github.com/elastic/elastic-agent/pull/ - -=== Elastic Agent version HEAD - -==== Breaking changes - -- Docker container is not run as root by default. {pull-beats}[21213] -- Read Fleet connection information from `fleet.*` instead of `fleet.kibana.*`. {pull-beats}[24713] -- Beats build for 32Bit Windows or Linux system will refuse to run on a 64bit system. {pull-beats}[25186] -- Remove the `--kibana-url` from `install` and `enroll` command. {pull-beats}[25529] -- Default to port 80 and 443 for Kibana and Fleet Server connections. {pull-beats}[25723] -- Remove deprecated/undocumented IncludeCreatorMetadata setting from kubernetes metadata config options {pull-beats}[28006] -- The `/processes/` endpoint proxies to the subprocess's monitoring endpoint, instead of querying its `/stats` endpoint {pull-beats}[28165] -- Remove username/password for fleet-server authentication. {pull-beats}[29458] -- Upgrade to Go 1.18. Certificates signed with SHA-1 are now rejected. See the Go 1.18 https://tip.golang.org/doc/go1.18#sha1[release notes] for details. {pull}832[832] - -==== Bugfixes -- Fix rename *ConfigChange to *PolicyChange to align on changes in the UI. {pull-beats}[20779] -- Thread safe sorted set {pull-beats}[21290] -- Copy Action store on upgrade {pull-beats}[21298] -- Include inputs in action store actions {pull-beats}[21298] -- Fix issue where inputs without processors defined would panic {pull-beats}[21628] -- Prevent reporting ecs version twice {pull-beats}[21616] -- Partial extracted beat result in failure to spawn beat {issue-beats}[21718] -- Use symlink path for reexecutions {pull-beats}[21835] -- Use ML_SYSTEM to detect if agent is running as a service {pull-beats}[21884] -- Use local temp instead of system one {pull-beats}[21883] -- Rename monitoring index from `elastic.agent` to `elastic_agent` {pull-beats}[21932] -- Fix issue with named pipes on Windows 7 {pull-beats}[21931] -- Fix missing elastic_agent event data {pull-beats}[21994] -- Ensure shell wrapper path exists before writing wrapper on install {pull-beats}[22144] -- Fix deb/rpm packaging for Elastic Agent {pull-beats}[22153] -- Fix composable input processor promotion to fix duplicates {pull-beats}[22344] -- Fix sysv init files for deb/rpm installation {pull-beats}[22543] -- Fix shell wrapper for deb/rpm packaging {pull-beats}[23038] -- Fixed parsing of npipe URI {pull-beats}[22978] -- Select default agent policy if no enrollment token provided. {pull-beats}[23973] -- Remove artifacts on transient download errors {pull-beats}[23235] -- Support for linux/arm64 {pull-beats}[23479] -- Skip top level files when unziping archive during upgrade {pull-beats}[23456] -- Do not take ownership of Endpoint log path {pull-beats}[23444] -- Fixed fetching DBus service PID {pull-beats}[23496] -- Fix issue of missing log messages from filebeat monitor {pull-beats}[23514] -- Increase checkin grace period to 30 seconds {pull-beats}[23568] -- Fix libbeat from reporting back degraded on config update {pull-beats}[23537] -- Rewrite check if agent is running with admin rights on Windows {pull-beats}[23970] -- Fix issues with dynamic inputs and conditions {pull-beats}[23886] -- Fix bad substitution of API key. {pull-beats}[24036] -- Fix docker enrollment issue related to Fleet Server change. {pull-beats}[24155] -- Improve log on failure of Endpoint Security installation. {pull-beats}[24429] -- Verify communication to Kibana before updating Fleet client. {pull-beats}[24489] -- Fix nil pointer when null is generated as list item. {issue-beats}[23734] -- Add support for filestream input. {pull-beats}[24820] -- Add check for URL set when cert and cert key. {pull-beats}[24904] -- Fix install command for Fleet Server bootstrap, remove need for --enrollment-token when using --fleet-server {pull-beats}[24981] -- Respect host configuration for exposed processes endpoint {pull-beats}[25114] -- Set --inscure in container when FLEET_SERVER_ENABLE and FLEET_INSECURE set {pull-beats}[25137] -- Fixed: limit for retries to Kibana configurable {issue-beats}[25063] -- Fix issue with status and inspect inside of container {pull-beats}[25204] -- Remove FLEET_SERVER_POLICY_NAME env variable as it was not used {pull-beats}[25149] -- Reduce log level for listener cleanup to debug {pull-beats} -- Passing in policy id to container command works {pull-beats}[25352] -- Reduce log level for listener cleanup to debug {pull-beats}[25274] -- Delay the restart of application when a status report of failure is given {pull-beats}[25339] -- Don't log when upgrade capability doesn't apply {pull-beats}[25386] -- Fixed issue when unversioned home is set and invoked watcher failing with ENOENT {issue-beats}[25371] -- Fixed Elastic Agent: expecting Dict and received *transpiler.Key for '0' {issue-beats}[24453] -- Fix AckBatch to do nothing when no actions passed {pull-beats}[25562] -- Add error log entry when listener creation fails {issue-beats}[23482] -- Handle case where policy doesn't contain Fleet connection information {pull-beats}[25707] -- Fix fleet-server.yml spec to not overwrite existing keys {pull-beats}[25741] -- Agent sends wrong log level to Endpoint {issue-beats}[25583] -- Fix startup with failing configuration {pull-beats}[26057] -- Change timestamp in elatic-agent-json.log to use UTC {issue-beats}[25391] -- Fix add support for Logstash output. {pull-beats}[24305] -- Do not log Elasticsearch configuration for monitoring output when running with debug. {pull-beats}[26583] -- Fix issue where proxy enrollment options broke enrollment command. {pull-beats}[26749] -- Remove symlink.prev from previously failed upgrade {pull-beats}[26785] -- Fix apm-server supported outputs not being in sync with supported output types. {pull-beats}[26885] -- Set permissions during installation {pull-beats}[26665] -- Disable monitoring during fleet-server bootstrapping. {pull-beats}[27222] -- Fix issue with atomic extract running in K8s {pull-beats}[27396] -- Fix issue with install directory in state path in K8s {pull-beats}[27396] -- Disable monitoring during fleet-server bootstrapping. {pull-beats}[27222] -- Change output.elasticsearch.proxy_disabled flag to output.elasticsearch.proxy_disable so fleet uses it. {issue-beats}[27670] {pull-beats}[27671] -- Add validation for certificate flags to ensure they are absolute paths. {pull-beats}[27779] -- Migrate state on upgrade {pull-beats}[27825] -- Add "_monitoring" suffix to monitoring instance names to remove ambiguity with the status command. {issue-beats}[25449] -- Ignore ErrNotExists when fixing permissions. {issue-beats}[27836] {pull-beats}[27846] -- Snapshot artifact lookup will use agent.download proxy settings. {issue-beats}[27903] {pull-beats}[27904] -- Fix lazy acker to only add new actions to the batch. {pull-beats}[27981] -- Allow HTTP metrics to run in bootstrap mode. Add ability to adjust timeouts for Fleet Server. {pull-beats}[28260] -- Fix agent configuration overwritten by default fleet config. {pull-beats}[29297] -- Allow agent containers to use basic auth to create a service token. {pull-beats}[29651] -- Fix issue where a failing artifact verification does not remove the bad artifact. {pull-beats}[30281] -- Reduce Elastic Agent shut down time by stopping processes concurrently {pull-beats}[29650] -- Move `context cancelled` error from fleet gateway into debug level. {pull}187[187] -- Update library containerd to 1.5.10. {pull}186[186] -- Add fleet-server to output of elastic-agent inspect output command (and diagnostic bundle). {pull}243[243] -- Update API calls that the agent makes to Kibana when running the container command. {pull}253[253] -- diagnostics collect log names are fixed on Windows machines, command will ignore failures. AgentID is included in diagnostics(and diagnostics collect) output. {issue}81[81] {issue}92[92] {issue}190[190] {pull}262[262] -- Collects stdout and stderr of applications run as a process and logs them. {issue}[88] -- Remove VerificationMode option to empty string. Default value is `full`. {issue}[184] -- diagnostics collect file mod times are set. {pull}570[570] -- Allow ':' characters in dynamic variables {issue}624[624] {pull}680[680] -- Allow the - char to appear as part of variable names in eql expressions. {issue}709[709] {pull}710[710] -- Allow the / char in variable names in eql and transpiler. {issue}715[715] {pull}718[718] -- Fix data duplication for standalone agent on Kubernetes using the default manifest {issue-beats}31512[31512] {pull}742[742] -- Agent updates will clean up unneeded artifacts. {issue}693[693] {issue}694[694] {pull}752[752] -- Use the Elastic Agent configuration directory as the root of the `inputs.d` folder. {issues}663[663] -- Fix a panic caused by a race condition when installing the Elastic Agent. {issues}806[806] -- Use at least warning level for all status logs {pull}1218[1218] -- Remove fleet event reporter and events from checkin body. {issue}993[993] -- Fix unintended reset of source URI when downloading components {pull}1252[1252] -- Create separate status reporter for local only events so that degraded fleet-checkins no longer affect health on successful fleet-checkins. {issue}1157[1157] {pull}1285[1285] -- Add success log message after previous checkin failures {pull}1327[1327] -- Fix inconsistency between kubernetes pod annotations and labels in autodiscovery templates {pull}1327[1327] -- Add permissions to elastic-agent-managed clusterrole to get, list, watch storageclasses {pull}1470[1470] -==== New features - -- Prepare packaging for endpoint and asc files {pull-beats}[20186] -- Improved version CLI {pull-beats}[20359] -- Enroll CLI now restarts running daemon {pull-beats}[20359] -- Add restart CLI cmd {pull-beats}[20359] -- Add new `synthetics/*` inputs to run Heartbeat {pull-beats}[20387] -- Users of the Docker image can now pass `FLEET_ENROLL_INSECURE=1` to include the `--insecure` flag with the `elastic-agent enroll` command {issue-beats}[20312] {pull-beats}[20713] -- Add `docker` composable dynamic provider. {pull-beats}[20842] -- Add support for dynamic inputs with providers and `{{variable|"default"}}` substitution. {pull-beats}[20839] -- Add support for EQL based condition on inputs {pull-beats}[20994] -- Send `fleet.host.id` to Endpoint Security {pull-beats}[21042] -- Add `install` and `uninstall` subcommands {pull-beats}[21206] -- Use new form of fleet API paths {pull-beats}[21478] -- Add `kubernetes` composable dynamic provider. {pull-beats}[21480] -- Send updating state {pull-beats}[21461] -- Add `elastic.agent.id` and `elastic.agent.version` to published events from filebeat and metricbeat {pull-beats}[21543] -- Add `upgrade` subcommand to perform upgrade of installed Elastic Agent {pull-beats}[21425] -- Update `fleet.yml` and Kibana hosts when a policy change updates the Kibana hosts {pull-beats}[21599] -- Update `install` command to perform enroll before starting Elastic Agent {pull-beats}[21772] -- Update `fleet.kibana.path` from a POLICY_CHANGE {pull-beats}[21804] -- Removed `install-service.ps1` and `uninstall-service.ps1` from Windows .zip packaging {pull-beats}[21694] -- Add `priority` to `AddOrUpdate` on dynamic composable input providers communication channel {pull-beats}[22352] -- Ship `endpoint-security` logs to elasticsearch {pull-beats}[22526] -- Log level reloadable from fleet {pull-beats}[22690] -- Push log level downstream {pull-beats}[22815] -- Add metrics collection for Agent {pull-beats}[22793] -- Add support for Fleet Server {pull-beats}[23736] -- Add support for enrollment with local bootstrap of Fleet Server {pull-beats}[23865] -- Add TLS support for Fleet Server {pull-beats}[24142] -- Add support for Fleet Server running under Elastic Agent {pull-beats}[24220] -- Add CA support to Elastic Agent docker image {pull-beats}[24486] -- Add k8s secrets provider for Agent {pull-beats}[24789] -- Add STATE_PATH, CONFIG_PATH, LOGS_PATH to Elastic Agent docker image {pull-beats}[24817] -- Add status subcommand {pull-beats}[24856] -- Add leader_election provider for k8s {pull-beats}[24267] -- Add --fleet-server-service-token and FLEET_SERVER_SERVICE_TOKEN options {pull-beats}[25083] -- Keep http and logging config during enroll {pull-beats}[25132] -- Log output of container to $LOGS_PATH/elastic-agent-start.log when LOGS_PATH set {pull-beats}[25150] -- Use `filestream` input for internal log collection. {pull-beats}[25660] -- Enable agent to send custom headers to kibana/ES {pull-beats}[26275] -- Set `agent.id` to the Fleet Agent ID in events published from inputs backed by Beats. {issue-beats}[21121] {pull-beats}[26394] {pull-beats}[26548] -- Add proxy support to artifact downloader and communication with fleet server. {pull-beats}[25219] -- Add proxy support to enroll command. {pull-beats}[26514] -- Enable configuring monitoring namespace {issue-beats}[26439] -- Communicate with Fleet Server over HTTP2. {pull-beats}[26474] -- Pass logging.metrics.enabled to beats to stop beats from adding metrics into their logs. {issue-beats}[26758] {pull-beats}[26828] -- Support Node and Service autodiscovery in kubernetes dynamic provider. {pull-beats}[26801] -- Increase Agent's mem limits in k8s. {pull-beats}[27153] -- Add new --enroll-delay option for install and enroll commands. {pull-beats}[27118] -- Add link to troubleshooting guide on fatal exits. {issue-beats}[26367] {pull-beats}[27236] -- Agent now adapts the beats queue size based on output settings. {issue-beats}[26638] {pull-beats}[27429] -- Support ephemeral containers in Kubernetes dynamic provider. {issue-beats}[#27020] {pull-beats}[27707] -- Add complete k8s metadata through composable provider. {pull-beats}[27691] -- Add diagnostics command to gather beat metadata. {pull-beats}[28265] -- Add diagnostics collect command to gather beat metadata, config, policy, and logs and bundle it into an archive. {pull-beats}[28461] -- Add `KIBANA_FLEET_SERVICE_TOKEN` to Elastic Agent container. {pull-beats}[28096] -- Enable pprof endpoints for beats processes. Allow pprof endpoints for elastic-agent if enabled. {pull-beats}[28983] -- Add `--pprof` flag to `elastic-agent diagnostics` and an `elastic-agent pprof` command to allow operators to gather pprof data from the agent and beats running under it. {pull-beats}[28798] -- Allow pprof endpoints for elastic-agent or beats if enabled. {pull-beats}[28983] {pull-beats}[29155] -- Add --fleet-server-es-ca-trusted-fingerprint flag to allow agent/fleet-server to work with elasticsearch clusters using self signed certs. {pull-beats}[29128] -- Discover changes in Kubernetes nodes metadata as soon as they happen. {pull-beats}[23139] -- Add results of inspect output command into archive produced by diagnostics collect. {pull-beats}[29902] -- Add support for loading input configuration from external configuration files in standalone mode. You can load inputs from YAML configuration files under the folder `{path.config}/inputs.d`. {pull-beats}[30087] -- Install command will skip install/uninstall steps when installation via package is detected on Linux distros. {pull-beats}[30289] -- Update docker/distribution dependency library to fix a security issues concerning OCI Manifest Type Confusion Issue. {pull-beats}[30462] -- Add action_input_type for the .fleet-actions-results {pull-beats}[30562] -- Add support for enabling the metrics buffer endpoint in the elastic-agent and beats it runs. diagnostics collect command will gather metrics-buffer data if enabled. {pull-beats}[30471] -- Update ack response schema and processing, add retrier for acks {pull}200[200] -- Enhance error messages and logs for process start {pull}225[225] -- Changed the default policy selection logic. When the agent has no policy id or name defined, it will fall back to defaults (defined by $FLEET_SERVER_POLICY_ID and $FLEET_DEFAULT_TOKEN_POLICY_NAME environment variables respectively). {issue-beats}[29774] {pull}226[226] -- Add Elastic APM instrumentation {pull}180[180] -- Agent can be built for `darwin/arm64`. When it's built for both `darwin/arm64` and `darwin/adm64` a universal binary is also built and packaged. {pull}203[203] -- Add support for Cloudbeat. {pull}179[179] -- Fix download verification in snapshot builds. {issue}252[252] -- Add support for kubernetes cronjobs {pull}279[279] -- Increase the download artifact timeout to 10mins and add log download statistics. {pull}308[308] -- Save the agent configuration and the state encrypted on the disk. {issue}535[535] {pull}398[398] -- Bump node.js version for heartbeat/synthetics to 16.15.0 -- Support scheduled actions and cancellation of pending actions. {issue}393[393] {pull}419[419] -- Add `@metadata.input_id` and `@metadata.stream_id` when applying the inject stream processor {pull}527[527] -- Add liveness endpoint, allow fleet-gateway component to report degraded state, add update time and messages to status output. {issue}390[390] {pull}569[569] -- Redact sensitive information on diagnostics collect command. {issue}[241] {pull}[566] -- Fix incorrectly creating a filebeat redis input when a policy contains a packetbeat redis input. {issue}[427] {pull}[700] -- Add `lumberjack` input type to the Filebeat spec. {pull}[959] -- Add support for hints' based autodiscovery in kubernetes provider. {pull}[698] -- Improve logging during upgrades. {pull}[1287] -- Added status message to CheckinRequest {pull}[1369] -- Improve logging of Fleet checkins errors. {pull}[1477] diff --git a/README.md b/README.md index b1f581b38bb..5b1f5c01b04 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ Prerequisites: - installed [mage](https://github.com/magefile/mage) - [Docker](https://docs.docker.com/get-docker/) - [X-pack](https://github.com/elastic/beats/tree/main/x-pack) to pre-exist in the parent folder of the local Git repository checkout +- [elastic-agent-changelog-tool](https://github.com/elastic/elastic-agent-changelog-tool) to add changelog fragments for changelog generation If you are on a Mac with M1 chip, don't forget to export some docker variable to be able to build for AMD ``` diff --git a/changelog/fragments/1660139385-Fix-a-panic-caused-by-a-race-condition-when-installing-the-Elastic-Agent.yaml b/changelog/fragments/1660139385-Fix-a-panic-caused-by-a-race-condition-when-installing-the-Elastic-Agent.yaml new file mode 100644 index 00000000000..19844fe2dfc --- /dev/null +++ b/changelog/fragments/1660139385-Fix-a-panic-caused-by-a-race-condition-when-installing-the-Elastic-Agent.yaml @@ -0,0 +1,3 @@ +kind: bug-fix +summary: Fix a panic caused by a race condition when installing the Elastic Agent. +pr: https://github.com/elastic/elastic-agent/pull/823 diff --git a/changelog/fragments/1660158319-Upgrade-to-Go-118-Certificates-signed-with-SHA-1-are-now-rejected-See-the-Go-118.yaml b/changelog/fragments/1660158319-Upgrade-to-Go-118-Certificates-signed-with-SHA-1-are-now-rejected-See-the-Go-118.yaml new file mode 100644 index 00000000000..f7b6ce903d3 --- /dev/null +++ b/changelog/fragments/1660158319-Upgrade-to-Go-118-Certificates-signed-with-SHA-1-are-now-rejected-See-the-Go-118.yaml @@ -0,0 +1,3 @@ +kind: breaking-change +summary: Upgrade to Go 1.18. Certificates signed with SHA-1 are now rejected. See the Go 1.18 https//tip.golang.org/doc/go1.18#sha1[release notes] for details. +pr: https://github.com/elastic/elastic-agent/pull/832 diff --git a/changelog/fragments/1661188787-Add-lumberjack-input-type-to-the-Filebeat-spec.yaml b/changelog/fragments/1661188787-Add-lumberjack-input-type-to-the-Filebeat-spec.yaml new file mode 100644 index 00000000000..9110968e91f --- /dev/null +++ b/changelog/fragments/1661188787-Add-lumberjack-input-type-to-the-Filebeat-spec.yaml @@ -0,0 +1,3 @@ +kind: feature +summary: Add `lumberjack` input type to the Filebeat spec. +pr: https://github.com/elastic/elastic-agent/pull/959 diff --git a/changelog/fragments/1663143487-Add-support-for-hints-based-autodiscovery-in-kubernetes-provider.yaml b/changelog/fragments/1663143487-Add-support-for-hints-based-autodiscovery-in-kubernetes-provider.yaml new file mode 100644 index 00000000000..04e84669955 --- /dev/null +++ b/changelog/fragments/1663143487-Add-support-for-hints-based-autodiscovery-in-kubernetes-provider.yaml @@ -0,0 +1,3 @@ +kind: feature +summary: Add support for hints' based autodiscovery in kubernetes provider. +pr: https://github.com/elastic/elastic-agent/pull/698 diff --git a/changelog/fragments/1664177394-Fix-unintended-reset-of-source-URI-when-downloading-components.yaml b/changelog/fragments/1664177394-Fix-unintended-reset-of-source-URI-when-downloading-components.yaml new file mode 100644 index 00000000000..b5712f4c193 --- /dev/null +++ b/changelog/fragments/1664177394-Fix-unintended-reset-of-source-URI-when-downloading-components.yaml @@ -0,0 +1,3 @@ +kind: bug-fix +summary: Fix unintended reset of source URI when downloading components +pr: https://github.com/elastic/elastic-agent/pull/1252 diff --git a/changelog/fragments/1664212969-Create-separate-status-reporter-for-local-only-events-so-that-degraded-fleet-che.yaml b/changelog/fragments/1664212969-Create-separate-status-reporter-for-local-only-events-so-that-degraded-fleet-che.yaml new file mode 100644 index 00000000000..a94f5b66751 --- /dev/null +++ b/changelog/fragments/1664212969-Create-separate-status-reporter-for-local-only-events-so-that-degraded-fleet-che.yaml @@ -0,0 +1,4 @@ +kind: bug-fix +summary: Create separate status reporter for local only events so that degraded fleet-checkins no longer affect health on successful fleet-checkins. +issue: https://github.com/elastic/elastic-agent/issues/1157 +pr: https://github.com/elastic/elastic-agent/pull/1285 diff --git a/changelog/fragments/1664230732-Improve-logging-during-upgrades.yaml b/changelog/fragments/1664230732-Improve-logging-during-upgrades.yaml new file mode 100644 index 00000000000..15f81e7d5ad --- /dev/null +++ b/changelog/fragments/1664230732-Improve-logging-during-upgrades.yaml @@ -0,0 +1,3 @@ +kind: feature +summary: Improve logging during upgrades. +pr: https://github.com/elastic/elastic-agent/pull/1287 diff --git a/changelog/fragments/1664360554-Add-success-log-message-after-previous-checkin-failures.yaml b/changelog/fragments/1664360554-Add-success-log-message-after-previous-checkin-failures.yaml new file mode 100644 index 00000000000..3e4ac3d91a5 --- /dev/null +++ b/changelog/fragments/1664360554-Add-success-log-message-after-previous-checkin-failures.yaml @@ -0,0 +1,3 @@ +kind: bug-fix +summary: Add success log message after previous checkin failures +pr: https://github.com/elastic/elastic-agent/pull/1327 From 9c6a43bbb8273467b249dc8c75ae16335f37f535 Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Mon, 17 Oct 2022 01:37:30 -0400 Subject: [PATCH 36/63] [Automation] Update elastic stack version to 8.6.0-54a302f0 for testing (#1531) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index 41048fde8fc..df54d740103 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-233dc5d4-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-54a302f0-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-233dc5d4-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-54a302f0-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing" From e64ea9ace444e6ba80d104e90981ea558b215d0f Mon Sep 17 00:00:00 2001 From: Craig MacKenzie Date: Mon, 17 Oct 2022 08:45:06 -0400 Subject: [PATCH 37/63] Update the linter configuration. (#1478) Sync the configuration with the one used in Beats, which has disabled the majority of the least useful linters already. --- .github/workflows/golangci-lint.yml | 8 +- .golangci.yml | 110 +++++++++++----------------- 2 files changed, 48 insertions(+), 70 deletions(-) diff --git a/.github/workflows/golangci-lint.yml b/.github/workflows/golangci-lint.yml index 8079fe1c673..62d4006737c 100644 --- a/.github/workflows/golangci-lint.yml +++ b/.github/workflows/golangci-lint.yml @@ -18,22 +18,22 @@ jobs: name: lint runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 # Uses Go version from the repository. - name: Read .go-version file id: goversion run: echo "::set-output name=version::$(cat .go-version)" - - uses: actions/setup-go@v2 + - uses: actions/setup-go@v3 with: go-version: "${{ steps.goversion.outputs.version }}" - name: golangci-lint - uses: golangci/golangci-lint-action@v2 + uses: golangci/golangci-lint-action@v3 with: # Optional: version of golangci-lint to use in form of v1.2 or v1.2.3 or `latest` to use the latest version - version: v1.45.2 + version: v1.47.2 # Give the job more time to execute. # Regarding `--whole-files`, the linter is supposed to support linting of changed a patch only but, diff --git a/.golangci.yml b/.golangci.yml index 956b4b4b573..96e131c8ade 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -12,46 +12,37 @@ issues: # Set to 0 to disable. # Default: 50 max-issues-per-linter: 0 + exclude-rules: + # Exclude package name contains '-' issue because we have at least one package with + # it on its name. + - text: "ST1003:" + linters: + - stylecheck + # From mage we are priting to the console to ourselves + - path: (.*magefile.go|.*dev-tools/mage/.*) + linters: + - forbidigo output: sort-results: true -# Uncomment and add a path if needed to exclude -# skip-dirs: -# - some/path -# skip-files: -# - ".*\\.my\\.go$" -# - lib/bad.go - # Find the whole list here https://golangci-lint.run/usage/linters/ linters: disable-all: true enable: - - deadcode # finds unused code - errcheck # checking for unchecked errors in go programs - errorlint # errorlint is a linter for that can be used to find code that will cause problems with the error wrapping scheme introduced in Go 1.13. - - goconst # finds repeated strings that could be replaced by a constant - - dupl # tool for code clone detection - forbidigo # forbids identifiers matched by reg exps - # 'replace' is used in go.mod for many dependencies that come from libbeat. We should work to remove those, - # so we can re-enable this linter. - # - gomoddirectives # manage the use of 'replace', 'retract', and 'excludes' directives in go.mod. - - gomodguard - gosimple # linter for Go source code that specializes in simplifying a code - misspell # finds commonly misspelled English words in comments - nakedret # finds naked returns in functions greater than a specified function length - - prealloc # finds slice declarations that could potentially be preallocated - nolintlint # reports ill-formed or insufficient nolint directives - staticcheck # Staticcheck is a go vet on steroids, applying a ton of static analysis checks - stylecheck # a replacement for golint - - unparam # reports unused function parameters - unused # checks Go code for unused constants, variables, functions and types - - govet # Vet examines Go source code and reports suspicious constructs, such as Printf calls whose arguments do not align with the format string - ineffassign # detects when assignments to existing variables are not used - - structcheck # finds unused struct fields - typecheck # Like the front-end of a Go compiler, parses and type-checks Go code - - varcheck # Finds unused global variables and constants - asciicheck # simple linter to check that your code does not contain non-ASCII identifiers - bodyclose # checks whether HTTP response body is closed successfully - durationcheck # check for two durations multiplied together @@ -63,14 +54,20 @@ linters: - noctx # noctx finds sending http request without context.Context - unconvert # Remove unnecessary type conversions - wastedassign # wastedassign finds wasted assignment statements. - # - godox # tool for detection of FIXME, TODO and other comment keywords + - gomodguard # check for blocked dependencies # all available settings of specific linters linters-settings: errcheck: # report about not checking of errors in type assertions: `a := b.(MyStruct)`; - # default is false: such cases aren't reported by default. - check-type-assertions: true + check-type-assertions: false + # report about assignment of errors to blank identifier: `num, _ := strconv.Atoi(numStr)`. + check-blank: false + # List of functions to exclude from checking, where each entry is a single function to exclude. + # See https://github.com/kisielk/errcheck#excluding-functions for details. + exclude-functions: + - (mapstr.M).Delete # Only returns ErrKeyNotFound, can safely be ignored. + - (mapstr.M).Put # Can only fail on type conversions, usually safe to ignore. errorlint: # Check whether fmt.Errorf uses the %w verb for formatting errors. See the readme for caveats @@ -80,16 +77,6 @@ linters-settings: # Check for plain error comparisons comparison: true - goconst: - # minimal length of string constant, 3 by default - min-len: 3 - # minimal occurrences count to trigger, 3 by default - min-occurrences: 2 - - dupl: - # tokens count to trigger issue, 150 by default - threshold: 100 - forbidigo: # Forbid the following identifiers forbid: @@ -97,68 +84,59 @@ linters-settings: # Exclude godoc examples from forbidigo checks. Default is true. exclude_godoc_examples: true - gomoddirectives: - # Allow local `replace` directives. Default is false. - replace-local: false + goimports: + local-prefixes: github.com/elastic gomodguard: blocked: # List of blocked modules. modules: - - github.com/elastic/beats/v7: - reason: "There must be no Beats dependency, use elastic-agent-libs instead." - + # Blocked module. + - github.com/pkg/errors: + # Recommended modules that should be used instead. (Optional) + recommendations: + - errors + - fmt + reason: "This package is deprecated, use `fmt.Errorf` with `%w` instead" gosimple: # Select the Go version to target. The default is '1.13'. - go: "1.17" - - misspell: - # Correct spellings using locale preferences for US or UK. - # Default is to use a neutral variety of English. - # Setting locale to US will correct the British spelling of 'colour' to 'color'. - # locale: US - # ignore-words: - # - IdP + go: "1.18.7" nakedret: # make an issue if func has more lines of code than this setting and it has naked returns; default is 30 max-func-lines: 0 - prealloc: - # Report preallocation suggestions only on simple loops that have no returns/breaks/continues/gotos in them. - # True by default. - simple: true - range-loops: true # Report preallocation suggestions on range loops, true by default - for-loops: false # Report preallocation suggestions on for loops, false by default - nolintlint: # Enable to ensure that nolint directives are all used. Default is true. allow-unused: false # Disable to ensure that nolint directives don't have a leading space. Default is true. - allow-leading-space: true + allow-leading-space: false # Exclude following linters from requiring an explanation. Default is []. allow-no-explanation: [] # Enable to require an explanation of nonzero length after each nolint directive. Default is false. require-explanation: true # Enable to require nolint directives to mention the specific linter being suppressed. Default is false. - require-specific: true + require-specific: false staticcheck: # Select the Go version to target. The default is '1.13'. - go: "1.17" + go: "1.18.7" + checks: ["all"] stylecheck: # Select the Go version to target. The default is '1.13'. - go: "1.17" - - unparam: - # Inspect exported functions, default is false. Set to true if no external program/library imports your code. - # XXX: if you enable this setting, unparam will report a lot of false-positives in text editors: - # if it's called for subdir of a project it can't find external interfaces. All text editor integrations - # with golangci-lint call it on a directory with the changed file. - check-exported: false + go: "1.18.7" + checks: ["all"] unused: # Select the Go version to target. The default is '1.13'. - go: "1.17" + go: "1.18.7" + + gosec: + excludes: + - G306 # Expect WriteFile permissions to be 0600 or less + - G404 # Use of weak random number generator + - G401 # Detect the usage of DES, RC4, MD5 or SHA1: Used in non-crypto contexts. + - G501 # Import blocklist: crypto/md5: Used in non-crypto contexts. + - G505 # Import blocklist: crypto/sha1: Used in non-crypto contexts. From d2c780b019a812b944593444dc66320f1d0aa5e8 Mon Sep 17 00:00:00 2001 From: Andrew Cholakian Date: Mon, 17 Oct 2022 10:55:17 -0500 Subject: [PATCH 38/63] Elastic agent counterpart of https://github.com/elastic/beats/pull/33362 (#1528) Always use the stack_release label for npm i No changelog necessary since there are no user-visible changes This lets us ensure we've carefully reviewed and labeled the version of the @elastic/synthetics NPM library that's bundled in docker images --- ...4342-use-stack-version-npm-synthetics.yaml | 31 +++++++++++++++++++ .../docker/Dockerfile.elastic-agent.tmpl | 2 +- .../templates/docker/Dockerfile.tmpl | 2 +- 3 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 changelog/fragments/1665784342-use-stack-version-npm-synthetics.yaml diff --git a/changelog/fragments/1665784342-use-stack-version-npm-synthetics.yaml b/changelog/fragments/1665784342-use-stack-version-npm-synthetics.yaml new file mode 100644 index 00000000000..a928c800d1e --- /dev/null +++ b/changelog/fragments/1665784342-use-stack-version-npm-synthetics.yaml @@ -0,0 +1,31 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: feature + +# Change summary; a 80ish characters long description of the change. +summary: use-stack-version-npm-synthetics + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +description: Always npm i the stack_release version of @elastic/synthetics + +# Affected component; a word indicating the component this changeset affects. +component: synthetics-integration + +# PR number; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +pr: 1528 + +# Issue number; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +#issue: 1234 diff --git a/dev-tools/packaging/templates/docker/Dockerfile.elastic-agent.tmpl b/dev-tools/packaging/templates/docker/Dockerfile.elastic-agent.tmpl index 02358d16d57..760d5e9949a 100644 --- a/dev-tools/packaging/templates/docker/Dockerfile.elastic-agent.tmpl +++ b/dev-tools/packaging/templates/docker/Dockerfile.elastic-agent.tmpl @@ -191,7 +191,7 @@ RUN cd {{$beatHome}}/.node \ RUN chown -R {{ .user }} $NODE_PATH USER {{ .user }} # If this fails dump the NPM logs -RUN npm i -g --loglevel verbose -f @elastic/synthetics || sh -c 'tail -n +1 /root/.npm/_logs/* && exit 1' +RUN npm i -g --loglevel verbose -f @elastic/synthetics@stack_release || sh -c 'tail -n +1 /root/.npm/_logs/* && exit 1' RUN chmod ug+rwX -R $NODE_PATH USER root diff --git a/dev-tools/packaging/templates/docker/Dockerfile.tmpl b/dev-tools/packaging/templates/docker/Dockerfile.tmpl index 06cce5a13b0..d2edf7909cb 100644 --- a/dev-tools/packaging/templates/docker/Dockerfile.tmpl +++ b/dev-tools/packaging/templates/docker/Dockerfile.tmpl @@ -181,7 +181,7 @@ RUN cd /usr/share/heartbeat/.node \ && mkdir -p node \ && curl ${NODE_DOWNLOAD_URL} | tar -xJ --strip 1 -C node \ && chmod ug+rwX -R $NODE_PATH \ - && npm i -g -f @elastic/synthetics && chmod ug+rwX -R $NODE_PATH + && npm i -g -f @elastic/synthetics@stack_release && chmod ug+rwX -R $NODE_PATH {{- end }} {{- range $i, $port := .ExposePorts }} From edc1e8582b4ed4d2ac2f1782dce59a8745909bdc Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Tue, 18 Oct 2022 01:37:20 -0400 Subject: [PATCH 39/63] [Automation] Update elastic stack version to 8.6.0-cae815eb for testing (#1545) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index df54d740103..1c415537ad4 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-54a302f0-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-cae815eb-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-54a302f0-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-cae815eb-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing" From 33a5f7e198cd759711f04758820d91e434df7b79 Mon Sep 17 00:00:00 2001 From: Michal Pristas Date: Tue, 18 Oct 2022 14:00:02 +0200 Subject: [PATCH 40/63] Fix admin permission check on localized windows (#1552) Fix admin permission check on localized windows (#1552) --- ...permission-check-on-localized-windows.yaml | 31 ++++++++++++ .../agent/control/server/listener_windows.go | 47 +++++++++++++++++-- 2 files changed, 73 insertions(+), 5 deletions(-) create mode 100644 changelog/fragments/1666088774-Fix-admin-permission-check-on-localized-windows.yaml diff --git a/changelog/fragments/1666088774-Fix-admin-permission-check-on-localized-windows.yaml b/changelog/fragments/1666088774-Fix-admin-permission-check-on-localized-windows.yaml new file mode 100644 index 00000000000..93d5999f1b0 --- /dev/null +++ b/changelog/fragments/1666088774-Fix-admin-permission-check-on-localized-windows.yaml @@ -0,0 +1,31 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: bug-fix + +# Change summary; a 80ish characters long description of the change. +summary: Fix admin permission check on localized windows + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +#description: + +# Affected component; a word indicating the component this changeset affects. +component: + +# PR number; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +pr: 1552 + +# Issue number; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +issue: 857 diff --git a/internal/pkg/agent/control/server/listener_windows.go b/internal/pkg/agent/control/server/listener_windows.go index 69d211502ea..73fd3b97d95 100644 --- a/internal/pkg/agent/control/server/listener_windows.go +++ b/internal/pkg/agent/control/server/listener_windows.go @@ -10,6 +10,7 @@ package server import ( "net" "os/user" + "strings" "github.com/pkg/errors" @@ -18,9 +19,14 @@ import ( "github.com/elastic/elastic-agent/pkg/core/logger" ) +const ( + NTAUTHORITY_SYSTEM = "S-1-5-18" + ADMINISTRATORS_GROUP = "S-1-5-32-544" +) + // createListener creates a named pipe listener on Windows -func createListener(_ *logger.Logger) (net.Listener, error) { - sd, err := securityDescriptor() +func createListener(log *logger.Logger) (net.Listener, error) { + sd, err := securityDescriptor(log) if err != nil { return nil, err } @@ -31,7 +37,7 @@ func cleanupListener(_ *logger.Logger) { // nothing to do on windows } -func securityDescriptor() (string, error) { +func securityDescriptor(log *logger.Logger) (string, error) { u, err := user.Current() if err != nil { return "", errors.Wrap(err, "failed to get current user") @@ -42,11 +48,42 @@ func securityDescriptor() (string, error) { // String definition: https://docs.microsoft.com/en-us/windows/win32/secauthz/ace-strings // Give generic read/write access to the specified user. descriptor := "D:P(A;;GA;;;" + u.Uid + ")" - if u.Username == "NT AUTHORITY\\SYSTEM" { + + if isAdmin, err := isWindowsAdmin(u); err != nil { + // do not fail, agent would end up in a loop, continue with limited permissions + log.Warnf("failed to detect admin: %w", err) + } else if isAdmin { // running as SYSTEM, include Administrators group so Administrators can talk over // the named pipe to the running Elastic Agent system process // https://support.microsoft.com/en-us/help/243330/well-known-security-identifiers-in-windows-operating-systems - descriptor += "(A;;GA;;;S-1-5-32-544)" // Administrators group + descriptor += "(A;;GA;;;" + ADMINISTRATORS_GROUP + ")" } return descriptor, nil } + +func isWindowsAdmin(u *user.User) (bool, error) { + if u.Username == "NT AUTHORITY\\SYSTEM" { + return true, nil + } + + if equalsSystemGroup(u.Uid) || equalsSystemGroup(u.Gid) { + return true, nil + } + + groups, err := u.GroupIds() + if err != nil { + return false, errors.Wrap(err, "failed to get current user groups") + } + + for _, groupSid := range groups { + if equalsSystemGroup(groupSid) { + return true, nil + } + } + + return false, nil +} + +func equalsSystemGroup(s string) bool { + return strings.EqualFold(s, NTAUTHORITY_SYSTEM) || strings.EqualFold(s, ADMINISTRATORS_GROUP) +} From 97b8834b3c425a2246503da8b123869c079b5d6c Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Wed, 19 Oct 2022 01:36:59 -0400 Subject: [PATCH 41/63] [Automation] Update elastic stack version to 8.6.0-6545f2df for testing (#1561) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index 1c415537ad4..29a6c0453ed 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-cae815eb-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-6545f2df-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-cae815eb-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-6545f2df-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing" From f2ee0d33b5892f8151ba5c4351eb4193a1d652a5 Mon Sep 17 00:00:00 2001 From: Andrew Cholakian Date: Wed, 19 Oct 2022 16:05:53 -0500 Subject: [PATCH 42/63] [Heartbeat] Only support elasticsearch output for synthetics integration (#1491) * [Heartbeat] Only support elasticsearch output for synthetics integration Heartbeat should only support the elasticsearch output due to the requirement that we connect back to ES to retrieve prior state, as well as the complex and exacting nature of our schema. * Add buildspec * Add changelog * Remove test that configures heartbeat+logstash Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: Craig MacKenzie --- .../1665780486-heartbeat-es-output-only.yaml | 31 +++++++++++++++++++ internal/pkg/agent/program/program_test.go | 2 +- internal/pkg/agent/program/supported.go | 2 +- .../testdata/logstash_config-heartbeat.yml | 29 ----------------- internal/spec/heartbeat.yml | 3 +- 5 files changed, 34 insertions(+), 33 deletions(-) create mode 100644 changelog/fragments/1665780486-heartbeat-es-output-only.yaml delete mode 100644 internal/pkg/agent/program/testdata/logstash_config-heartbeat.yml diff --git a/changelog/fragments/1665780486-heartbeat-es-output-only.yaml b/changelog/fragments/1665780486-heartbeat-es-output-only.yaml new file mode 100644 index 00000000000..1e3b4059ddf --- /dev/null +++ b/changelog/fragments/1665780486-heartbeat-es-output-only.yaml @@ -0,0 +1,31 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: feature + +# Change summary; a 80ish characters long description of the change. +summary: Only support elasticsearch as an output for the beta synthetics integration. + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +# description: + +# Affected component; a word indicating the component this changeset affects. +component: synthetics-integration + +# PR number; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +pr: 1491 + +# Issue number; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +#issue: 1234 diff --git a/internal/pkg/agent/program/program_test.go b/internal/pkg/agent/program/program_test.go index a318d23e02a..cd095d3e481 100644 --- a/internal/pkg/agent/program/program_test.go +++ b/internal/pkg/agent/program/program_test.go @@ -393,7 +393,7 @@ func TestConfiguration(t *testing.T) { }, "logstash_config": { programs: map[string][]string{ - "default": {"filebeat", "fleet-server", "heartbeat", "metricbeat", "endpoint", "packetbeat"}, + "default": {"filebeat", "fleet-server", "metricbeat", "endpoint", "packetbeat"}, "elasticsearch": {"filebeat"}, }, }, diff --git a/internal/pkg/agent/program/supported.go b/internal/pkg/agent/program/supported.go index 52685137b97..ed17610fcb5 100644 --- a/internal/pkg/agent/program/supported.go +++ b/internal/pkg/agent/program/supported.go @@ -27,7 +27,7 @@ func init() { // internal/spec/metricbeat.yml // internal/spec/osquerybeat.yml // internal/spec/packetbeat.yml - unpacked := packer.MustUnpack("eJzce1mTo0h39v33M+b2s/2ylGoaR7wXgmo2qagRapGQd2SmCiQlSFMCSeDwf3dksggQqqVnPB77oiK6U5DLybM85zmH//hlk2brtzSk/zge1vgf4SH51+P67bR++7ciob/8+y8o0TP4Yx8tPNWZew7FKaQ4OmwRWDxahn5GS7GEvi1B35oFvi2EAMaBPPpbist9BM77yNKszF1aR0uzswBMYih5GQQTYZ54eQDsIwQLhZi2CJfWUdtMI2sj6tbmHFkJ2fqySnHiUJQuFNvM1NV38Yfr2cD17FdXUMxFub88P6mKFR2IlngP2FAKYng7XxIpMe1DID8/WvpxZmnTTeCr2dyvz7SxjhoVZjj1jtB/fmTrzpfqFsnqxJfdky9dDlhe8HFLm0aWQQUIhEfLgEcIPKEdN93Ty0Y9oFQVifk842PaNELS5DWQlBwml0Mln8kJyVP2e2YZYoyf9u2z2NCF8GkfweRCob+4jnf21ozNl2oBgXgiifcaSt7kJdq3v1V/6hv0d+w+toHklVhUYmxQ/uxPzWPatJIpzeG5+4wQ4cTLkAypL2V0/eN6nuaPz7tR2X3nZLrn78CEPviyI+DEi9GPfbSWhVom8IBMl2KqSAG4iL1zmw5FhrclhlKMybpeR1j7Kr2+A2NkehSXvX1lXE8X7V6OxPCK69nVEoILDWT3hNMbud+sW82niMRUxep8V9l07jKzDJqHibclurKHQN9B3y5fNuqvr4uDHBpe/rJRjxBMUmJEe9vM6nUcZbac/n/raRoFYLKzjDjGQkbXy2i3luo1TeFoaYQiQy+JQbdY8mKcOHu7OEe2bFNo0NIuzmwPaSjpSSh9T+faNEWGkmLZjbEUpbPF/p+//MvQK+Rkk6F1mA2dgu/tQt/dQKALWuJk8GkfBb0xvYAdg5uD582cG971mXniHQPfEULwnAWAMMPO10A8Wptz9c5qxd/BkicQf5qH4JJ1HQVM9COWVpv5tOdYSui7ItYmAgTiGRm6AJcTihJ9gwxv9xtgyuDQ4RoI6DnfA/ByonWe992CgJs1jkgiaQgm6Ty5UJJ4x9+AS4PUS4fzQsl5xYZXBEzxn4TdWtA99ztdLXae6umK+UMgTy/b7+dnUxiuERPgHohh08Bf5JUDoek8EU/Q5M4gh2ASB0y5nsQkAJcSLq2ZvzjEOHUPMNG3hN1J4sXEfD717ia1Kb/01H1lzgnJnoBNT/AlZx+AScrulzvhxb7jFMcNVts8RwtwOVrfdREaVODrabWBaJUh4kLdQEAOaKPKoe/uLY1UZ9O+pfNoP2LwziuRqBDqSgEBoWtzWhtg44jcCTZWlfGZNn3ZTFM/qdafFw9vdu2csOQdIXAEJFuPzPDYGfF5P7MKVYWGW1oaOaDEpeunfcQNoRB/hYYuBJ5SEtOmARBy/n/focQUsrXkxsTQX7HsFhDo2TyZxAh4JTb0LfSFlAUAy/DiQIqiEEzOxF/kTG4hmPzO9uFLNIeG99A4GWLSM5M1X7u7L9OVUaHG0HBfYUIp4mNqgSSHYtlhDok5oRNKVhFMlMIyPKkKonx/JfQXtaNQcrhUEyRbbO5d4Ltxe3/LCf8/s625dvMbv7e5psYoWURDmXAf4DvnADi0ur/WqdXPiK/YtE886EhK61RvAyHNseQVRFdimLoU35fLE5ImEvRtoblHIAp5cw9YzAgbJ8a3Wu5uyX7nOuvHAma2oCvH0Od3wPxC2eypCbjNe7WeHYjhZS8btTlPMy6gsvMsP3sngJtqTIyoq2tjwXtrfa9+r/TYKy0DnvBGVTv2zuw2DcFDxO1Am6aV/S5OdvEQuT6kOKVCyAIx0xMmu40qhIbO5FO265hcPgIEiyjwnyNixNQyav+xVPMAiOz+mqDG5JRj6RITw7tjf32Qwu9p0Ql2N7YY5bNil3bf5fM+7SP76fusAx74XnrAw4AFkoS+rWvXO0BA2RFwoR2ZJpbxPWrtVVMPuFBLbHjbEMADl4Gk5CTxCuaXallxnbGlxm5cFoyRzQGeSOziod0jAvob9JQYp3bcBzitrrT21Duv8e3RMq7z179lTFYIKBL0FO7Tuu/UIGY432feaePWV9dpbX+x/xgUGhzkcXmyew6Bkrf3oNsU+94BJ6soYHHBcE4ogQdYqAIq1C2SRGqZ7h4nigilqNLJ73rK/JelxTkq1COSnBhpahKCC8XFLv/CWaq4oisF0w1fggdkeLWd3gOVHaC3vfEDNYCubaaWDUn0IwFe2ZEJA5Nc5+G0O8bXfw0BwyKkCH23A7yFCMu0ZLqjRfut9V3fQfM5IiY5WYa+g7qShMA7srGuL8UFB5qNX48a3BSCUZ/fAkJfqmQ8T+huro2Mp04e+GoZGoow/jvzOc4BSQ+DmFb9zmNb8u3RMjOlkrdzwmZzR2qCEyW7AexVwtTFOTM299AfwNQ+MbzRx459H1vZWYuNZjVe6YH6Kx6w27ubL/vrt/ZeY6JW95jO+/xdWt1XHfM0tQXyjT+p/GwNzDWm85MkBETEic5tZRAzB4nAfkakmKIti39eDmV3P9PcX/tnb875hxIDngzghM37zJOBLyUGmO5zMpYYSJcTAYPEoDc2SAz+ViD/QD4PjgdAu3vG6MAdUJ1xckewbsGRUBnJps4kkwNzPsxJ7KAfdTJdHvgeLeNygjJT6P7zDfjx5Q4w3O7fYR3GwZZVg3SWeSKJMGfJgSgyzr27bEDNFbTYFEo0t0xyIEYU2ZJzRLK3qxIAnNf7ZU5cgL792jhNX2bPx6848VLox+dOxn0L7pnRJqtHSyPrrnznm336ESMwlFflKLmcEwYgquSlu0drlGW4ZQ36jufGYUR/SSYf2VIWwySLq38zHXEY8NnbxW52Y6zrlBz2m3Roqy6Y7LChHFC6iFaStyW+fSDmbhZI4q4CE+4ZSTQnmlhC4Ig4ocJ6oPfwOkcGDU/SUs6qHQJp9Wg9BfLLUzQLgMMdHwsS3JbkSodeuCP0cmiqJxYkteRygqJyDnx3X8mdgRNbDsHDo6VZpx8G3eBEL9ZLRW/kMxeu78+5DFw6Z/IolM7+hd/nbO7CYnMeQzARWdJnbZQTNhcnF1xiLLuHoFD06ztKSbhfUY5IwqfuOWebCRvbMH0hPKlTZFzuI2v3/OjrlwVOlBQnemZ9rwCIr1/a/fJ/N2voF8x8DTE87DM7Fy94dJ3E2UPgvHH5yW6MjPNjndzQQFQ4UBomNFbSkYvv0ED2OACx6udqVrYNeBYL8AlN1kvrOrYRMhYIrDZYTjdYdlmwKpoxYtAMAkVkuvBcTmfYUEqis/07AgOA9R0/QOCwZPYIfwySJc0e17NmH0YVL9pArNnt3N19zZdieyf1cyUxXIpTqzNmZXPfO0PZjqGxGozbFEuKiBOH4qIjgzty7D8/eQz96aZJEkMgUuYPXjZT6flpOsOmTX3Zy0MwYTp1RE/72Xyp0rXhbSuQumqAEtf9l81009UDfLXNZo0YJ6QPQDVbREmrH5su0Bze47h8Rvb9IflzBVm+PGBZ32Nq6wSim0AOQTWTT6sXUy6vm1hyC+ysm+R7PDFSh8xyxpJlAlb8TAjo56E99Rh50xaR0dvrh8kSj72JV/SSke2trLo22a8CCIPk5JZx5vvw4QFL9ISicSBZzekO4tCFooQIocbiUC0/WThYTw/Rc00ChYZeLiVvwuZoYt3rksWehsR1Sgj0IpCiEQB5YCCY4Z+Y2RHzlShRBIutJ9siSt0DAqs88O1taArRbz+EyJb0Av0IBLuo1rfNrCBgwnV0nsAYAXpc+/WzPC7GDHdU59HcX3Hq5dw3LSdZAA4nnNbPljidLae3MfJ1Q9cjeNZlPgtUyUEdF7m/5ThneqjscKOiXrUqdSgxvfM8oUf0CVzaYF+WDGLjEhNjNUbuDipiYowSlnyJPAHsPC/g1LvFvoaSMrwGi8kR+pCiJ3EHgS3C4sNKm7FcXfRPENZXuWiTOunBeUPU4GJEDrxSpxfEoAnDCC8bleM7XChdGRfQZ3HdFnyJYYha53VlGxp6DhnOqG33I391H2/fYOC/gtS0a5K88WsHlOAIJR7lRIDxjdncGRtezrF2ohRwqcYo3UVIdvfQtxqSmeUjMUqdBMl2FvjugdnanCWpukPhUt0T0z3jcn+aSxx7i8GG3Y9wglfc0+LNOagTfomWWPYoLr4Jz1uc/zf7+pjIz1mN8eXAd7fhtP8bLp9bPxj4BxEnq4zrceruCbjG63qOBMksbtuTq690Tih1mf9p72S+VHdIct6gf32/mnNx4ljlSoZRkng1JnQPKLoZF4KqotubgwD3fF2/woJ1Dnh9VnLOc18Vg9QRg+u8/M58qYON23ligZjq71hSrsSYcTgR384DcNl1CAueH1z/fzk1Vdv5Uq0wraFsQ8kres8USgGBe8CiUiJDkZm+Dn4v177D3r+SJcy39OV1JEDcQN/+PQDO2/V98ULMDsmSeAJMLidy3dfpuQzEmuzuEsS70L/eP8OgIXiYdYm03j5Nd4uv+iNC83o3BEwO6+tvcgg4xr4b9weEYDevr/VNOa89JUbG5ZUYyisyaEme9jeEdVus6OaNBvNnnCA9hpX/y5GknLvEJvTjLfRV9huy0tqf+VUxtfX5Gve/Z5woW+g7JZLt0qLCcI8CEqsiSB+f1N0M/QLTz+6fYXOKU/c1SLykJssq4raKp7MeuW+MkXgN/mjPOvuZQs5wv6HvUhYHhuO9Nc8cZ2aBPx3g8SFxeYP/OsWT1eeKJYYYY1OlXc6mGr+cYJ0TdG32qg8d/1THk+56ljbwVdv9wF6adeqioexuMduf4ZzvzMPs59EyvR2e9vfC1p5L7imQMnaOaOBPmnm4r6ls2HnFzD+z/JzpVM+uO+e/+phHy3Qm7J2O7/oQaxPfoS1OmH7+vUGhs+t/7nS3NAWCdwrDXyio3C0+/8X5z59I7ndwXPcM1m3xJe3spYqVFZd6P9d5gz4tGeaAi30f2xleDA2P5VJVnpqSPQQPj/0857pGW0z4gzlPS6bzPZK3AMC3YImPlkY4riSGXoYaPmjRP28J9Ve6XmfjHXhulQdGqyaHrHn1Oj/M2tzyyq1v5kux6rbSxAxJLmXxoN+lV3WvtV1wn+Gso3631A0O/EAX73G8IwW1bv6bdXLnP2d9o60vfLiHphHm3QJkbb9XDrjaZ7MXZj8sztzhlyNQqAkyPEq0SdM1mTdzXZukWl2L/DaOtPHpRHyX5QINp93YFS8+olH58IITavUgbbohJ2cksVxmx4uKI2s1dp4/a+2zV769jv/Q8JLA947EHO/eu+XVb/axR7IjDDrvbuTEOxLHO+7yRm+YfTN9QYl+9GX1hNPFR2uXWDrfdGU22GK+neZDnexix46sqv22+xvix46/6/JBnb+xomX/T2iKwDfv1vnCCcutXnSwzf2Glbv7fKcA/1H8/TiWTrvxarQp4Q/NsZzwojKW1TiQVj91rnfi88/s79pcFP1UF+pAt6ZjHGHz7NA/fDVWcvt4Xe6i3zbTs2XoOdT+7PrWSN0qXodvY92nS8OLcdovMoe9sU4g/GyB+ScIua+0s3+qe/QmSNs0YA5Hiikb/3lCrC50+nELUutug/GAKiq8U9OXbQol76Hboj7eYm3Tl426xjJztDHlF1t8y2fnEUUedNm8B2zf787pGGZzN1dHe9s63iRqqUc/Uxy+55jvO+SPja/vmDsdJaNG9zcApck6e9vgEQv8ATwBJ3Rba2T9UUj9oYVU0+XjH340rRsHZAgf09wNlZ66FPnqkZd3P24P+eLHJSzlJgeU4BzxtPisQMPbEICH86aBqJyhb2/ZvL8t3V9/rLzVakefPkGPZ9B3ixA4dcrR9MVNGDzndMLYubS7HqEva5x47CYLoisnRJVa49zXQIpjlBBmnZXmpy11cact5RP9yvXd13TJ51pS3qOKABRaCrZte2k/2nm3J2xAz/X6cztUzwe0jjPs8/1b0XwhWPzvovsM74GYdsx0gstJV3iP7LVcMICdHaqvv8fB+OADq9GSxL3INVKG4K1F+kM+K5TGNkt7+v63A//TZeiP6Oi6FMT95P1Wq84d3PZXF8zG/U30j9XThUf53zYPb7PlrYyqeXj716OluV1EcEv5aj2qqI8ehv2bV8osC0BGfUkvcKJPRvW49RMDGF7pSrvnL/Zed95bfIEGHNIKX6cOf7pvu/M9xx+YYyRF+sQZTK+EntJJMf5UCrEXN98vJ/RiYj4WG9py9PkrdGQ/1jZyebdHuCeDmAbApWzO6vyjLYIDfPA36PHdH3/P12/FGPyTnQsBXrHud0acsKyL0Lcnw+6IL3RGfB36faXD91MdEfbPdwHXn9uhoXze7YBUSux7FKe72c91+jXv06pT97MdfqZ7QMA7EX/xaD19P3e7lu8ki92uiObTuQFsa7qVO+rfuBdP6UCW1uQYXOXdIsigXIYh4Owecx0JC+33OzT+wCcdvU8IhywyM+36/gaVhfEu4k9/ajD4bvgv7xr+4Pvf2y6tftcVSypjxdK+K5aGy5enIB1NGg8h3q3HaJuVoW9DyRN6SaPJgk1Gmw8w2qSxwJlbpakfJIzsmZtn3/0ugEulEPWqd/r9Pqr+s3cTxfTeh7e4f+afp27+IEXSrz/fpUfOAXDe4AhP+H+sf/6jL+Fnv/zn//uvAAAA//8jBKIe") + unpacked := packer.MustUnpack("eJzce1mTo0h39v33M+b2s/2ylGoaR7wXgio2qagWapGQd2SmCiQlSFMCSeDwf3dksggQtXWPx2NfVER3CnI5eZbnPOfwH79t0mz9mob0H8fDGv8jPCT/ely/ntav/1Yk9Ld//w0legZ/7KOFpzpzz6E4hRRHhy0Ci3vL0M9oKZbQtyXoW7PAt4UQwDiQR39LcbmPwHkfWZqVuUvraGl2FoBJDCUvg2AizBMvD4B9hGChENMW4dI6aptpZG1E3dqcIyshW19WKU4citKFYpuZunoUf7ieDVzPfnEFxVyU+8vTg6pY0YFoiXeHDaUghrfzJZES0z4E8tO9pR9nljbdBL6azf36TBvrqFFhhlPvCP2ne7bufKlukaxOfNk9+dLlgOUFH7e0aWQZVIBAuLcMeITAE9px0z09b9QDSlWRmE8zPqZNIyRNXgJJyWFyOVTymZyQPGW/Z5Yhxvhh3z6LDV0IH/YRTC4U+ovreGdvzdh8qRYQiCeSeC+h5E2eo337W/WnvkJ/x+5jG0heiUUlxgblz/7UPKZNK5nSHJ67zwgRTrwMyZD6UkbXP67naf74vBuV3XdOpnv+DkzonS87Ak68GP3YR2tZqGUCD8h0KaaKFICL2Du36VBkeFtiKMWYrOt1hLWv0us7MEamR3HZ21fG9XTR7uVIDK+4nl0tIbjQQHZPOL2R+8261XyKSExVrM53lU3nLjPLoHmYeFuiK3sI9B307fJ5o/7+sjjIoeHlzxv1CMEkJUa0t82sXsdRZsvp/7ceplEAJjvLiGMsZHS9jHZrqV7TFI6WRigy9JIYdIslL8aJs7eLc2TLNoUGLe3izPaQhpKehNJjOtemKTKUFMtujKUonS32//ztX4ZeISebDK3DbOgUfG8X+u4GAl3QEieDD/so6I3pBewY3Bw8bebc8K7PzBPvGPiOEIKnLACEGXa+BuLR2pyrd1Yr/g6WPIH40zwEl6zrKGCiH7G02synPcdSQt8VsTYRIBDPyNAFuJxQlOgbZHi774Apg0OHayCg53wPwMuJ1nnedwsCbtY4IomkIZik8+RCSeIdvwOXBqmXDueFkvOCDa8ImOI/CLu1oHvuI10tdp7q6Yr5QyAPz9vH85MpDNeICXAPxLBp4C/yyoHQdJ6IJ2hyZ5BDMIkDplwPYhKASwmX1sxfHGKcugeY6FvC7iTxYmI+nXp3k9qUX3rqvjDnhGRPwKYn+JKzD8AkZffLnfBi33GK4warbZ6iBbgcrUddhAYV+HpabSBaZYi4UDcQkAPaqHLou3tLI9XZtG/pPNqPGLzzQiQqhLpSQEDo2pzWBtg4IneCjVVlfKZNnzfT1E+q9efF3atdOycseUcIHAHJ1j0zPHZGfN7PrEJVoeGWlkYOKHHp+mEfcUMoxN+hoQuBp5TEtGkAhJz/33coMYVsLbkxMfQXLLsFBHo2TyYxAl6JDX0LfSFlAcAyvDiQoigEkzPxFzmTWwgmf7B9+BLNoeHdNU6GmPTMZM3X7u7LdGVUqDE03BeYUIr4mFogyaFYdphDYk7ohJJVBBOlsAxPqoIo318J/UXtKJQcLtUEyRabexf4btze33LC/89sa67d/Mbvba6pMUoW0VAm3Af4zjkADq3ur3Vq9TPiCzbtEw86ktI61dtASHMseQXRlRimLsVvy+UBSRMJ+rbQ3CMQhby5ByxmhI0T41std7dkv3Od9WMBM1vQlWPo8ztgfqFs9tQE3Oa9Ws8OxPCy543anKcZF1DZeZafvRPATTUmRtTVtbHgvbUeq98rPfZKy4AnvFHVjr0zu01DcBdxO9CmaWW/i5Nd3EWuDylOqRCyQMz0hMluowqhoTP5lO06JpePAMEiCvyniBgxtYzafyzVPAAiu78mqDE55Vi6xMTw3rC/Pkjh97ToBLsbW4zyWbFLu+/yeR/2kf3wOOuAB76XHvAwYIEkoW/r2vUOEFB2BFxoR6aJZTxGrb1q6gEXaokNbxsCeOAykJScJF7B/FItK64zttTYjcuCMbI5wBOJXdy1e0RAf4WeEuPUjvsAp9WV1p565zW+3VvGdf76t4zJCgFFgp7CfVr3nRrEDOf7zDtt3PrqOq3tL/Yfg0KDgzwuT3bPIVDy9h50m2LfO+BkFQUsLhjOCSXwAAtVQIW6RZJILdPd40QRoRRVOvmop8x/WVqco0I9IsmJkaYmIbhQXOzyL5yliiu6UjDd8CV4QIZX2+lboLID9LY3fqAG0LXN1LIhiX4kwCs7MmFgkus8nHbH+PovIWBYhBSh73aAtxBhmZZMd7Rov7Ue9R00nyJikpNl6DuoK0kIvCMb6/pSXHCg2fj1qMFNIRj1+S0g9KVKxvOE7ubayHjq5IGvlqGhCOO/M5/jHJB0N4hp1e88tiXf7i0zUyp5OydsNnekJjhRshvAXiVMXZwzY3MP/QFM7RPDG33s2PexlZ212GhW45UeqL/iAbu9u/myv35r7zUmanWP6bzP36XVfdUxT1NbIN/4k8rP1sBcYzo/SUJARJzo3FYGMXOQCOxnRIop2rL45+VQdvczzf29f/bmnL+UGPBkACds3ieeDHwpMcB0n5OxxEC6nAgYJAa9sUFi8LcC+QfyeXA8ANrdM0YH7oDqjJM7gnULjoTKSDZ1JpkcmPNhTmIH/aiT6fLAd28ZlxOUmUL3n2/Ajy93gOF2/w7rMA62rBqks8wTSYQ5Sw5EkXHu3WUDaq6gxaZQorllkgMxosiWnCOSvV2VAOC83i9z4gL07ZfGafoyez5+wYmXQj8+dzLuW3DPjDZZ3VsaWXflO9/s048YgaG8KkfJ5ZwwAFElL909WqMswy1r0Hc8Nw4j+ksy+ciWshgmWVz9m+mIw4DP3i52sxtjXafksN+kQ1t1wWSHDeWA0kW0krwt8e0DMXezQBJ3FZhwz0iiOdHEEgJHxAkV1gO9h9c5Mmh4kpZyVu0QSKt76yGQnx+iWQAc7vhYkOC2JFc69MwdoZdDUz2xIKkllxMUlXPgu/tK7gyc2HII7u4tzTr9MOgGJ3qxXip6I5+5cH1/zmXg0jmTR6F09i/8MWdzFxab8xiCiciSPmujnLC5OLngEmPZPQSFol/fUUrC/YpyRBI+dc8520zY2IbpC+FJnSLjch9Zu6d7X78scKKkONEz67ECIL5+affL/92soV8w8zXE8LDP7Fy84NF1EmcPgfPK5Se7MTLO93VyQwNR4UBpmNBYSUcuvkMD2eMAxKqfq1nZNuBZLMAnNFkvrevYRshYILDaYDndYNllwapoxohBMwgUkenCUzmdYUMpic727wgMANZ3fAeBw5LZI/wxSJY0e1zPmn0YVbxoA7Fmt3N39zVfiu2d1M+VxHApTq3OmJXNfe8MZTuGxmowblMsKSJOHIqLjgzekGP/+cl96E83TZIYApEyf/C8mUpPD9MZNm3qy14eggnTqSN62M/mS5WuDW9bgdRVA5S47j9vppuuHuCrbTZrxDghfQCq2SJKWv3YdIHm8B7H5TOy7w/JnyvI8uUBy/oeU1snEN0EcgiqmXxavZhyed3EkltgZ90k3+OJkTpkljOWLBOw4mdCQD8P7anHyJu2iIzeXj9MlnjsTbyil4xsb2XVtcl+FUAYJCe3jDPfhw8PWKInFI0DyWpOdxCHLhQlRAg1Fodq+cnCwXq4i55qEig09HIpeRM2RxPrXpYs9jQkrlNCoBeBFI0AyAMDwQz/xMyOmK9EiSJYbD3ZFlHqHhBY5YFvb0NTiL7/ECJb0gv0IxDsolrfNrOCgAnX0XkCYwToce3Xz/K4GDPcUZ1Hc3/HqZdz37ScZAE4nHBaP1vidLac3sbIlw1dj+BZl/ksUCUHdVzk/pbjnOmhssONinrVqtShxPTO84Qe0SdwaYN9WTKIjUtMjNUYuTuoiIkxSljyJfIEsPO8gFPvFvsaSsrwGiwmR+hDih7EHQS2CIsPK23GcnXRP0FYX+WiTeqkB+cNUYOLETnwSp1eEIMmDCM8b1SO73ChdGVcQJ/FdVvwJYYhap3XlW1o6DlkOKO23Y/81dt4+wYD/xWkpl2T5I1fO6AERyjxKCcCjG/M5s7Y8HKOtROlgEs1RukuQrK7h77VkMwsH4lR6iRItrPAdw/M1uYsSdUdCpfqnpjuGZf701zi2FsMNux+hBO84p4Wb85BnfBLtMSyR3HxTXja4vy/2dfHRH7KaowvB767Daf933D51PrBwD+IOFllXI9Td0/ANV7XcyRIZnHbnlx9pXNCqcv8T3sn86W6Q5LzCv3r+9WcixPHKlcyjJLEqzGhe0DRzbgQVBXd3hwEuOfr+hUWrHPA67OSc577qhikjhhc5+V35ksdbNzOEwvEVP/AknIlxozDifh2HoDLrkNY8Pzg+v/LqanazpdqhWkNZRtKXtF7plAKCNwDFpUSGYrM9HXwe7n2Hfb+lSxhvqUvryMB4gb69h8BcF6v74sXYnZIlsQTYHI5keu+Tk9lINZkd5cg3oX+9f4ZBg3B3axLpPX2abpbfNUfEZrXuyFgclhff5NDwDH2m3F/QAh28/pa35Tz2lNiZFxeiKG8IIOW5GF/Q1i3xYpu3mgwf8YJ0mNY+b8cScq5S2xCP95CX2W/ISut/ZlfFVNbn69x/3vGibKFvlMi2S4tKgz3KCCxKoL08UndzdAvMP3s/hk2pzh1X4LES2qyrCJuq3g665H7xhiJ1+CP9qyznynkDPcb+i5lcWA43lvzzHFmFvjTAR4fEpc3+K9TPFl9rlhiiDE2VdrlbKrxywnWOUHXZq/60PFPdTzprmdpA1+13Q/spVmnLhrK7haz/RnO+Y15mP3cW6a3w9P+Xtjac8k9BVLGzhEN/EkzD/c1lQ07L5j5Z5afM53q2XXn/Fcfc2+ZzoS90/FdH2Jt4ju0xQnTz783KHR2/c8b3S1NgeCdwvAXCipvFp//4vznTyT3OziuewbrtviSdvZSxcqKS30713mFPi0Z5oCLfR/bGV4MDY/lUlWempI9BHf3/TznukZbTPjFnKcl0/keyWsA4GuwxEdLIxxXEkMvQw0ftOift4T6C12vs/EOPLfKA6NVk0PWvHqdH2Ztbnnl1jfzpVh1W2lihiSXsnjQ79KrutfaLrjPcNZRv1vqBgd+oItvcbwjBbVu/pt1cuc/Z32jrS98uIemEebdAmRtv1cOuNpnsxdmPyzOvMEvR6BQE2R4lGiTpmsyb+a6Nkm1uhb5bRxp49OJ+C7LBRpOu7ErXnxEo/LhBSfU6kHadENOzkhiucyOFxVH1mrsPH/S2mevfHsd/6HhJYHvHYk53r13y6vf7GOPZEcYdN7dyIl3JI533OWN3jD7ZvqCEv3oy+oJp4uP1i6xdL7pymywxXw7zYc62cWOHVlV+233N8SPHX/X5YM6f2NFy/6f0BSBb96t84UTllu96GCbtxtW3tznOwX4j+Lvx7F02o1Xo00JvzTHcsKLylhW40Ba/dS53onPP7O/a3NR9FNdqAPdmo5xhM2zQ//w1VjJ7eNluYu+b6Zny9BzqP3Z9a2RulW8Dl/Huk+XhhfjtF9kDntjnUD42QLzTxByX2ln/1T36E2QtmnAHI4UUzb+84RYXej04xak1t0G4wFVVHinpi/bFEreXbdFfbzF2qbPG3WNZeZoY8ovtviWz84jijzosnkP2L7fndMxzOZuro72tnW8SdRSj36mOPyWY37bIX9sfH3H3OkoGTW6Xwels+V0az12dEdTTyi5TCzTEbHJAqFbWo/ej1FAmqyz1w0esb4fwBNwQre1NtYfhNQfWUg1VT7+0UfTtnFAhvAxxd3Q6KlLka8eeWn349aQL35YwtJtckAJzhFPic8KNLwNAXg4bxqIyhn69pbN+33p/v5j5a1WO/rwCWo8g75bhMCp042mJ27CoDmnEsbOpb3pDfqyxonHUouC6MoJUaXWNvclkOIYJYRZZqX1aUtbvNGS8ole5frua6rkc+0o79FEAAot/dq2vLQf7LzbDzag5nq9uR2a5wNKxxn2+P6tKL4QLP53UX2Gd0dMO2Y6weWkK7w/9loqGEDODs3X3+NgfPBx1Wg54q2oNVKC4G1F+l0+K5TGNkt7+v53A//TJeiPqOi6DMT95NttVp07uO2tLpiN+5voH6uHC4/w3zd3r7PlrYyqeXjr172luV00cEv3aj2aqI8chr2bV7osC0BGfUkvcKJPRvW49RMDCF7pSrvnL/Zdd95bfIECHFIKX6cNf7pnu/Mtxy/MMZIefeIMpldCT+mkF38qfdiLm++XEnoxMR+LDW0p+vwVKrIfaxu5vNsf3JNBTAPgUjZndf7R9sABPvgb9Pfuj3/k69diDP7JzoUAr1j3uyJOWNZF6NuTYWfEF7oivg79vtLd+6luCPvnO4DrT+3QUD7vdj8qJfY9itPd7Oe6/Jr3adWl+9nuPtM9IOCdiL+4tx4ez92O5TcSxW5HRPPZ3AC2NZ3KHfVv3IundCBLa3IMrvJOEWRQLsMQcGaPuY6Ehfa3uzN+4XOO3ueDQwaZmXZ9f4OqwngH8ac/Mxh8M/yXdwx/8O3vbYdWv+OKJZSxYmmPiqXh8vkhSEeTxkOId+sxymZl6NtQ8oRe0miyYJPR5uOLNmkscOZWdZMPEkb2zM2z734TwKVSiHrVN/1+D1X/2TcTxfStj25x/8w/T9v8Ij3Srz2/SY2cA+C8whGO8P9Y7/xHX8HPfvvP//dfAQAA//81uZ/e") SupportedMap = make(map[string]Spec) for f, v := range unpacked { diff --git a/internal/pkg/agent/program/testdata/logstash_config-heartbeat.yml b/internal/pkg/agent/program/testdata/logstash_config-heartbeat.yml deleted file mode 100644 index fbddfbe022e..00000000000 --- a/internal/pkg/agent/program/testdata/logstash_config-heartbeat.yml +++ /dev/null @@ -1,29 +0,0 @@ -inputs: -- type: synthetics/http - id: unique-http-id - name: my-http - schedule: '*/5 * * * * * *' - host: "http://localhost:80/service/status" - timeout: 16s - wait: 1s - data_stream.namespace: default - processors: - - add_fields: - target: 'elastic_agent' - fields: - id: agent-id - version: 8.0.0 - snapshot: false - - add_fields: - target: 'agent' - fields: - id: agent-id -output: - logstash: - ssl.certificate: abcert - ssl.key: abckey - hosts: - - 127.0.0.1:5044 - ssl.certificate_authorities: - - abc1 - - abc2 diff --git a/internal/spec/heartbeat.yml b/internal/spec/heartbeat.yml index ecb373cf791..640cbfa1c91 100644 --- a/internal/spec/heartbeat.yml +++ b/internal/spec/heartbeat.yml @@ -20,5 +20,4 @@ rules: - inputs - output - keystore -when: length(${inputs}) > 0 and hasKey(${output}, 'elasticsearch', 'redis', - 'kafka', 'logstash') +when: length(${inputs}) > 0 and hasKey(${output}, 'elasticsearch') # Heartbeat only supports ES From 19f82223b0af9f4e114530956f833060120da667 Mon Sep 17 00:00:00 2001 From: Anderson Queiroz Date: Thu, 20 Oct 2022 19:56:32 +0200 Subject: [PATCH 43/63] make policy change handler try all fleet hosts before failing (#1329) It changes the remote client to: - when creating a new client: - succeed if at least one host is health - shuffle the hosts, avoiding all the agents reaching to the same fleet-server on the first request - makes `(remote.*Client).Send` try all the hosts before failing, returning a multi-error if all hosts fail - if debug logs are enabled, `Send` will log each error with debug level - modifies `remote.requestClient`: - now `requestClient` holds its host - remove `requestFunc` - `(remopte.requestClient).newRequest uses the new `host` property to build the final URL for the request --- ...ltiple-Fleet-Server-hosts-are-handled.yaml | 35 +++ .../handlers/handler_action_policy_change.go | 14 +- internal/pkg/fleetapi/client/client.go | 2 +- internal/pkg/remote/client.go | 227 ++++++++++-------- internal/pkg/remote/client_test.go | 140 +++++++---- 5 files changed, 273 insertions(+), 145 deletions(-) create mode 100644 changelog/fragments/1666281194-Fix-how-multiple-Fleet-Server-hosts-are-handled.yaml diff --git a/changelog/fragments/1666281194-Fix-how-multiple-Fleet-Server-hosts-are-handled.yaml b/changelog/fragments/1666281194-Fix-how-multiple-Fleet-Server-hosts-are-handled.yaml new file mode 100644 index 00000000000..c0f13aa3d9c --- /dev/null +++ b/changelog/fragments/1666281194-Fix-how-multiple-Fleet-Server-hosts-are-handled.yaml @@ -0,0 +1,35 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: bug-fix + +# Change summary; a 80ish characters long description of the change. +summary: Fix how multiple Fleet Server hosts are handled + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +description: It fixes the bug when the Elastic Agent would be enrolled using + a valid Fleet Server URL, but the policy would contain more than one, being + the first URL unreachable. In that case the Elastic Agent would enroll with + Fleet Server, but become unhealthy as it'd get stuck trying only the first, + unreachable Fleet Server host. + +# Affected component; a word indicating the component this changeset affects. +#component: + +# PR number; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +pr: 1329 + +# Issue number; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +#issue: 1234 diff --git a/internal/pkg/agent/application/pipeline/actions/handlers/handler_action_policy_change.go b/internal/pkg/agent/application/pipeline/actions/handlers/handler_action_policy_change.go index ad75299e420..5551e9461c7 100644 --- a/internal/pkg/agent/application/pipeline/actions/handlers/handler_action_policy_change.go +++ b/internal/pkg/agent/application/pipeline/actions/handlers/handler_action_policy_change.go @@ -10,6 +10,7 @@ import ( "fmt" "io" "io/ioutil" + "net/http" "sort" "time" @@ -142,14 +143,17 @@ func (h *PolicyChange) handleFleetServerHosts(ctx context.Context, c *config.Con err, "fail to create API client with updated hosts", errors.TypeNetwork, errors.M("hosts", h.config.Fleet.Client.Hosts)) } + ctx, cancel := context.WithTimeout(ctx, apiStatusTimeout) defer cancel() - resp, err := client.Send(ctx, "GET", "/api/status", nil, nil, nil) + + resp, err := client.Send(ctx, http.MethodGet, "/api/status", nil, nil, nil) if err != nil { return errors.New( - err, "fail to communicate with updated API client hosts", + err, "fail to communicate with Fleet Server API client hosts", errors.TypeNetwork, errors.M("hosts", h.config.Fleet.Client.Hosts)) } + // discard body for proper cancellation and connection reuse _, _ = io.Copy(ioutil.Discard, resp.Body) resp.Body.Close() @@ -157,15 +161,17 @@ func (h *PolicyChange) handleFleetServerHosts(ctx context.Context, c *config.Con reader, err := fleetToReader(h.agentInfo, h.config) if err != nil { return errors.New( - err, "fail to persist updated API client hosts", + err, "fail to persist new Fleet Server API client hosts", errors.TypeUnexpected, errors.M("hosts", h.config.Fleet.Client.Hosts)) } + err = h.store.Save(reader) if err != nil { return errors.New( - err, "fail to persist updated API client hosts", + err, "fail to persist new Fleet Server API client hosts", errors.TypeFilesystem, errors.M("hosts", h.config.Fleet.Client.Hosts)) } + for _, setter := range h.setters { setter.SetClient(client) } diff --git a/internal/pkg/fleetapi/client/client.go b/internal/pkg/fleetapi/client/client.go index 4470f0259a8..0f478497bb6 100644 --- a/internal/pkg/fleetapi/client/client.go +++ b/internal/pkg/fleetapi/client/client.go @@ -87,7 +87,7 @@ func NewWithConfig(log *logger.Logger, cfg remote.Config) (*remote.Client, error // ExtractError extracts error from a fleet-server response func ExtractError(resp io.Reader) error { - // Lets try to extract a high level fleet-server error. + // Let's try to extract a high level fleet-server error. e := &struct { StatusCode int `json:"statusCode"` Error string `json:"error"` diff --git a/internal/pkg/remote/client.go b/internal/pkg/remote/client.go index 085ab2bfe0e..5c8fd5c9a34 100644 --- a/internal/pkg/remote/client.go +++ b/internal/pkg/remote/client.go @@ -6,14 +6,17 @@ package remote import ( "context" + "fmt" "io" + "math/rand" "net/http" "net/url" + "sort" "strings" "sync" "time" - "github.com/pkg/errors" + "github.com/hashicorp/go-multierror" urlutil "github.com/elastic/elastic-agent-libs/kibana" "github.com/elastic/elastic-agent-libs/transport/httpcommon" @@ -26,33 +29,32 @@ const ( retryOnBadConnTimeout = 5 * time.Minute ) -type requestFunc func(string, string, url.Values, io.Reader) (*http.Request, error) type wrapperFunc func(rt http.RoundTripper) (http.RoundTripper, error) type requestClient struct { - request requestFunc + host string client http.Client lastUsed time.Time lastErr error lastErrOcc time.Time } -// Client wraps an http.Client and takes care of making the raw calls, the client should -// stay simple and specificals should be implemented in external action instead of adding new methods -// to the client. For authenticated calls or sending fields on every request, create customer RoundTripper -// implementations that will take care of the boiler plates. +// Client wraps a http.Client and takes care of making the raw calls, the client should +// stay simple and specifics should be implemented in external action instead of adding new methods +// to the client. For authenticated calls or sending fields on every request, create a custom RoundTripper +// implementation that will take care of the boilerplate. type Client struct { - log *logger.Logger - lock sync.Mutex - clients []*requestClient - config Config + log *logger.Logger + clientLock sync.Mutex + clients []*requestClient + config Config } // NewConfigFromURL returns a Config based on a received host. func NewConfigFromURL(URL string) (Config, error) { u, err := url.Parse(URL) if err != nil { - return Config{}, errors.Wrap(err, "could not parse url") + return Config{}, fmt.Errorf("could not parse url: %w", err) } c := DefaultClientConfig() @@ -76,7 +78,7 @@ func NewWithRawConfig(log *logger.Logger, config *config.Config, wrapper wrapper cfg := Config{} if err := config.Unpack(&cfg); err != nil { - return nil, errors.Wrap(err, "invalidate configuration") + return nil, fmt.Errorf("invalidate configuration: %w", err) } return NewWithConfig(l, cfg, wrapper) @@ -97,11 +99,14 @@ func NewWithConfig(log *logger.Logger, cfg Config, wrapper wrapperFunc) (*Client } hosts := cfg.GetHosts() - clients := make([]*requestClient, len(hosts)) - for i, host := range cfg.GetHosts() { - connStr, err := urlutil.MakeURL(string(cfg.Protocol), p, host, 0) + hostCount := len(hosts) + log.With("hosts", hosts).Debugf( + "creating remote client with %d hosts", hostCount) + clients := make([]*requestClient, hostCount) + for i, host := range hosts { + baseURL, err := urlutil.MakeURL(string(cfg.Protocol), p, host, 0) if err != nil { - return nil, errors.Wrap(err, "invalid fleet-server endpoint") + return nil, fmt.Errorf("invalid fleet-server endpoint: %w", err) } transport, err := cfg.Transport.RoundTripper( @@ -115,7 +120,7 @@ func NewWithConfig(log *logger.Logger, cfg Config, wrapper wrapperFunc) (*Client if wrapper != nil { transport, err = wrapper(transport) if err != nil { - return nil, errors.Wrap(err, "fail to create transport client") + return nil, fmt.Errorf("fail to create transport client: %w", err) } } @@ -125,17 +130,17 @@ func NewWithConfig(log *logger.Logger, cfg Config, wrapper wrapperFunc) (*Client } clients[i] = &requestClient{ - request: prefixRequestFactory(connStr), - client: httpClient, + host: baseURL, + client: httpClient, } } - return new(log, cfg, clients...) + return newClient(log, cfg, clients...) } -// Send executes a direct calls against the API, the method will takes cares of cloning -// also add necessary headers for likes: "Content-Type", "Accept", and "kbn-xsrf". -// No assumptions is done on the response concerning the received format, this will be the responsibility +// Send executes a direct calls against the API, the method will take care of cloning and +// also adding the necessary headers likes: "Content-Type", "Accept", and "kbn-xsrf". +// No assumptions are done on the response concerning the received format, this will be the responsibility // of the implementation to correctly unpack any received data. // // NOTE: @@ -155,45 +160,62 @@ func (c *Client) Send( } c.log.Debugf("Request method: %s, path: %s, reqID: %s", method, path, reqID) - c.lock.Lock() - defer c.lock.Unlock() - requester := c.nextRequester() + c.clientLock.Lock() + defer c.clientLock.Unlock() - req, err := requester.request(method, path, params, body) - if err != nil { - return nil, errors.Wrapf(err, "fail to create HTTP request using method %s to %s", method, path) - } + var resp *http.Response + var multiErr error - // Add generals headers to the request, we are dealing exclusively with JSON. - // Content-Type / Accepted type can be override from the called. - req.Header.Set("Content-Type", "application/json") - req.Header.Add("Accept", "application/json") - // This header should be specific to fleet-server or remove it - req.Header.Set("kbn-xsrf", "1") // Without this Kibana will refuse to answer the request. + c.sortClients() + for i, requester := range c.clients { + req, err := requester.newRequest(method, path, params, body) + if err != nil { + return nil, fmt.Errorf( + "fail to create HTTP request using method %s to %s: %w", + method, path, err) + } - // If available, add the request id as an HTTP header - if reqID != "" { - req.Header.Add("X-Request-ID", reqID) - } + // Add generals headers to the request, we are dealing exclusively with JSON. + // Content-Type / Accepted type can be overridden by the caller. + req.Header.Set("Content-Type", "application/json") + req.Header.Add("Accept", "application/json") + // This header should be specific to fleet-server or remove it + req.Header.Set("kbn-xsrf", "1") // Without this Kibana will refuse to answer the request. - // copy headers. - for header, values := range headers { - for _, v := range values { - req.Header.Add(header, v) + // If available, add the request id as an HTTP header + if reqID != "" { + req.Header.Add("X-Request-ID", reqID) } - } - requester.lastUsed = time.Now().UTC() + // copy headers. + for header, values := range headers { + for _, v := range values { + req.Header.Add(header, v) + } + } + + requester.lastUsed = time.Now().UTC() + + resp, err = requester.client.Do(req.WithContext(ctx)) + if err != nil { + requester.lastErr = err + requester.lastErrOcc = time.Now().UTC() + + msg := fmt.Sprintf("requester %d/%d to host %s errored", + i, len(c.clients), requester.host) + multiErr = multierror.Append(multiErr, fmt.Errorf("%s: %w", msg, err)) + + // Using debug level as the error is only relevant if all clients fail. + c.log.With("error", err).Debugf(msg) + continue + } - resp, err := requester.client.Do(req.WithContext(ctx)) - if err != nil { - requester.lastErr = err - requester.lastErrOcc = time.Now().UTC() - } else { requester.lastErr = nil requester.lastErrOcc = time.Time{} + return resp, nil } - return resp, err + + return nil, fmt.Errorf("all hosts failed: %w", multiErr) } // URI returns the remote URI. @@ -202,67 +224,78 @@ func (c *Client) URI() string { return string(c.config.Protocol) + "://" + host + "/" + c.config.Path } -// new creates new API client. -func new( +// newClient creates a new API client. +func newClient( log *logger.Logger, cfg Config, - httpClients ...*requestClient, + clients ...*requestClient, ) (*Client, error) { + // Shuffle so all the agents don't access the hosts in the same order + rand.Shuffle(len(clients), func(i, j int) { + clients[i], clients[j] = clients[j], clients[i] + }) + c := &Client{ log: log, - clients: httpClients, + clients: clients, config: cfg, } return c, nil } -// nextRequester returns the requester to use. -// -// It excludes clients that have errored in the last 5 minutes. -func (c *Client) nextRequester() *requestClient { - var selected *requestClient - +// sortClients sort the clients according to the following priority: +// - never used +// - without errors, last used first when more than one does not have errors +// - last errored. +// It also removes the last error after retryOnBadConnTimeout has elapsed. +func (c *Client) sortClients() { now := time.Now().UTC() - for _, requester := range c.clients { - if requester.lastErr != nil && now.Sub(requester.lastErrOcc) > retryOnBadConnTimeout { - requester.lastErr = nil - requester.lastErrOcc = time.Time{} + + sort.Slice(c.clients, func(i, j int) bool { + // First, set them good if the timout has elapsed + if c.clients[i].lastErr != nil && + now.Sub(c.clients[i].lastErrOcc) > retryOnBadConnTimeout { + c.clients[i].lastErr = nil + c.clients[i].lastErrOcc = time.Time{} } - if requester.lastErr != nil { - continue + if c.clients[j].lastErr != nil && + now.Sub(c.clients[j].lastErrOcc) > retryOnBadConnTimeout { + c.clients[j].lastErr = nil + c.clients[j].lastErrOcc = time.Time{} } - if requester.lastUsed.IsZero() { - // never been used, instant winner! - selected = requester - break + + // Pick not yet used first, but if both haven't been used yet, + // we return false to comply with the sort.Interface definition. + if c.clients[i].lastUsed.IsZero() && + c.clients[j].lastUsed.IsZero() { + return false } - if selected == nil { - selected = requester - continue + + // Pick not yet used first + if c.clients[i].lastUsed.IsZero() { + return true } - if requester.lastUsed.Before(selected.lastUsed) { - selected = requester + + // If none has errors, pick the last used + // Then, the one without errors + if c.clients[i].lastErr == nil && + c.clients[j].lastErr == nil { + return c.clients[i].lastUsed.Before(c.clients[j].lastUsed) } - } - if selected == nil { - // all are erroring; select the oldest one that errored - for _, requester := range c.clients { - if selected == nil { - selected = requester - continue - } - if requester.lastErrOcc.Before(selected.lastErrOcc) { - selected = requester - } + + // Then, the one without error + if c.clients[i].lastErr == nil { + return true } - } - return selected + + // Lastly, the one that errored last + return c.clients[i].lastUsed.Before(c.clients[j].lastUsed) + }) } -func prefixRequestFactory(URL string) requestFunc { - return func(method, path string, params url.Values, body io.Reader) (*http.Request, error) { - path = strings.TrimPrefix(path, "/") - newPath := strings.Join([]string{URL, path, "?", params.Encode()}, "") - return http.NewRequest(method, newPath, body) //nolint:noctx // keep old behaviour - } +func (r requestClient) newRequest(method string, path string, params url.Values, body io.Reader) (*http.Request, error) { + path = strings.TrimPrefix(path, "/") + newPath := strings.Join([]string{r.host, path, "?", params.Encode()}, "") + + return http.NewRequest(method, newPath, body) } diff --git a/internal/pkg/remote/client_test.go b/internal/pkg/remote/client_test.go index 6ea546f8128..887bc9817b2 100644 --- a/internal/pkg/remote/client_test.go +++ b/internal/pkg/remote/client_test.go @@ -58,7 +58,8 @@ func TestPortDefaults(t *testing.T) { c, err := NewWithConfig(l, cfg, nil) require.NoError(t, err) - r, err := c.nextRequester().request("GET", "/", nil, strings.NewReader("")) + c.sortClients() + r, err := c.clients[0].newRequest(http.MethodGet, "/", nil, strings.NewReader("")) require.NoError(t, err) if tc.ExpectedPort > 0 { @@ -77,13 +78,13 @@ func TestHTTPClient(t *testing.T) { l, err := logger.New("", false) require.NoError(t, err) + const successResp = `{"message":"hello"}` t.Run("Guard against double slashes on path", withServer( func(t *testing.T) *http.ServeMux { - msg := `{ message: "hello" }` mux := http.NewServeMux() mux.HandleFunc("/nested/echo-hello", func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) - fmt.Fprint(w, msg) + fmt.Fprint(w, successResp) }) return addCatchAll(mux, t) }, func(t *testing.T, host string) { @@ -97,23 +98,22 @@ func TestHTTPClient(t *testing.T) { client, err := NewWithConfig(l, c, noopWrapper) require.NoError(t, err) - resp, err := client.Send(ctx, "GET", "/nested/echo-hello", nil, nil, nil) + resp, err := client.Send(ctx, http.MethodGet, "/nested/echo-hello", nil, nil, nil) require.NoError(t, err) body, err := ioutil.ReadAll(resp.Body) require.NoError(t, err) defer resp.Body.Close() - assert.Equal(t, `{ message: "hello" }`, string(body)) + assert.Equal(t, successResp, string(body)) }, )) t.Run("Simple call", withServer( func(t *testing.T) *http.ServeMux { - msg := `{ message: "hello" }` mux := http.NewServeMux() mux.HandleFunc("/echo-hello", func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) - fmt.Fprint(w, msg) + fmt.Fprint(w, successResp) }) return mux }, func(t *testing.T, host string) { @@ -123,23 +123,22 @@ func TestHTTPClient(t *testing.T) { client, err := NewWithRawConfig(nil, cfg, nil) require.NoError(t, err) - resp, err := client.Send(ctx, "GET", "/echo-hello", nil, nil, nil) + resp, err := client.Send(ctx, http.MethodGet, "/echo-hello", nil, nil, nil) require.NoError(t, err) body, err := ioutil.ReadAll(resp.Body) require.NoError(t, err) defer resp.Body.Close() - assert.Equal(t, `{ message: "hello" }`, string(body)) + assert.Equal(t, successResp, string(body)) }, )) t.Run("Simple call with a prefix path", withServer( func(t *testing.T) *http.ServeMux { - msg := `{ message: "hello" }` mux := http.NewServeMux() mux.HandleFunc("/mycustompath/echo-hello", func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) - fmt.Fprint(w, msg) + fmt.Fprint(w, successResp) }) return mux }, func(t *testing.T, host string) { @@ -150,23 +149,62 @@ func TestHTTPClient(t *testing.T) { client, err := NewWithRawConfig(nil, cfg, nil) require.NoError(t, err) - resp, err := client.Send(ctx, "GET", "/echo-hello", nil, nil, nil) + resp, err := client.Send(ctx, http.MethodGet, "/echo-hello", nil, nil, nil) require.NoError(t, err) body, err := ioutil.ReadAll(resp.Body) require.NoError(t, err) defer resp.Body.Close() - assert.Equal(t, `{ message: "hello" }`, string(body)) + assert.Equal(t, successResp, string(body)) }, )) + t.Run("Tries all the hosts", withServer( + func(t *testing.T) *http.ServeMux { + mux := http.NewServeMux() + mux.HandleFunc("/echo-hello", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + fmt.Fprint(w, successResp) + }) + return mux + }, func(t *testing.T, host string) { + one := &requestClient{host: "http://must.fail-1.co/"} + two := &requestClient{host: "http://must.fail-2.co/"} + three := &requestClient{host: fmt.Sprintf("http://%s/", host)} + + c := &Client{clients: []*requestClient{one, two, three}, log: l} + require.NoError(t, err) + resp, err := c.Send(ctx, http.MethodGet, "/echo-hello", nil, nil, nil) + require.NoError(t, err) + + assert.Equal(t, http.StatusOK, resp.StatusCode) + body, err := ioutil.ReadAll(resp.Body) + require.NoError(t, err) + defer resp.Body.Close() + assert.Equal(t, successResp, string(body)) + }, + )) + + t.Run("Return last error", func(t *testing.T) { + client := &Client{ + log: l, + clients: []*requestClient{ + {host: "http://must.fail-1.co/"}, + {host: "http://must.fail-2.co/"}, + {host: "http://must.fail-3.co/"}, + }} + + resp, err := client.Send(ctx, http.MethodGet, "/echo-hello", nil, nil, nil) + assert.Contains(t, err.Error(), "http://must.fail-3.co/") // error contains last host + assert.Nil(t, resp) + }) + t.Run("Custom user agent", withServer( func(t *testing.T) *http.ServeMux { - msg := `{ message: "hello" }` mux := http.NewServeMux() mux.HandleFunc("/echo-hello", func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) - fmt.Fprint(w, msg) + fmt.Fprint(w, successResp) require.Equal(t, r.Header.Get("User-Agent"), "custom-agent") }) return mux @@ -180,23 +218,22 @@ func TestHTTPClient(t *testing.T) { }) require.NoError(t, err) - resp, err := client.Send(ctx, "GET", "/echo-hello", nil, nil, nil) + resp, err := client.Send(ctx, http.MethodGet, "/echo-hello", nil, nil, nil) require.NoError(t, err) body, err := ioutil.ReadAll(resp.Body) require.NoError(t, err) defer resp.Body.Close() - assert.Equal(t, `{ message: "hello" }`, string(body)) + assert.Equal(t, successResp, string(body)) }, )) t.Run("Allows to debug HTTP request between a client and a server", withServer( func(t *testing.T) *http.ServeMux { - msg := `{ "message": "hello" }` mux := http.NewServeMux() mux.HandleFunc("/echo-hello", func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) - fmt.Fprint(w, msg) + fmt.Fprint(w, successResp) }) return mux }, func(t *testing.T, host string) { @@ -212,16 +249,16 @@ func TestHTTPClient(t *testing.T) { }) require.NoError(t, err) - resp, err := client.Send(ctx, "GET", "/echo-hello", nil, nil, bytes.NewBuffer([]byte("hello"))) + resp, err := client.Send(ctx, http.MethodGet, "/echo-hello", nil, nil, bytes.NewBuffer([]byte("hello"))) require.NoError(t, err) body, err := ioutil.ReadAll(resp.Body) require.NoError(t, err) defer resp.Body.Close() - assert.Equal(t, `{ "message": "hello" }`, string(body)) + assert.Equal(t, successResp, string(body)) for _, m := range debugger.messages { - fmt.Println(m) + fmt.Println(m) //nolint:forbidigo // printing debug messages on a test. } assert.Equal(t, 1, len(debugger.messages)) @@ -230,11 +267,10 @@ func TestHTTPClient(t *testing.T) { t.Run("RequestId", withServer( func(t *testing.T) *http.ServeMux { - msg := `{ message: "hello" }` mux := http.NewServeMux() mux.HandleFunc("/echo-hello", func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) - fmt.Fprint(w, msg) + fmt.Fprint(w, successResp) require.NotEmpty(t, r.Header.Get("X-Request-ID")) }) return mux @@ -245,48 +281,58 @@ func TestHTTPClient(t *testing.T) { client, err := NewWithRawConfig(nil, cfg, nil) require.NoError(t, err) - resp, err := client.Send(ctx, "GET", "/echo-hello", nil, nil, nil) + resp, err := client.Send(ctx, http.MethodGet, "/echo-hello", nil, nil, nil) require.NoError(t, err) body, err := ioutil.ReadAll(resp.Body) require.NoError(t, err) defer resp.Body.Close() - assert.Equal(t, `{ message: "hello" }`, string(body)) + assert.Equal(t, successResp, string(body)) }, )) } -func TestNextRequester(t *testing.T) { +func TestSortClients(t *testing.T) { t.Run("Picks first requester on initial call", func(t *testing.T) { one := &requestClient{} two := &requestClient{} - client, err := new(nil, Config{}, one, two) + client, err := newClient(nil, Config{}, one, two) require.NoError(t, err) - assert.Equal(t, one, client.nextRequester()) + + client.sortClients() + + assert.Equal(t, one, client.clients[0]) }) t.Run("Picks second requester when first has error", func(t *testing.T) { one := &requestClient{ + lastUsed: time.Now().UTC(), lastErr: fmt.Errorf("fake error"), lastErrOcc: time.Now().UTC(), } two := &requestClient{} - client, err := new(nil, Config{}, one, two) + client, err := newClient(nil, Config{}, one, two) require.NoError(t, err) - assert.Equal(t, two, client.nextRequester()) + + client.sortClients() + + assert.Equal(t, two, client.clients[0]) }) - t.Run("Picks second requester when first has used", func(t *testing.T) { + t.Run("Picks second requester when first has been used", func(t *testing.T) { one := &requestClient{ lastUsed: time.Now().UTC(), } two := &requestClient{} - client, err := new(nil, Config{}, one, two) + client, err := newClient(nil, Config{}, one, two) require.NoError(t, err) - assert.Equal(t, two, client.nextRequester()) + + client.sortClients() + + assert.Equal(t, two, client.clients[0]) }) - t.Run("Picks second requester when its oldest", func(t *testing.T) { + t.Run("Picks second requester when it's the oldest", func(t *testing.T) { one := &requestClient{ lastUsed: time.Now().UTC().Add(-time.Minute), } @@ -296,12 +342,15 @@ func TestNextRequester(t *testing.T) { three := &requestClient{ lastUsed: time.Now().UTC().Add(-2 * time.Minute), } - client, err := new(nil, Config{}, one, two, three) + client, err := newClient(nil, Config{}, one, two, three) require.NoError(t, err) - assert.Equal(t, two, client.nextRequester()) + + client.sortClients() + + assert.Equal(t, two, client.clients[0]) }) - t.Run("Picks third requester when its second has error and first is last used", func(t *testing.T) { + t.Run("Picks third requester when second has error and first is last used", func(t *testing.T) { one := &requestClient{ lastUsed: time.Now().UTC().Add(-time.Minute), } @@ -313,9 +362,11 @@ func TestNextRequester(t *testing.T) { three := &requestClient{ lastUsed: time.Now().UTC().Add(-2 * time.Minute), } - client, err := new(nil, Config{}, one, two, three) - require.NoError(t, err) - assert.Equal(t, three, client.nextRequester()) + client := &Client{clients: []*requestClient{one, two, three}} + + client.sortClients() + + assert.Equal(t, three, client.clients[0]) }) t.Run("Picks second requester when its oldest and all have old errors", func(t *testing.T) { @@ -334,9 +385,12 @@ func TestNextRequester(t *testing.T) { lastErr: fmt.Errorf("fake error"), lastErrOcc: time.Now().Add(-2 * time.Minute), } - client, err := new(nil, Config{}, one, two, three) + client, err := newClient(nil, Config{}, one, two, three) require.NoError(t, err) - assert.Equal(t, two, client.nextRequester()) + + client.sortClients() + + assert.Equal(t, two, client.clients[0]) }) } From 3bb1244421b6a23da81a0a8cd6e6693415001283 Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Fri, 21 Oct 2022 01:35:34 -0400 Subject: [PATCH 44/63] [Automation] Update elastic stack version to 8.6.0-baf193e8 for testing (#1579) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index 29a6c0453ed..55700ee503c 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-6545f2df-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-baf193e8-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-6545f2df-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-baf193e8-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing" From b21324e0621152e53658e84c645ce5fb3dc677a1 Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Mon, 24 Oct 2022 01:41:18 -0400 Subject: [PATCH 45/63] [Automation] Update elastic stack version to 8.6.0-22d60ec9 for testing (#1587) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index 55700ee503c..7f5971c7a24 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-baf193e8-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-22d60ec9-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-baf193e8-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-22d60ec9-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing" From e6365919be505f5562f483dadeb8d8a7ad92ab83 Mon Sep 17 00:00:00 2001 From: Elastic Machine Date: Mon, 24 Oct 2022 08:16:36 -0400 Subject: [PATCH 46/63] [automation] Publish kubernetes templates for elastic-agent (#1594) Co-authored-by: apmmachine --- .../templates.d/activemq.yml | 24 +++---- .../templates.d/apache.yml | 4 +- .../templates.d/cassandra.yml | 8 +-- .../templates.d/cockroachdb.yml | 6 +- .../templates.d/elasticsearch.yml | 22 +++---- .../templates.d/endpoint.yml | 2 +- .../templates.d/haproxy.yml | 64 +++++++++--------- .../templates.d/hashicorp_vault.yml | 4 +- .../templates.d/hid_bravura_monitor.yml | 22 +++---- .../templates.d/iis.yml | 8 ++- .../templates.d/iptables.yml | 26 ++++---- .../templates.d/kafka.yml | 10 +-- .../templates.d/kibana.yml | 29 ++------ .../templates.d/logstash.yml | 8 +-- .../templates.d/microsoft_sqlserver.yml | 32 ++++----- .../templates.d/mimecast.yml | 2 +- .../templates.d/mongodb.yml | 20 +++--- .../templates.d/mysql.yml | 23 ++++--- .../templates.d/nats.yml | 24 +++---- .../templates.d/netflow.yml | 2 +- .../templates.d/nginx.yml | 30 ++++----- .../templates.d/oracle.yml | 49 ++++++++++++-- .../templates.d/panw.yml | 66 +++++++++---------- .../templates.d/panw_cortex_xdr.yml | 2 +- .../templates.d/pfsense.yml | 2 +- .../templates.d/postgresql.yml | 16 ++--- .../templates.d/prometheus.yml | 21 +++--- .../templates.d/qnap_nas.yml | 40 +++++------ .../templates.d/rabbitmq.yml | 32 ++++----- .../templates.d/redis.yml | 22 +++---- .../templates.d/security_detection_engine.yml | 2 +- .../templates.d/sentinel_one.yml | 2 +- .../templates.d/snyk.yml | 42 ++++++------ .../templates.d/stan.yml | 12 ++-- .../templates.d/symantec_endpoint.yml | 38 +++++------ .../templates.d/synthetics.yml | 46 ++++++------- .../templates.d/tcp.yml | 22 +++---- .../templates.d/traefik.yml | 4 +- .../templates.d/udp.yml | 24 +++---- .../templates.d/zookeeper.yml | 14 ++-- 40 files changed, 425 insertions(+), 401 deletions(-) diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/activemq.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/activemq.yml index 007060a5ac0..cbdaf66311f 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/activemq.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/activemq.yml @@ -53,44 +53,44 @@ inputs: dataset: activemq.broker type: metrics hosts: - - ${kubernetes.hints.activemq.broker.host|'localhost:8161'} + - ${kubernetes.hints.activemq.broker.host|kubernetes.hints.activemq.host|'localhost:8161'} metricsets: - broker - password: ${kubernetes.hints.activemq.broker.password|'admin'} + password: ${kubernetes.hints.activemq.broker.password|kubernetes.hints.activemq.password|'admin'} path: /api/jolokia/?ignoreErrors=true&canonicalNaming=false - period: ${kubernetes.hints.activemq.broker.period|'10s'} + period: ${kubernetes.hints.activemq.broker.period|kubernetes.hints.activemq.period|'10s'} tags: - forwarded - activemq-broker - username: ${kubernetes.hints.activemq.broker.username|'admin'} + username: ${kubernetes.hints.activemq.broker.username|kubernetes.hints.activemq.username|'admin'} - condition: ${kubernetes.hints.activemq.queue.enabled} == true or ${kubernetes.hints.activemq.enabled} == true data_stream: dataset: activemq.queue type: metrics hosts: - - ${kubernetes.hints.activemq.queue.host|'localhost:8161'} + - ${kubernetes.hints.activemq.queue.host|kubernetes.hints.activemq.host|'localhost:8161'} metricsets: - queue - password: ${kubernetes.hints.activemq.queue.password|'admin'} + password: ${kubernetes.hints.activemq.queue.password|kubernetes.hints.activemq.password|'admin'} path: /api/jolokia/?ignoreErrors=true&canonicalNaming=false - period: ${kubernetes.hints.activemq.queue.period|'10s'} + period: ${kubernetes.hints.activemq.queue.period|kubernetes.hints.activemq.period|'10s'} tags: - forwarded - activemq-queue - username: ${kubernetes.hints.activemq.queue.username|'admin'} + username: ${kubernetes.hints.activemq.queue.username|kubernetes.hints.activemq.username|'admin'} - condition: ${kubernetes.hints.activemq.topic.enabled} == true or ${kubernetes.hints.activemq.enabled} == true data_stream: dataset: activemq.topic type: metrics hosts: - - ${kubernetes.hints.activemq.topic.host|'localhost:8161'} + - ${kubernetes.hints.activemq.topic.host|kubernetes.hints.activemq.host|'localhost:8161'} metricsets: - topic - password: ${kubernetes.hints.activemq.topic.password|'admin'} + password: ${kubernetes.hints.activemq.topic.password|kubernetes.hints.activemq.password|'admin'} path: /api/jolokia/?ignoreErrors=true&canonicalNaming=false - period: ${kubernetes.hints.activemq.topic.period|'10s'} + period: ${kubernetes.hints.activemq.topic.period|kubernetes.hints.activemq.period|'10s'} tags: - forwarded - activemq-topic - username: ${kubernetes.hints.activemq.topic.username|'admin'} + username: ${kubernetes.hints.activemq.topic.username|kubernetes.hints.activemq.username|'admin'} data_stream.namespace: default diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/apache.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/apache.yml index a6e461a5363..8dcb71c6434 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/apache.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/apache.yml @@ -126,9 +126,9 @@ inputs: dataset: apache.status type: metrics hosts: - - ${kubernetes.hints.apache.status.host|'http://127.0.0.1'} + - ${kubernetes.hints.apache.status.host|kubernetes.hints.apache.host|'http://127.0.0.1'} metricsets: - status - period: ${kubernetes.hints.apache.status.period|'30s'} + period: ${kubernetes.hints.apache.status.period|kubernetes.hints.apache.period|'30s'} server_status_path: /server-status data_stream.namespace: default diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/cassandra.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/cassandra.yml index bce4edf635c..aafef542628 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/cassandra.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/cassandra.yml @@ -35,7 +35,7 @@ inputs: dataset: cassandra.metrics type: metrics hosts: - - ${kubernetes.hints.cassandra.metrics.host|'localhost:8778'} + - ${kubernetes.hints.cassandra.metrics.host|kubernetes.hints.cassandra.host|'localhost:8778'} jmx.mappings: - attributes: - attr: ReleaseVersion @@ -320,8 +320,8 @@ inputs: metricsets: - jmx namespace: metrics - password: ${kubernetes.hints.cassandra.metrics.password|'admin'} + password: ${kubernetes.hints.cassandra.metrics.password|kubernetes.hints.cassandra.password|'admin'} path: /jolokia/?ignoreErrors=true&canonicalNaming=false - period: ${kubernetes.hints.cassandra.metrics.period|'10s'} - username: ${kubernetes.hints.cassandra.metrics.username|'admin'} + period: ${kubernetes.hints.cassandra.metrics.period|kubernetes.hints.cassandra.period|'10s'} + username: ${kubernetes.hints.cassandra.metrics.username|kubernetes.hints.cassandra.username|'admin'} data_stream.namespace: default diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/cockroachdb.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/cockroachdb.yml index 3e55b02794d..ef637384ddc 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/cockroachdb.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/cockroachdb.yml @@ -9,14 +9,14 @@ inputs: dataset: cockroachdb.status type: metrics hosts: - - ${kubernetes.hints.cockroachdb.status.host|'localhost:8080'} + - ${kubernetes.hints.cockroachdb.status.host|kubernetes.hints.cockroachdb.host|'localhost:8080'} metrics_filters.exclude: null metrics_filters.include: null metrics_path: /_status/vars metricsets: - collector password: null - period: ${kubernetes.hints.cockroachdb.status.period|'10s'} + period: ${kubernetes.hints.cockroachdb.status.period|kubernetes.hints.cockroachdb.period|'10s'} ssl.certificate_authorities: null use_types: true username: null @@ -27,7 +27,7 @@ inputs: streams: - condition: ${kubernetes.hints.cockroachdb.container_logs.enabled} == true data_stream: - dataset: kubernetes.container_logs + dataset: cockroachdb.container_logs type: logs exclude_files: [] exclude_lines: [] diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/elasticsearch.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/elasticsearch.yml index 49503b63346..82060c4d961 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/elasticsearch.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/elasticsearch.yml @@ -180,7 +180,7 @@ inputs: dataset: elasticsearch.stack_monitoring.ccr type: metrics hosts: - - ${kubernetes.hints.elasticsearch.ccr.host|'http://localhost:9200'} + - ${kubernetes.hints.elasticsearch.ccr.host|kubernetes.hints.elasticsearch.host|'http://localhost:9200'} metricsets: - ccr period: null @@ -190,7 +190,7 @@ inputs: dataset: elasticsearch.stack_monitoring.cluster_stats type: metrics hosts: - - ${kubernetes.hints.elasticsearch.cluster_stats.host|'http://localhost:9200'} + - ${kubernetes.hints.elasticsearch.cluster_stats.host|kubernetes.hints.elasticsearch.host|'http://localhost:9200'} metricsets: - cluster_stats period: null @@ -200,7 +200,7 @@ inputs: dataset: elasticsearch.stack_monitoring.enrich type: metrics hosts: - - ${kubernetes.hints.elasticsearch.enrich.host|'http://localhost:9200'} + - ${kubernetes.hints.elasticsearch.enrich.host|kubernetes.hints.elasticsearch.host|'http://localhost:9200'} metricsets: - enrich period: null @@ -210,7 +210,7 @@ inputs: dataset: elasticsearch.stack_monitoring.index type: metrics hosts: - - ${kubernetes.hints.elasticsearch.index.host|'http://localhost:9200'} + - ${kubernetes.hints.elasticsearch.index.host|kubernetes.hints.elasticsearch.host|'http://localhost:9200'} metricsets: - index period: null @@ -220,7 +220,7 @@ inputs: dataset: elasticsearch.stack_monitoring.index_recovery type: metrics hosts: - - ${kubernetes.hints.elasticsearch.index_recovery.host|'http://localhost:9200'} + - ${kubernetes.hints.elasticsearch.index_recovery.host|kubernetes.hints.elasticsearch.host|'http://localhost:9200'} metricsets: - index_recovery period: null @@ -230,7 +230,7 @@ inputs: dataset: elasticsearch.stack_monitoring.index_summary type: metrics hosts: - - ${kubernetes.hints.elasticsearch.index_summary.host|'http://localhost:9200'} + - ${kubernetes.hints.elasticsearch.index_summary.host|kubernetes.hints.elasticsearch.host|'http://localhost:9200'} metricsets: - index_summary period: null @@ -240,7 +240,7 @@ inputs: dataset: elasticsearch.stack_monitoring.ml_job type: metrics hosts: - - ${kubernetes.hints.elasticsearch.ml_job.host|'http://localhost:9200'} + - ${kubernetes.hints.elasticsearch.ml_job.host|kubernetes.hints.elasticsearch.host|'http://localhost:9200'} metricsets: - ml_job period: null @@ -250,7 +250,7 @@ inputs: dataset: elasticsearch.stack_monitoring.node type: metrics hosts: - - ${kubernetes.hints.elasticsearch.node.host|'http://localhost:9200'} + - ${kubernetes.hints.elasticsearch.node.host|kubernetes.hints.elasticsearch.host|'http://localhost:9200'} metricsets: - node period: null @@ -260,7 +260,7 @@ inputs: dataset: elasticsearch.stack_monitoring.node_stats type: metrics hosts: - - ${kubernetes.hints.elasticsearch.node_stats.host|'http://localhost:9200'} + - ${kubernetes.hints.elasticsearch.node_stats.host|kubernetes.hints.elasticsearch.host|'http://localhost:9200'} metricsets: - node_stats period: null @@ -270,7 +270,7 @@ inputs: dataset: elasticsearch.stack_monitoring.pending_tasks type: metrics hosts: - - ${kubernetes.hints.elasticsearch.pending_tasks.host|'http://localhost:9200'} + - ${kubernetes.hints.elasticsearch.pending_tasks.host|kubernetes.hints.elasticsearch.host|'http://localhost:9200'} metricsets: - pending_tasks period: null @@ -280,7 +280,7 @@ inputs: dataset: elasticsearch.stack_monitoring.shard type: metrics hosts: - - ${kubernetes.hints.elasticsearch.shard.host|'http://localhost:9200'} + - ${kubernetes.hints.elasticsearch.shard.host|kubernetes.hints.elasticsearch.host|'http://localhost:9200'} metricsets: - shard period: null diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/endpoint.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/endpoint.yml index 178a6098f99..81e0684ae0c 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/endpoint.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/endpoint.yml @@ -5,7 +5,7 @@ inputs: streams: - condition: ${kubernetes.hints.endpoint.container_logs.enabled} == true data_stream: - dataset: kubernetes.container_logs + dataset: endpoint.container_logs type: logs exclude_files: [] exclude_lines: [] diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/haproxy.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/haproxy.yml index cff5d5821aa..265a6c17863 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/haproxy.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/haproxy.yml @@ -1,4 +1,28 @@ inputs: + - name: filestream-haproxy + type: filestream + use_output: default + streams: + - condition: ${kubernetes.hints.haproxy.log.enabled} == true or ${kubernetes.hints.haproxy.enabled} == true + data_stream: + dataset: haproxy.log + type: logs + exclude_files: + - .gz$ + parsers: + - container: + format: auto + stream: ${kubernetes.hints.haproxy.log.stream|'all'} + paths: + - /var/log/containers/*${kubernetes.hints.container_id}.log + processors: + - add_locale: null + prospector: + scanner: + symlinks: true + tags: + - haproxy-log + data_stream.namespace: default - name: syslog-haproxy type: syslog use_output: default @@ -24,45 +48,21 @@ inputs: dataset: haproxy.info type: metrics hosts: - - ${kubernetes.hints.haproxy.info.host|'tcp://127.0.0.1:14567'} + - ${kubernetes.hints.haproxy.info.host|kubernetes.hints.haproxy.host|'tcp://127.0.0.1:14567'} metricsets: - info - password: ${kubernetes.hints.haproxy.info.password|'admin'} - period: ${kubernetes.hints.haproxy.info.period|'10s'} - username: ${kubernetes.hints.haproxy.info.username|'admin'} + password: ${kubernetes.hints.haproxy.info.password|kubernetes.hints.haproxy.password|'admin'} + period: ${kubernetes.hints.haproxy.info.period|kubernetes.hints.haproxy.period|'10s'} + username: ${kubernetes.hints.haproxy.info.username|kubernetes.hints.haproxy.username|'admin'} - condition: ${kubernetes.hints.haproxy.stat.enabled} == true or ${kubernetes.hints.haproxy.enabled} == true data_stream: dataset: haproxy.stat type: metrics hosts: - - ${kubernetes.hints.haproxy.stat.host|'tcp://127.0.0.1:14567'} + - ${kubernetes.hints.haproxy.stat.host|kubernetes.hints.haproxy.host|'tcp://127.0.0.1:14567'} metricsets: - stat - password: ${kubernetes.hints.haproxy.stat.password|'admin'} - period: ${kubernetes.hints.haproxy.stat.period|'10s'} - username: ${kubernetes.hints.haproxy.stat.username|'admin'} - data_stream.namespace: default - - name: filestream-haproxy - type: filestream - use_output: default - streams: - - condition: ${kubernetes.hints.haproxy.log.enabled} == true or ${kubernetes.hints.haproxy.enabled} == true - data_stream: - dataset: haproxy.log - type: logs - exclude_files: - - .gz$ - parsers: - - container: - format: auto - stream: ${kubernetes.hints.haproxy.log.stream|'all'} - paths: - - /var/log/containers/*${kubernetes.hints.container_id}.log - processors: - - add_locale: null - prospector: - scanner: - symlinks: true - tags: - - haproxy-log + password: ${kubernetes.hints.haproxy.stat.password|kubernetes.hints.haproxy.password|'admin'} + period: ${kubernetes.hints.haproxy.stat.period|kubernetes.hints.haproxy.period|'10s'} + username: ${kubernetes.hints.haproxy.stat.username|kubernetes.hints.haproxy.username|'admin'} data_stream.namespace: default diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/hashicorp_vault.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/hashicorp_vault.yml index 19892110b74..28bfd77da77 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/hashicorp_vault.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/hashicorp_vault.yml @@ -61,11 +61,11 @@ inputs: dataset: hashicorp_vault.metrics type: metrics hosts: - - ${kubernetes.hints.hashicorp_vault.metrics.host|'http://localhost:8200'} + - ${kubernetes.hints.hashicorp_vault.metrics.host|kubernetes.hints.hashicorp_vault.host|'http://localhost:8200'} metrics_path: /v1/sys/metrics metricsets: - collector - period: ${kubernetes.hints.hashicorp_vault.metrics.period|'30s'} + period: ${kubernetes.hints.hashicorp_vault.metrics.period|kubernetes.hints.hashicorp_vault.period|'30s'} query: format: prometheus rate_counters: true diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/hid_bravura_monitor.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/hid_bravura_monitor.yml index 28d8f782d69..4b8faa04e10 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/hid_bravura_monitor.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/hid_bravura_monitor.yml @@ -1,4 +1,15 @@ inputs: + - name: winlog-hid_bravura_monitor + type: winlog + use_output: default + streams: + - condition: ${kubernetes.hints.hid_bravura_monitor.winlog.enabled} == true or ${kubernetes.hints.hid_bravura_monitor.enabled} == true + data_stream: + dataset: hid_bravura_monitor.winlog + type: logs + name: Hitachi-Hitachi ID Systems-Hitachi ID Suite/Operational + tags: null + data_stream.namespace: default - name: filestream-hid_bravura_monitor type: filestream use_output: default @@ -29,14 +40,3 @@ inputs: - .gz$ tags: null data_stream.namespace: default - - name: winlog-hid_bravura_monitor - type: winlog - use_output: default - streams: - - condition: ${kubernetes.hints.hid_bravura_monitor.winlog.enabled} == true or ${kubernetes.hints.hid_bravura_monitor.enabled} == true - data_stream: - dataset: hid_bravura_monitor.winlog - type: logs - name: Hitachi-Hitachi ID Systems-Hitachi ID Suite/Operational - tags: null - data_stream.namespace: default diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/iis.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/iis.yml index 44162f4ac6b..8ff2f64baf7 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/iis.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/iis.yml @@ -11,6 +11,7 @@ inputs: - .gz$ exclude_lines: - ^# + ignore_older: 72h parsers: - container: format: auto @@ -30,6 +31,7 @@ inputs: - .gz$ exclude_lines: - ^# + ignore_older: 72h parsers: - container: format: auto @@ -53,19 +55,19 @@ inputs: type: metrics metricsets: - application_pool - period: ${kubernetes.hints.iis.application_pool.period|'10s'} + period: ${kubernetes.hints.iis.application_pool.period|kubernetes.hints.iis.period|'10s'} - condition: ${kubernetes.hints.iis.webserver.enabled} == true or ${kubernetes.hints.iis.enabled} == true data_stream: dataset: iis.webserver type: metrics metricsets: - webserver - period: ${kubernetes.hints.iis.webserver.period|'10s'} + period: ${kubernetes.hints.iis.webserver.period|kubernetes.hints.iis.period|'10s'} - condition: ${kubernetes.hints.iis.website.enabled} == true or ${kubernetes.hints.iis.enabled} == true data_stream: dataset: iis.website type: metrics metricsets: - website - period: ${kubernetes.hints.iis.website.period|'10s'} + period: ${kubernetes.hints.iis.website.period|kubernetes.hints.iis.period|'10s'} data_stream.namespace: default diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/iptables.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/iptables.yml index 02d1d8330d3..25f38c5bf85 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/iptables.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/iptables.yml @@ -1,4 +1,17 @@ inputs: + - name: journald-iptables + type: journald + use_output: default + streams: + - condition: ${kubernetes.hints.iptables.log.enabled} == true or ${kubernetes.hints.iptables.enabled} == true + data_stream: + dataset: iptables.log + type: logs + include_matches: + - _TRANSPORT=kernel + tags: + - iptables-log + data_stream.namespace: default - name: udp-iptables type: udp use_output: default @@ -39,16 +52,3 @@ inputs: - iptables-log - forwarded data_stream.namespace: default - - name: journald-iptables - type: journald - use_output: default - streams: - - condition: ${kubernetes.hints.iptables.log.enabled} == true or ${kubernetes.hints.iptables.enabled} == true - data_stream: - dataset: iptables.log - type: logs - include_matches: - - _TRANSPORT=kernel - tags: - - iptables-log - data_stream.namespace: default diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/kafka.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/kafka.yml index b79eebbcfb0..f8e3de9e81a 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/kafka.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/kafka.yml @@ -39,23 +39,23 @@ inputs: - localhost:8778 metricsets: - broker - period: ${kubernetes.hints.kafka.broker.period|'10s'} + period: ${kubernetes.hints.kafka.broker.period|kubernetes.hints.kafka.period|'10s'} - condition: ${kubernetes.hints.kafka.consumergroup.enabled} == true or ${kubernetes.hints.kafka.enabled} == true data_stream: dataset: kafka.consumergroup type: metrics hosts: - - ${kubernetes.hints.kafka.consumergroup.host|'localhost:9092'} + - ${kubernetes.hints.kafka.consumergroup.host|kubernetes.hints.kafka.host|'localhost:9092'} metricsets: - consumergroup - period: ${kubernetes.hints.kafka.consumergroup.period|'10s'} + period: ${kubernetes.hints.kafka.consumergroup.period|kubernetes.hints.kafka.period|'10s'} - condition: ${kubernetes.hints.kafka.partition.enabled} == true or ${kubernetes.hints.kafka.enabled} == true data_stream: dataset: kafka.partition type: metrics hosts: - - ${kubernetes.hints.kafka.partition.host|'localhost:9092'} + - ${kubernetes.hints.kafka.partition.host|kubernetes.hints.kafka.host|'localhost:9092'} metricsets: - partition - period: ${kubernetes.hints.kafka.partition.period|'10s'} + period: ${kubernetes.hints.kafka.partition.period|kubernetes.hints.kafka.period|'10s'} data_stream.namespace: default diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/kibana.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/kibana.yml index 1c27b4830ab..78ab5f35128 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/kibana.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/kibana.yml @@ -15,16 +15,6 @@ inputs: stream: ${kubernetes.hints.kibana.audit.stream|'all'} paths: - /var/log/containers/*${kubernetes.hints.container_id}.log - processors: - - add_locale: null - - add_fields: - fields: - ecs.version: 1.10.0 - target: "" - - decode_json_fields: - fields: - - message - target: kibana._audit_temp prospector: scanner: symlinks: true @@ -34,19 +24,12 @@ inputs: type: logs exclude_files: - .gz$ - json.add_error_key: true - json.keys_under_root: false parsers: - container: format: auto stream: ${kubernetes.hints.kibana.log.stream|'all'} paths: - /var/log/containers/*${kubernetes.hints.container_id}.log - processors: - - add_fields: - fields: - ecs.version: 1.10.0 - target: "" prospector: scanner: symlinks: true @@ -60,7 +43,7 @@ inputs: dataset: kibana.stack_monitoring.cluster_actions type: metrics hosts: - - ${kubernetes.hints.kibana.cluster_actions.host|'http://localhost:5601'} + - ${kubernetes.hints.kibana.cluster_actions.host|kubernetes.hints.kibana.host|'http://localhost:5601'} metricsets: - cluster_actions period: null @@ -69,7 +52,7 @@ inputs: dataset: kibana.stack_monitoring.cluster_rules type: metrics hosts: - - ${kubernetes.hints.kibana.cluster_rules.host|'http://localhost:5601'} + - ${kubernetes.hints.kibana.cluster_rules.host|kubernetes.hints.kibana.host|'http://localhost:5601'} metricsets: - cluster_rules period: null @@ -78,7 +61,7 @@ inputs: dataset: kibana.stack_monitoring.node_actions type: metrics hosts: - - ${kubernetes.hints.kibana.node_actions.host|'http://localhost:5601'} + - ${kubernetes.hints.kibana.node_actions.host|kubernetes.hints.kibana.host|'http://localhost:5601'} metricsets: - node_actions period: null @@ -87,7 +70,7 @@ inputs: dataset: kibana.stack_monitoring.node_rules type: metrics hosts: - - ${kubernetes.hints.kibana.node_rules.host|'http://localhost:5601'} + - ${kubernetes.hints.kibana.node_rules.host|kubernetes.hints.kibana.host|'http://localhost:5601'} metricsets: - node_rules period: null @@ -96,7 +79,7 @@ inputs: dataset: kibana.stack_monitoring.stats type: metrics hosts: - - ${kubernetes.hints.kibana.stats.host|'http://localhost:5601'} + - ${kubernetes.hints.kibana.stats.host|kubernetes.hints.kibana.host|'http://localhost:5601'} metricsets: - stats period: null @@ -105,7 +88,7 @@ inputs: dataset: kibana.stack_monitoring.status type: metrics hosts: - - ${kubernetes.hints.kibana.status.host|'http://localhost:5601'} + - ${kubernetes.hints.kibana.status.host|kubernetes.hints.kibana.host|'http://localhost:5601'} metricsets: - status period: null diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/logstash.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/logstash.yml index 6ba62de3274..f4b3c2a23b3 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/logstash.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/logstash.yml @@ -59,17 +59,17 @@ inputs: dataset: logstash.stack_monitoring.node type: metrics hosts: - - ${kubernetes.hints.logstash.node.host|'http://localhost:9600'} + - ${kubernetes.hints.logstash.node.host|kubernetes.hints.logstash.host|'http://localhost:9600'} metricsets: - node - period: ${kubernetes.hints.logstash.node.period|'10s'} + period: ${kubernetes.hints.logstash.node.period|kubernetes.hints.logstash.period|'10s'} - condition: ${kubernetes.hints.logstash.node_stats.enabled} == true or ${kubernetes.hints.logstash.enabled} == true data_stream: dataset: logstash.stack_monitoring.node_stats type: metrics hosts: - - ${kubernetes.hints.logstash.node_stats.host|'http://localhost:9600'} + - ${kubernetes.hints.logstash.node_stats.host|kubernetes.hints.logstash.host|'http://localhost:9600'} metricsets: - node_stats - period: ${kubernetes.hints.logstash.node_stats.period|'10s'} + period: ${kubernetes.hints.logstash.node_stats.period|kubernetes.hints.logstash.period|'10s'} data_stream.namespace: default diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/microsoft_sqlserver.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/microsoft_sqlserver.yml index 5ac70293051..b3215c1cec9 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/microsoft_sqlserver.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/microsoft_sqlserver.yml @@ -1,16 +1,4 @@ inputs: - - name: winlog-microsoft_sqlserver - type: winlog - use_output: default - streams: - - condition: ${kubernetes.hints.microsoft_sqlserver.audit.enabled} == true or ${kubernetes.hints.microsoft_sqlserver.enabled} == true - data_stream: - dataset: microsoft_sqlserver.audit - type: logs - event_id: 33205 - ignore_older: 72h - name: Security - data_stream.namespace: default - name: filestream-microsoft_sqlserver type: filestream use_output: default @@ -48,10 +36,10 @@ inputs: driver: mssql dynamic_counter_name: Memory Grants Pending hosts: - - sqlserver://${kubernetes.hints.microsoft_sqlserver.performance.username|'domain\username'}:${kubernetes.hints.microsoft_sqlserver.performance.password|'verysecurepassword'}@${kubernetes.hints.microsoft_sqlserver.performance.host|'localhost'}:1433 + - sqlserver://${kubernetes.hints.microsoft_sqlserver.performance.username|kubernetes.hints.microsoft_sqlserver.username|'domain\username'}:${kubernetes.hints.microsoft_sqlserver.performance.password|kubernetes.hints.microsoft_sqlserver.password|'verysecurepassword'}@${kubernetes.hints.microsoft_sqlserver.performance.host|kubernetes.hints.microsoft_sqlserver.host|'localhost'}:1433 metricsets: - query - period: ${kubernetes.hints.microsoft_sqlserver.performance.period|'60s'} + period: ${kubernetes.hints.microsoft_sqlserver.performance.period|kubernetes.hints.microsoft_sqlserver.period|'60s'} raw_data.enabled: true sql_queries: - query: SELECT cntr_value As 'user_connections' FROM sys.dm_os_performance_counters WHERE counter_name= 'User Connections' @@ -94,10 +82,10 @@ inputs: type: metrics driver: mssql hosts: - - sqlserver://${kubernetes.hints.microsoft_sqlserver.transaction_log.username|'domain\username'}:${kubernetes.hints.microsoft_sqlserver.transaction_log.password|'verysecurepassword'}@${kubernetes.hints.microsoft_sqlserver.transaction_log.host|'localhost'}:1433 + - sqlserver://${kubernetes.hints.microsoft_sqlserver.transaction_log.username|kubernetes.hints.microsoft_sqlserver.username|'domain\username'}:${kubernetes.hints.microsoft_sqlserver.transaction_log.password|kubernetes.hints.microsoft_sqlserver.password|'verysecurepassword'}@${kubernetes.hints.microsoft_sqlserver.transaction_log.host|kubernetes.hints.microsoft_sqlserver.host|'localhost'}:1433 metricsets: - query - period: ${kubernetes.hints.microsoft_sqlserver.transaction_log.period|'60s'} + period: ${kubernetes.hints.microsoft_sqlserver.transaction_log.period|kubernetes.hints.microsoft_sqlserver.period|'60s'} raw_data.enabled: true sql_queries: - query: SELECT name As 'database_name', database_id FROM sys.databases WHERE database_id=1; @@ -125,3 +113,15 @@ inputs: - query: SELECT 'msdb' As 'database_name', total_log_size_in_bytes As total_log_size_bytes, used_log_space_in_bytes As used_log_space_bytes, used_log_space_in_percent As used_log_space_pct, log_space_in_bytes_since_last_backup FROM sys.dm_db_log_space_usage msdb response_format: table data_stream.namespace: default + - name: winlog-microsoft_sqlserver + type: winlog + use_output: default + streams: + - condition: ${kubernetes.hints.microsoft_sqlserver.audit.enabled} == true or ${kubernetes.hints.microsoft_sqlserver.enabled} == true + data_stream: + dataset: microsoft_sqlserver.audit + type: logs + event_id: 33205 + ignore_older: 72h + name: Security + data_stream.namespace: default diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/mimecast.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/mimecast.yml index 23139e47852..4d78d740508 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/mimecast.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/mimecast.yml @@ -364,7 +364,7 @@ inputs: streams: - condition: ${kubernetes.hints.mimecast.container_logs.enabled} == true data_stream: - dataset: kubernetes.container_logs + dataset: mimecast.container_logs type: logs exclude_files: [] exclude_lines: [] diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/mongodb.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/mongodb.yml index ece2d4439eb..9804e4f6cb7 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/mongodb.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/mongodb.yml @@ -30,44 +30,44 @@ inputs: dataset: mongodb.collstats type: metrics hosts: - - ${kubernetes.hints.mongodb.collstats.host|'localhost:27017'} + - ${kubernetes.hints.mongodb.collstats.host|kubernetes.hints.mongodb.host|'localhost:27017'} metricsets: - collstats - period: ${kubernetes.hints.mongodb.collstats.period|'10s'} + period: ${kubernetes.hints.mongodb.collstats.period|kubernetes.hints.mongodb.period|'10s'} - condition: ${kubernetes.hints.mongodb.dbstats.enabled} == true or ${kubernetes.hints.mongodb.enabled} == true data_stream: dataset: mongodb.dbstats type: metrics hosts: - - ${kubernetes.hints.mongodb.dbstats.host|'localhost:27017'} + - ${kubernetes.hints.mongodb.dbstats.host|kubernetes.hints.mongodb.host|'localhost:27017'} metricsets: - dbstats - period: ${kubernetes.hints.mongodb.dbstats.period|'10s'} + period: ${kubernetes.hints.mongodb.dbstats.period|kubernetes.hints.mongodb.period|'10s'} - condition: ${kubernetes.hints.mongodb.metrics.enabled} == true or ${kubernetes.hints.mongodb.enabled} == true data_stream: dataset: mongodb.metrics type: metrics hosts: - - ${kubernetes.hints.mongodb.metrics.host|'localhost:27017'} + - ${kubernetes.hints.mongodb.metrics.host|kubernetes.hints.mongodb.host|'localhost:27017'} metricsets: - metrics - period: ${kubernetes.hints.mongodb.metrics.period|'10s'} + period: ${kubernetes.hints.mongodb.metrics.period|kubernetes.hints.mongodb.period|'10s'} - condition: ${kubernetes.hints.mongodb.replstatus.enabled} == true or ${kubernetes.hints.mongodb.enabled} == true data_stream: dataset: mongodb.replstatus type: metrics hosts: - - ${kubernetes.hints.mongodb.replstatus.host|'localhost:27017'} + - ${kubernetes.hints.mongodb.replstatus.host|kubernetes.hints.mongodb.host|'localhost:27017'} metricsets: - replstatus - period: ${kubernetes.hints.mongodb.replstatus.period|'10s'} + period: ${kubernetes.hints.mongodb.replstatus.period|kubernetes.hints.mongodb.period|'10s'} - condition: ${kubernetes.hints.mongodb.status.enabled} == true or ${kubernetes.hints.mongodb.enabled} == true data_stream: dataset: mongodb.status type: metrics hosts: - - ${kubernetes.hints.mongodb.status.host|'localhost:27017'} + - ${kubernetes.hints.mongodb.status.host|kubernetes.hints.mongodb.host|'localhost:27017'} metricsets: - status - period: ${kubernetes.hints.mongodb.status.period|'10s'} + period: ${kubernetes.hints.mongodb.status.period|kubernetes.hints.mongodb.period|'10s'} data_stream.namespace: default diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/mysql.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/mysql.yml index 234caeeb40c..aa8ad8e0a02 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/mysql.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/mysql.yml @@ -56,27 +56,32 @@ inputs: dataset: mysql.galera_status type: metrics hosts: - - ${kubernetes.hints.mysql.galera_status.host|'tcp(127.0.0.1:3306)/'} + - ${kubernetes.hints.mysql.galera_status.host|kubernetes.hints.mysql.host|'tcp(127.0.0.1:3306)/'} metricsets: - galera_status - password: ${kubernetes.hints.mysql.galera_status.password|'test'} - period: ${kubernetes.hints.mysql.galera_status.period|'10s'} - username: ${kubernetes.hints.mysql.galera_status.username|'root'} - - condition: ${kubernetes.hints.mysql.performance.enabled} == true and ${kubernetes.hints.mysql.enabled} == true + password: ${kubernetes.hints.mysql.galera_status.password|kubernetes.hints.mysql.password|'test'} + period: ${kubernetes.hints.mysql.galera_status.period|kubernetes.hints.mysql.period|'10s'} + username: ${kubernetes.hints.mysql.galera_status.username|kubernetes.hints.mysql.username|'root'} + - condition: ${kubernetes.hints.mysql.performance.enabled} == true or ${kubernetes.hints.mysql.enabled} == true data_stream: dataset: mysql.performance type: metrics + hosts: + - ${kubernetes.hints.mysql.performance.host|kubernetes.hints.mysql.host|'tcp(127.0.0.1:3306)/'} metricsets: - performance + password: ${kubernetes.hints.mysql.performance.password|kubernetes.hints.mysql.password|'test'} + period: ${kubernetes.hints.mysql.performance.period|kubernetes.hints.mysql.period|'10s'} + username: ${kubernetes.hints.mysql.performance.username|kubernetes.hints.mysql.username|'root'} - condition: ${kubernetes.hints.mysql.status.enabled} == true or ${kubernetes.hints.mysql.enabled} == true data_stream: dataset: mysql.status type: metrics hosts: - - ${kubernetes.hints.mysql.status.host|'tcp(127.0.0.1:3306)/'} + - ${kubernetes.hints.mysql.status.host|kubernetes.hints.mysql.host|'tcp(127.0.0.1:3306)/'} metricsets: - status - password: ${kubernetes.hints.mysql.status.password|'test'} - period: ${kubernetes.hints.mysql.status.period|'10s'} - username: ${kubernetes.hints.mysql.status.username|'root'} + password: ${kubernetes.hints.mysql.status.password|kubernetes.hints.mysql.password|'test'} + period: ${kubernetes.hints.mysql.status.period|kubernetes.hints.mysql.period|'10s'} + username: ${kubernetes.hints.mysql.status.username|kubernetes.hints.mysql.username|'root'} data_stream.namespace: default diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/nats.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/nats.yml index 91525210374..af4da4e87ec 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/nats.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/nats.yml @@ -30,53 +30,53 @@ inputs: dataset: nats.connection type: metrics hosts: - - ${kubernetes.hints.nats.connection.host|'localhost:8222'} + - ${kubernetes.hints.nats.connection.host|kubernetes.hints.nats.host|'localhost:8222'} metricsets: - connection - period: ${kubernetes.hints.nats.connection.period|'10s'} + period: ${kubernetes.hints.nats.connection.period|kubernetes.hints.nats.period|'10s'} - condition: ${kubernetes.hints.nats.connections.enabled} == true or ${kubernetes.hints.nats.enabled} == true data_stream: dataset: nats.connections type: metrics hosts: - - ${kubernetes.hints.nats.connections.host|'localhost:8222'} + - ${kubernetes.hints.nats.connections.host|kubernetes.hints.nats.host|'localhost:8222'} metricsets: - connections - period: ${kubernetes.hints.nats.connections.period|'10s'} + period: ${kubernetes.hints.nats.connections.period|kubernetes.hints.nats.period|'10s'} - condition: ${kubernetes.hints.nats.route.enabled} == true and ${kubernetes.hints.nats.enabled} == true data_stream: dataset: nats.route type: metrics hosts: - - ${kubernetes.hints.nats.route.host|'localhost:8222'} + - ${kubernetes.hints.nats.route.host|kubernetes.hints.nats.host|'localhost:8222'} metricsets: - route - period: ${kubernetes.hints.nats.route.period|'10s'} + period: ${kubernetes.hints.nats.route.period|kubernetes.hints.nats.period|'10s'} - condition: ${kubernetes.hints.nats.routes.enabled} == true or ${kubernetes.hints.nats.enabled} == true data_stream: dataset: nats.routes type: metrics hosts: - - ${kubernetes.hints.nats.routes.host|'localhost:8222'} + - ${kubernetes.hints.nats.routes.host|kubernetes.hints.nats.host|'localhost:8222'} metricsets: - routes - period: ${kubernetes.hints.nats.routes.period|'10s'} + period: ${kubernetes.hints.nats.routes.period|kubernetes.hints.nats.period|'10s'} - condition: ${kubernetes.hints.nats.stats.enabled} == true or ${kubernetes.hints.nats.enabled} == true data_stream: dataset: nats.stats type: metrics hosts: - - ${kubernetes.hints.nats.stats.host|'localhost:8222'} + - ${kubernetes.hints.nats.stats.host|kubernetes.hints.nats.host|'localhost:8222'} metricsets: - stats - period: ${kubernetes.hints.nats.stats.period|'10s'} + period: ${kubernetes.hints.nats.stats.period|kubernetes.hints.nats.period|'10s'} - condition: ${kubernetes.hints.nats.subscriptions.enabled} == true or ${kubernetes.hints.nats.enabled} == true data_stream: dataset: nats.subscriptions type: metrics hosts: - - ${kubernetes.hints.nats.subscriptions.host|'localhost:8222'} + - ${kubernetes.hints.nats.subscriptions.host|kubernetes.hints.nats.host|'localhost:8222'} metricsets: - subscriptions - period: ${kubernetes.hints.nats.subscriptions.period|'10s'} + period: ${kubernetes.hints.nats.subscriptions.period|kubernetes.hints.nats.period|'10s'} data_stream.namespace: default diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/netflow.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/netflow.yml index d2bb80601df..7976c094a38 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/netflow.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/netflow.yml @@ -30,7 +30,7 @@ inputs: streams: - condition: ${kubernetes.hints.netflow.container_logs.enabled} == true data_stream: - dataset: kubernetes.container_logs + dataset: netflow.container_logs type: logs exclude_files: [] exclude_lines: [] diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/nginx.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/nginx.yml index a9b6693e372..c42fff19dd3 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/nginx.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/nginx.yml @@ -1,19 +1,4 @@ inputs: - - name: nginx/metrics-nginx - type: nginx/metrics - use_output: default - streams: - - condition: ${kubernetes.hints.nginx.stubstatus.enabled} == true or ${kubernetes.hints.nginx.enabled} == true - data_stream: - dataset: nginx.stubstatus - type: metrics - hosts: - - ${kubernetes.hints.nginx.stubstatus.host|'http://127.0.0.1:80'} - metricsets: - - stubstatus - period: ${kubernetes.hints.nginx.stubstatus.period|'10s'} - server_status_path: /nginx_status - data_stream.namespace: default - name: filestream-nginx type: filestream use_output: default @@ -140,3 +125,18 @@ inputs: - forwarded - nginx-error data_stream.namespace: default + - name: nginx/metrics-nginx + type: nginx/metrics + use_output: default + streams: + - condition: ${kubernetes.hints.nginx.stubstatus.enabled} == true or ${kubernetes.hints.nginx.enabled} == true + data_stream: + dataset: nginx.stubstatus + type: metrics + hosts: + - ${kubernetes.hints.nginx.stubstatus.host|kubernetes.hints.nginx.host|'http://127.0.0.1:80'} + metricsets: + - stubstatus + period: ${kubernetes.hints.nginx.stubstatus.period|kubernetes.hints.nginx.period|'10s'} + server_status_path: /nginx_status + data_stream.namespace: default diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/oracle.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/oracle.yml index 8e846586d4b..c6a5cb725a3 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/oracle.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/oracle.yml @@ -29,16 +29,33 @@ inputs: type: sql/metrics use_output: default streams: + - condition: ${kubernetes.hints.oracle.memory.enabled} == true and ${kubernetes.hints.oracle.enabled} == true + data_stream: + dataset: oracle.memory + type: metrics + driver: oracle + hosts: + - ${kubernetes.hints.oracle.memory.host|kubernetes.hints.oracle.host|'oracle://sys:Oradoc_db1@0.0.0.0:1521/ORCLCDB.localdomain?sysdba=1'} + merge_results: true + metricsets: + - query + period: ${kubernetes.hints.oracle.memory.period|kubernetes.hints.oracle.period|'60s'} + raw_data.enabled: true + sql_queries: + - query: select name, value from V$PGASTAT where name in ('aggregate PGA auto target','global memory bound', 'total PGA allocated', 'total PGA used for auto workareas', 'total PGA inuse', 'maximum PGA allocated', 'total freeable PGA memory', 'cache hit percentage', 'aggregate PGA target parameter') + response_format: variables + - query: select 'sga free memory' as NAME, sum(decode(name,'free memory',bytes)) as VALUE from v$sgastat where pool = 'shared pool' union select 'sga total memory' as NAME, sum(bytes) as VALUE from v$sgastat where pool = 'shared pool' + response_format: variables - condition: ${kubernetes.hints.oracle.performance.enabled} == true and ${kubernetes.hints.oracle.enabled} == true data_stream: dataset: oracle.performance type: metrics driver: oracle hosts: - - ${kubernetes.hints.oracle.performance.host|'oracle://sys:Oradoc_db1@0.0.0.0:1521/ORCLCDB.localdomain?sysdba=1'} + - ${kubernetes.hints.oracle.performance.host|kubernetes.hints.oracle.host|'oracle://sys:Oradoc_db1@0.0.0.0:1521/ORCLCDB.localdomain?sysdba=1'} metricsets: - query - period: ${kubernetes.hints.oracle.performance.period|'60s'} + period: ${kubernetes.hints.oracle.performance.period|kubernetes.hints.oracle.period|'60s'} raw_data.enabled: true sql_queries: - query: SELECT name, physical_reads, db_block_gets, consistent_gets, 1 - (physical_reads / (db_block_gets + consistent_gets)) "Hit_Ratio" FROM V$BUFFER_POOL_STATISTICS @@ -49,6 +66,12 @@ inputs: response_format: table - query: SELECT 'lock_requests' "Ratio" , AVG(gethitratio) FROM V$LIBRARYCACHE UNION SELECT 'pin_requests' "Ratio", AVG(pinhitratio) FROM V$LIBRARYCACHE UNION SELECT 'io_reloads' "Ratio", (SUM(reloads) / SUM(pins)) FROM V$LIBRARYCACHE response_format: variables + - query: SELECT COUNT(*) as "failed_db_jobs" FROM dba_jobs WHERE NVL(failures, 0) < > 0 + response_format: table + - query: select 'active_session_count' as name, count(s.status) as value from gv$session s, v$process p where p.addr=s.paddr and s.status='ACTIVE' union select 'inactive_session_count' as name, count(s.status) as value from gv$session s, v$process p where p.addr=s.paddr and s.status='INACTIVE' union select 'inactive_morethan_onehr' as name, count(s.status) as value from gv$session s, v$process p where p.addr=s.paddr and s.last_call_et > 3600 and s.status='INACTIVE' + response_format: variables + - query: select WAIT_CLASS, TOTAL_WAITS, round(100 * (TOTAL_WAITS / SUM_WAITS),2) PCT_WAITS, ROUND((TIME_WAITED / 100),2) TIME_WAITED_SECS, round(100 * (TIME_WAITED / SUM_TIME),2) PCT_TIME from (select WAIT_CLASS, TOTAL_WAITS, TIME_WAITED from V$SYSTEM_WAIT_CLASS where WAIT_CLASS != 'Idle'), (select sum(TOTAL_WAITS) SUM_WAITS, sum(TIME_WAITED) SUM_TIME from V$SYSTEM_WAIT_CLASS where WAIT_CLASS != 'Idle') order by 5 desc + response_format: table - condition: ${kubernetes.hints.oracle.sysmetric.enabled} == true and ${kubernetes.hints.oracle.enabled} == true data_stream: dataset: oracle.sysmetric @@ -56,14 +79,28 @@ inputs: driver: oracle dynamic_metric_name_filter: '%' hosts: - - ${kubernetes.hints.oracle.sysmetric.host|'oracle://sys:Oradoc_db1@0.0.0.0:1521/ORCLCDB.localdomain?sysdba=1'} + - ${kubernetes.hints.oracle.sysmetric.host|kubernetes.hints.oracle.host|'oracle://sys:Oradoc_db1@0.0.0.0:1521/ORCLCDB.localdomain?sysdba=1'} metricsets: - query - period: ${kubernetes.hints.oracle.sysmetric.period|'60s'} + period: ${kubernetes.hints.oracle.sysmetric.period|kubernetes.hints.oracle.period|'60s'} raw_data.enabled: true sql_queries: - query: SELECT METRIC_NAME, VALUE FROM V$SYSMETRIC WHERE GROUP_ID = 2 and METRIC_NAME LIKE '%' response_format: variables + - condition: ${kubernetes.hints.oracle.system_statistics.enabled} == true and ${kubernetes.hints.oracle.enabled} == true + data_stream: + dataset: oracle.system_statistics + type: metrics + driver: oracle + hosts: + - ${kubernetes.hints.oracle.system_statistics.host|kubernetes.hints.oracle.host|'oracle://sys:Oradoc_db1@0.0.0.0:1521/ORCLCDB.localdomain?sysdba=1'} + metricsets: + - query + period: ${kubernetes.hints.oracle.system_statistics.period|kubernetes.hints.oracle.period|'60s'} + raw_data.enabled: true + sql_queries: + - query: SELECT NAME, VALUE FROM V$SYSSTAT WHERE NAME IN ( 'bytes received via SQL*Net from client', 'bytes received via SQL*Net from dblink', 'bytes sent via SQL*Net to client', 'bytes sent via SQL*Net to dblink', 'CPU used by this session', 'db block changes', 'db block gets from cache', 'DBWR checkpoint buffers written', 'DBWR checkpoints', 'DML statements parallelized', 'enqueue conversions', 'enqueue deadlocks', 'enqueue releases', 'enqueue requests', 'enqueue timeouts', 'enqueue waits', 'exchange deadlocks', 'execute count', 'gc current block receive time', 'index fast full scans (direct read)', 'index fast full scans (full)', 'index fast full scans (rowid ranges)', 'lob reads', 'lob writes', 'logons current', 'opened cursors current', 'Parallel operations not downgraded', 'parse count (hard)', 'parse count (total)', 'parse time cpu', 'parse time elapsed', 'physical read bytes', 'physical read IO requests', 'physical read total bytes', 'physical read total IO requests', 'physical reads', 'physical write bytes', 'physical write IO requests', 'physical write total bytes', 'physical write total IO requests', 'physical writes', 'physical writes direct', 'physical writes from cache', 'process last non-idle time', 'queries parallelized', 'recovery blocks read', 'recursive calls', 'recursive cpu usage', 'redo blocks written', 'redo buffer allocation retries', 'redo log space requests', 'redo log space wait time', 'redo size', 'redo synch time', 'redo write time', 'redo writes', 'session cursor cache count', 'session cursor cache hits', 'session logical reads', 'session stored procedure space', 'sorts (disk)', 'sorts (memory)', 'sorts (rows)', 'table scan rows gotten', 'table scans (direct read)', 'table scans (long tables)', 'table scans (rowid ranges)', 'transaction rollbacks', 'user calls', 'user commits', 'user rollbacks', 'DB time', 'OS System time used', 'OS User time used', 'SMON posted for instance recovery', 'SMON posted for txn recovery for other instances', 'java call heap live size', 'java call heap total size', 'java call heap used size') + response_format: variables - condition: ${kubernetes.hints.oracle.tablespace.enabled} == true and ${kubernetes.hints.oracle.enabled} == true data_stream: dataset: oracle.tablespace @@ -71,10 +108,10 @@ inputs: driver: oracle dynamic_metric_name_filter: "" hosts: - - ${kubernetes.hints.oracle.tablespace.host|'oracle://sys:Oradoc_db1@0.0.0.0:1521/ORCLCDB.localdomain?sysdba=1'} + - ${kubernetes.hints.oracle.tablespace.host|kubernetes.hints.oracle.host|'oracle://sys:Oradoc_db1@0.0.0.0:1521/ORCLCDB.localdomain?sysdba=1'} metricsets: - query - period: ${kubernetes.hints.oracle.tablespace.period|'60s'} + period: ${kubernetes.hints.oracle.tablespace.period|kubernetes.hints.oracle.period|'60s'} raw_data.enabled: true sql_queries: - query: WITH data_files AS (SELECT file_name, file_id, tablespace_name, bytes, status, maxbytes, user_bytes, online_status FROM sys.dba_data_files UNION SELECT file_name, file_id, tablespace_name, bytes, status, maxbytes, user_bytes, status AS ONLINE_STATUS FROM sys.dba_temp_files), spaces AS (SELECT b.tablespace_name TB_NAME, tbs_size TB_SIZE_USED, a.free_space TB_SIZE_FREE FROM (SELECT tablespace_name, SUM(bytes) AS free_space FROM dba_free_space GROUP BY tablespace_name) a, (SELECT tablespace_name, SUM(bytes) AS tbs_size FROM dba_data_files GROUP BY tablespace_name) b WHERE a.tablespace_name(+) = b.tablespace_name AND a.tablespace_name != 'TEMP'), temp_spaces AS (SELECT tablespace_name, tablespace_size, allocated_space, free_space FROM dba_temp_free_space WHERE tablespace_name = 'TEMP'), details AS (SELECT df.file_name, df.file_id, df.tablespace_name, df.bytes, df.status, df.maxbytes, df.user_bytes, df.online_status, sp.tb_size_used, sp.tb_size_free FROM data_files df, spaces sp WHERE df.tablespace_name = sp.tb_name UNION SELECT df.file_name, df.file_id, df.tablespace_name, df.bytes, df.status, df.maxbytes, df.user_bytes, df.online_status, tsp.tablespace_size - tsp.free_space AS TB_SIZE_USED, tsp.free_space AS TB_SIZE_FREE FROM data_files df, temp_spaces tsp WHERE df.tablespace_name = tsp.tablespace_name) SELECT file_name, file_id, tablespace_name, bytes, status, maxbytes, user_bytes, online_status, tb_size_used, tb_size_free, SUM(bytes) over() AS TOTAL_BYTES FROM details diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/panw.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/panw.yml index 93c07883f03..8cea3505601 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/panw.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/panw.yml @@ -1,4 +1,37 @@ inputs: + - name: filestream-panw + type: filestream + use_output: default + streams: + - condition: ${kubernetes.hints.panw.panos.enabled} == true or ${kubernetes.hints.panw.enabled} == true + data_stream: + dataset: panw.panos + type: logs + exclude_files: + - .gz$ + fields: + _conf: + external_zones: + - untrust + internal_zones: + - trust + tz_offset: local + fields_under_root: true + parsers: + - container: + format: auto + stream: ${kubernetes.hints.panw.panos.stream|'all'} + paths: + - /var/log/containers/*${kubernetes.hints.container_id}.log + processors: + - add_locale: null + prospector: + scanner: + symlinks: true + tags: + - panw-panos + - forwarded + data_stream.namespace: default - name: tcp-panw type: tcp use_output: default @@ -59,36 +92,3 @@ inputs: - panw-panos - forwarded data_stream.namespace: default - - name: filestream-panw - type: filestream - use_output: default - streams: - - condition: ${kubernetes.hints.panw.panos.enabled} == true or ${kubernetes.hints.panw.enabled} == true - data_stream: - dataset: panw.panos - type: logs - exclude_files: - - .gz$ - fields: - _conf: - external_zones: - - untrust - internal_zones: - - trust - tz_offset: local - fields_under_root: true - parsers: - - container: - format: auto - stream: ${kubernetes.hints.panw.panos.stream|'all'} - paths: - - /var/log/containers/*${kubernetes.hints.container_id}.log - processors: - - add_locale: null - prospector: - scanner: - symlinks: true - tags: - - panw-panos - - forwarded - data_stream.namespace: default diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/panw_cortex_xdr.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/panw_cortex_xdr.yml index ec6a58fd9b2..bbd2aebfa4b 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/panw_cortex_xdr.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/panw_cortex_xdr.yml @@ -73,7 +73,7 @@ inputs: streams: - condition: ${kubernetes.hints.panw_cortex_xdr.container_logs.enabled} == true data_stream: - dataset: kubernetes.container_logs + dataset: panw_cortex_xdr.container_logs type: logs exclude_files: [] exclude_lines: [] diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/pfsense.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/pfsense.yml index e4541f90639..3a52d749ed7 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/pfsense.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/pfsense.yml @@ -45,7 +45,7 @@ inputs: streams: - condition: ${kubernetes.hints.pfsense.container_logs.enabled} == true data_stream: - dataset: kubernetes.container_logs + dataset: pfsense.container_logs type: logs exclude_files: [] exclude_lines: [] diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/postgresql.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/postgresql.yml index a9abf518a9a..8b40d2524d2 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/postgresql.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/postgresql.yml @@ -34,35 +34,35 @@ inputs: dataset: postgresql.activity type: metrics hosts: - - ${kubernetes.hints.postgresql.activity.host|'postgres://localhost:5432'} + - ${kubernetes.hints.postgresql.activity.host|kubernetes.hints.postgresql.host|'postgres://localhost:5432'} metricsets: - activity - period: ${kubernetes.hints.postgresql.activity.period|'10s'} + period: ${kubernetes.hints.postgresql.activity.period|kubernetes.hints.postgresql.period|'10s'} - condition: ${kubernetes.hints.postgresql.bgwriter.enabled} == true or ${kubernetes.hints.postgresql.enabled} == true data_stream: dataset: postgresql.bgwriter type: metrics hosts: - - ${kubernetes.hints.postgresql.bgwriter.host|'postgres://localhost:5432'} + - ${kubernetes.hints.postgresql.bgwriter.host|kubernetes.hints.postgresql.host|'postgres://localhost:5432'} metricsets: - bgwriter - period: ${kubernetes.hints.postgresql.bgwriter.period|'10s'} + period: ${kubernetes.hints.postgresql.bgwriter.period|kubernetes.hints.postgresql.period|'10s'} - condition: ${kubernetes.hints.postgresql.database.enabled} == true or ${kubernetes.hints.postgresql.enabled} == true data_stream: dataset: postgresql.database type: metrics hosts: - - ${kubernetes.hints.postgresql.database.host|'postgres://localhost:5432'} + - ${kubernetes.hints.postgresql.database.host|kubernetes.hints.postgresql.host|'postgres://localhost:5432'} metricsets: - database - period: ${kubernetes.hints.postgresql.database.period|'10s'} + period: ${kubernetes.hints.postgresql.database.period|kubernetes.hints.postgresql.period|'10s'} - condition: ${kubernetes.hints.postgresql.statement.enabled} == true or ${kubernetes.hints.postgresql.enabled} == true data_stream: dataset: postgresql.statement type: metrics hosts: - - ${kubernetes.hints.postgresql.statement.host|'postgres://localhost:5432'} + - ${kubernetes.hints.postgresql.statement.host|kubernetes.hints.postgresql.host|'postgres://localhost:5432'} metricsets: - statement - period: ${kubernetes.hints.postgresql.statement.period|'10s'} + period: ${kubernetes.hints.postgresql.statement.period|kubernetes.hints.postgresql.period|'10s'} data_stream.namespace: default diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/prometheus.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/prometheus.yml index 2a7e630c9cf..1bb26ac4da2 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/prometheus.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/prometheus.yml @@ -3,38 +3,35 @@ inputs: type: prometheus/metrics use_output: default streams: - - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - condition: ${kubernetes.hints.prometheus.collector.enabled} == true or ${kubernetes.hints.prometheus.enabled} == true + - condition: ${kubernetes.hints.prometheus.collector.enabled} == true or ${kubernetes.hints.prometheus.enabled} == true data_stream: dataset: prometheus.collector type: metrics hosts: - - ${kubernetes.hints.prometheus.collector.host|'localhost:9090'} + - ${kubernetes.hints.prometheus.collector.host|kubernetes.hints.prometheus.host|'localhost:9090'} metrics_filters.exclude: null metrics_filters.include: null metrics_path: /metrics metricsets: - collector - password: ${kubernetes.hints.prometheus.collector.password|'secret'} - period: ${kubernetes.hints.prometheus.collector.period|'10s'} + password: ${kubernetes.hints.prometheus.collector.password|kubernetes.hints.prometheus.password|'secret'} + period: ${kubernetes.hints.prometheus.collector.period|kubernetes.hints.prometheus.period|'10s'} rate_counters: true - ssl.certificate_authorities: - - /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt use_types: true - username: ${kubernetes.hints.prometheus.collector.username|'user'} + username: ${kubernetes.hints.prometheus.collector.username|kubernetes.hints.prometheus.username|'user'} - condition: ${kubernetes.hints.prometheus.query.enabled} == true and ${kubernetes.hints.prometheus.enabled} == true data_stream: dataset: prometheus.query type: metrics hosts: - - ${kubernetes.hints.prometheus.query.host|'localhost:9090'} + - ${kubernetes.hints.prometheus.query.host|kubernetes.hints.prometheus.host|'localhost:9090'} metricsets: - query - period: ${kubernetes.hints.prometheus.query.period|'10s'} + period: ${kubernetes.hints.prometheus.query.period|kubernetes.hints.prometheus.period|'10s'} queries: - name: instant_vector params: - query: sum(rate(prometheus_http_requests_total[1m])) + query: sum(rate(prometheus_http_requests_total[2m])) path: /api/v1/query - name: range_vector params: @@ -73,7 +70,7 @@ inputs: streams: - condition: ${kubernetes.hints.prometheus.container_logs.enabled} == true data_stream: - dataset: kubernetes.container_logs + dataset: prometheus.container_logs type: logs exclude_files: [] exclude_lines: [] diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/qnap_nas.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/qnap_nas.yml index 546faa79901..3d517763b74 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/qnap_nas.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/qnap_nas.yml @@ -1,4 +1,23 @@ inputs: + - name: tcp-qnap_nas + type: tcp + use_output: default + streams: + - condition: ${kubernetes.hints.qnap_nas.log.enabled} == true or ${kubernetes.hints.qnap_nas.enabled} == true + data_stream: + dataset: qnap_nas.log + type: logs + host: localhost:9301 + processors: + - add_locale: null + - add_fields: + fields: + tz_offset: local + target: _tmp + tags: + - qnap-nas + - forwarded + data_stream.namespace: default - name: udp-qnap_nas type: udp use_output: default @@ -24,7 +43,7 @@ inputs: streams: - condition: ${kubernetes.hints.qnap_nas.container_logs.enabled} == true data_stream: - dataset: kubernetes.container_logs + dataset: qnap_nas.container_logs type: logs exclude_files: [] exclude_lines: [] @@ -39,22 +58,3 @@ inputs: symlinks: true tags: [] data_stream.namespace: default - - name: tcp-qnap_nas - type: tcp - use_output: default - streams: - - condition: ${kubernetes.hints.qnap_nas.log.enabled} == true or ${kubernetes.hints.qnap_nas.enabled} == true - data_stream: - dataset: qnap_nas.log - type: logs - host: localhost:9301 - processors: - - add_locale: null - - add_fields: - fields: - tz_offset: local - target: _tmp - tags: - - qnap-nas - - forwarded - data_stream.namespace: default diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/rabbitmq.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/rabbitmq.yml index 942c4fa6911..53701dfa769 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/rabbitmq.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/rabbitmq.yml @@ -36,44 +36,44 @@ inputs: dataset: rabbitmq.connection type: metrics hosts: - - ${kubernetes.hints.rabbitmq.connection.host|'localhost:15672'} + - ${kubernetes.hints.rabbitmq.connection.host|kubernetes.hints.rabbitmq.host|'localhost:15672'} metricsets: - connection - password: ${kubernetes.hints.rabbitmq.connection.password|''} - period: ${kubernetes.hints.rabbitmq.connection.period|'10s'} - username: ${kubernetes.hints.rabbitmq.connection.username|''} + password: ${kubernetes.hints.rabbitmq.connection.password|kubernetes.hints.rabbitmq.password|''} + period: ${kubernetes.hints.rabbitmq.connection.period|kubernetes.hints.rabbitmq.period|'10s'} + username: ${kubernetes.hints.rabbitmq.connection.username|kubernetes.hints.rabbitmq.username|''} - condition: ${kubernetes.hints.rabbitmq.exchange.enabled} == true or ${kubernetes.hints.rabbitmq.enabled} == true data_stream: dataset: rabbitmq.exchange type: metrics hosts: - - ${kubernetes.hints.rabbitmq.exchange.host|'localhost:15672'} + - ${kubernetes.hints.rabbitmq.exchange.host|kubernetes.hints.rabbitmq.host|'localhost:15672'} metricsets: - exchange - password: ${kubernetes.hints.rabbitmq.exchange.password|''} - period: ${kubernetes.hints.rabbitmq.exchange.period|'10s'} - username: ${kubernetes.hints.rabbitmq.exchange.username|''} + password: ${kubernetes.hints.rabbitmq.exchange.password|kubernetes.hints.rabbitmq.password|''} + period: ${kubernetes.hints.rabbitmq.exchange.period|kubernetes.hints.rabbitmq.period|'10s'} + username: ${kubernetes.hints.rabbitmq.exchange.username|kubernetes.hints.rabbitmq.username|''} - condition: ${kubernetes.hints.rabbitmq.node.enabled} == true or ${kubernetes.hints.rabbitmq.enabled} == true data_stream: dataset: rabbitmq.node type: metrics hosts: - - ${kubernetes.hints.rabbitmq.node.host|'localhost:15672'} + - ${kubernetes.hints.rabbitmq.node.host|kubernetes.hints.rabbitmq.host|'localhost:15672'} metricsets: - node node.collect: node - password: ${kubernetes.hints.rabbitmq.node.password|''} - period: ${kubernetes.hints.rabbitmq.node.period|'10s'} - username: ${kubernetes.hints.rabbitmq.node.username|''} + password: ${kubernetes.hints.rabbitmq.node.password|kubernetes.hints.rabbitmq.password|''} + period: ${kubernetes.hints.rabbitmq.node.period|kubernetes.hints.rabbitmq.period|'10s'} + username: ${kubernetes.hints.rabbitmq.node.username|kubernetes.hints.rabbitmq.username|''} - condition: ${kubernetes.hints.rabbitmq.queue.enabled} == true or ${kubernetes.hints.rabbitmq.enabled} == true data_stream: dataset: rabbitmq.queue type: metrics hosts: - - ${kubernetes.hints.rabbitmq.queue.host|'localhost:15672'} + - ${kubernetes.hints.rabbitmq.queue.host|kubernetes.hints.rabbitmq.host|'localhost:15672'} metricsets: - queue - password: ${kubernetes.hints.rabbitmq.queue.password|''} - period: ${kubernetes.hints.rabbitmq.queue.period|'10s'} - username: ${kubernetes.hints.rabbitmq.queue.username|''} + password: ${kubernetes.hints.rabbitmq.queue.password|kubernetes.hints.rabbitmq.password|''} + period: ${kubernetes.hints.rabbitmq.queue.period|kubernetes.hints.rabbitmq.period|'10s'} + username: ${kubernetes.hints.rabbitmq.queue.username|kubernetes.hints.rabbitmq.username|''} data_stream.namespace: default diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/redis.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/redis.yml index 31731f6c1a5..d8db78aee6d 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/redis.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/redis.yml @@ -32,8 +32,8 @@ inputs: dataset: redis.slowlog type: logs hosts: - - ${kubernetes.hints.redis.slowlog.host|'127.0.0.1:6379'} - password: ${kubernetes.hints.redis.slowlog.password|''} + - ${kubernetes.hints.redis.slowlog.host|kubernetes.hints.redis.host|'127.0.0.1:6379'} + password: ${kubernetes.hints.redis.slowlog.password|kubernetes.hints.redis.password|''} data_stream.namespace: default - name: redis/metrics-redis type: redis/metrics @@ -44,20 +44,20 @@ inputs: dataset: redis.info type: metrics hosts: - - ${kubernetes.hints.redis.info.host|'127.0.0.1:6379'} + - ${kubernetes.hints.redis.info.host|kubernetes.hints.redis.host|'127.0.0.1:6379'} idle_timeout: 20s maxconn: 10 metricsets: - info network: tcp - password: ${kubernetes.hints.redis.info.password|''} - period: ${kubernetes.hints.redis.info.period|'10s'} + password: ${kubernetes.hints.redis.info.password|kubernetes.hints.redis.password|''} + period: ${kubernetes.hints.redis.info.period|kubernetes.hints.redis.period|'10s'} - condition: ${kubernetes.hints.redis.key.enabled} == true or ${kubernetes.hints.redis.enabled} == true data_stream: dataset: redis.key type: metrics hosts: - - ${kubernetes.hints.redis.key.host|'127.0.0.1:6379'} + - ${kubernetes.hints.redis.key.host|kubernetes.hints.redis.host|'127.0.0.1:6379'} idle_timeout: 20s key.patterns: - limit: 20 @@ -66,19 +66,19 @@ inputs: metricsets: - key network: tcp - password: ${kubernetes.hints.redis.key.password|''} - period: ${kubernetes.hints.redis.key.period|'10s'} + password: ${kubernetes.hints.redis.key.password|kubernetes.hints.redis.password|''} + period: ${kubernetes.hints.redis.key.period|kubernetes.hints.redis.period|'10s'} - condition: ${kubernetes.hints.redis.keyspace.enabled} == true or ${kubernetes.hints.redis.enabled} == true data_stream: dataset: redis.keyspace type: metrics hosts: - - ${kubernetes.hints.redis.keyspace.host|'127.0.0.1:6379'} + - ${kubernetes.hints.redis.keyspace.host|kubernetes.hints.redis.host|'127.0.0.1:6379'} idle_timeout: 20s maxconn: 10 metricsets: - keyspace network: tcp - password: ${kubernetes.hints.redis.keyspace.password|''} - period: ${kubernetes.hints.redis.keyspace.period|'10s'} + password: ${kubernetes.hints.redis.keyspace.password|kubernetes.hints.redis.password|''} + period: ${kubernetes.hints.redis.keyspace.period|kubernetes.hints.redis.period|'10s'} data_stream.namespace: default diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/security_detection_engine.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/security_detection_engine.yml index 990a4372e8b..aee90809a9f 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/security_detection_engine.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/security_detection_engine.yml @@ -5,7 +5,7 @@ inputs: streams: - condition: ${kubernetes.hints.security_detection_engine.container_logs.enabled} == true data_stream: - dataset: kubernetes.container_logs + dataset: security_detection_engine.container_logs type: logs exclude_files: [] exclude_lines: [] diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/sentinel_one.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/sentinel_one.yml index 7c06b222d78..5401309fbe1 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/sentinel_one.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/sentinel_one.yml @@ -200,7 +200,7 @@ inputs: streams: - condition: ${kubernetes.hints.sentinel_one.container_logs.enabled} == true data_stream: - dataset: kubernetes.container_logs + dataset: sentinel_one.container_logs type: logs exclude_files: [] exclude_lines: [] diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/snyk.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/snyk.yml index aef353751ec..4f857c2233c 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/snyk.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/snyk.yml @@ -1,25 +1,4 @@ inputs: - - name: filestream-snyk - type: filestream - use_output: default - streams: - - condition: ${kubernetes.hints.snyk.container_logs.enabled} == true - data_stream: - dataset: kubernetes.container_logs - type: logs - exclude_files: [] - exclude_lines: [] - parsers: - - container: - format: auto - stream: all - paths: - - /var/log/containers/*${kubernetes.hints.container_id}.log - prospector: - scanner: - symlinks: true - tags: [] - data_stream.namespace: default - name: httpjson-snyk type: httpjson use_output: default @@ -137,3 +116,24 @@ inputs: - forwarded - snyk-vulnerabilities data_stream.namespace: default + - name: filestream-snyk + type: filestream + use_output: default + streams: + - condition: ${kubernetes.hints.snyk.container_logs.enabled} == true + data_stream: + dataset: snyk.container_logs + type: logs + exclude_files: [] + exclude_lines: [] + parsers: + - container: + format: auto + stream: all + paths: + - /var/log/containers/*${kubernetes.hints.container_id}.log + prospector: + scanner: + symlinks: true + tags: [] + data_stream.namespace: default diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/stan.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/stan.yml index 9fdee28a731..ea77d57ed81 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/stan.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/stan.yml @@ -31,26 +31,26 @@ inputs: dataset: stan.channels type: metrics hosts: - - ${kubernetes.hints.stan.channels.host|'localhost:8222'} + - ${kubernetes.hints.stan.channels.host|kubernetes.hints.stan.host|'localhost:8222'} metricsets: - channels - period: ${kubernetes.hints.stan.channels.period|'60s'} + period: ${kubernetes.hints.stan.channels.period|kubernetes.hints.stan.period|'60s'} - condition: ${kubernetes.hints.stan.stats.enabled} == true or ${kubernetes.hints.stan.enabled} == true data_stream: dataset: stan.stats type: metrics hosts: - - ${kubernetes.hints.stan.stats.host|'localhost:8222'} + - ${kubernetes.hints.stan.stats.host|kubernetes.hints.stan.host|'localhost:8222'} metricsets: - stats - period: ${kubernetes.hints.stan.stats.period|'60s'} + period: ${kubernetes.hints.stan.stats.period|kubernetes.hints.stan.period|'60s'} - condition: ${kubernetes.hints.stan.subscriptions.enabled} == true or ${kubernetes.hints.stan.enabled} == true data_stream: dataset: stan.subscriptions type: metrics hosts: - - ${kubernetes.hints.stan.subscriptions.host|'localhost:8222'} + - ${kubernetes.hints.stan.subscriptions.host|kubernetes.hints.stan.host|'localhost:8222'} metricsets: - subscriptions - period: ${kubernetes.hints.stan.subscriptions.period|'60s'} + period: ${kubernetes.hints.stan.subscriptions.period|kubernetes.hints.stan.period|'60s'} data_stream.namespace: default diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/symantec_endpoint.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/symantec_endpoint.yml index 8e3ca7ce297..fac3f6cbd93 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/symantec_endpoint.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/symantec_endpoint.yml @@ -1,23 +1,4 @@ inputs: - - name: udp-symantec_endpoint - type: udp - use_output: default - streams: - - condition: ${kubernetes.hints.symantec_endpoint.log.enabled} == true or ${kubernetes.hints.symantec_endpoint.enabled} == true - data_stream: - dataset: symantec_endpoint.log - type: logs - fields: - _conf: - remove_mapped_fields: false - tz_offset: UTC - fields_under_root: true - host: localhost:9008 - max_message_size: 1 MiB - tags: - - symantec-endpoint-log - - forwarded - data_stream.namespace: default - name: filestream-symantec_endpoint type: filestream use_output: default @@ -65,3 +46,22 @@ inputs: - symantec-endpoint-log - forwarded data_stream.namespace: default + - name: udp-symantec_endpoint + type: udp + use_output: default + streams: + - condition: ${kubernetes.hints.symantec_endpoint.log.enabled} == true or ${kubernetes.hints.symantec_endpoint.enabled} == true + data_stream: + dataset: symantec_endpoint.log + type: logs + fields: + _conf: + remove_mapped_fields: false + tz_offset: UTC + fields_under_root: true + host: localhost:9008 + max_message_size: 1 MiB + tags: + - symantec-endpoint-log + - forwarded + data_stream.namespace: default diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/synthetics.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/synthetics.yml index 2f375b1a3f0..7f754909d1a 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/synthetics.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/synthetics.yml @@ -1,4 +1,25 @@ inputs: + - name: filestream-synthetics + type: filestream + use_output: default + streams: + - condition: ${kubernetes.hints.synthetics.container_logs.enabled} == true + data_stream: + dataset: synthetics.container_logs + type: logs + exclude_files: [] + exclude_lines: [] + parsers: + - container: + format: auto + stream: all + paths: + - /var/log/containers/*${kubernetes.hints.container_id}.log + prospector: + scanner: + symlinks: true + tags: [] + data_stream.namespace: default - name: synthetics/http-synthetics type: synthetics/http use_output: default @@ -37,7 +58,7 @@ inputs: dataset: tcp type: synthetics enabled: true - hosts: ${kubernetes.hints.synthetics.tcp.host|''} + hosts: ${kubernetes.hints.synthetics.tcp.host|kubernetes.hints.synthetics.host|''} name: null processors: - add_observer_metadata: @@ -62,7 +83,7 @@ inputs: dataset: icmp type: synthetics enabled: true - hosts: ${kubernetes.hints.synthetics.icmp.host|''} + hosts: ${kubernetes.hints.synthetics.icmp.host|kubernetes.hints.synthetics.host|''} name: null processors: - add_observer_metadata: @@ -125,24 +146,3 @@ inputs: monitor.fleet_managed: true target: "" data_stream.namespace: default - - name: filestream-synthetics - type: filestream - use_output: default - streams: - - condition: ${kubernetes.hints.synthetics.container_logs.enabled} == true - data_stream: - dataset: kubernetes.container_logs - type: logs - exclude_files: [] - exclude_lines: [] - parsers: - - container: - format: auto - stream: all - paths: - - /var/log/containers/*${kubernetes.hints.container_id}.log - prospector: - scanner: - symlinks: true - tags: [] - data_stream.namespace: default diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/tcp.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/tcp.yml index 34c8d0d984e..0f20d16dfd1 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/tcp.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/tcp.yml @@ -1,11 +1,21 @@ inputs: + - name: tcp-tcp + type: tcp + use_output: default + streams: + - condition: ${kubernetes.hints.tcp.generic.enabled} == true or ${kubernetes.hints.tcp.enabled} == true + data_stream: + dataset: tcp.generic + type: logs + host: localhost:8080 + data_stream.namespace: default - name: filestream-tcp type: filestream use_output: default streams: - condition: ${kubernetes.hints.tcp.container_logs.enabled} == true data_stream: - dataset: kubernetes.container_logs + dataset: tcp.container_logs type: logs exclude_files: [] exclude_lines: [] @@ -20,13 +30,3 @@ inputs: symlinks: true tags: [] data_stream.namespace: default - - name: tcp-tcp - type: tcp - use_output: default - streams: - - condition: ${kubernetes.hints.tcp.generic.enabled} == true or ${kubernetes.hints.tcp.enabled} == true - data_stream: - dataset: tcp.generic - type: logs - host: localhost:8080 - data_stream.namespace: default diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/traefik.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/traefik.yml index 4ab26982389..15b7ffbbba9 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/traefik.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/traefik.yml @@ -30,8 +30,8 @@ inputs: dataset: traefik.health type: metrics hosts: - - ${kubernetes.hints.traefik.health.host|'localhost:8080'} + - ${kubernetes.hints.traefik.health.host|kubernetes.hints.traefik.host|'localhost:8080'} metricsets: - health - period: ${kubernetes.hints.traefik.health.period|'10s'} + period: ${kubernetes.hints.traefik.health.period|kubernetes.hints.traefik.period|'10s'} data_stream.namespace: default diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/udp.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/udp.yml index 60fa5ebf598..10883414be4 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/udp.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/udp.yml @@ -1,22 +1,11 @@ inputs: - - name: udp-udp - type: udp - use_output: default - streams: - - condition: ${kubernetes.hints.udp.generic.enabled} == true or ${kubernetes.hints.udp.enabled} == true - data_stream: - dataset: udp.generic - type: logs - host: localhost:8080 - max_message_size: 10KiB - data_stream.namespace: default - name: filestream-udp type: filestream use_output: default streams: - condition: ${kubernetes.hints.udp.container_logs.enabled} == true data_stream: - dataset: kubernetes.container_logs + dataset: udp.container_logs type: logs exclude_files: [] exclude_lines: [] @@ -31,3 +20,14 @@ inputs: symlinks: true tags: [] data_stream.namespace: default + - name: udp-udp + type: udp + use_output: default + streams: + - condition: ${kubernetes.hints.udp.generic.enabled} == true or ${kubernetes.hints.udp.enabled} == true + data_stream: + dataset: udp.generic + type: logs + host: localhost:8080 + max_message_size: 10KiB + data_stream.namespace: default diff --git a/deploy/kubernetes/elastic-agent-standalone/templates.d/zookeeper.yml b/deploy/kubernetes/elastic-agent-standalone/templates.d/zookeeper.yml index 5199734c315..411d454e031 100644 --- a/deploy/kubernetes/elastic-agent-standalone/templates.d/zookeeper.yml +++ b/deploy/kubernetes/elastic-agent-standalone/templates.d/zookeeper.yml @@ -8,28 +8,28 @@ inputs: dataset: zookeeper.connection type: metrics hosts: - - ${kubernetes.hints.zookeeper.connection.host|'localhost:2181'} + - ${kubernetes.hints.zookeeper.connection.host|kubernetes.hints.zookeeper.host|'localhost:2181'} metricsets: - connection - period: ${kubernetes.hints.zookeeper.connection.period|'10s'} + period: ${kubernetes.hints.zookeeper.connection.period|kubernetes.hints.zookeeper.period|'10s'} - condition: ${kubernetes.hints.zookeeper.mntr.enabled} == true or ${kubernetes.hints.zookeeper.enabled} == true data_stream: dataset: zookeeper.mntr type: metrics hosts: - - ${kubernetes.hints.zookeeper.mntr.host|'localhost:2181'} + - ${kubernetes.hints.zookeeper.mntr.host|kubernetes.hints.zookeeper.host|'localhost:2181'} metricsets: - mntr - period: ${kubernetes.hints.zookeeper.mntr.period|'10s'} + period: ${kubernetes.hints.zookeeper.mntr.period|kubernetes.hints.zookeeper.period|'10s'} - condition: ${kubernetes.hints.zookeeper.server.enabled} == true or ${kubernetes.hints.zookeeper.enabled} == true data_stream: dataset: zookeeper.server type: metrics hosts: - - ${kubernetes.hints.zookeeper.server.host|'localhost:2181'} + - ${kubernetes.hints.zookeeper.server.host|kubernetes.hints.zookeeper.host|'localhost:2181'} metricsets: - server - period: ${kubernetes.hints.zookeeper.server.period|'10s'} + period: ${kubernetes.hints.zookeeper.server.period|kubernetes.hints.zookeeper.period|'10s'} data_stream.namespace: default - name: filestream-zookeeper type: filestream @@ -37,7 +37,7 @@ inputs: streams: - condition: ${kubernetes.hints.zookeeper.container_logs.enabled} == true data_stream: - dataset: kubernetes.container_logs + dataset: zookeeper.container_logs type: logs exclude_files: [] exclude_lines: [] From a8ad2da852d507d6a889eaba3c6483c0dbd7c040 Mon Sep 17 00:00:00 2001 From: Aleksandr Maus Date: Mon, 24 Oct 2022 14:26:13 -0400 Subject: [PATCH 47/63] Fix: Windows Agent Left Unhealthy After Removing Endpoint Integration (#1286) * Add stop timeout to the endpoint spec * Service watcher * Wire in the service watcher * Remove waiting on the service stop since Endpoint should not be stopped --- .../1666611696-fix_service_stop_timeout.yaml | 4 ++ internal/pkg/agent/operation/monitoring.go | 3 +- .../pkg/agent/operation/monitoring_test.go | 2 +- internal/pkg/agent/program/spec.go | 16 +++++- internal/pkg/agent/program/spec_test.go | 44 ++++++++++++++++ internal/pkg/agent/program/supported.go | 2 +- internal/pkg/core/plugin/service/app.go | 50 +++++++++++++------ internal/spec/endpoint.yml | 5 ++ 8 files changed, 108 insertions(+), 18 deletions(-) create mode 100644 changelog/fragments/1666611696-fix_service_stop_timeout.yaml diff --git a/changelog/fragments/1666611696-fix_service_stop_timeout.yaml b/changelog/fragments/1666611696-fix_service_stop_timeout.yaml new file mode 100644 index 00000000000..5125282618f --- /dev/null +++ b/changelog/fragments/1666611696-fix_service_stop_timeout.yaml @@ -0,0 +1,4 @@ +kind: bug-fix +summary: "Fix: Windows Agent Left Unhealthy After Removing Endpoint Integration" +pr: 1286 +issue: 1262 diff --git a/internal/pkg/agent/operation/monitoring.go b/internal/pkg/agent/operation/monitoring.go index f28c681e42a..d3f120da677 100644 --- a/internal/pkg/agent/operation/monitoring.go +++ b/internal/pkg/agent/operation/monitoring.go @@ -373,6 +373,7 @@ func (o *Operator) getMonitoringMetricbeatConfig(outputType string, output inter if len(hosts) == 0 { return nil, false } + //nolint:prealloc // false positive var modules []interface{} fixedAgentName := strings.ReplaceAll(agentName, "-", "_") @@ -668,7 +669,7 @@ func normalizeHTTPCopyRules(name string) []map[string]interface{} { return fromToMap } - for _, exportedMetric := range spec.ExprtedMetrics { + for _, exportedMetric := range spec.ExportedMetrics { fromToMap = append(fromToMap, map[string]interface{}{ "from": fmt.Sprintf("http.agent.%s", exportedMetric), "to": exportedMetric, diff --git a/internal/pkg/agent/operation/monitoring_test.go b/internal/pkg/agent/operation/monitoring_test.go index 06a9cfbe23b..55b18741dad 100644 --- a/internal/pkg/agent/operation/monitoring_test.go +++ b/internal/pkg/agent/operation/monitoring_test.go @@ -37,7 +37,7 @@ import ( func TestExportedMetrics(t *testing.T) { programName := "testing" expectedMetricsName := "metric_name" - program.SupportedMap[programName] = program.Spec{ExprtedMetrics: []string{expectedMetricsName}} + program.SupportedMap[programName] = program.Spec{ExportedMetrics: []string{expectedMetricsName}} exportedMetrics := normalizeHTTPCopyRules(programName) diff --git a/internal/pkg/agent/program/spec.go b/internal/pkg/agent/program/spec.go index 12f860a1e9a..41cf966aaed 100644 --- a/internal/pkg/agent/program/spec.go +++ b/internal/pkg/agent/program/spec.go @@ -8,6 +8,7 @@ import ( "fmt" "io/ioutil" "path/filepath" + "time" "gopkg.in/yaml.v2" @@ -40,7 +41,20 @@ type Spec struct { When string `yaml:"when"` Constraints string `yaml:"constraints"` RestartOnOutputChange bool `yaml:"restart_on_output_change,omitempty"` - ExprtedMetrics []string `yaml:"exported_metrics,omitempty"` + ExportedMetrics []string `yaml:"exported_metrics,omitempty"` + Process *ProcessSettings `yaml:"process,omitempty"` +} + +// ProcessSettings process specific settings +type ProcessSettings struct { + // Allows to override the agent stop timeout settings and specify a different stop timeout for Endpoint service + StopTimeout time.Duration `yaml:"stop_timeout"` +} + +// Service info +type ServiceInfo struct { + Name string `yaml:"name"` + Label string `yaml:"label"` } // ReadSpecs reads all the specs that match the provided globbing path. diff --git a/internal/pkg/agent/program/spec_test.go b/internal/pkg/agent/program/spec_test.go index 110dd92eb36..31985d3d6d7 100644 --- a/internal/pkg/agent/program/spec_test.go +++ b/internal/pkg/agent/program/spec_test.go @@ -5,13 +5,16 @@ package program import ( + "fmt" "io/ioutil" "os" "path/filepath" "regexp" "strings" "testing" + "time" + "github.com/google/go-cmp/cmp" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" @@ -147,3 +150,44 @@ func TestExport(t *testing.T) { require.NoError(t, err) } } + +func TestSerializationProcessSettings(t *testing.T) { + ymlTmpl := `name: "Foobar" +process: + stop_timeout: %v` + + tests := []struct { + name string + tonum int + to time.Duration + }{ + {"zero", 0, 0}, + {"180ns", 180, 0}, + {"180s", 0, 120 * time.Second}, + {"3m", 0, 3 * time.Minute}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + var ( + yml string + wantTimeout time.Duration + ) + if tc.to == 0 { + yml = fmt.Sprintf(ymlTmpl, tc.tonum) + wantTimeout = time.Duration(tc.tonum) + } else { + yml = fmt.Sprintf(ymlTmpl, tc.to) + wantTimeout = tc.to + } + var spec Spec + err := yaml.Unmarshal([]byte(yml), &spec) + if err != nil { + t.Fatal(err) + } + diff := cmp.Diff(wantTimeout, spec.Process.StopTimeout) + if diff != "" { + t.Fatal(diff) + } + }) + } +} diff --git a/internal/pkg/agent/program/supported.go b/internal/pkg/agent/program/supported.go index ed17610fcb5..3c6a027b92f 100644 --- a/internal/pkg/agent/program/supported.go +++ b/internal/pkg/agent/program/supported.go @@ -27,7 +27,7 @@ func init() { // internal/spec/metricbeat.yml // internal/spec/osquerybeat.yml // internal/spec/packetbeat.yml - unpacked := packer.MustUnpack("eJzce1mTo0h39v33M+b2s/2ylGoaR7wXgio2qagWapGQd2SmCiQlSFMCSeDwf3dksggQtXWPx2NfVER3CnI5eZbnPOfwH79t0mz9mob0H8fDGv8jPCT/ely/ntav/1Yk9Ld//w0legZ/7KOFpzpzz6E4hRRHhy0Ci3vL0M9oKZbQtyXoW7PAt4UQwDiQR39LcbmPwHkfWZqVuUvraGl2FoBJDCUvg2AizBMvD4B9hGChENMW4dI6aptpZG1E3dqcIyshW19WKU4citKFYpuZunoUf7ieDVzPfnEFxVyU+8vTg6pY0YFoiXeHDaUghrfzJZES0z4E8tO9pR9nljbdBL6azf36TBvrqFFhhlPvCP2ne7bufKlukaxOfNk9+dLlgOUFH7e0aWQZVIBAuLcMeITAE9px0z09b9QDSlWRmE8zPqZNIyRNXgJJyWFyOVTymZyQPGW/Z5Yhxvhh3z6LDV0IH/YRTC4U+ovreGdvzdh8qRYQiCeSeC+h5E2eo337W/WnvkJ/x+5jG0heiUUlxgblz/7UPKZNK5nSHJ67zwgRTrwMyZD6UkbXP67naf74vBuV3XdOpnv+DkzonS87Ak68GP3YR2tZqGUCD8h0KaaKFICL2Du36VBkeFtiKMWYrOt1hLWv0us7MEamR3HZ21fG9XTR7uVIDK+4nl0tIbjQQHZPOL2R+8261XyKSExVrM53lU3nLjPLoHmYeFuiK3sI9B307fJ5o/7+sjjIoeHlzxv1CMEkJUa0t82sXsdRZsvp/7ceplEAJjvLiGMsZHS9jHZrqV7TFI6WRigy9JIYdIslL8aJs7eLc2TLNoUGLe3izPaQhpKehNJjOtemKTKUFMtujKUonS32//ztX4ZeISebDK3DbOgUfG8X+u4GAl3QEieDD/so6I3pBewY3Bw8bebc8K7PzBPvGPiOEIKnLACEGXa+BuLR2pyrd1Yr/g6WPIH40zwEl6zrKGCiH7G02synPcdSQt8VsTYRIBDPyNAFuJxQlOgbZHi774Apg0OHayCg53wPwMuJ1nnedwsCbtY4IomkIZik8+RCSeIdvwOXBqmXDueFkvOCDa8ImOI/CLu1oHvuI10tdp7q6Yr5QyAPz9vH85MpDNeICXAPxLBp4C/yyoHQdJ6IJ2hyZ5BDMIkDplwPYhKASwmX1sxfHGKcugeY6FvC7iTxYmI+nXp3k9qUX3rqvjDnhGRPwKYn+JKzD8AkZffLnfBi33GK4warbZ6iBbgcrUddhAYV+HpabSBaZYi4UDcQkAPaqHLou3tLI9XZtG/pPNqPGLzzQiQqhLpSQEDo2pzWBtg4IneCjVVlfKZNnzfT1E+q9efF3atdOycseUcIHAHJ1j0zPHZGfN7PrEJVoeGWlkYOKHHp+mEfcUMoxN+hoQuBp5TEtGkAhJz/33coMYVsLbkxMfQXLLsFBHo2TyYxAl6JDX0LfSFlAcAyvDiQoigEkzPxFzmTWwgmf7B9+BLNoeHdNU6GmPTMZM3X7u7LdGVUqDE03BeYUIr4mFogyaFYdphDYk7ohJJVBBOlsAxPqoIo318J/UXtKJQcLtUEyRabexf4btze33LC/89sa67d/Mbvba6pMUoW0VAm3Af4zjkADq3ur3Vq9TPiCzbtEw86ktI61dtASHMseQXRlRimLsVvy+UBSRMJ+rbQ3CMQhby5ByxmhI0T41std7dkv3Od9WMBM1vQlWPo8ztgfqFs9tQE3Oa9Ws8OxPCy543anKcZF1DZeZafvRPATTUmRtTVtbHgvbUeq98rPfZKy4AnvFHVjr0zu01DcBdxO9CmaWW/i5Nd3EWuDylOqRCyQMz0hMluowqhoTP5lO06JpePAMEiCvyniBgxtYzafyzVPAAiu78mqDE55Vi6xMTw3rC/Pkjh97ToBLsbW4zyWbFLu+/yeR/2kf3wOOuAB76XHvAwYIEkoW/r2vUOEFB2BFxoR6aJZTxGrb1q6gEXaokNbxsCeOAykJScJF7B/FItK64zttTYjcuCMbI5wBOJXdy1e0RAf4WeEuPUjvsAp9WV1p565zW+3VvGdf76t4zJCgFFgp7CfVr3nRrEDOf7zDtt3PrqOq3tL/Yfg0KDgzwuT3bPIVDy9h50m2LfO+BkFQUsLhjOCSXwAAtVQIW6RZJILdPd40QRoRRVOvmop8x/WVqco0I9IsmJkaYmIbhQXOzyL5yliiu6UjDd8CV4QIZX2+lboLID9LY3fqAG0LXN1LIhiX4kwCs7MmFgkus8nHbH+PovIWBYhBSh73aAtxBhmZZMd7Rov7Ue9R00nyJikpNl6DuoK0kIvCMb6/pSXHCg2fj1qMFNIRj1+S0g9KVKxvOE7ubayHjq5IGvlqGhCOO/M5/jHJB0N4hp1e88tiXf7i0zUyp5OydsNnekJjhRshvAXiVMXZwzY3MP/QFM7RPDG33s2PexlZ212GhW45UeqL/iAbu9u/myv35r7zUmanWP6bzP36XVfdUxT1NbIN/4k8rP1sBcYzo/SUJARJzo3FYGMXOQCOxnRIop2rL45+VQdvczzf29f/bmnL+UGPBkACds3ieeDHwpMcB0n5OxxEC6nAgYJAa9sUFi8LcC+QfyeXA8ANrdM0YH7oDqjJM7gnULjoTKSDZ1JpkcmPNhTmIH/aiT6fLAd28ZlxOUmUL3n2/Ajy93gOF2/w7rMA62rBqks8wTSYQ5Sw5EkXHu3WUDaq6gxaZQorllkgMxosiWnCOSvV2VAOC83i9z4gL07ZfGafoyez5+wYmXQj8+dzLuW3DPjDZZ3VsaWXflO9/s048YgaG8KkfJ5ZwwAFElL909WqMswy1r0Hc8Nw4j+ksy+ciWshgmWVz9m+mIw4DP3i52sxtjXafksN+kQ1t1wWSHDeWA0kW0krwt8e0DMXezQBJ3FZhwz0iiOdHEEgJHxAkV1gO9h9c5Mmh4kpZyVu0QSKt76yGQnx+iWQAc7vhYkOC2JFc69MwdoZdDUz2xIKkllxMUlXPgu/tK7gyc2HII7u4tzTr9MOgGJ3qxXip6I5+5cH1/zmXg0jmTR6F09i/8MWdzFxab8xiCiciSPmujnLC5OLngEmPZPQSFol/fUUrC/YpyRBI+dc8520zY2IbpC+FJnSLjch9Zu6d7X78scKKkONEz67ECIL5+affL/92soV8w8zXE8LDP7Fy84NF1EmcPgfPK5Se7MTLO93VyQwNR4UBpmNBYSUcuvkMD2eMAxKqfq1nZNuBZLMAnNFkvrevYRshYILDaYDndYNllwapoxohBMwgUkenCUzmdYUMpic727wgMANZ3fAeBw5LZI/wxSJY0e1zPmn0YVbxoA7Fmt3N39zVfiu2d1M+VxHApTq3OmJXNfe8MZTuGxmowblMsKSJOHIqLjgzekGP/+cl96E83TZIYApEyf/C8mUpPD9MZNm3qy14eggnTqSN62M/mS5WuDW9bgdRVA5S47j9vppuuHuCrbTZrxDghfQCq2SJKWv3YdIHm8B7H5TOy7w/JnyvI8uUBy/oeU1snEN0EcgiqmXxavZhyed3EkltgZ90k3+OJkTpkljOWLBOw4mdCQD8P7anHyJu2iIzeXj9MlnjsTbyil4xsb2XVtcl+FUAYJCe3jDPfhw8PWKInFI0DyWpOdxCHLhQlRAg1Fodq+cnCwXq4i55qEig09HIpeRM2RxPrXpYs9jQkrlNCoBeBFI0AyAMDwQz/xMyOmK9EiSJYbD3ZFlHqHhBY5YFvb0NTiL7/ECJb0gv0IxDsolrfNrOCgAnX0XkCYwToce3Xz/K4GDPcUZ1Hc3/HqZdz37ScZAE4nHBaP1vidLac3sbIlw1dj+BZl/ksUCUHdVzk/pbjnOmhssONinrVqtShxPTO84Qe0SdwaYN9WTKIjUtMjNUYuTuoiIkxSljyJfIEsPO8gFPvFvsaSsrwGiwmR+hDih7EHQS2CIsPK23GcnXRP0FYX+WiTeqkB+cNUYOLETnwSp1eEIMmDCM8b1SO73ChdGVcQJ/FdVvwJYYhap3XlW1o6DlkOKO23Y/81dt4+wYD/xWkpl2T5I1fO6AERyjxKCcCjG/M5s7Y8HKOtROlgEs1RukuQrK7h77VkMwsH4lR6iRItrPAdw/M1uYsSdUdCpfqnpjuGZf701zi2FsMNux+hBO84p4Wb85BnfBLtMSyR3HxTXja4vy/2dfHRH7KaowvB767Daf933D51PrBwD+IOFllXI9Td0/ANV7XcyRIZnHbnlx9pXNCqcv8T3sn86W6Q5LzCv3r+9WcixPHKlcyjJLEqzGhe0DRzbgQVBXd3hwEuOfr+hUWrHPA67OSc577qhikjhhc5+V35ksdbNzOEwvEVP/AknIlxozDifh2HoDLrkNY8Pzg+v/LqanazpdqhWkNZRtKXtF7plAKCNwDFpUSGYrM9HXwe7n2Hfb+lSxhvqUvryMB4gb69h8BcF6v74sXYnZIlsQTYHI5keu+Tk9lINZkd5cg3oX+9f4ZBg3B3axLpPX2abpbfNUfEZrXuyFgclhff5NDwDH2m3F/QAh28/pa35Tz2lNiZFxeiKG8IIOW5GF/Q1i3xYpu3mgwf8YJ0mNY+b8cScq5S2xCP95CX2W/ISut/ZlfFVNbn69x/3vGibKFvlMi2S4tKgz3KCCxKoL08UndzdAvMP3s/hk2pzh1X4LES2qyrCJuq3g665H7xhiJ1+CP9qyznynkDPcb+i5lcWA43lvzzHFmFvjTAR4fEpc3+K9TPFl9rlhiiDE2VdrlbKrxywnWOUHXZq/60PFPdTzprmdpA1+13Q/spVmnLhrK7haz/RnO+Y15mP3cW6a3w9P+Xtjac8k9BVLGzhEN/EkzD/c1lQ07L5j5Z5afM53q2XXn/Fcfc2+ZzoS90/FdH2Jt4ju0xQnTz783KHR2/c8b3S1NgeCdwvAXCipvFp//4vznTyT3OziuewbrtviSdvZSxcqKS30713mFPi0Z5oCLfR/bGV4MDY/lUlWempI9BHf3/TznukZbTPjFnKcl0/keyWsA4GuwxEdLIxxXEkMvQw0ftOift4T6C12vs/EOPLfKA6NVk0PWvHqdH2Ztbnnl1jfzpVh1W2lihiSXsnjQ79KrutfaLrjPcNZRv1vqBgd+oItvcbwjBbVu/pt1cuc/Z32jrS98uIemEebdAmRtv1cOuNpnsxdmPyzOvMEvR6BQE2R4lGiTpmsyb+a6Nkm1uhb5bRxp49OJ+C7LBRpOu7ErXnxEo/LhBSfU6kHadENOzkhiucyOFxVH1mrsPH/S2mevfHsd/6HhJYHvHYk53r13y6vf7GOPZEcYdN7dyIl3JI533OWN3jD7ZvqCEv3oy+oJp4uP1i6xdL7pymywxXw7zYc62cWOHVlV+233N8SPHX/X5YM6f2NFy/6f0BSBb96t84UTllu96GCbtxtW3tznOwX4j+Lvx7F02o1Xo00JvzTHcsKLylhW40Ba/dS53onPP7O/a3NR9FNdqAPdmo5xhM2zQ//w1VjJ7eNluYu+b6Zny9BzqP3Z9a2RulW8Dl/Huk+XhhfjtF9kDntjnUD42QLzTxByX2ln/1T36E2QtmnAHI4UUzb+84RYXej04xak1t0G4wFVVHinpi/bFEreXbdFfbzF2qbPG3WNZeZoY8ovtviWz84jijzosnkP2L7fndMxzOZuro72tnW8SdRSj36mOPyWY37bIX9sfH3H3OkoGTW6Xwels+V0az12dEdTTyi5TCzTEbHJAqFbWo/ej1FAmqyz1w0esb4fwBNwQre1NtYfhNQfWUg1VT7+0UfTtnFAhvAxxd3Q6KlLka8eeWn349aQL35YwtJtckAJzhFPic8KNLwNAXg4bxqIyhn69pbN+33p/v5j5a1WO/rwCWo8g75bhMCp042mJ27CoDmnEsbOpb3pDfqyxonHUouC6MoJUaXWNvclkOIYJYRZZqX1aUtbvNGS8ole5frua6rkc+0o79FEAAot/dq2vLQf7LzbDzag5nq9uR2a5wNKxxn2+P6tKL4QLP53UX2Gd0dMO2Y6weWkK7w/9loqGEDODs3X3+NgfPBx1Wg54q2oNVKC4G1F+l0+K5TGNkt7+v53A//TJeiPqOi6DMT95NttVp07uO2tLpiN+5voH6uHC4/w3zd3r7PlrYyqeXjr172luV00cEv3aj2aqI8chr2bV7osC0BGfUkvcKJPRvW49RMDCF7pSrvnL/Zdd95bfIECHFIKX6cNf7pnu/Mtxy/MMZIefeIMpldCT+mkF38qfdiLm++XEnoxMR+LDW0p+vwVKrIfaxu5vNsf3JNBTAPgUjZndf7R9sABPvgb9Pfuj3/k69diDP7JzoUAr1j3uyJOWNZF6NuTYWfEF7oivg79vtLd+6luCPvnO4DrT+3QUD7vdj8qJfY9itPd7Oe6/Jr3adWl+9nuPtM9IOCdiL+4tx4ez92O5TcSxW5HRPPZ3AC2NZ3KHfVv3IundCBLa3IMrvJOEWRQLsMQcGaPuY6Ehfa3uzN+4XOO3ueDQwaZmXZ9f4OqwngH8ac/Mxh8M/yXdwx/8O3vbYdWv+OKJZSxYmmPiqXh8vkhSEeTxkOId+sxymZl6NtQ8oRe0miyYJPR5uOLNmkscOZWdZMPEkb2zM2z734TwKVSiHrVN/1+D1X/2TcTxfStj25x/8w/T9v8Ij3Srz2/SY2cA+C8whGO8P9Y7/xHX8HPfvvP//dfAQAA//81uZ/e") + unpacked := packer.MustUnpack("eJzcfFmToziX9v33M/r2W16WdHbxRbwXhkwE2Em2cSUC3SHJCbYFdqeNbZiY/z4hsRgwuVRVT887c5ERXTJoOTrnOc9Z6H/7bZ0dV29ZxP5x2K/IP6J9+n8Pq7fT6u3/FSn77f//hlPziL7v4oWvu3PfZSRDjMT7DYaLexuYZ7yUSxQ4CgrsWRg4UgRREqqjv2Wk3MXwvIttwz56S/tgG84xhJMEKf4RwYk0T/08hM4BwYVGLUdGS/tgrKexvZZNe32O7ZRuAlVnJHUZzhaaYx31l0f5u+c70POdV0/SrEW5uzw96Jod76mR+ncEaAUF/jZQZEYtZx+qT/e2eZjZxnQdBvpxHtRnWtsHg0kzkvkHFDzd83XnS32DVX0SqN4pUC57oi7EuG1MYxswCUHp3gbogKAvteOWd3pe63uc6TK1nmZizJjGWJm8hoqWo/Syr+QzOWF1yn8/2kBOyMOufZYAU4oedjFKLwwFi+t4Z2/N2HypFwjKJ5r6r5HiT57jXftb9ae/oWDL72MTKn5JZC0hgIlnf2oey2GVTFmOzt1npJik/hGriAXKka2+X8/T/Il51zq/75xOd+IdlLK7QHUlkvoJ/r6LV6pUywTtseUxwjQlhBe5d27LZRj4Gwq0YkzW9TrSKtDZ9R2UYMtnpOzt6yj0dNHu5UCBX1zPrpcIXlioeieS3cj9Zt1qPk2mli5X57vKpnOXRxuwPEr9DTW1HYLmFgVO+bzWf39d7NUI+PnzWj8gOMkoiHeOdazXcbXZcvq/7YdpHMLJ1gZJQqQjWy3j7Uqp17Skg21QhoFZUsA2RPETkro7pzjHjuowBFjpFGe+hyxSzDRSHrO5Mc0w0DKieglR4my22P3zt/8zRIWcro94FR2HoBD42yjw1giakpG6R/Swi8PemFmgjsHN4dN6Lgzv+sw89Q9h4EoRfDqGkHLDzldQPtjrc/XOy4t4hyi+RINpHsHLsQsUKDUPRHlZz6c9YClR4MnEmEgIymcMTAktJwyn5hoDf/sH5MrgsuEaGJq52AP0c2p0ng+8gsKbNQ5YoVkEJ9k8vTCa+oc/oMfCzM+G8yLFfSXAL0Ku+A/SdiWZvvfIXhZbX/dNzfou0YfnzeP5yZKGayQUensKHBYGi7wCEJbNU/mELAEGOYKTJOTK9SCnIbyUaGnPgsU+IZm3R6m5ofxOUj+h1tOpdzeZw8SlZ94rByes+hKxfClQ3F0IJxm/XwHCi10HFMcN1lg/xQt4OdiPpowAk8R6Rm0gRmWIpNDXCNI9XutqFHg726DV2Yxv2TzejRi8+0oVJkWmViBI2cqa1gbYAJE3IeClMj7LYc/raRak1frz4u7NqcGJKP4BQVfCqn3PDY+fkZx3M7vQdQS80jboHqceWz3sYmEIhfw7AqYU+lpJLYeFUMrFvwOXUUs6rhQvocB8JapXIGge5+kkwdAvCTA3KJAy7gBs4CehEscRnJxpsMi53CI4+ZPvI1BYjoB/14AMtdiZy1qs3d2X5am40BMEvFeUMobFmF5gxWVEdTkgcRA64fQlRqlW2MBXKicq9leiYFEDhZajpZ5i1eZzb8PAS9r7W07Ev7ltzY2b38S9zQ09wekiHspEYEDgnkPosur+WlCrn5FfieWchNNRtBZUbx0hy4niF9TUEpR5jLwvlwesTBQUOFJzj1CW8uYeiHykfJyCb7XcvZL/LnQ2SCTCbcHUDlEg7oDjQtnsqXG4zXu1nu0p8I/Pa705TzMu4bLzrDh7x4FbekJB3NW1Mee9sR+r3ys99ksboBNZ63rH3rndZhG8i4UdGNOsst/FySnuYi9AjGRMirgj5nrCZbfWpQiYXD5lu44l5CMhuIjD4CmmIGE2qPFjqechlPn9NU6NyyknyiWhwH/H/vokRdzTouPsbmwxzmfFNuu+K+Z92MXOw+OsQx7EXnrEA6ACK1Lf1o3rHWCobSm8sI5MUxs8xq29GvqeFHpJgL+JINoLGShaTlO/4LhUy0rojKM0duNxZ4wdQfBk6hR37R4xNN+QryUkc5I+wWl1pbWn3nnBt3sbXOevfztyWWGoKcjXBKZ136lJzHC+r7zT+q0fXae1/cXuc1IIBMkT8uT3HEEtb+/BdBgJ/D1JX+KQ+wXgnnCK9qjQJVzoG6zIzLa8HUk1GSlxpZOPZsbxyzaSHBf6AStugg09jeCFkWKb/8BZKr9iagXXjUBBewz82k7fI5Udore5wYGaQNc2U8uGpuaBQr/syISTSaHzaNodE+u/RpBzEVpEgdch3lJMVFZy3THi3cZ+NLfIeoqpRU82MLfI1NII+gc+1sVSUgii2eB63PCmCI5ifksIA6WS8Txl27kxMp65eRjoZQQ0afx3jjnuHit3A59W/S58W/rt3raOWiVv90Ss5o70lKTa8YawVwFTl+fM+NxDPECZc+J8o88d+xhb2VnLjWY1X+mR+isfcNq7my/767f2XnOiVve4zgfiXVbdV+3zDL0l8g2eVDhbE3OD6/wkjSCVSWoKWxn4zEEgsJtRJWF4w/2fnyPV280M7/f+2Ztz/lJgIIIBkvJ5n0Qw8EOBAWG7nI4FBsrlROEgMOiNDQKDfymSv6dfJ8cDot09Y7wXAFRHnAIIVi05kiojWdeRZLrn4MNBYouCuBPpCsd3b4PLCalcofvPN+QnUDvEcLP7IOswTrbsmqTzyBMrlIOlIKIYnHt32ZCaK2lxGFJYblt0T0EcO4p7wKq/rQIAktf75SAuocB5bUAzUPnzyStJ/QwFybkTcd+Se2606cu9bdBVV77z9S77LCMwlFcFlELOKScQVfDS3aM9mmW4zRr0gecGMOK/JZKPHeWYoPSYVP/NdcTlxGfnFNvZjbGuMrrfrbOhrXpwsiVA2+NsEb8o/oYGzp5a21moyNuKTHhnrLCcGnKJoCuTlEmrgd6j6xxHBHzFyERWbR8qL/f2Q6g+P8Sz1oG1suRBI+Iyj/l5QpU7N3ZqA8RHP0eWfuKO07Y8bjMVqFocgDnYsyOCmtyQPB7wcOB9UqRDS0QAy22gFQj4RRWY0T3lAROcnCikO9vy9hjWAZTl7dBSN6/nFWSlpEA724AWPNBBmX/A1vZKdEAiUUsvn9ffToiTWstfz1P3hJdae3dz5SbBcYoCt+R2MC8fC3dj146Qn0t/bc/1sONnmYXQFQ6DO1eBQWple8/CgVxlZKSXE5K1cxh4u0rGnNQ5agTv7m3DPn0HbE1Ss1gtNbPdm3R9fy50x2NzrkeF1rl36c85n7vg+9QPEZzIPFi219qJWIuTBy8JUb19WGgd2WklFXisHbBCTl39mK0nfGzNZUhFMKyppNzF9vbpPjAvC5JqGUnNo/1YEbfAvLT7Ff/drGFeCMdoCnwScHyUL2R0ndTdIei+CfmpXoLB+b4OClkoa4JgDgNBO+3IJXBZqPqCuNn1c3U2uyUKNidGKUtXS/s6tpaO3IHaLcmYronqcSdfNGO9uy6nMwK0kpp8/67EiXN9x3cIuq+cyKDvgyDTcMbts9kHqPxsS2AMp527u6/5Um7vpH6upMBjJLM7Y/ZxHvhnpDoJAi+DcYcRRZNJ6jJSdGTwjhz7z0/uo2C6boLrCMqM4+jzeqo8PUw5brBA9fMITrhOHfDDbjZf6mwF/E1F7l8agil0/3k9XXf1gFwxrVkjISntE3fDkXHa6se6S9CH9zgun5F9f5o0u5LTQB1kpz/KcNeBVzfwHgYjXD6tXkyFvG588C0htm+SFuMBpT7MyB9tS3AncSYMzfPQnnqVDMuRMejt9dMgU3CW1C96QdzmVlZdm+xXT6RBUHebqRf7CNBe+KB4nIBXc3oD/31hOKVSZHD/XctPlfb2w138VCfPImCWS8Wf8DkajvC65D678Q1uiaBZhEo8Qrz3PHjgvDHhdsSxEqeaZPP1VEfGmfBheRg4m8iS4j++S7GjmAX+HkpOUa3vWMeCwonQ0XmKEgzZYRXUzwo+kXC+Vp3H8H4nmZ8LbFpOjiHcn0hWP1uSbLac3nKL1zVbjcQBHscsWAVVNZ8QeCv44XRf2eFax70qX+YyavnnecoO+At8vokZeBBNwCWh4GUsKT6oJMoJTnnQKovAufO8RDL/NmYAWsZ5LiomBxQghh/kLYKOjIpPK5Rg+XIxv5Dov8rFmNTBIsmbBBcpRuQgKpxmQQFLOUd4XuuCF5NC68q4QAH3644UKJxD1DpvapsImDni/Ky23c/w6v045SZ2+DuSwU5dXGhwbY9TEuPUZyKBAr5xmzsT4OciRkm1Ai31BGfbGKvejnPOOjnP47gEZ26KVecYBt6e29qcB/emy9BS31HLO5Nyd5orImaRwzW/H+mErrznyvVgnShRWElUn5Him/S0Ifl/MtYnVH061rGRGgbeJpr2fyPlU4uDYbCXSfpyFHqceTsKr/66niPFKvfbzuSKle4JZx7Hn/ZO5kt9ixX3DQXX96s5FyfBVa5JREZTv+aE3h7HN+NSWFXCe3NQ6J2v61dcsI6dr88q7nke6HKYuXJ4nVfcWaB0uHE7j+DrfxJFuyYUwf5EAycP4WXbSfSIuOr678upqXbPl3rFaYG2iRS/6D1TaAWC3p7IWomBpnJ9HfxergKXv39NMnFs6cvrQKG8RoHzZwjdt+v78oVaneRU6ksovZzodV+npzKU6yJBN7G+jYLr/XMOGsG7WTcB2dun5W3IVX9kZF3vhsLJfnX9TY2g4Njv+v1BIrWbD6n1TTuvfC3B4PJKgfaKASvpw+4m0d8WebrxNuB4JhLLh6jCvxwr2rmbEEZBskGBzn/DdlbjWVAVoVvMNwT+nkmqbVDgllh1SptJwz1KWK6KR31+UneB9AtzP7t/zs0ZybzXMPXTOslYJbwrfzrrFUXAWPKz4R/tWWc/UwAb7jcKPMb9wHC8t+ZZ8MxjGEwHfHyY8L3hf52i08vXikxAToils26uqxq/nFAdE3Rt9qoPHXyq/Ul3PdsYYNVmN7CXZp262Kp6G8L3B9zzO/Nw+7m3LX9Lpv298LXnincKlSM/RzzAk2YegTWVDbuvhOMzj8+5TvXsunP+K8bc25Y74e90sOtTrk0Dl7U8Yfr19wYF4i7+vNMV1BRWPiio/0Ah6t2i/d8c//yFRZEOj+uewb4tWmWdvVS+sspBvx/rvKGAlZxzoMWuz+2AnyDg81iqilMzukPw7r4f51zXaIswvxjztEUIsUf6FkL0Fi7JwTao4JUUmGVkkL0R//O2EPHKVqvjeOeiV8WB8UsTQ9b1iDo+PLax5bUmsZ4v5apLzZCPWPEY9wf97saq66/tHvxKrj/ud5nd8MBPdPG93PhIIbIb/x47sfNfsz5o6zKf7qFpIPqwcFvb7zV3Xu2z2Qu3H+5n3snLx7DQUwx8Ro1J022aN3Ndm8taXYuD1o+0/ulEA4/HAk0toLErUbTFo/IRhTrc6kHWdJFOzljhscxWFGNH1mrsPH8y2mevdYra/yPgp2HgH6g13vV4W4+42ccOq6406Fi8kZPo5BzvVMwbveH2zfUFp+YhUPUTyRafrV0S5XzTzdpwi/lmmg91sssdO7Kq9tvub8gfO3jXzQd1/saKvf0/qSme37xbxwsnorZ60eE27zf6vLvPDxoXPvO/n/vSaddfjTZz/NIcy4koxhNVT0Ll5afO9YF//pn9XZuy4p/q3h3o1nQsR9g8O8SHH/WVwj5el9v4j/X0bAMzR8ZfXRccqfclq+htrGt3CfyEZP3ifNQb6zjCrxbmfyIh9yOfAXyp6/bGSTss5ICjJIyP/3xCrC4QB0lLUusujXGHKmuigBmoDkOKf9dt7R9vTXfY81pfEZUDbcLExRbf8tl5RJEH3UkfEduPu5o6htnczRVob1vum0At89lXiurvAfP7gPy58fWBudOJM2p0v05KZ8vpxn7s6I6hn3B6mdiWKxOLO0KvtB/976OENF0d39ZkxPq+Q18iKdvU2lh/SFN/nKLUqfLxj2Wadpc9BtLnKe4mjZ55DAf6QZR2P2+p+cEPcni4Tfc4JTkWIfFZQ8BfU0iG82ahrJ1R4Gz4vH8svd+/v/gvL1v28IXU+BEFXhFBtw43ml7CCafmIpUwdi7jXTToy5qkPg8tCmpqJ8y0Wtu811BJEpxSbpmV1mdt2uKdVp4v9HjXd1+nSr7WxvNRmqhqn2hCxrpVqP3Q6cM+ukFqrtfT3EnzfJLScYe90f9SKb4ILv57pfqAf0ctJ+E6IeRkaqKv+FoqGFDOTpqvv8fB+OCjtNFyxHtea6QEIdqxzLt8VmiNbZbO9OPvLf6rS9CfpaLrMpDAyffb0zp3cNuTXnAbD9bxP14eLsLD/7G+e5stb2VUzSNa5u5tw+uygdt0r9FLE/WZw7Dn9ZouO4bwyALFLEhqTkb1uMWJAQWvdKXd8w/2q3feW/xACnCYUvjxtOFP97p3voH5hTlGwqMvnMHyS+RrnfDiL00f9vzmx6WEnk/Mx3xDW4o+/0gqsu9rG7l82Ffdk0HCQugxPmd1/tG2ygE/+Bfoi94d/sxXb8UY/VPdC4V+sep3RZyIasoocCbDzogf6Ir4cer3I13RX+qGcH6+c7r+RBEP5fNh96NWksBnJNvOfq7Lr3mfVd3NX+3uE12j/okGi3v74fHc7fR+J1DsdkQ0nxsOaFvT4d1R/wZefK1DWVqT43RVdIpgwIQMIygyexw6Uu7a3+/O+IXPYHqfXQ4zyNy06/sbVBXGO6+//HnG4Fvrv73T+pNvpm87tPodVzygTDTbeNRsg5TPD2E2GjTuI7JdjaVsXoC5iRRf6gWNFnc2R9Z8tNIGjQU5elXd5JOAkT9z8+yH31IIqRSyWfWbf9xD1X/23UAxe+9jZdI/88+nbX4xPdKvPb+bGjmH0H1DIznC/2HfHHz2fw+Y/fbv/+s/AgAA//8EnQoK") SupportedMap = make(map[string]Spec) for f, v := range unpacked { diff --git a/internal/pkg/core/plugin/service/app.go b/internal/pkg/core/plugin/service/app.go index 312ac8b592e..af553ac5ac8 100644 --- a/internal/pkg/core/plugin/service/app.go +++ b/internal/pkg/core/plugin/service/app.go @@ -165,8 +165,8 @@ func (a *Application) Start(ctx context.Context, _ app.Taggable, cfg map[string] // already started if a.srvState != nil { a.setState(state.Starting, "Starting", nil) - a.srvState.SetStatus(proto.StateObserved_STARTING, a.state.Message, a.state.Payload) - a.srvState.UpdateConfig(a.srvState.Config()) + _ = a.srvState.SetStatus(proto.StateObserved_STARTING, a.state.Message, a.state.Payload) + _ = a.srvState.UpdateConfig(a.srvState.Config()) } else { a.setState(state.Starting, "Starting", nil) a.srvState, err = a.srv.Register(a, string(cfgStr)) @@ -247,6 +247,13 @@ func (a *Application) Configure(ctx context.Context, config map[string]interface return err } +func (a *Application) getStopTimeout() time.Duration { + if a.desc.Spec().Process != nil && a.desc.Spec().Process.StopTimeout > 0 { + return a.desc.Spec().Process.StopTimeout + } + return a.processConfig.StopTimeout +} + // Stop stops the current application. func (a *Application) Stop() { a.appLock.Lock() @@ -257,21 +264,36 @@ func (a *Application) Stop() { return } - if err := srvState.Stop(a.processConfig.StopTimeout); err != nil { - a.appLock.Lock() - a.setState( - state.Failed, - fmt.Errorf("failed to stop after %s: %w", a.processConfig.StopTimeout, err).Error(), - nil) - } else { - a.appLock.Lock() - a.setState(state.Stopped, "Stopped", nil) + name := a.desc.Spec().Name + to := a.getStopTimeout() + + a.logger.Infof("Stop %v service, with %v timeout", name, to) + start := time.Now() + + // Try to stop the service with timeout + // If timed out and the service is still not stopped the runtime is set to STOPPED state anyways. + // This avoids leaving the runtime indefinitely in the failed state. + // + // The Agent is not managing the Endpoint service state by design. + // The service runtime should send STOPPING state to the Endpoint service only before the Endpoint is expected to be uninstalled. + // So if the Agent never receives the STOPPING check-in from the Endpoint after this, it's ok to set the state + // to STOPPED following with the Endpoint service uninstall. + if err := srvState.Stop(to); err != nil { + // Log the error + a.logger.Errorf("Failed to stop %v service after %v timeout", name, to) } - a.srvState = nil + // Cleanup + a.appLock.Lock() + defer a.appLock.Unlock() + + a.srvState = nil a.cleanUp() a.stopCredsListener() - a.appLock.Unlock() + + // Set the service state to "stopped", otherwise the agent is stuck in the failed stop state until restarted + a.logger.Infof("setting %s service status to Stopped, took: %v", name, time.Since(start)) + a.setState(state.Stopped, "Stopped", nil) } // Shutdown disconnects the service, but doesn't signal it to stop. @@ -327,7 +349,7 @@ func (a *Application) setState(s state.Status, msg string, payload map[string]in } func (a *Application) cleanUp() { - a.monitor.Cleanup(a.desc.Spec(), a.pipelineID) + _ = a.monitor.Cleanup(a.desc.Spec(), a.pipelineID) } func (a *Application) startCredsListener() error { diff --git a/internal/spec/endpoint.yml b/internal/spec/endpoint.yml index 5f452ba1943..e3e8c4fbe3e 100644 --- a/internal/spec/endpoint.yml +++ b/internal/spec/endpoint.yml @@ -2,6 +2,11 @@ name: Endpoint Security cmd: endpoint-security artifact: endpoint-dev service: 6788 +process: + # After discussion with Endpoint team the stop timeout is set to 3m, + # in order to give enough time for the Endpoint to stop gracefully. + # https://github.com/elastic/elastic-agent/issues/1262 + stop_timeout: 3m action_input_types: - endpoint log_paths: From 66b98deb35636715311de9bbf8339ba53ae442ce Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Tue, 25 Oct 2022 01:38:21 -0400 Subject: [PATCH 48/63] [Automation] Update elastic stack version to 8.6.0-b8b35931 for testing (#1602) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index 7f5971c7a24..1b5b91c18dd 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-22d60ec9-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-b8b35931-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-22d60ec9-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-b8b35931-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing" From 069d3865d9a300a722eed225bd8783f036ab944c Mon Sep 17 00:00:00 2001 From: Julien Lind Date: Wed, 26 Oct 2022 10:15:40 +0200 Subject: [PATCH 49/63] Create stale config (#1607) Purpose of this is to frequently clean up elastic agent repo --- .github/stale.yml | 91 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 .github/stale.yml diff --git a/.github/stale.yml b/.github/stale.yml new file mode 100644 index 00000000000..bb6e00e40a3 --- /dev/null +++ b/.github/stale.yml @@ -0,0 +1,91 @@ +# Configuration for probot-stale - https://github.com/probot/stale + +# Number of days of inactivity before an Issue or Pull Request becomes stale +daysUntilStale: 90 + +# Number of days of inactivity before an Issue or Pull Request with the stale label is closed. +# Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale. +daysUntilClose: 90 + +# Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled) +onlyLabels: [] + +# Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable +exemptLabels: + - flaky-test + +# Set to true to ignore issues in a project (defaults to false) +exemptProjects: false + +# Set to true to ignore issues in a milestone (defaults to false) +exemptMilestones: true + +# Set to true to ignore issues with an assignee (defaults to false) +exemptAssignees: true + +# Label to use when marking as stale +staleLabel: Stalled + +# Comment to post when marking as stale. Set to `false` to disable +markComment: > + Hi! + + We just realized that we haven't looked into this issue in a while. We're + sorry! + + + We're labeling this issue as `Stale` to make it hit our filters and + make sure we get back to it as soon as possible. In the meantime, it'd + be extremely helpful if you could take a look at it as well and confirm its + relevance. A simple comment with a nice emoji will be enough `:+1`. + + Thank you for your contribution! + +# Comment to post when removing the stale label. +# unmarkComment: > +# Your comment here. + +# Comment to post when closing a stale Issue or Pull Request. +# closeComment: > +# Your comment here. + +# Limit the number of actions per hour, from 1-30. Default is 30 +limitPerRun: 30 + +# Limit to only `issues` or `pulls` +# only: issues + +# Optionally, specify configuration settings that are specific to just 'issues' or 'pulls': +pulls: + daysUntilStale: 60 + daysUntilClose: 30 + markComment: > + Hi! + + We just realized that we haven't looked into this PR in a while. We're + sorry! + + + We're labeling this issue as `Stale` to make it hit our filters and + make sure we get back to it as soon as possible. In the meantime, it'd + be extremely helpful if you could take a look at it as well and confirm its + relevance. A simple comment with a nice emoji will be enough `:+1`. + + Thank you for your contribution! + + closeComment: > + Hi! + + This PR has been stale for a while and we're going to close it as part of + our cleanup procedure. + + We appreciate your contribution and would like to apologize if we have not + been able to review it, due to the current heavy load of the team. + + Feel free to re-open this PR if you think it should stay open and is worth rebasing. + + Thank you for your contribution! + +# issues: +# exemptLabels: +# - confirmed From 4af7f131f66c8ac3c7db1acbab15a567227371df Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Thu, 27 Oct 2022 01:37:25 -0400 Subject: [PATCH 50/63] [Automation] Update elastic stack version to 8.6.0-a892f234 for testing (#1621) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index 1b5b91c18dd..2affb05175a 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-b8b35931-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-a892f234-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-b8b35931-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-a892f234-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing" From 72a4f61e3db3c098c1bba70f82cdc70d81769cac Mon Sep 17 00:00:00 2001 From: Anderson Queiroz Date: Thu, 27 Oct 2022 16:39:01 +0200 Subject: [PATCH 51/63] Improve shutdown logs (#1618) --- NOTICE.txt | 4 +-- .../1666789812-Improve-shutdown-logs.yaml | 31 +++++++++++++++++++ go.mod | 2 +- go.sum | 4 +-- .../handlers/handler_action_settings.go | 1 + .../pkg/agent/application/upgrade/upgrade.go | 8 +++-- internal/pkg/agent/cmd/common.go | 2 +- internal/pkg/agent/cmd/run.go | 18 +++++++++-- 8 files changed, 59 insertions(+), 11 deletions(-) create mode 100644 changelog/fragments/1666789812-Improve-shutdown-logs.yaml diff --git a/NOTICE.txt b/NOTICE.txt index 79a332f3d45..db830946fca 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -1061,11 +1061,11 @@ SOFTWARE -------------------------------------------------------------------------------- Dependency : github.com/elastic/elastic-agent-libs -Version: v0.2.6 +Version: v0.2.14 Licence type (autodetected): Apache-2.0 -------------------------------------------------------------------------------- -Contents of probable licence file $GOMODCACHE/github.com/elastic/elastic-agent-libs@v0.2.6/LICENSE: +Contents of probable licence file $GOMODCACHE/github.com/elastic/elastic-agent-libs@v0.2.14/LICENSE: Apache License Version 2.0, January 2004 diff --git a/changelog/fragments/1666789812-Improve-shutdown-logs.yaml b/changelog/fragments/1666789812-Improve-shutdown-logs.yaml new file mode 100644 index 00000000000..091e2570ae6 --- /dev/null +++ b/changelog/fragments/1666789812-Improve-shutdown-logs.yaml @@ -0,0 +1,31 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: feature + +# Change summary; a 80ish characters long description of the change. +summary: Improve shutdown logs + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +#description: + +# Affected component; a word indicating the component this changeset affects. +component: cmd, handler, upgrade + +# PR number; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +pr: 1618 + +# Issue number; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +issue: 1358 diff --git a/go.mod b/go.mod index 2557e2109d4..92b80378a21 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( github.com/elastic/e2e-testing v1.99.2-0.20220117192005-d3365c99b9c4 github.com/elastic/elastic-agent-autodiscover v0.2.1 github.com/elastic/elastic-agent-client/v7 v7.0.0-20210727140539-f0905d9377f6 - github.com/elastic/elastic-agent-libs v0.2.6 + github.com/elastic/elastic-agent-libs v0.2.14 github.com/elastic/elastic-agent-system-metrics v0.3.0 github.com/elastic/go-licenser v0.4.0 github.com/elastic/go-sysinfo v1.7.1 diff --git a/go.sum b/go.sum index 0728fa89909..c8a542d6c79 100644 --- a/go.sum +++ b/go.sum @@ -384,8 +384,8 @@ github.com/elastic/elastic-agent-autodiscover v0.2.1/go.mod h1:gPnzzfdYNdgznAb+i github.com/elastic/elastic-agent-client/v7 v7.0.0-20210727140539-f0905d9377f6 h1:nFvXHBjYK3e9+xF0WKDeAKK4aOO51uC28s+L9rBmilo= github.com/elastic/elastic-agent-client/v7 v7.0.0-20210727140539-f0905d9377f6/go.mod h1:uh/Gj9a0XEbYoM4NYz4LvaBVARz3QXLmlNjsrKY9fTc= github.com/elastic/elastic-agent-libs v0.2.5/go.mod h1:chO3rtcLyGlKi9S0iGVZhYCzDfdDsAQYBc+ui588AFE= -github.com/elastic/elastic-agent-libs v0.2.6 h1:DpcUcCVYZ7lNtHLUlyT1u/GtGAh49wpL15DTH7+8O5o= -github.com/elastic/elastic-agent-libs v0.2.6/go.mod h1:chO3rtcLyGlKi9S0iGVZhYCzDfdDsAQYBc+ui588AFE= +github.com/elastic/elastic-agent-libs v0.2.14 h1:o1agY/37TKl5kjhv3ur5M9d127wzQPRxwA4Xoh0jUEo= +github.com/elastic/elastic-agent-libs v0.2.14/go.mod h1:0J9lzJh+BjttIiVjYDLncKYCEWUUHiiqnuI64y6C6ss= github.com/elastic/elastic-agent-system-metrics v0.3.0 h1:W8L0E8lWJmdguH+oIR7OzuFgopvw8ucZAE9w6iqVlpE= github.com/elastic/elastic-agent-system-metrics v0.3.0/go.mod h1:RIYhJOS7mUeyIthfOSqmmbEILYSzaDWLi5zQ70bQo+o= github.com/elastic/elastic-package v0.32.1/go.mod h1:l1fEnF52XRBL6a5h6uAemtdViz2bjtjUtgdQcuRhEAY= diff --git a/internal/pkg/agent/application/pipeline/actions/handlers/handler_action_settings.go b/internal/pkg/agent/application/pipeline/actions/handlers/handler_action_settings.go index 5418a0f3eb6..300d153760e 100644 --- a/internal/pkg/agent/application/pipeline/actions/handlers/handler_action_settings.go +++ b/internal/pkg/agent/application/pipeline/actions/handlers/handler_action_settings.go @@ -62,6 +62,7 @@ func (h *Settings) Handle(ctx context.Context, a fleetapi.Action, acker store.Fl h.log.Errorf("failed to commit acker after acknowledging action with id '%s'", action.ActionID) } + h.log.Info("SETTINGS action done, triggering agent restart") h.reexec.ReExec(nil) return nil } diff --git a/internal/pkg/agent/application/upgrade/upgrade.go b/internal/pkg/agent/application/upgrade/upgrade.go index d8c55e17806..e6ab611d19f 100644 --- a/internal/pkg/agent/application/upgrade/upgrade.go +++ b/internal/pkg/agent/application/upgrade/upgrade.go @@ -199,8 +199,12 @@ func (u *Upgrader) Upgrade(ctx context.Context, a Action, reexecNow bool) (_ ree if err != nil { u.log.Errorw("Unable to clean downloads after update", "error.message", err, "downloads.path", paths.Downloads()) } - u.log.Infow("Restarting after upgrade", "new_version", release.Version(), "prev_version", a.Version(), - "hash", trimmedNewHash, "home", paths.Home()) + + u.log.Infow("Restarting after upgrade", + "new_version", release.Version(), + "prev_version", a.Version(), + "hash", trimmedNewHash, + "home", paths.Home()) u.reexec.ReExec(cb) return nil, nil } diff --git a/internal/pkg/agent/cmd/common.go b/internal/pkg/agent/cmd/common.go index 35aef4d4339..1e349774e83 100644 --- a/internal/pkg/agent/cmd/common.go +++ b/internal/pkg/agent/cmd/common.go @@ -68,7 +68,7 @@ func NewCommandWithArgs(args []string, streams *cli.IOStreams) *cobra.Command { cmd.AddCommand(newStatusCommand(args, streams)) cmd.AddCommand(newDiagnosticsCommand(args, streams)) - // windows special hidden sub-command (only added on windows) + // windows special hidden sub-command (only added on Windows) reexec := newReExecWindowsCommand(args, streams) if reexec != nil { cmd.AddCommand(reexec) diff --git a/internal/pkg/agent/cmd/run.go b/internal/pkg/agent/cmd/run.go index 732831d87d8..101e1403f03 100644 --- a/internal/pkg/agent/cmd/run.go +++ b/internal/pkg/agent/cmd/run.go @@ -63,7 +63,14 @@ func newRunCommandWithArgs(_ []string, streams *cli.IOStreams) *cobra.Command { Short: "Start the elastic-agent.", Run: func(_ *cobra.Command, _ []string) { if err := run(nil); err != nil { + logp.NewLogger("cmd_run"). + Errorw("run command finished with error", + "error.message", err) fmt.Fprintf(streams.Err, "Error: %v\n%s\n", err, troubleshootMessage()) + + // TODO: remove it. os.Exit will be called on main and if it's called + // too early some goroutines with deferred functions related + // to the shutdown process might not run. os.Exit(1) } }, @@ -237,13 +244,16 @@ func run(override cfgOverrider) error { breakout := false select { case <-stop: + logger.Info("service.HandleSignals invoked stop function. Shutting down") breakout = true case <-rex.ShutdownChan(): + logger.Info("reexec Shutdown channel triggered") reexecing = true breakout = true case sig := <-signals: + logger.Infof("signal %q received", sig) if sig == syscall.SIGHUP { - rexLogger.Infof("SIGHUP triggered re-exec") + logger.Infof("signals syscall.SIGHUP received, triggering agent restart") rex.ReExec(nil) } else { breakout = true @@ -252,6 +262,8 @@ func run(override cfgOverrider) error { if breakout { if !reexecing { logger.Info("Shutting down Elastic Agent and sending last events...") + } else { + logger.Info("Restarting Elastic Agent") } break } @@ -407,7 +419,7 @@ func tryDelayEnroll(ctx context.Context, logger *logger.Logger, cfg *configurati enrollPath := paths.AgentEnrollFile() if _, err := os.Stat(enrollPath); err != nil { // no enrollment file exists or failed to stat it; nothing to do - return cfg, nil //nolint:nilerr // there is nothing to do + return cfg, nil } contents, err := ioutil.ReadFile(enrollPath) if err != nil { @@ -461,7 +473,7 @@ func initTracer(agentName, version string, mcfg *monitoringCfg.MonitoringConfig) cfg := mcfg.APM - // nolint:godox // the TODO is intentional + //nolint:godox // the TODO is intentional // TODO(stn): Ideally, we'd use apmtransport.NewHTTPTransportOptions() // but it doesn't exist today. Update this code once we have something // available via the APM Go agent. From cecb1cada28b849d44e47718d38011ced1c541b0 Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Fri, 28 Oct 2022 01:35:55 -0400 Subject: [PATCH 52/63] [Automation] Update elastic stack version to 8.6.0-89d224d2 for testing (#1633) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index 2affb05175a..45a12fe3f1e 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-a892f234-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-89d224d2-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-a892f234-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-89d224d2-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing" From 5cf5daf5741f7c966d6147a8be89be472114c3fc Mon Sep 17 00:00:00 2001 From: Kyle Pollich Date: Fri, 28 Oct 2022 09:29:40 -0400 Subject: [PATCH 53/63] [Automation] Add GH action to add issues to ingest board (#1629) * Add GH action to add issues to ingest board Issues labeled w/ the control plane or data plane labels in this repository will be added to the ingest project w/ `Area: Elastic Agent` set * Support Elastic Agent label * Update add-issues-to-ingest-board.yml * Update add-issues-to-ingest-board.yml * Update add-issues-to-ingest-board.yml --- .../workflows/add-issues-to-ingest-board.yml | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 .github/workflows/add-issues-to-ingest-board.yml diff --git a/.github/workflows/add-issues-to-ingest-board.yml b/.github/workflows/add-issues-to-ingest-board.yml new file mode 100644 index 00000000000..499fa012efc --- /dev/null +++ b/.github/workflows/add-issues-to-ingest-board.yml @@ -0,0 +1,53 @@ +name: Add issue to Platform Ingest project + +on: + issues: + types: + - labeled + +env: + INGEST_PROJECT_ID: 'PVT_kwDOAGc3Zs4AEzn4' + DATA_PLANE_LABEL: 'Team:Elastic-Agent-Data-Plane' + CONTROL_PLANE_LABEL: 'Team:Elastic-Agent-Control-Plane' + ELASTIC_AGENT_LABEL: 'Team:Elastic-Agent' + AREA_FIELD_ID: 'PVTSSF_lADOAGc3Zs4AEzn4zgEgZSo' + ELASTIC_AGENT_OPTION_ID: 'c1e1a30a' + +jobs: + add_to_ingest_project: + runs-on: ubuntu-latest + steps: + - uses: octokit/graphql-action@v2.x + id: add_to_project + if: github.event.label.name == env.DATA_PLANE_LABEL || github.event.label.name == env.CONTROL_PLANE_LABEL || github.event.label.name == env.ELASTIC_AGENT_LABEL + with: + query: | + # Variables have to be snake cased because of https://github.com/octokit/graphql-action/issues/164 + mutation AddToIngestProject($project_id: ID!, $content_id: ID!) { + addProjectV2ItemById(input: { projectId: $project_id, contentId: $content_id }) { + item { + id + } + } + } + project_id: ${{ env.INGEST_PROJECT_ID }} + content_id: ${{ github.event.issue.node_id }} + env: + GITHUB_TOKEN: ${{ secrets.PROJECT_ASSIGNER_TOKEN }} + - uses: octokit/graphql-action@v2.x + id: set_elastic_agent_area + if: github.event.label.name == env.DATA_PLANE_LABEL || github.event.label.name == env.CONTROL_PLANE_LABEL || github.event.label.name == env.ELASTIC_AGENT_LABEL + with: + query: | + mutation updateIngestArea($item_id: ID!, $project_id: ID!, $area_field_id: ID!, $area_id: String) { + updateProjectV2ItemFieldValue( + input: { itemId: $item_id, projectId: $project_id, fieldId: $area_field_id, value: { singleSelectOptionId: $area_id } }) { + clientMutationId + } + } + item_id: ${{ fromJSON(steps.add_to_project.outputs.data).addProjectV2ItemById.item.id }} + project_id: ${{ env.INGEST_PROJECT_ID }} + area_field_id: ${{ env.AREA_FIELD_ID }} + area_id: ${{ env.ELASTIC_AGENT_OPTION_ID }} + env: + GITHUB_TOKEN: ${{ secrets.PROJECT_ASSIGNER_TOKEN }} From 275c1139b966d28b342ecd9c213de4e9d8add124 Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Mon, 31 Oct 2022 01:36:53 -0400 Subject: [PATCH 54/63] [Automation] Update elastic stack version to 8.6.0-949a38d2 for testing (#1647) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index 45a12fe3f1e..ecee07c0330 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-89d224d2-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-949a38d2-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-89d224d2-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-949a38d2-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing" From 3abaece78c8baf0c0618f2f4e65d9a5361b02e26 Mon Sep 17 00:00:00 2001 From: Andrew Cholakian Date: Mon, 31 Oct 2022 16:18:30 -0500 Subject: [PATCH 55/63] Update node version to 18.12.0 on complete image (#1657) This puts us on to the latest LTS release of node. Test by using a private location with Uptime to run a browser monitor. --- .../1667243040-Upgrade-node-to-18.12.0.yaml | 31 +++++++++++++++++++ .../docker/Dockerfile.elastic-agent.tmpl | 2 +- .../templates/docker/Dockerfile.tmpl | 2 +- 3 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 changelog/fragments/1667243040-Upgrade-node-to-18.12.0.yaml diff --git a/changelog/fragments/1667243040-Upgrade-node-to-18.12.0.yaml b/changelog/fragments/1667243040-Upgrade-node-to-18.12.0.yaml new file mode 100644 index 00000000000..a670ebab9cb --- /dev/null +++ b/changelog/fragments/1667243040-Upgrade-node-to-18.12.0.yaml @@ -0,0 +1,31 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: feature + +# Change summary; a 80ish characters long description of the change. +summary: Upgrade node to 18.12.0 + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +#description: + +# Affected component; a word indicating the component this changeset affects. +component: + +# PR number; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +#pr: 1234 + +# Issue number; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +#issue: 1234 diff --git a/dev-tools/packaging/templates/docker/Dockerfile.elastic-agent.tmpl b/dev-tools/packaging/templates/docker/Dockerfile.elastic-agent.tmpl index 760d5e9949a..53bbe760223 100644 --- a/dev-tools/packaging/templates/docker/Dockerfile.elastic-agent.tmpl +++ b/dev-tools/packaging/templates/docker/Dockerfile.elastic-agent.tmpl @@ -162,7 +162,7 @@ RUN echo \ # Setup synthetics env vars ENV ELASTIC_SYNTHETICS_CAPABLE=true -ENV NODE_VERSION=16.15.0 +ENV NODE_VERSION=18.12.0 ENV PATH="$NODE_PATH/node/bin:$PATH" # Install the latest version of @elastic/synthetics forcefully ignoring the previously # cached node_modules, heartbeat then calls the global executable to run test suites diff --git a/dev-tools/packaging/templates/docker/Dockerfile.tmpl b/dev-tools/packaging/templates/docker/Dockerfile.tmpl index d2edf7909cb..17dab4b5223 100644 --- a/dev-tools/packaging/templates/docker/Dockerfile.tmpl +++ b/dev-tools/packaging/templates/docker/Dockerfile.tmpl @@ -160,7 +160,7 @@ USER {{ .user }} # Setup synthetics env vars ENV ELASTIC_SYNTHETICS_CAPABLE=true ENV SUITES_DIR={{ $beatHome }}/suites -ENV NODE_VERSION=16.15.0 +ENV NODE_VERSION=18.12.0 ENV PATH="$NODE_PATH/node/bin:$PATH" # Install the latest version of @elastic/synthetics forcefully ignoring the previously # cached node_modules, heartbeat then calls the global executable to run test suites From 188688f3e52ebed2f914f3481a313a5d870cd647 Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Tue, 1 Nov 2022 01:38:24 -0400 Subject: [PATCH 56/63] [Automation] Update elastic stack version to 8.6.0-26dc1164 for testing (#1660) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index ecee07c0330..18f365e5661 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-949a38d2-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-26dc1164-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-949a38d2-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-26dc1164-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing" From c031be509d001f5be8992475ff762bd8612e3b0b Mon Sep 17 00:00:00 2001 From: Craig MacKenzie Date: Tue, 1 Nov 2022 13:10:50 -0400 Subject: [PATCH 57/63] Generate the consolidated 8.5.0 changelog. (#1642) (#1670) * Consolidate the 8.5.0 changelog fragments. Generate a summarized YAML file of all the changes in 8.5.0 for future reference. The rendered changelog lives in the observability-docs repository currently. * Improve README documentation for the changelog. --- README.md | 19 ++- changelog/8.5.0.yaml | 129 ++++++++++++++++++ ...ion-when-installing-the-Elastic-Agent.yaml | 3 - ...SHA-1-are-now-rejected-See-the-Go-118.yaml | 3 - ...rjack-input-type-to-the-Filebeat-spec.yaml | 3 - ...-autodiscovery-in-kubernetes-provider.yaml | 3 - ...ource-URI-when-downloading-components.yaml | 3 - ...nly-events-so-that-degraded-fleet-che.yaml | 4 - ...30732-Improve-logging-during-upgrades.yaml | 3 - ...ssage-after-previous-checkin-failures.yaml | 3 - ...989867-fix-docker-provider-processors.yaml | 31 ----- ...5517984-improve-checkin-error-logging.yaml | 5 - ...permission-check-on-localized-windows.yaml | 31 ----- 13 files changed, 147 insertions(+), 93 deletions(-) create mode 100644 changelog/8.5.0.yaml delete mode 100644 changelog/fragments/1660139385-Fix-a-panic-caused-by-a-race-condition-when-installing-the-Elastic-Agent.yaml delete mode 100644 changelog/fragments/1660158319-Upgrade-to-Go-118-Certificates-signed-with-SHA-1-are-now-rejected-See-the-Go-118.yaml delete mode 100644 changelog/fragments/1661188787-Add-lumberjack-input-type-to-the-Filebeat-spec.yaml delete mode 100644 changelog/fragments/1663143487-Add-support-for-hints-based-autodiscovery-in-kubernetes-provider.yaml delete mode 100644 changelog/fragments/1664177394-Fix-unintended-reset-of-source-URI-when-downloading-components.yaml delete mode 100644 changelog/fragments/1664212969-Create-separate-status-reporter-for-local-only-events-so-that-degraded-fleet-che.yaml delete mode 100644 changelog/fragments/1664230732-Improve-logging-during-upgrades.yaml delete mode 100644 changelog/fragments/1664360554-Add-success-log-message-after-previous-checkin-failures.yaml delete mode 100644 changelog/fragments/1664989867-fix-docker-provider-processors.yaml delete mode 100644 changelog/fragments/1665517984-improve-checkin-error-logging.yaml delete mode 100644 changelog/fragments/1666088774-Fix-admin-permission-check-on-localized-windows.yaml diff --git a/README.md b/README.md index 5b1f5c01b04..bde0369cc69 100644 --- a/README.md +++ b/README.md @@ -5,13 +5,30 @@ The source files for the general Elastic Agent documentation are currently stored in the [observability-docs](https://github.com/elastic/observability-docs) repo. The following docs are only focused on getting developers started building code for Elastic Agent. +### Changelog + +The changelog for the Elastic Agent is generated and maintained using the [elastic-agent-changelog-tool](https://github.com/elastic/elastic-agent-changelog-tool). Read the [installation](https://github.com/elastic/elastic-agent-changelog-tool/blob/main/docs/install.md) +and [usage](https://github.com/elastic/elastic-agent-changelog-tool/blob/main/docs/usage.md#im-a-developer) instructions to get started. + +The changelog tool produces fragement files that are consolidated to generate a changelog for each release. Each PR containing a change with user +impact (new feature, bug fix, etc.) must contain a changelog fragement describing the change. There is a GitHub action in CI that will fail +if a PR does not contain a changelog fragment. For PRs that should not have a changelog entry, use the "skip-changelog" label to bypass +this check. + +A simple example of a changelog fragment is below for reference: + +```yml +kind: bug-fix +summary: Fix a panic caused by a race condition when installing the Elastic Agent. +pr: https://github.com/elastic/elastic-agent/pull/823 +``` + ## Testing Prerequisites: - installed [mage](https://github.com/magefile/mage) - [Docker](https://docs.docker.com/get-docker/) - [X-pack](https://github.com/elastic/beats/tree/main/x-pack) to pre-exist in the parent folder of the local Git repository checkout -- [elastic-agent-changelog-tool](https://github.com/elastic/elastic-agent-changelog-tool) to add changelog fragments for changelog generation If you are on a Mac with M1 chip, don't forget to export some docker variable to be able to build for AMD ``` diff --git a/changelog/8.5.0.yaml b/changelog/8.5.0.yaml new file mode 100644 index 00000000000..dd0502bbf01 --- /dev/null +++ b/changelog/8.5.0.yaml @@ -0,0 +1,129 @@ +version: 8.5.0 +entries: + - kind: bug-fix + summary: Fix a panic caused by a race condition when installing the Elastic Agent. + description: "" + component: "" + pr: + - https://github.com/elastic/elastic-agent/pull/823 + issue: + - https://github.com/elastic/elastic-agent/issues/806 + timestamp: 1660139385 + file: + name: 1660139385-Fix-a-panic-caused-by-a-race-condition-when-installing-the-Elastic-Agent.yaml + checksum: be820e85b12290a895bfd37c2c2245bfae5a70c9 + - kind: breaking-change + summary: Upgrade to Go 1.18. Certificates signed with SHA-1 are now rejected. See the Go 1.18 https//tip.golang.org/doc/go1.18#sha1[release notes] for details. + description: "" + component: "" + pr: + - https://github.com/elastic/elastic-agent/pull/832 + issue: [] + timestamp: 1660158319 + file: + name: 1660158319-Upgrade-to-Go-118-Certificates-signed-with-SHA-1-are-now-rejected-See-the-Go-118.yaml + checksum: 2b304d75a687ec7384f3011a55f243fef66e447b + - kind: feature + summary: Add `lumberjack` input type to the Filebeat spec. + description: "" + component: "" + pr: + - https://github.com/elastic/elastic-agent/pull/959 + issue: [] + timestamp: 1661188787 + file: + name: 1661188787-Add-lumberjack-input-type-to-the-Filebeat-spec.yaml + checksum: ab7e3af045affbda1522c029cc56a9c5403340c3 + - kind: feature + summary: Add support for hints' based autodiscovery in kubernetes provider. + description: "" + component: "" + pr: + - https://github.com/elastic/elastic-agent/pull/698 + issue: [] + timestamp: 1663143487 + file: + name: 1663143487-Add-support-for-hints-based-autodiscovery-in-kubernetes-provider.yaml + checksum: 398470d14a8475e93aaf66f96407570653dde71e + - kind: bug-fix + summary: Fix unintended reset of source URI when downloading components + description: "" + component: "" + pr: + - https://github.com/elastic/elastic-agent/pull/1252 + issue: [] + timestamp: 1664177394 + file: + name: 1664177394-Fix-unintended-reset-of-source-URI-when-downloading-components.yaml + checksum: 1040aceb00b70182c0bba621e15cfe711e32f9fe + - kind: bug-fix + summary: Create separate status reporter for local only events so that degraded fleet-checkins no longer affect health on successful fleet-checkins. + description: "" + component: "" + pr: + - https://github.com/elastic/elastic-agent/pull/1285 + issue: + - https://github.com/elastic/elastic-agent/issues/1157 + timestamp: 1664212969 + file: + name: 1664212969-Create-separate-status-reporter-for-local-only-events-so-that-degraded-fleet-che.yaml + checksum: 929f1ada47aeaaf9c631091c0f2732f631b3539f + - kind: feature + summary: Improve logging during upgrades. + description: "" + component: "" + pr: + - https://github.com/elastic/elastic-agent/pull/1287 + issue: + - https://github.com/elastic/elastic-agent/issues/1253 + timestamp: 1664230732 + file: + name: 1664230732-Improve-logging-during-upgrades.yaml + checksum: 13ab968324d342118c5a257d6c6cc0c5db161b46 + - kind: bug-fix + summary: Add success log message after previous checkin failures + description: "" + component: "" + pr: + - https://github.com/elastic/elastic-agent/pull/1327 + issue: [] + timestamp: 1664360554 + file: + name: 1664360554-Add-success-log-message-after-previous-checkin-failures.yaml + checksum: 819a22a452dddfa3b2976433d5cb4c8354a6ccc5 + - kind: bug-fix + summary: Fix docker provider add_fields processors + description: "" + component: providers + pr: + - https://github.com/elastic/elastic-agent/pull/1420 + issue: + - https://github.com/elastic/elastic-agent/issues/29030 + timestamp: 1664989867 + file: + name: 1664989867-fix-docker-provider-processors.yaml + checksum: 45731791cd2e8f1d747abfc47bb256971a77d015 + - kind: enhancement + summary: Improve logging of Fleet check-in errors. + description: Improve logging of Fleet check-in errors and only report the local state as degraded after two consecutive failed check-ins. + component: "" + pr: + - https://github.com/elastic/elastic-agent/pull/1477 + issue: + - https://github.com/elastic/elastic-agent/issues/1154 + timestamp: 1665517984 + file: + name: 1665517984-improve-checkin-error-logging.yaml + checksum: 2d3dd39309def9a082f794eda815af459596c2e6 + - kind: bug-fix + summary: Fix admin permission check on localized windows + description: "" + component: "" + pr: + - https://github.com/elastic/elastic-agent/pull/1552 + issue: + - https://github.com/elastic/elastic-agent/issues/857 + timestamp: 1666088774 + file: + name: 1666088774-Fix-admin-permission-check-on-localized-windows.yaml + checksum: 16ee5909c319680b8d32045e74c38922eafc29ea diff --git a/changelog/fragments/1660139385-Fix-a-panic-caused-by-a-race-condition-when-installing-the-Elastic-Agent.yaml b/changelog/fragments/1660139385-Fix-a-panic-caused-by-a-race-condition-when-installing-the-Elastic-Agent.yaml deleted file mode 100644 index 19844fe2dfc..00000000000 --- a/changelog/fragments/1660139385-Fix-a-panic-caused-by-a-race-condition-when-installing-the-Elastic-Agent.yaml +++ /dev/null @@ -1,3 +0,0 @@ -kind: bug-fix -summary: Fix a panic caused by a race condition when installing the Elastic Agent. -pr: https://github.com/elastic/elastic-agent/pull/823 diff --git a/changelog/fragments/1660158319-Upgrade-to-Go-118-Certificates-signed-with-SHA-1-are-now-rejected-See-the-Go-118.yaml b/changelog/fragments/1660158319-Upgrade-to-Go-118-Certificates-signed-with-SHA-1-are-now-rejected-See-the-Go-118.yaml deleted file mode 100644 index f7b6ce903d3..00000000000 --- a/changelog/fragments/1660158319-Upgrade-to-Go-118-Certificates-signed-with-SHA-1-are-now-rejected-See-the-Go-118.yaml +++ /dev/null @@ -1,3 +0,0 @@ -kind: breaking-change -summary: Upgrade to Go 1.18. Certificates signed with SHA-1 are now rejected. See the Go 1.18 https//tip.golang.org/doc/go1.18#sha1[release notes] for details. -pr: https://github.com/elastic/elastic-agent/pull/832 diff --git a/changelog/fragments/1661188787-Add-lumberjack-input-type-to-the-Filebeat-spec.yaml b/changelog/fragments/1661188787-Add-lumberjack-input-type-to-the-Filebeat-spec.yaml deleted file mode 100644 index 9110968e91f..00000000000 --- a/changelog/fragments/1661188787-Add-lumberjack-input-type-to-the-Filebeat-spec.yaml +++ /dev/null @@ -1,3 +0,0 @@ -kind: feature -summary: Add `lumberjack` input type to the Filebeat spec. -pr: https://github.com/elastic/elastic-agent/pull/959 diff --git a/changelog/fragments/1663143487-Add-support-for-hints-based-autodiscovery-in-kubernetes-provider.yaml b/changelog/fragments/1663143487-Add-support-for-hints-based-autodiscovery-in-kubernetes-provider.yaml deleted file mode 100644 index 04e84669955..00000000000 --- a/changelog/fragments/1663143487-Add-support-for-hints-based-autodiscovery-in-kubernetes-provider.yaml +++ /dev/null @@ -1,3 +0,0 @@ -kind: feature -summary: Add support for hints' based autodiscovery in kubernetes provider. -pr: https://github.com/elastic/elastic-agent/pull/698 diff --git a/changelog/fragments/1664177394-Fix-unintended-reset-of-source-URI-when-downloading-components.yaml b/changelog/fragments/1664177394-Fix-unintended-reset-of-source-URI-when-downloading-components.yaml deleted file mode 100644 index b5712f4c193..00000000000 --- a/changelog/fragments/1664177394-Fix-unintended-reset-of-source-URI-when-downloading-components.yaml +++ /dev/null @@ -1,3 +0,0 @@ -kind: bug-fix -summary: Fix unintended reset of source URI when downloading components -pr: https://github.com/elastic/elastic-agent/pull/1252 diff --git a/changelog/fragments/1664212969-Create-separate-status-reporter-for-local-only-events-so-that-degraded-fleet-che.yaml b/changelog/fragments/1664212969-Create-separate-status-reporter-for-local-only-events-so-that-degraded-fleet-che.yaml deleted file mode 100644 index a94f5b66751..00000000000 --- a/changelog/fragments/1664212969-Create-separate-status-reporter-for-local-only-events-so-that-degraded-fleet-che.yaml +++ /dev/null @@ -1,4 +0,0 @@ -kind: bug-fix -summary: Create separate status reporter for local only events so that degraded fleet-checkins no longer affect health on successful fleet-checkins. -issue: https://github.com/elastic/elastic-agent/issues/1157 -pr: https://github.com/elastic/elastic-agent/pull/1285 diff --git a/changelog/fragments/1664230732-Improve-logging-during-upgrades.yaml b/changelog/fragments/1664230732-Improve-logging-during-upgrades.yaml deleted file mode 100644 index 15f81e7d5ad..00000000000 --- a/changelog/fragments/1664230732-Improve-logging-during-upgrades.yaml +++ /dev/null @@ -1,3 +0,0 @@ -kind: feature -summary: Improve logging during upgrades. -pr: https://github.com/elastic/elastic-agent/pull/1287 diff --git a/changelog/fragments/1664360554-Add-success-log-message-after-previous-checkin-failures.yaml b/changelog/fragments/1664360554-Add-success-log-message-after-previous-checkin-failures.yaml deleted file mode 100644 index 3e4ac3d91a5..00000000000 --- a/changelog/fragments/1664360554-Add-success-log-message-after-previous-checkin-failures.yaml +++ /dev/null @@ -1,3 +0,0 @@ -kind: bug-fix -summary: Add success log message after previous checkin failures -pr: https://github.com/elastic/elastic-agent/pull/1327 diff --git a/changelog/fragments/1664989867-fix-docker-provider-processors.yaml b/changelog/fragments/1664989867-fix-docker-provider-processors.yaml deleted file mode 100644 index c7c87152479..00000000000 --- a/changelog/fragments/1664989867-fix-docker-provider-processors.yaml +++ /dev/null @@ -1,31 +0,0 @@ -# Kind can be one of: -# - breaking-change: a change to previously-documented behavior -# - deprecation: functionality that is being removed in a later release -# - bug-fix: fixes a problem in a previous version -# - enhancement: extends functionality but does not break or fix existing behavior -# - feature: new functionality -# - known-issue: problems that we are aware of in a given version -# - security: impacts on the security of a product or a user’s deployment. -# - upgrade: important information for someone upgrading from a prior version -# - other: does not fit into any of the other categories -kind: bug-fix - -# Change summary; a 80ish characters long description of the change. -summary: Fix docker provider add_fields processors - -# Long description; in case the summary is not enough to describe the change -# this field accommodate a description without length limits. -#description: - -# Affected component; a word indicating the component this changeset affects. -component: providers - -# PR number; optional; the PR number that added the changeset. -# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. -# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. -# Please provide it if you are adding a fragment for a different PR. -#pr: 1234 - -# Issue number; optional; the GitHub issue related to this changeset (either closes or is part of). -# If not present is automatically filled by the tooling with the issue linked to the PR number. -#issue: 1234 diff --git a/changelog/fragments/1665517984-improve-checkin-error-logging.yaml b/changelog/fragments/1665517984-improve-checkin-error-logging.yaml deleted file mode 100644 index 7bf2777d9d5..00000000000 --- a/changelog/fragments/1665517984-improve-checkin-error-logging.yaml +++ /dev/null @@ -1,5 +0,0 @@ -kind: enhancement -summary: Improve logging of Fleet check-in errors. -description: Improve logging of Fleet check-in errors and only report the local state as degraded after two consecutive failed check-ins. -pr: 1477 -issue: 1154 diff --git a/changelog/fragments/1666088774-Fix-admin-permission-check-on-localized-windows.yaml b/changelog/fragments/1666088774-Fix-admin-permission-check-on-localized-windows.yaml deleted file mode 100644 index 93d5999f1b0..00000000000 --- a/changelog/fragments/1666088774-Fix-admin-permission-check-on-localized-windows.yaml +++ /dev/null @@ -1,31 +0,0 @@ -# Kind can be one of: -# - breaking-change: a change to previously-documented behavior -# - deprecation: functionality that is being removed in a later release -# - bug-fix: fixes a problem in a previous version -# - enhancement: extends functionality but does not break or fix existing behavior -# - feature: new functionality -# - known-issue: problems that we are aware of in a given version -# - security: impacts on the security of a product or a user’s deployment. -# - upgrade: important information for someone upgrading from a prior version -# - other: does not fit into any of the other categories -kind: bug-fix - -# Change summary; a 80ish characters long description of the change. -summary: Fix admin permission check on localized windows - -# Long description; in case the summary is not enough to describe the change -# this field accommodate a description without length limits. -#description: - -# Affected component; a word indicating the component this changeset affects. -component: - -# PR number; optional; the PR number that added the changeset. -# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. -# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. -# Please provide it if you are adding a fragment for a different PR. -pr: 1552 - -# Issue number; optional; the GitHub issue related to this changeset (either closes or is part of). -# If not present is automatically filled by the tooling with the issue linked to the PR number. -issue: 857 From 887bac79df364c525b1af9dde7c974538a6196ba Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Wed, 2 Nov 2022 01:42:39 -0400 Subject: [PATCH 58/63] [Automation] Update elastic stack version to 8.6.0-4765d2b0 for testing (#1674) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index 18f365e5661..0e71261f60d 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-26dc1164-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-4765d2b0-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-26dc1164-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-4765d2b0-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing" From b394e2f4fd34f8fa909406859991841917b754e8 Mon Sep 17 00:00:00 2001 From: Craig MacKenzie Date: Wed, 2 Nov 2022 13:19:52 -0400 Subject: [PATCH 59/63] Disable creating GH issues for build failures. (#1677) The E2E tests are still unstable and the issues reported for those failures is creating a lot of noise in the issue tracker. --- .ci/Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/Jenkinsfile b/.ci/Jenkinsfile index c374bfeb0ef..61f8c6b9e13 100644 --- a/.ci/Jenkinsfile +++ b/.ci/Jenkinsfile @@ -425,7 +425,7 @@ pipeline { cleanup { notifyBuildResult(prComment: true, analyzeFlakey: !isTag(), jobName: getFlakyJobName(withBranch: (isPR() ? env.CHANGE_TARGET : env.BRANCH_NAME)), - githubIssue: isBranch() && currentBuild.currentResult != "SUCCESS", + githubIssue: false, // Disable creating gh issues for build failures while the E2E tests are stabilized. githubLabels: 'Team:Elastic-Agent-Control-Plane') } } From d34020750b06a1d9073d2d35ae20a1e7c162385d Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Thu, 3 Nov 2022 01:37:20 -0400 Subject: [PATCH 60/63] [Automation] Update elastic stack version to 8.6.0-8a615646 for testing (#1682) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index 0e71261f60d..89f4ce6f899 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-4765d2b0-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-8a615646-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-4765d2b0-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-8a615646-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing" From 68fd2dbc7a44200d160534fa38abd60e9f0c02f4 Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Fri, 4 Nov 2022 01:37:47 -0400 Subject: [PATCH 61/63] [Automation] Update elastic stack version to 8.6.0-3f5f98b7 for testing (#1685) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index 89f4ce6f899..9cf5799a0ed 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-8a615646-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-3f5f98b7-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-8a615646-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-3f5f98b7-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing" From 6130eba21556e0738ed9814f0387d8b504ddc0dd Mon Sep 17 00:00:00 2001 From: Craig MacKenzie Date: Fri, 4 Nov 2022 11:02:43 -0400 Subject: [PATCH 62/63] Remove non-agent Dockerfile templates. (#1539) These are only used for Beats and should have been deleted when the agent migrated to its own repository. --- .../templates/docker/Dockerfile.tmpl | 198 ------------------ .../templates/docker/docker-entrypoint.tmpl | 25 --- 2 files changed, 223 deletions(-) delete mode 100644 dev-tools/packaging/templates/docker/Dockerfile.tmpl delete mode 100644 dev-tools/packaging/templates/docker/docker-entrypoint.tmpl diff --git a/dev-tools/packaging/templates/docker/Dockerfile.tmpl b/dev-tools/packaging/templates/docker/Dockerfile.tmpl deleted file mode 100644 index 17dab4b5223..00000000000 --- a/dev-tools/packaging/templates/docker/Dockerfile.tmpl +++ /dev/null @@ -1,198 +0,0 @@ -{{- $beatHome := printf "%s/%s" "/usr/share" .BeatName }} -{{- $beatBinary := printf "%s/%s" $beatHome .BeatName }} -{{- $repoInfo := repo }} - -# Prepare home in a different stage to avoid creating additional layers on -# the final image because of permission changes. -FROM {{ .buildFrom }} AS home - -COPY beat {{ $beatHome }} - -RUN mkdir -p {{ $beatHome }}/data {{ $beatHome }}/logs && \ - chown -R root:root {{ $beatHome }} && \ - find {{ $beatHome }} -type d -exec chmod 0755 {} \; && \ - find {{ $beatHome }} -type f -exec chmod 0644 {} \; && \ - chmod 0755 {{ $beatBinary }} && \ -{{- range $i, $modulesd := .ModulesDirs }} - chmod 0775 {{ $beatHome}}/{{ $modulesd }} && \ -{{- end }} - chmod 0775 {{ $beatHome }}/data {{ $beatHome }}/logs - -FROM {{ .from }} - -{{- if contains .from "ubi-minimal" }} -RUN microdnf -y update && \ - microdnf install findutils shadow-utils && \ - microdnf clean all -{{- else }} -RUN for iter in {1..10}; do \ - apt-get update -y && \ - DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends --yes ca-certificates curl gawk libcap2-bin xz-utils && \ - apt-get clean all && \ - exit_code=0 && break || exit_code=$? && echo "apt-get error: retry $iter in 10s" && sleep 10; \ - done; \ - (exit $exit_code) -{{- end }} - -{{- if (and (eq .BeatName "heartbeat") (not (contains .from "ubi-minimal"))) }} -RUN apt-get update -y && \ - for iter in {1..10}; do \ - DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends --yes \ - libglib2.0-0\ - libnss3\ - libnspr4\ - libatk1.0-0\ - libatk-bridge2.0-0\ - libcups2\ - libdrm2\ - libdbus-1-3\ - libxcb1\ - libxkbcommon0\ - libx11-6\ - libxcomposite1\ - libxdamage1\ - libxext6\ - libxfixes3\ - libxrandr2\ - libgbm1\ - libpango-1.0-0\ - libcairo2\ - libasound2\ - libatspi2.0-0\ - libxshmfence1 \ - fonts-noto \ - fonts-noto-cjk && \ - apt-get clean all && \ - exit_code=0 && break || exit_code=$? && echo "apt-get error: retry $iter in 10s" && sleep 10; \ - done; \ - (exit $exit_code) -ENV NODE_PATH={{ $beatHome }}/.node -RUN echo \ - $NODE_PATH \ - {{ $beatHome }}/.config \ - {{ $beatHome }}/.synthetics \ - {{ $beatHome }}/.npm \ - {{ $beatHome }}/.cache \ - | xargs -IDIR sh -c 'mkdir -p DIR && chmod 0770 DIR' -{{- end }} - -LABEL \ - org.label-schema.build-date="{{ date }}" \ - org.label-schema.schema-version="1.0" \ - org.label-schema.vendor="{{ .BeatVendor }}" \ - org.label-schema.license="{{ .License }}" \ - org.label-schema.name="{{ .BeatName }}" \ - org.label-schema.version="{{ beat_version }}{{if .Snapshot}}-SNAPSHOT{{end}}" \ - org.label-schema.url="{{ .BeatURL }}" \ - org.label-schema.vcs-url="{{ $repoInfo.RootImportPath }}" \ - org.label-schema.vcs-ref="{{ commit }}" \ - io.k8s.description="{{ .BeatDescription }}" \ - io.k8s.display-name="{{ .BeatName | title }} image" \ - org.opencontainers.image.created="{{ date }}" \ - org.opencontainers.image.licenses="{{ .License }}" \ - org.opencontainers.image.title="{{ .BeatName | title }}" \ - org.opencontainers.image.vendor="{{ .BeatVendor }}" \ - name="{{ .BeatName }}" \ - maintainer="infra@elastic.co" \ - vendor="{{ .BeatVendor }}" \ - version="{{ beat_version }}{{if .Snapshot}}-SNAPSHOT{{end}}" \ - release="1" \ - url="{{ .BeatURL }}" \ - summary="{{ .BeatName }}" \ - license="{{ .License }}" \ - description="{{ .BeatDescription }}" - -ENV ELASTIC_CONTAINER "true" -ENV PATH={{ $beatHome }}:$PATH -ENV GODEBUG="madvdontneed=1" - -# Add an init process, check the checksum to make sure it's a match -RUN set -e ; \ - TINI_BIN=""; \ - TINI_SHA256=""; \ - TINI_VERSION="v0.19.0"; \ - echo "The arch value is $(arch)"; \ - case "$(arch)" in \ - x86_64) \ - TINI_BIN="tini-amd64"; \ - TINI_SHA256="93dcc18adc78c65a028a84799ecf8ad40c936fdfc5f2a57b1acda5a8117fa82c"; \ - ;; \ - aarch64) \ - TINI_BIN="tini-arm64"; \ - TINI_SHA256="07952557df20bfd2a95f9bef198b445e006171969499a1d361bd9e6f8e5e0e81"; \ - ;; \ - *) \ - echo >&2 ; echo >&2 "Unsupported architecture \$(arch)" ; echo >&2 ; exit 1 ; \ - ;; \ - esac ; \ - curl --retry 8 -S -L -O "https://github.com/krallin/tini/releases/download/${TINI_VERSION}/${TINI_BIN}" ; \ - echo "${TINI_SHA256} ${TINI_BIN}" | sha256sum -c - ; \ - mv "${TINI_BIN}" /usr/bin/tini ; \ - chmod +x /usr/bin/tini - -COPY docker-entrypoint /usr/local/bin/docker-entrypoint -RUN chmod 755 /usr/local/bin/docker-entrypoint - -COPY --from=home {{ $beatHome }} {{ $beatHome }} - -RUN mkdir /licenses -COPY --from=home {{ $beatHome }}/LICENSE.txt /licenses -COPY --from=home {{ $beatHome }}/NOTICE.txt /licenses - -{{- if .linux_capabilities }} -# Since the beat is stored at the other end of a symlink we must follow the symlink first -# For security reasons setcap does not support symlinks. This is smart in the general case -# but in our specific case since we're building a trusted image from trusted binaries this is -# fine. Thus, we use readlink to follow the link and setcap on the actual binary -RUN readlink -f {{ $beatBinary }} | xargs setcap {{ .linux_capabilities }} -{{- end }} - -{{- if ne .user "root" }} -RUN groupadd --gid 1000 {{ .BeatName }} -RUN useradd -M --uid 1000 --gid 1000 --groups 0 --home {{ $beatHome }} {{ .user }} -{{- if (and (eq .BeatName "heartbeat") (not (contains .from "ubi-minimal"))) }} -RUN chown {{ .user }} $NODE_PATH -{{- end }} -{{- end }} -USER {{ .user }} - -{{- if (and (eq .BeatName "heartbeat") (not (contains .from "ubi-minimal"))) }} -# Setup synthetics env vars -ENV ELASTIC_SYNTHETICS_CAPABLE=true -ENV SUITES_DIR={{ $beatHome }}/suites -ENV NODE_VERSION=18.12.0 -ENV PATH="$NODE_PATH/node/bin:$PATH" -# Install the latest version of @elastic/synthetics forcefully ignoring the previously -# cached node_modules, heartbeat then calls the global executable to run test suites -# Setup node -RUN cd /usr/share/heartbeat/.node \ - && NODE_DOWNLOAD_URL="" \ - && case "$(arch)" in \ - x86_64) \ - NODE_DOWNLOAD_URL=https://nodejs.org/dist/v${NODE_VERSION}/node-v${NODE_VERSION}-linux-x64.tar.xz \ - ;; \ - aarch64) \ - NODE_DOWNLOAD_URL=https://nodejs.org/dist/v${NODE_VERSION}/node-v${NODE_VERSION}-linux-arm64.tar.xz \ - ;; \ - *) \ - echo >&2 ; echo >&2 "Unsupported architecture \$(arch)" ; echo >&2 ; exit 1 ; \ - ;; \ - esac \ - && mkdir -p node \ - && curl ${NODE_DOWNLOAD_URL} | tar -xJ --strip 1 -C node \ - && chmod ug+rwX -R $NODE_PATH \ - && npm i -g -f @elastic/synthetics@stack_release && chmod ug+rwX -R $NODE_PATH -{{- end }} - -{{- range $i, $port := .ExposePorts }} -EXPOSE {{ $port }} -{{- end }} - -# When running under Docker, we must ensure libbeat monitoring pulls cgroup -# metrics from /sys/fs/cgroup//, ignoring any paths found in -# /proc/self/cgroup. -ENV LIBBEAT_MONITORING_CGROUPS_HIERARCHY_OVERRIDE=/ - -WORKDIR {{ $beatHome }} -ENTRYPOINT ["/usr/bin/tini", "--", "/usr/local/bin/docker-entrypoint"] -CMD ["-environment", "container"] diff --git a/dev-tools/packaging/templates/docker/docker-entrypoint.tmpl b/dev-tools/packaging/templates/docker/docker-entrypoint.tmpl deleted file mode 100644 index f073e21e318..00000000000 --- a/dev-tools/packaging/templates/docker/docker-entrypoint.tmpl +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -# Check if the the user has invoked the image with flags. -# eg. "{{ .BeatName }} -c {{ .BeatName }}.yml" -if [[ -z $1 ]] || [[ ${1:0:1} == '-' ]] ; then - exec {{ .BeatName }} "$@" -else - # They may be looking for a Beat subcommand, like "{{ .BeatName }} setup". - subcommands=$({{ .BeatName }} help \ - | awk 'BEGIN {RS=""; FS="\n"} /Available Commands:/' \ - | awk '/^\s+/ {print $1}') - - # If we _did_ get a subcommand, pass it to {{ .BeatName }}. - for subcommand in $subcommands; do - if [[ $1 == $subcommand ]]; then - exec {{ .BeatName }} "$@" - fi - done -fi - -# If neither of those worked, then they have specified the binary they want, so -# just do exactly as they say. -exec "$@" From 657f66dad4bd7bb4215bc2394f2d78506213de56 Mon Sep 17 00:00:00 2001 From: apmmachine <58790750+apmmachine@users.noreply.github.com> Date: Mon, 7 Nov 2022 00:38:05 -0500 Subject: [PATCH 63/63] [Automation] Update elastic stack version to 8.6.0-f20b7179 for testing (#1692) Co-authored-by: apmmachine --- testing/environments/snapshot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml index 9cf5799a0ed..4f4b5225e47 100644 --- a/testing/environments/snapshot.yml +++ b/testing/environments/snapshot.yml @@ -3,7 +3,7 @@ version: '2.3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-3f5f98b7-SNAPSHOT + image: docker.elastic.co/elasticsearch/elasticsearch:8.6.0-f20b7179-SNAPSHOT # When extend is used it merges healthcheck.tests, see: # https://github.com/docker/compose/issues/8962 # healthcheck: @@ -42,7 +42,7 @@ services: - ./docker/logstash/pki:/etc/pki:ro kibana: - image: docker.elastic.co/kibana/kibana:8.6.0-3f5f98b7-SNAPSHOT + image: docker.elastic.co/kibana/kibana:8.6.0-f20b7179-SNAPSHOT environment: - "ELASTICSEARCH_USERNAME=kibana_system_user" - "ELASTICSEARCH_PASSWORD=testing"