Skip to content

Commit

Permalink
Relax leak test condition from Healthy to not Failed. (#5301)
Browse files Browse the repository at this point in the history
* Relax leak test condition from Health to not Failed.

* Invert condition

Co-authored-by: Denis <denis@rdner.de>

* Explicitly allow only health or degraded.

Avoids the check succeeding immediately once starting is reported.

* Actually fix condition.

* Fix the health check while the tests are running.

---------

Co-authored-by: Denis <denis@rdner.de>
  • Loading branch information
cmacknz and rdner authored Aug 15, 2024
1 parent ef69b58 commit b832c15
Showing 1 changed file with 34 additions and 2 deletions.
36 changes: 34 additions & 2 deletions testing/integration/agent_long_running_leak_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ package integration
import (
"context"
"encoding/json"
"fmt"
"io"
"net"
"net/http"
Expand All @@ -29,7 +30,9 @@ import (
"github.com/elastic/elastic-agent-libs/api/npipe"
"github.com/elastic/elastic-agent-libs/kibana"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
"github.com/elastic/elastic-agent/pkg/control/v2/client"
"github.com/elastic/elastic-agent/pkg/control/v2/cproto"
"github.com/elastic/elastic-agent/pkg/core/process"
atesting "github.com/elastic/elastic-agent/pkg/testing"
"github.com/elastic/elastic-agent/pkg/testing/define"
"github.com/elastic/elastic-agent/pkg/testing/tools"
Expand Down Expand Up @@ -160,7 +163,10 @@ func (runner *ExtendedRunner) TestHandleLeak() {
case <-timer.C:
done = true
case <-ticker.C:
err := runner.agentFixture.IsHealthy(ctx)
// https://github.com/elastic/elastic-agent/issues/5300
// Ideally we would require healthy but we currently report as DEGRADED due to unexpected permissions errors
// accessing some process metrics. Ensure the leak tests still run as long while this is the case.
err := runner.IsHealthyOrDegraded(ctx)
require.NoError(runner.T(), err)
// iterate through our watchers, update them
for _, mon := range runner.resourceWatchers {
Expand Down Expand Up @@ -205,6 +211,8 @@ func (runner *ExtendedRunner) TestHandleLeak() {

// CheckHealthAtStartup ensures all the beats and agent are healthy and working before we continue
func (runner *ExtendedRunner) CheckHealthAtStartup(ctx context.Context) {
runner.T().Helper()

// because we need to separately fetch the PIDs, wait until everything is healthy before we look for running beats
compDebugName := ""
require.Eventually(runner.T(), func() bool {
Expand Down Expand Up @@ -233,7 +241,11 @@ func (runner *ExtendedRunner) CheckHealthAtStartup(ctx context.Context) {
}
}
runner.T().Logf("component state: %s", comp.Message)
if comp.State != int(cproto.State_HEALTHY) {

// https://github.com/elastic/elastic-agent/issues/5300
// Ideally we would require healthy but we currently report as DEGRADED due to unexpected permissions errors
// accessing some process metrics. Ensure the leak tests still run as long while this is the case.
if !isHealthyOrDegraded(comp.State) {
compDebugName = comp.Name
allHealthy = false
}
Expand All @@ -242,6 +254,26 @@ func (runner *ExtendedRunner) CheckHealthAtStartup(ctx context.Context) {
}, runner.healthCheckTime, runner.healthCheckRefreshTime, "install never became healthy: components did not return a healthy state: %s", compDebugName)
}

func (runner *ExtendedRunner) IsHealthyOrDegraded(ctx context.Context, opts ...process.CmdOption) error {
runner.T().Helper()

status, err := runner.agentFixture.ExecStatus(ctx, opts...)
if err != nil {
return fmt.Errorf("agent status returned an error: %w", err)
}

if !isHealthyOrDegraded(status.State) {
return fmt.Errorf("agent isn't healthy or degraded, current status: %s",
client.State(status.State))
}

return nil
}

func isHealthyOrDegraded(state int) bool {
return state == int(cproto.State_HEALTHY) || state == int(cproto.State_DEGRADED)
}

/*
=============================================================================
Watchers for checking resource usage
Expand Down

0 comments on commit b832c15

Please sign in to comment.