Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix TestDockerStart flaky test #21681

Merged
merged 7 commits into from
Oct 19, 2020
20 changes: 10 additions & 10 deletions libbeat/autodiscover/providers/docker/docker_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@ import (

// Test docker start emits an autodiscover event
func TestDockerStart(t *testing.T) {
t.Skip("#20360 Flaky TestDockerStart skipped")

log := logp.NewLogger("docker")

d, err := dk.NewClient()
Expand Down Expand Up @@ -70,15 +68,17 @@ func TestDockerStart(t *testing.T) {
// Start
cmd := []string{"echo", "Hi!"}
labels := map[string]string{"label": "foo", "label.child": "bar"}
ID, err := d.ContainerStart("busybox", cmd, labels)
ID, err := d.ContainerStart("busybox:latest", cmd, labels)
if err != nil {
t.Fatal(err)
}
checkEvent(t, listener, true)
defer d.ContainerRemove(ID)

checkEvent(t, listener, ID, true)

// Kill
d.ContainerKill(ID)
checkEvent(t, listener, false)
checkEvent(t, listener, ID, false)
}

func getValue(e bus.Event, key string) interface{} {
Expand All @@ -89,12 +89,13 @@ func getValue(e bus.Event, key string) interface{} {
return val
}

func checkEvent(t *testing.T, listener bus.Listener, start bool) {
func checkEvent(t *testing.T, listener bus.Listener, id string, start bool) {
timeout := time.After(60 * time.Second)
for {
select {
case e := <-listener.Events():
// Ignore any other container
if getValue(e, "docker.container.image") != "busybox" {
if getValue(e, "container.id") != id {
continue
}
if start {
Expand All @@ -104,7 +105,7 @@ func checkEvent(t *testing.T, listener bus.Listener, start bool) {
assert.Equal(t, getValue(e, "stop"), true)
assert.Nil(t, getValue(e, "start"))
}
assert.Equal(t, getValue(e, "container.image.name"), "busybox")
assert.Equal(t, getValue(e, "container.image.name"), "busybox:latest")
// labels.dedot=true by default
assert.Equal(t,
common.MapStr{
Expand All @@ -122,8 +123,7 @@ func checkEvent(t *testing.T, listener bus.Listener, start bool) {
assert.Equal(t, getValue(e, "docker.container.name"), getValue(e, "meta.container.name"))
assert.Equal(t, getValue(e, "docker.container.image"), getValue(e, "meta.container.image.name"))
return

case <-time.After(10 * time.Second):
case <-timeout:
t.Fatal("Timeout waiting for provider events")
return
}
Expand Down
41 changes: 36 additions & 5 deletions libbeat/tests/docker/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ package docker

import (
"context"
"io"
"io/ioutil"

"github.com/pkg/errors"

Expand All @@ -42,13 +44,12 @@ func NewClient() (Client, error) {

// ContainerStart pulls and starts the given container
func (c Client) ContainerStart(image string, cmd []string, labels map[string]string) (string, error) {
ctx := context.Background()
respBody, err := c.cli.ImagePull(ctx, image, types.ImagePullOptions{})
err := c.imagePull(image)
if err != nil {
return "", errors.Wrapf(err, "pullling image %s", image)
return "", err
}
defer respBody.Close()

ctx := context.Background()
resp, err := c.cli.ContainerCreate(ctx, &container.Config{
Image: image,
Cmd: cmd,
Expand All @@ -65,6 +66,36 @@ func (c Client) ContainerStart(image string, cmd []string, labels map[string]str
return resp.ID, nil
}

// imagePull pulls an image
func (c Client) imagePull(image string) (err error) {
ctx := context.Background()
_, _, err = c.cli.ImageInspectWithRaw(ctx, image)
if err == nil {
// Image already available, do nothing
return nil
}
for retry := 0; retry < 3; retry++ {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if we could make it configurable like having this number of retries to be a function variable. Also maybe to add sleep time between each of the retries, however not sure if this is needed at all since not familiar with the nature of the errors that may occur here.

Suggestion would look like:
imagePullWithRetry(image string, retries int, interval int)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good suggestion, but I think this wouldn't be needed by now, and in any case we wouldn't expose these settings in the public methods.
I would prefer to add them if we need it at some moment.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

err = func() error {
respBody, err := c.cli.ImagePull(ctx, image, types.ImagePullOptions{})
if err != nil {
return errors.Wrapf(err, "pullling image %s", image)
}
defer respBody.Close()

// Read all the response, to be sure that the pull has finished before returning.
_, err = io.Copy(ioutil.Discard, respBody)
if err != nil {
return errors.Wrapf(err, "reading response for image %s", image)
}
return nil
}()
if err == nil {
break
}
}
return
}

// ContainerWait waits for a container to finish
func (c Client) ContainerWait(ID string) error {
ctx := context.Background()
Expand All @@ -89,7 +120,7 @@ func (c Client) ContainerKill(ID string) error {
return c.cli.ContainerKill(ctx, ID, "KILL")
}

// ContainerRemove kills and removed the given container
// ContainerRemove kills and removes the given container
func (c Client) ContainerRemove(ID string) error {
ctx := context.Background()
return c.cli.ContainerRemove(ctx, ID, types.ContainerRemoveOptions{
Expand Down