Skip to content

Commit

Permalink
Merge pull request #3856 from hashicorp/f-client-add-health-checks
Browse files Browse the repository at this point in the history
Client driver health checks for Docker
  • Loading branch information
chelseakomlo committed Mar 21, 2018
2 parents 8ea9993 + 89ffc96 commit 48c5f0e
Show file tree
Hide file tree
Showing 15 changed files with 1,246 additions and 367 deletions.
11 changes: 11 additions & 0 deletions api/nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"
"sort"
"strconv"
"time"
)

// Nodes is used to query node-related API endpoints
Expand Down Expand Up @@ -96,6 +97,15 @@ func (n *Nodes) GcAlloc(allocID string, q *QueryOptions) error {
return err
}

// DriverInfo is used to deserialize a DriverInfo entry
type DriverInfo struct {
Attributes map[string]string
Detected bool
Healthy bool
HealthDescription string
UpdateTime time.Time
}

// Node is used to deserialize a node entry.
type Node struct {
ID string
Expand All @@ -114,6 +124,7 @@ type Node struct {
StatusDescription string
StatusUpdatedAt int64
Events []*NodeEvent
Drivers map[string]*DriverInfo
CreateIndex uint64
ModifyIndex uint64
}
Expand Down
83 changes: 81 additions & 2 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,8 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulServic
}

fingerprintManager := NewFingerprintManager(c.GetConfig, c.config.Node,
c.shutdownCh, c.updateNodeFromFingerprint, c.logger)
c.shutdownCh, c.updateNodeFromFingerprint, c.updateNodeFromDriver,
c.logger)

// Fingerprint the node and scan for drivers
if err := fingerprintManager.Run(); err != nil {
Expand Down Expand Up @@ -894,6 +895,9 @@ func (c *Client) setupNode() error {
if node.Links == nil {
node.Links = make(map[string]string)
}
if node.Drivers == nil {
node.Drivers = make(map[string]*structs.DriverInfo)
}
if node.Meta == nil {
node.Meta = make(map[string]string)
}
Expand Down Expand Up @@ -1006,6 +1010,81 @@ func (c *Client) updateNodeFromFingerprint(response *cstructs.FingerprintRespons
if nodeHasChanged {
c.updateNode()
}

return c.config.Node
}

// updateNodeFromDriver receives either a fingerprint of the driver or its
// health and merges this into a single DriverInfo object
func (c *Client) updateNodeFromDriver(name string, fingerprint, health *structs.DriverInfo) *structs.Node {
c.configLock.Lock()
defer c.configLock.Unlock()

var hasChanged bool

if fingerprint != nil {
if c.config.Node.Drivers[name] == nil {
// If the driver info has not yet been set, do that here
hasChanged = true
c.config.Node.Drivers[name] = fingerprint
} else {
// The driver info has already been set, fix it up
if c.config.Node.Drivers[name].Detected != fingerprint.Detected {
hasChanged = true
c.config.Node.Drivers[name].Detected = fingerprint.Detected
}

for attrName, newVal := range fingerprint.Attributes {
oldVal := c.config.Node.Drivers[name].Attributes[attrName]
if oldVal == newVal {
continue
}

hasChanged = true
if newVal == "" {
delete(c.config.Node.Attributes, attrName)
} else {
c.config.Node.Attributes[attrName] = newVal
}
}
}
}

if health != nil {
if c.config.Node.Drivers[name] == nil {
hasChanged = true
c.config.Node.Drivers[name] = health
} else {
oldVal := c.config.Node.Drivers[name]
if health.HealthCheckEquals(oldVal) {
// Make sure we accurately reflect the last time a health check has been
// performed for the driver.
oldVal.UpdateTime = health.UpdateTime
} else {
hasChanged = true

// Only emit an event if the health status has changed, not if we are
// simply updating a node on startup
if health.Healthy != oldVal.Healthy && oldVal.HealthDescription != "" {
event := &structs.NodeEvent{
Subsystem: "Driver",
Message: health.HealthDescription,
Timestamp: time.Now().Unix(),
}
c.triggerNodeEvent(event)
}

// Update the node with the latest information
c.config.Node.Drivers[name].MergeHealthCheck(health)
}
}
}

if hasChanged {
c.config.Node.Drivers[name].UpdateTime = time.Now()
c.updateNode()
}

return c.config.Node
}

Expand Down Expand Up @@ -1190,7 +1269,7 @@ func (c *Client) watchNodeEvents() {
timer.Reset(c.retryIntv(nodeUpdateRetryIntv))
} else {
// Reset the events since we successfully sent them.
batchEvents = nil
batchEvents = []*structs.NodeEvent{}
}
case <-c.shutdownCh:
return
Expand Down
76 changes: 56 additions & 20 deletions client/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,27 +142,63 @@ func TestClient_Fingerprint_Periodic(t *testing.T) {
node := c1.config.Node
mockDriverName := "driver.mock_driver"

// Ensure the mock driver is registered on the client
testutil.WaitForResult(func() (bool, error) {
mockDriverStatus := node.Attributes[mockDriverName]
if mockDriverStatus == "" {
return false, fmt.Errorf("mock driver attribute should be set on the client")
}
return true, nil
}, func(err error) {
t.Fatalf("err: %v", err)
})
{
// Ensure the mock driver is registered on the client
testutil.WaitForResult(func() (bool, error) {
c1.configLock.Lock()
defer c1.configLock.Unlock()
mockDriverStatus := node.Attributes[mockDriverName]
mockDriverInfo := node.Drivers["mock_driver"]
if mockDriverStatus == "" {
return false, fmt.Errorf("mock driver attribute should be set on the client")
}

// Ensure that the client fingerprinter eventually removes this attribute
testutil.WaitForResult(func() (bool, error) {
mockDriverStatus := node.Attributes[mockDriverName]
if mockDriverStatus != "" {
return false, fmt.Errorf("mock driver attribute should not be set on the client")
}
return true, nil
}, func(err error) {
t.Fatalf("err: %v", err)
})
// assert that the Driver information for the node is also set correctly
if mockDriverInfo == nil {
return false, fmt.Errorf("mock driver is nil when it should be set on node Drivers")
}
if !mockDriverInfo.Detected {
return false, fmt.Errorf("mock driver should be set as detected")
}
if !mockDriverInfo.Healthy {
return false, fmt.Errorf("mock driver should be set as healthy")
}
if mockDriverInfo.HealthDescription == "" {
return false, fmt.Errorf("mock driver description should not be empty")
}
return true, nil
}, func(err error) {
t.Fatalf("err: %v", err)
})
}

{
testutil.WaitForResult(func() (bool, error) {
c1.configLock.Lock()
defer c1.configLock.Unlock()
mockDriverStatus := node.Attributes[mockDriverName]
mockDriverInfo := node.Drivers["mock_driver"]
if mockDriverStatus != "" {
return false, fmt.Errorf("mock driver attribute should not be set on the client")
}
// assert that the Driver information for the node is also set correctly
if mockDriverInfo == nil {
return false, fmt.Errorf("mock driver is nil when it should be set on node Drivers")
}
if mockDriverInfo.Detected {
return false, fmt.Errorf("mock driver should be set as detected")
}
if mockDriverInfo.Healthy {
return false, fmt.Errorf("mock driver should be set as healthy")
}
if mockDriverInfo.HealthDescription == "" {
return false, fmt.Errorf("mock driver description should not be empty")
}
return true, nil
}, func(err error) {
t.Fatalf("err: %v", err)
})
}
}

// TestClient_MixedTLS asserts that when a server is running with TLS enabled
Expand Down
41 changes: 40 additions & 1 deletion client/driver/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,6 @@ func (d *DockerDriver) Fingerprint(req *cstructs.FingerprintRequest, resp *cstru
d.logger.Printf("[INFO] driver.docker: failed to initialize client: %s", err)
}
d.fingerprintSuccess = helper.BoolToPtr(false)
resp.RemoveAttribute(dockerDriverAttr)
return nil
}

Expand Down Expand Up @@ -552,6 +551,46 @@ func (d *DockerDriver) Fingerprint(req *cstructs.FingerprintRequest, resp *cstru
return nil
}

// HealthCheck implements the interface for the HealthCheck interface. This
// performs a health check on the docker driver, asserting whether the docker
// driver is responsive to a `docker ps` command.
func (d *DockerDriver) HealthCheck(req *cstructs.HealthCheckRequest, resp *cstructs.HealthCheckResponse) error {
dinfo := &structs.DriverInfo{
UpdateTime: time.Now(),
}

client, _, err := d.dockerClients()
if err != nil {
d.logger.Printf("[WARN] driver.docker: failed to retrieve Docker client in the process of a docker health check: %v", err)
dinfo.HealthDescription = fmt.Sprintf("Failed retrieving Docker client: %v", err)
resp.AddDriverInfo("docker", dinfo)
return nil
}

_, err = client.ListContainers(docker.ListContainersOptions{All: false})
if err != nil {
d.logger.Printf("[WARN] driver.docker: failed to list Docker containers in the process of a Docker health check: %v", err)
dinfo.HealthDescription = fmt.Sprintf("Failed to list Docker containers: %v", err)
resp.AddDriverInfo("docker", dinfo)
return nil
}

d.logger.Printf("[TRACE] driver.docker: docker driver is available and is responsive to `docker ps`")
dinfo.Healthy = true
dinfo.HealthDescription = "Docker driver is available and responsive"
resp.AddDriverInfo("docker", dinfo)
return nil
}

// GetHealthChecks implements the interface for the HealthCheck interface. This
// sets whether the driver is eligible for periodic health checks and the
// interval at which to do them.
func (d *DockerDriver) GetHealthCheckInterval(req *cstructs.HealthCheckIntervalRequest, resp *cstructs.HealthCheckIntervalResponse) error {
resp.Eligible = true
resp.Period = 1 * time.Minute
return nil
}

// Validate is used to validate the driver configuration
func (d *DockerDriver) Validate(config map[string]interface{}) error {
fd := &fields.FieldData{
Expand Down
41 changes: 41 additions & 0 deletions client/driver/docker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/client/driver/env"
"github.com/hashicorp/nomad/client/fingerprint"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/client/testutil"
"github.com/hashicorp/nomad/helper/uuid"
Expand Down Expand Up @@ -164,6 +165,7 @@ func TestDockerDriver_Fingerprint(t *testing.T) {
if !tu.IsTravis() {
t.Parallel()
}

ctx := testDockerDriverContexts(t, &structs.Task{Name: "foo", Driver: "docker", Resources: basicResources})
//ctx.DriverCtx.config.Options = map[string]string{"docker.cleanup.image": "false"}
defer ctx.AllocDir.Destroy()
Expand Down Expand Up @@ -227,6 +229,7 @@ func TestDockerDriver_Fingerprint_Bridge(t *testing.T) {

request := &cstructs.FingerprintRequest{Config: conf, Node: conf.Node}
var response cstructs.FingerprintResponse

err = dd.Fingerprint(request, &response)
if err != nil {
t.Fatalf("error fingerprinting docker: %v", err)
Expand All @@ -251,6 +254,44 @@ func TestDockerDriver_Fingerprint_Bridge(t *testing.T) {
t.Logf("docker bridge ip: %q", attributes["driver.docker.bridge_ip"])
}

func TestDockerDriver_Check_DockerHealthStatus(t *testing.T) {
if !tu.IsTravis() {
t.Parallel()
}
if !testutil.DockerIsConnected(t) {
t.Skip("requires Docker")
}
if runtime.GOOS != "linux" {
t.Skip("expect only on linux")
}

require := require.New(t)

expectedAddr, err := sockaddr.GetInterfaceIP("docker0")
if err != nil {
t.Fatalf("unable to get ip for docker0: %v", err)
}
if expectedAddr == "" {
t.Fatalf("unable to get ip for docker bridge")
}

conf := testConfig(t)
conf.Node = mock.Node()
dd := NewDockerDriver(NewDriverContext("", "", conf, conf.Node, testLogger(), nil))

request := &cstructs.HealthCheckRequest{}
var response cstructs.HealthCheckResponse

dc, ok := dd.(fingerprint.HealthCheck)
require.True(ok)
err = dc.HealthCheck(request, &response)
require.Nil(err)

driverInfo := response.Drivers["docker"]
require.NotNil(driverInfo)
require.True(driverInfo.Healthy)
}

func TestDockerDriver_StartOpen_Wait(t *testing.T) {
if !tu.IsTravis() {
t.Parallel()
Expand Down
Loading

0 comments on commit 48c5f0e

Please sign in to comment.