Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor client fingerprinters to return a diff of node attributes #3781

Merged
merged 12 commits into from
Feb 2, 2018
98 changes: 75 additions & 23 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/hashicorp/nomad/client/driver"
"github.com/hashicorp/nomad/client/fingerprint"
"github.com/hashicorp/nomad/client/stats"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/client/vaultclient"
"github.com/hashicorp/nomad/command/agent/consul"
"github.com/hashicorp/nomad/helper"
Expand Down Expand Up @@ -931,33 +932,42 @@ func (c *Client) fingerprint() error {

c.logger.Printf("[DEBUG] client: built-in fingerprints: %v", fingerprint.BuiltinFingerprints())

var applied []string
var skipped []string
var appliedFingerprints []string
var skippedFingerprints []string
for _, name := range fingerprint.BuiltinFingerprints() {
// Skip modules that are not in the whitelist if it is enabled.
if _, ok := whitelist[name]; whitelistEnabled && !ok {
skipped = append(skipped, name)
skippedFingerprints = append(skippedFingerprints, name)
continue
}
// Skip modules that are in the blacklist
if _, ok := blacklist[name]; ok {
skipped = append(skipped, name)
skippedFingerprints = append(skippedFingerprints, name)
continue
}

f, err := fingerprint.NewFingerprint(name, c.logger)
if err != nil {
return err
}

request := &cstructs.FingerprintRequest{Config: c.config, Node: c.config.Node}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't believe you can get rid of the lock yet. This is a shared data structure that is being mutated/read concurrently. Apples to all places you are doing a fingerprint.

var response cstructs.FingerprintResponse
c.configLock.Lock()
applies, err := f.Fingerprint(c.config, c.config.Node)
err = f.Fingerprint(request, &response)
c.configLock.Unlock()
if err != nil {
return err
}
if applies {
applied = append(applied, name)

// log the fingerprinters which have been applied
if response.Detected {
appliedFingerprints = append(appliedFingerprints, name)
}

// add the diff found from each fingerprinter
c.updateNodeFromFingerprint(&response)

p, period := f.Periodic()
if p {
// TODO: If more periodic fingerprinters are added, then
Expand All @@ -966,9 +976,10 @@ func (c *Client) fingerprint() error {
go c.fingerprintPeriodic(name, f, period)
}
}
c.logger.Printf("[DEBUG] client: applied fingerprints %v", applied)
if len(skipped) != 0 {
c.logger.Printf("[DEBUG] client: fingerprint modules skipped due to white/blacklist: %v", skipped)

c.logger.Printf("[DEBUG] client: applied fingerprints %v", appliedFingerprints)
if len(skippedFingerprints) != 0 {
c.logger.Printf("[DEBUG] client: fingerprint modules skipped due to white/blacklist: %v", skippedFingerprints)
}
return nil
}
Expand All @@ -979,11 +990,18 @@ func (c *Client) fingerprintPeriodic(name string, f fingerprint.Fingerprint, d t
for {
select {
case <-time.After(d):
request := &cstructs.FingerprintRequest{Config: c.config, Node: c.config.Node}
var response cstructs.FingerprintResponse
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The lock should wrap creating the request since it is accessing the config. (not just here)

c.configLock.Lock()
if _, err := f.Fingerprint(c.config, c.config.Node); err != nil {
err := f.Fingerprint(request, &response)
c.configLock.Unlock()

if err != nil {
c.logger.Printf("[DEBUG] client: periodic fingerprinting for %v failed: %v", name, err)
} else {
c.updateNodeFromFingerprint(&response)
}
c.configLock.Unlock()

case <-c.shutdownCh:
return
}
Expand All @@ -997,52 +1015,86 @@ func (c *Client) setupDrivers() error {
whitelistEnabled := len(whitelist) > 0
blacklist := c.config.ReadStringListToMap("driver.blacklist")

var avail []string
var skipped []string
var availDrivers []string
var skippedDrivers []string
driverCtx := driver.NewDriverContext("", "", c.config, c.config.Node, c.logger, nil)
for name := range driver.BuiltinDrivers {
// Skip fingerprinting drivers that are not in the whitelist if it is
// enabled.
if _, ok := whitelist[name]; whitelistEnabled && !ok {
skipped = append(skipped, name)
skippedDrivers = append(skippedDrivers, name)
continue
}
// Skip fingerprinting drivers that are in the blacklist
if _, ok := blacklist[name]; ok {
skipped = append(skipped, name)
skippedDrivers = append(skippedDrivers, name)
continue
}

d, err := driver.NewDriver(name, driverCtx)
if err != nil {
return err
}

request := &cstructs.FingerprintRequest{Config: c.config, Node: c.config.Node}
var response cstructs.FingerprintResponse
c.configLock.Lock()
applies, err := d.Fingerprint(c.config, c.config.Node)
err = d.Fingerprint(request, &response)
c.configLock.Unlock()
if err != nil {
return err
}
if applies {
avail = append(avail, name)

// log the fingerprinters which have been applied
if response.Detected {
availDrivers = append(availDrivers, name)
}

c.updateNodeFromFingerprint(&response)

p, period := d.Periodic()
if p {
go c.fingerprintPeriodic(name, d, period)
}

}

c.logger.Printf("[DEBUG] client: available drivers %v", avail)

if len(skipped) != 0 {
c.logger.Printf("[DEBUG] client: drivers skipped due to white/blacklist: %v", skipped)
c.logger.Printf("[DEBUG] client: available drivers %v", availDrivers)
if len(skippedDrivers) > 0 {
c.logger.Printf("[DEBUG] client: drivers skipped due to white/blacklist: %v", skippedDrivers)
}

return nil
}

// updateNodeFromFingerprint updates the node with the result of
// fingerprinting the node from the diff that was created
func (c *Client) updateNodeFromFingerprint(response *cstructs.FingerprintResponse) {
c.configLock.Lock()
defer c.configLock.Unlock()
for name, val := range response.Attributes {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How are you handling removing Attributes/Links?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As in fingerprinter_foo adds attribute = {"foo": "bar", "bam": "baz"} and then the next time it is called only returns {"foo": "bar"} because "bam" should be removed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. One way we can handle this is if the attribute is equal to empty in the fingerprinter's response, we can delete this attribute from the node's attributes. That way we don't need to rely on the fingerprinter modifying the node directly.

if val == "" {
delete(c.config.Node.Attributes, name)
} else {
c.config.Node.Attributes[name] = val
}
}

// update node links and resources from the diff created from
// fingerprinting
for name, val := range response.Links {
if val == "" {
delete(c.config.Node.Links, name)
} else {
c.config.Node.Links[name] = val
}
}

if response.Resources != nil {
c.config.Node.Resources.Merge(response.Resources)
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should change the node watcher from checking periodically to check via a channel trigger and send it when this function is called: https://github.com/hashicorp/nomad/blob/master/client/client.go#L1588

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will be in a follow up PR.


// retryIntv calculates a retry interval value given the base
func (c *Client) retryIntv(base time.Duration) time.Duration {
if c.config.DevMode {
Expand Down
43 changes: 43 additions & 0 deletions client/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/hashicorp/consul/lib/freeport"
memdb "github.com/hashicorp/go-memdb"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/client/driver"
"github.com/hashicorp/nomad/client/fingerprint"
"github.com/hashicorp/nomad/command/agent/consul"
"github.com/hashicorp/nomad/helper"
Expand Down Expand Up @@ -252,6 +253,48 @@ func TestClient_HasNodeChanged(t *testing.T) {
}
}

func TestClient_Fingerprint_Periodic(t *testing.T) {
if _, ok := driver.BuiltinDrivers["mock_driver"]; !ok {
t.Skip(`test requires mock_driver; run with "-tags nomad_test"`)
}
t.Parallel()

// these constants are only defined when nomad_test is enabled, so these fail
// our linter without explicit disabling.
c1 := testClient(t, func(c *config.Config) {
c.Options = map[string]string{
driver.ShutdownPeriodicAfter: "true", // nolint: varcheck
driver.ShutdownPeriodicDuration: "3", // nolint: varcheck
}
})
defer c1.Shutdown()

node := c1.config.Node
mockDriverName := "driver.mock_driver"

// Ensure the mock driver is registered on the client
testutil.WaitForResult(func() (bool, error) {
mockDriverStatus := node.Attributes[mockDriverName]
if mockDriverStatus == "" {
return false, fmt.Errorf("mock driver attribute should be set on the client")
}
return true, nil
}, func(err error) {
t.Fatalf("err: %v", err)
})

// Ensure that the client fingerprinter eventually removes this attribute
testutil.WaitForResult(func() (bool, error) {
mockDriverStatus := node.Attributes[mockDriverName]
if mockDriverStatus != "" {
return false, fmt.Errorf("mock driver attribute should not be set on the client")
}
return true, nil
}, func(err error) {
t.Fatalf("err: %v", err)
})
}

func TestClient_Fingerprint_InWhitelist(t *testing.T) {
t.Parallel()
c := testClient(t, func(c *config.Config) {
Expand Down
25 changes: 12 additions & 13 deletions client/driver/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ import (
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/go-plugin"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/client/driver/env"
"github.com/hashicorp/nomad/client/driver/executor"
dstructs "github.com/hashicorp/nomad/client/driver/structs"
Expand Down Expand Up @@ -476,42 +475,42 @@ func NewDockerDriver(ctx *DriverContext) Driver {
return &DockerDriver{DriverContext: *ctx}
}

func (d *DockerDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
// Initialize docker API clients
func (d *DockerDriver) Fingerprint(req *cstructs.FingerprintRequest, resp *cstructs.FingerprintResponse) error {
client, _, err := d.dockerClients()
if err != nil {
if d.fingerprintSuccess == nil || *d.fingerprintSuccess {
d.logger.Printf("[INFO] driver.docker: failed to initialize client: %s", err)
}
delete(node.Attributes, dockerDriverAttr)
d.fingerprintSuccess = helper.BoolToPtr(false)
return false, nil
resp.RemoveAttribute(dockerDriverAttr)
return nil
}

// This is the first operation taken on the client so we'll try to
// establish a connection to the Docker daemon. If this fails it means
// Docker isn't available so we'll simply disable the docker driver.
env, err := client.Version()
if err != nil {
delete(node.Attributes, dockerDriverAttr)
if d.fingerprintSuccess == nil || *d.fingerprintSuccess {
d.logger.Printf("[DEBUG] driver.docker: could not connect to docker daemon at %s: %s", client.Endpoint(), err)
}
d.fingerprintSuccess = helper.BoolToPtr(false)
return false, nil
resp.RemoveAttribute(dockerDriverAttr)
return nil
}

node.Attributes[dockerDriverAttr] = "1"
node.Attributes["driver.docker.version"] = env.Get("Version")
resp.AddAttribute(dockerDriverAttr, "1")
resp.AddAttribute("driver.docker.version", env.Get("Version"))
resp.Detected = true

privileged := d.config.ReadBoolDefault(dockerPrivilegedConfigOption, false)
if privileged {
node.Attributes[dockerPrivilegedConfigOption] = "1"
resp.AddAttribute(dockerPrivilegedConfigOption, "1")
}

// Advertise if this node supports Docker volumes
if d.config.ReadBoolDefault(dockerVolumesConfigOption, dockerVolumesConfigDefault) {
node.Attributes["driver."+dockerVolumesConfigOption] = "1"
resp.AddAttribute("driver."+dockerVolumesConfigOption, "1")
}

// Detect bridge IP address - #2785
Expand All @@ -529,7 +528,7 @@ func (d *DockerDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool
}

if n.IPAM.Config[0].Gateway != "" {
node.Attributes["driver.docker.bridge_ip"] = n.IPAM.Config[0].Gateway
resp.AddAttribute("driver.docker.bridge_ip", n.IPAM.Config[0].Gateway)
} else if d.fingerprintSuccess == nil {
// Docker 17.09.0-ce dropped the Gateway IP from the bridge network
// See https://github.com/moby/moby/issues/32648
Expand All @@ -540,7 +539,7 @@ func (d *DockerDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool
}

d.fingerprintSuccess = helper.BoolToPtr(true)
return true, nil
return nil
}

// Validate is used to validate the driver configuration
Expand Down
43 changes: 35 additions & 8 deletions client/driver/docker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,17 +171,31 @@ func TestDockerDriver_Fingerprint(t *testing.T) {
node := &structs.Node{
Attributes: make(map[string]string),
}
apply, err := d.Fingerprint(&config.Config{}, node)

request := &cstructs.FingerprintRequest{Config: &config.Config{}, Node: node}
var response cstructs.FingerprintResponse
err := d.Fingerprint(request, &response)
if err != nil {
t.Fatalf("err: %v", err)
}
if apply != testutil.DockerIsConnected(t) {

attributes := response.Attributes
if testutil.DockerIsConnected(t) && attributes["driver.docker"] == "" {
t.Fatalf("Fingerprinter should detect when docker is available")
}
if node.Attributes["driver.docker"] != "1" {

if attributes["driver.docker"] != "1" {
t.Log("Docker daemon not available. The remainder of the docker tests will be skipped.")
} else {

// if docker is available, make sure that the response is tagged as
// applicable
if !response.Detected {
t.Fatalf("expected response to be applicable")
}
}
t.Logf("Found docker version %s", node.Attributes["driver.docker.version"])

t.Logf("Found docker version %s", attributes["driver.docker.version"])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this be require.NotEmpty(attributes["driver.docker.version"])

}

// TestDockerDriver_Fingerprint_Bridge asserts that if Docker is running we set
Expand Down Expand Up @@ -210,18 +224,31 @@ func TestDockerDriver_Fingerprint_Bridge(t *testing.T) {
conf := testConfig(t)
conf.Node = mock.Node()
dd := NewDockerDriver(NewDriverContext("", "", conf, conf.Node, testLogger(), nil))
ok, err := dd.Fingerprint(conf, conf.Node)

request := &cstructs.FingerprintRequest{Config: conf, Node: conf.Node}
var response cstructs.FingerprintResponse
err = dd.Fingerprint(request, &response)
if err != nil {
t.Fatalf("error fingerprinting docker: %v", err)
}
if !ok {

if !response.Detected {
t.Fatalf("expected response to be applicable")
}

attributes := response.Attributes
if attributes == nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use require

t.Fatalf("expected attributes to be set")
}

if attributes["driver.docker"] == "" {
t.Fatalf("expected Docker to be enabled but false was returned")
}

if found := conf.Node.Attributes["driver.docker.bridge_ip"]; found != expectedAddr {
if found := attributes["driver.docker.bridge_ip"]; found != expectedAddr {
t.Fatalf("expected bridge ip %q but found: %q", expectedAddr, found)
}
t.Logf("docker bridge ip: %q", conf.Node.Attributes["driver.docker.bridge_ip"])
t.Logf("docker bridge ip: %q", attributes["driver.docker.bridge_ip"])
}

func TestDockerDriver_StartOpen_Wait(t *testing.T) {
Expand Down
Loading