Skip to content

Commit

Permalink
Non-locked accessors to common Node fields
Browse files Browse the repository at this point in the history
This PR removes locking around commonly accessed node attributes that do
not need to be locked. The locking could cause nodes to TTL as the
heartbeat code path was acquiring a lock that could be held for an
excessively long time. An example of this is when Vault is inaccessible,
since the fingerprint is run with a lock held but the Vault
fingerprinter makes the API calls with a large timeout.

Fixes #2689
  • Loading branch information
dadgar committed Sep 14, 2017
1 parent 2a3b65b commit f23ac5f
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 18 deletions.
38 changes: 22 additions & 16 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulServic
// Start collecting stats
go c.emitStats()

c.logger.Printf("[INFO] client: Node ID %q", c.Node().ID)
c.logger.Printf("[INFO] client: Node ID %q", c.NodeID())
return c, nil
}

Expand Down Expand Up @@ -369,9 +369,7 @@ func (c *Client) Leave() error {

// Datacenter returns the datacenter for the given client
func (c *Client) Datacenter() string {
c.configLock.RLock()
dc := c.configCopy.Node.Datacenter
c.configLock.RUnlock()
dc := c.config.Node.Datacenter
return dc
}

Expand All @@ -380,6 +378,16 @@ func (c *Client) Region() string {
return c.config.Region
}

// NodeID returns the node ID for the given client
func (c *Client) NodeID() string {
return c.config.Node.ID
}

// secretNodeID returns the secret node ID for the given client
func (c *Client) secretNodeID() string {
return c.config.Node.SecretID
}

// RPCMajorVersion returns the structs.ApiMajorVersion supported by the
// client.
func (c *Client) RPCMajorVersion() int {
Expand Down Expand Up @@ -467,7 +475,7 @@ func (c *Client) Stats() map[string]map[string]string {
defer c.heartbeatLock.Unlock()
stats := map[string]map[string]string{
"client": map[string]string{
"node_id": c.Node().ID,
"node_id": c.NodeID(),
"known_servers": c.servers.all().String(),
"num_allocations": strconv.Itoa(c.NumAllocs()),
"last_heartbeat": fmt.Sprintf("%v", time.Since(c.lastHeartbeat)),
Expand Down Expand Up @@ -1159,9 +1167,8 @@ func (c *Client) updateNodeStatus() error {
c.heartbeatLock.Lock()
defer c.heartbeatLock.Unlock()

node := c.Node()
req := structs.NodeUpdateStatusRequest{
NodeID: node.ID,
NodeID: c.NodeID(),
Status: structs.NodeStatusReady,
WriteRequest: structs.WriteRequest{Region: c.Region()},
}
Expand Down Expand Up @@ -1226,7 +1233,7 @@ func (c *Client) updateAllocStatus(alloc *structs.Allocation) {
// send the fields that are updatable by the client.
stripped := new(structs.Allocation)
stripped.ID = alloc.ID
stripped.NodeID = c.Node().ID
stripped.NodeID = c.NodeID()
stripped.TaskStates = alloc.TaskStates
stripped.ClientStatus = alloc.ClientStatus
stripped.ClientDescription = alloc.ClientDescription
Expand Down Expand Up @@ -1303,10 +1310,9 @@ func (c *Client) watchAllocations(updates chan *allocUpdates) {
// The request and response for getting the map of allocations that should
// be running on the Node to their AllocModifyIndex which is incremented
// when the allocation is updated by the servers.
n := c.Node()
req := structs.NodeSpecificRequest{
NodeID: n.ID,
SecretID: n.SecretID,
NodeID: c.NodeID(),
SecretID: c.secretNodeID(),
QueryOptions: structs.QueryOptions{
Region: c.Region(),
AllowStale: true,
Expand Down Expand Up @@ -1664,8 +1670,8 @@ func (c *Client) deriveToken(alloc *structs.Allocation, taskNames []string, vcli
// DeriveVaultToken of nomad server can take in a set of tasks and
// creates tokens for all the tasks.
req := &structs.DeriveVaultTokenRequest{
NodeID: c.Node().ID,
SecretID: c.Node().SecretID,
NodeID: c.NodeID(),
SecretID: c.secretNodeID(),
AllocID: alloc.ID,
Tasks: verifiedTasks,
QueryOptions: structs.QueryOptions{
Expand Down Expand Up @@ -1849,7 +1855,7 @@ DISCOLOOP:
func (c *Client) emitStats() {
// Assign labels directly before emitting stats so the information expected
// is ready
c.baseLabels = []metrics.Label{{Name: "node_id", Value: c.Node().ID}, {Name: "datacenter", Value: c.Node().Datacenter}}
c.baseLabels = []metrics.Label{{Name: "node_id", Value: c.NodeID()}, {Name: "datacenter", Value: c.Datacenter()}}

// Start collecting host stats right away and then keep collecting every
// collection interval
Expand Down Expand Up @@ -2027,7 +2033,7 @@ func (c *Client) setGaugeForUptime(hStats *stats.HostStats) {

// emitHostStats pushes host resource usage stats to remote metrics collection sinks
func (c *Client) emitHostStats() {
nodeID := c.Node().ID
nodeID := c.NodeID()
hStats := c.hostStatsCollector.Stats()

c.setGaugeForMemoryStats(nodeID, hStats)
Expand All @@ -2041,7 +2047,7 @@ func (c *Client) emitHostStats() {

// emitClientMetrics emits lower volume client metrics
func (c *Client) emitClientMetrics() {
nodeID := c.Node().ID
nodeID := c.NodeID()

// Emit allocation metrics
blocked, migrating, pending, running, terminal := 0, 0, 0, 0, 0
Expand Down
5 changes: 3 additions & 2 deletions client/stats/host.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ func NewHostStatsCollector(logger *log.Logger, allocDir string) *HostStatsCollec

// Collect collects stats related to resource usage of a host
func (h *HostStatsCollector) Collect() error {
h.hostStatsLock.Lock()
defer h.hostStatsLock.Unlock()

hs := &HostStats{Timestamp: time.Now().UTC().UnixNano()}

// Determine up-time
Expand Down Expand Up @@ -131,9 +134,7 @@ func (h *HostStatsCollector) Collect() error {
hs.AllocDirStats = h.toDiskStats(usage, nil)

// Update the collected status object.
h.hostStatsLock.Lock()
h.hostStats = hs
h.hostStatsLock.Unlock()

return nil
}
Expand Down

0 comments on commit f23ac5f

Please sign in to comment.