Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Do not merge - collecting performance patches for scale testing #9436

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 16 additions & 13 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ const (

// allocSyncIntv is the batching period of allocation updates before they
// are synced with the server.
allocSyncIntv = 200 * time.Millisecond
allocSyncIntv = time.Second

// allocSyncRetryIntv is the interval on which we retry updating
// the status of the allocation
Expand Down Expand Up @@ -1904,7 +1904,6 @@ func (c *Client) AllocStateUpdated(alloc *structs.Allocation) {
// allocSync is a long lived function that batches allocation updates to the
// server.
func (c *Client) allocSync() {
staggered := false
syncTicker := time.NewTicker(allocSyncIntv)
updates := make(map[string]*structs.Allocation)
for {
Expand Down Expand Up @@ -1933,19 +1932,23 @@ func (c *Client) allocSync() {
}

var resp structs.GenericResponse
if err := c.RPC("Node.UpdateAlloc", &args, &resp); err != nil {
err := c.RPC("Node.UpdateAlloc", &args, &resp)
if err != nil {
// Error updating allocations, do *not* clear
// updates and retry after backoff
c.logger.Error("error updating allocations", "error", err)
syncTicker.Stop()
syncTicker = time.NewTicker(c.retryIntv(allocSyncRetryIntv))
staggered = true
} else {
updates = make(map[string]*structs.Allocation)
if staggered {
syncTicker.Stop()
syncTicker = time.NewTicker(allocSyncIntv)
staggered = false
}
syncTicker.Reset(c.retryIntv(allocSyncRetryIntv))
continue
}

// Successfully updated allocs, reset map and ticker.
// Always reset ticker to give loop time to receive
// alloc updates. If the RPC took the ticker interval
// we may call it in a tight loop before draining
// buffered updates.
updates = make(map[string]*structs.Allocation, len(updates))
syncTicker.Stop()
syncTicker = time.NewTicker(allocSyncIntv)
}
}
}
Expand Down
8 changes: 4 additions & 4 deletions nomad/deploymentwatcher/deployment_watcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -873,6 +873,10 @@ func (w *deploymentWatcher) getAllocsCh(index uint64) <-chan *allocUpdates {
// getAllocs retrieves the allocations that are part of the deployment blocking
// at the given index.
func (w *deploymentWatcher) getAllocs(index uint64) ([]*structs.AllocListStub, uint64, error) {
if err := w.queryLimiter.Wait(w.ctx); err != nil {
return nil, 0, err
}

resp, index, err := w.state.BlockingQuery(w.getAllocsImpl, index, w.ctx)
if err != nil {
return nil, 0, err
Expand All @@ -886,10 +890,6 @@ func (w *deploymentWatcher) getAllocs(index uint64) ([]*structs.AllocListStub, u

// getDeploysImpl retrieves all deployments from the passed state store.
func (w *deploymentWatcher) getAllocsImpl(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) {
if err := w.queryLimiter.Wait(w.ctx); err != nil {
return nil, 0, err
}

// Capture all the allocations
allocs, err := state.AllocsByDeployment(ws, w.deploymentID)
if err != nil {
Expand Down
9 changes: 7 additions & 2 deletions nomad/node_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -1144,8 +1144,13 @@ func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.Gene
updates := n.updates
evals := n.evals
future := n.updateFuture
n.updates = nil
n.evals = nil

// Assume future update patterns will be similar to
// current batch and set cap appropriately to avoid
// slice resizing.
n.updates = make([]*structs.Allocation, 0, len(updates))
n.evals = make([]*structs.Evaluation, 0, len(evals))

n.updateFuture = nil
n.updateTimer = nil
n.updatesLock.Unlock()
Expand Down
3 changes: 3 additions & 0 deletions nomad/structs/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ func (idx *NetworkIndex) getUsedPortsFor(ip string) Bitmap {
// Release is called when the network index is no longer needed
// to attempt to re-use some of the memory it has allocated
func (idx *NetworkIndex) Release() {
if idx == nil {
return
}
for _, b := range idx.UsedPorts {
bitmapPool.Put(b)
}
Expand Down
22 changes: 17 additions & 5 deletions scheduler/rank.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package scheduler
import (
"fmt"
"math"
"sync"

"github.com/hashicorp/nomad/nomad/structs"
)
Expand Down Expand Up @@ -202,13 +203,21 @@ OUTER:
}

// Index the existing network usage
netIdx := structs.NewNetworkIndex()
netIdx.SetNode(option.Node)
netIdx.AddAllocs(proposed)
var netIdx *structs.NetworkIndex
initNetIdx := func() {
netIdx = structs.NewNetworkIndex()
netIdx.SetNode(option.Node)
netIdx.AddAllocs(proposed)
}
var initNetIdxOnce sync.Once

// Create a device allocator
devAllocator := newDeviceAllocator(iter.ctx, option.Node)
devAllocator.AddAllocs(proposed)
var devAllocator *deviceAllocator
initDevAllocator := func() {
devAllocator = newDeviceAllocator(iter.ctx, option.Node)
devAllocator.AddAllocs(proposed)
}
var initDevAllocatorOnce sync.Once

// Track the affinities of the devices
totalDeviceAffinityWeight := 0.0
Expand Down Expand Up @@ -241,6 +250,7 @@ OUTER:

// Check if we need task group network resource
if len(iter.taskGroup.Networks) > 0 {
initNetIdxOnce.Do(initNetIdx)
ask := iter.taskGroup.Networks[0].Copy()
offer, err := netIdx.AssignPorts(ask)
if err != nil {
Expand Down Expand Up @@ -308,6 +318,7 @@ OUTER:

// Check if we need a network resource
if len(task.Resources.Networks) > 0 {
initNetIdxOnce.Do(initNetIdx)
ask := task.Resources.Networks[0].Copy()
offer, err := netIdx.AssignNetwork(ask)
if offer == nil {
Expand Down Expand Up @@ -356,6 +367,7 @@ OUTER:

// Check if we need to assign devices
for _, req := range task.Resources.Devices {
initDevAllocatorOnce.Do(initDevAllocator)
offer, sumAffinities, err := devAllocator.AssignDevice(req)
if offer == nil {
// If eviction is not enabled, mark this node as exhausted and continue
Expand Down