Skip to content

Commit

Permalink
fix node routing in slotClosestNode (#3043)
Browse files Browse the repository at this point in the history
* fix node routing when all nodes are failing

* fix minlatency zero value
  • Loading branch information
srikar-jilugu committed Jul 10, 2024
1 parent 26e0c49 commit 9c1f4f0
Showing 1 changed file with 32 additions and 10 deletions.
42 changes: 32 additions & 10 deletions osscluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,8 @@ func (n *clusterNode) Close() error {
return n.Client.Close()
}

const maximumNodeLatency = 1 * time.Minute

func (n *clusterNode) updateLatency() {
const numProbe = 10
var dur uint64
Expand All @@ -361,7 +363,7 @@ func (n *clusterNode) updateLatency() {
if successes == 0 {
// If none of the pings worked, set latency to some arbitrarily high value so this node gets
// least priority.
latency = float64((1 * time.Minute) / time.Microsecond)
latency = float64((maximumNodeLatency) / time.Microsecond)
} else {
latency = float64(dur) / float64(successes)
}
Expand Down Expand Up @@ -735,20 +737,40 @@ func (c *clusterState) slotClosestNode(slot int) (*clusterNode, error) {
return c.nodes.Random()
}

var node *clusterNode
var allNodesFailing = true
var (
closestNonFailingNode *clusterNode
closestNode *clusterNode
minLatency time.Duration
)

// setting the max possible duration as zerovalue for minlatency
minLatency = time.Duration(math.MaxInt64)

for _, n := range nodes {
if n.Failing() {
continue
}
if node == nil || n.Latency() < node.Latency() {
node = n
if closestNode == nil || n.Latency() < minLatency {
closestNode = n
minLatency = n.Latency()
if !n.Failing() {
closestNonFailingNode = n
allNodesFailing = false
}
}
}
if node != nil {
return node, nil

// pick the healthly node with the lowest latency
if !allNodesFailing && closestNonFailingNode != nil {
return closestNonFailingNode, nil
}

// if all nodes are failing, we will pick the temporarily failing node with lowest latency
if minLatency < maximumNodeLatency && closestNode != nil {
internal.Logger.Printf(context.TODO(), "redis: all nodes are marked as failed, picking the temporarily failing node with lowest latency")
return closestNode, nil
}

// If all nodes are failing - return random node
// If all nodes are having the maximum latency(all pings are failing) - return a random node across the cluster
internal.Logger.Printf(context.TODO(), "redis: pings to all nodes are failing, picking a random node across the cluster")
return c.nodes.Random()
}

Expand Down

0 comments on commit 9c1f4f0

Please sign in to comment.