Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix node routing in slotClosestNode #3043

Merged
merged 2 commits into from
Jul 10, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 32 additions & 10 deletions osscluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,8 @@ func (n *clusterNode) Close() error {
return n.Client.Close()
}

const maximumNodeLatency = 1 * time.Minute

func (n *clusterNode) updateLatency() {
const numProbe = 10
var dur uint64
Expand All @@ -361,7 +363,7 @@ func (n *clusterNode) updateLatency() {
if successes == 0 {
// If none of the pings worked, set latency to some arbitrarily high value so this node gets
// least priority.
latency = float64((1 * time.Minute) / time.Microsecond)
latency = float64((maximumNodeLatency) / time.Microsecond)
} else {
latency = float64(dur) / float64(successes)
}
Expand Down Expand Up @@ -735,20 +737,40 @@ func (c *clusterState) slotClosestNode(slot int) (*clusterNode, error) {
return c.nodes.Random()
}

var node *clusterNode
var allNodesFailing = true
var (
closestNonFailingNode *clusterNode
closestNode *clusterNode
minLatency time.Duration
)

// setting the max possible duration as zerovalue for minlatency
minLatency = time.Duration(math.MaxInt64)

for _, n := range nodes {
if n.Failing() {
continue
}
if node == nil || n.Latency() < node.Latency() {
node = n
if closestNode == nil || n.Latency() < minLatency {
closestNode = n
minLatency = n.Latency()
if !n.Failing() {
closestNonFailingNode = n
allNodesFailing = false
}
}
}
if node != nil {
return node, nil

// pick the healthly node with the lowest latency
if !allNodesFailing && closestNonFailingNode != nil {
return closestNonFailingNode, nil
}

// if all nodes are failing, we will pick the temporarily failing node with lowest latency
if minLatency < maximumNodeLatency && closestNode != nil {
internal.Logger.Printf(context.TODO(), "redis: all nodes are marked as failed, picking the temporarily failing node with lowest latency")
return closestNode, nil
}

// If all nodes are failing - return random node
// If all nodes are having the maximum latency(all pings are failing) - return a random node across the cluster
internal.Logger.Printf(context.TODO(), "redis: pings to all nodes are failing, picking a random node across the cluster")
return c.nodes.Random()
}

Expand Down
Loading