-
Notifications
You must be signed in to change notification settings - Fork 4.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Allow to rename nodes with IDs, will fix #3974 and #4413 #4415
Changes from all commits
5aca4e5
6161266
1a837dc
b030880
bbf5822
6f52314
e323155
55d3380
a58a977
c717608
05af3a1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ import ( | |
"github.com/hashicorp/consul/api" | ||
"github.com/hashicorp/consul/types" | ||
"github.com/hashicorp/go-memdb" | ||
uuid "github.com/hashicorp/go-uuid" | ||
) | ||
|
||
const ( | ||
|
@@ -350,6 +351,25 @@ func (s *Store) EnsureNode(idx uint64, node *structs.Node) error { | |
return nil | ||
} | ||
|
||
// ensureNoNodeWithSimilarNameTxn checks that no other node has conflict in its name | ||
// If allowClashWithoutID then, getting a conflict on another node without ID will be allowed | ||
func (s *Store) ensureNoNodeWithSimilarNameTxn(tx *memdb.Txn, node *structs.Node, allowClashWithoutID bool) error { | ||
// Retrieve all of the nodes | ||
enodes, err := tx.Get("nodes", "id") | ||
if err != nil { | ||
return fmt.Errorf("Cannot lookup all nodes: %s", err) | ||
} | ||
for nodeIt := enodes.Next(); nodeIt != nil; nodeIt = enodes.Next() { | ||
enode := nodeIt.(*structs.Node) | ||
if strings.EqualFold(node.Node, enode.Node) && node.ID != enode.ID { | ||
if !(enode.ID == "" && allowClashWithoutID) { | ||
return fmt.Errorf("Node name %s is reserved by node %s with name %s", node.Node, enode.ID, enode.Node) | ||
} | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
// ensureNodeTxn is the inner function called to actually create a node | ||
// registration or modify an existing one in the state store. It allows | ||
// passing in a memdb transaction so it may be part of a larger txn. | ||
|
@@ -358,28 +378,50 @@ func (s *Store) ensureNodeTxn(tx *memdb.Txn, idx uint64, node *structs.Node) err | |
// name is the same. | ||
var n *structs.Node | ||
if node.ID != "" { | ||
existing, err := tx.First("nodes", "uuid", string(node.ID)) | ||
existing, err := getNodeIDTxn(tx, node.ID) | ||
if err != nil { | ||
return fmt.Errorf("node lookup failed: %s", err) | ||
} | ||
if existing != nil { | ||
n = existing.(*structs.Node) | ||
n = existing | ||
if n.Node != node.Node { | ||
return fmt.Errorf("node ID %q for node %q aliases existing node %q", | ||
node.ID, node.Node, n.Node) | ||
// Lets first get all nodes and check whether name do match, we do not allow clash on nodes without ID | ||
dupNameError := s.ensureNoNodeWithSimilarNameTxn(tx, node, false) | ||
if dupNameError != nil { | ||
return fmt.Errorf("Error while renaming Node ID: %q: %s", node.ID, dupNameError) | ||
} | ||
// We are actually renaming a node, remove its reference first | ||
err := s.deleteNodeTxn(tx, idx, n.Node) | ||
if err != nil { | ||
return fmt.Errorf("Error while renaming Node ID: %q from %s to %s", | ||
node.ID, n.Node, node.Node) | ||
} | ||
} | ||
} else { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The else statement here is one of the problematic ones. I would recommend replacing with this statement.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @mkeeler I think the else statement is now SAFE since In all other cases, the behaviour is unchanged as shown in tests What works:When request has an ID:
When request has not ID (more permissive regarding case insensitivity)
What does not work anymore due to this change:
Previously, given the following configurationid42 - node1 Registering same name with different ID is blocked Conclusion
So, basically, the ONLY new possible issue we have is:
What do you think of this ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I understand now. We will error out if both if an existing node with a uuid != new uuid where there is a name match but we will still allow adding a uuid to a node that doesn't have one. You just moved the logic to only enforce it when they both have uuids into that function. |
||
// We allow to "steal" another node name that would have no ID | ||
// It basically means that we allow upgrading a node without ID and add the ID | ||
dupNameError := s.ensureNoNodeWithSimilarNameTxn(tx, node, true) | ||
if dupNameError != nil { | ||
return fmt.Errorf("Error while renaming Node ID: %q: %s", node.ID, dupNameError) | ||
} | ||
} | ||
} | ||
// TODO: else Node.ID == "" should be forbidden in future Consul releases | ||
// See https://github.com/hashicorp/consul/pull/3983 for context | ||
|
||
// Check for an existing node by name to support nodes with no IDs. | ||
if n == nil { | ||
existing, err := tx.First("nodes", "id", node.Node) | ||
if err != nil { | ||
return fmt.Errorf("node name lookup failed: %s", err) | ||
} | ||
|
||
if existing != nil { | ||
n = existing.(*structs.Node) | ||
} | ||
// WARNING, for compatibility reasons with tests, we do not check | ||
// for case insensitive matches, which may lead to DB corruption | ||
// See https://github.com/hashicorp/consul/pull/3983 for context | ||
} | ||
|
||
// Get the indexes. | ||
|
@@ -421,6 +463,23 @@ func (s *Store) GetNode(id string) (uint64, *structs.Node, error) { | |
return idx, nil, nil | ||
} | ||
|
||
func getNodeIDTxn(tx *memdb.Txn, id types.NodeID) (*structs.Node, error) { | ||
strnode := string(id) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The uuid parsing should be done with the ParseUUID function from github.com/hashicorp/go-uuid. An example of this is in the Catalog Register RPC endpoint: https://github.com/hashicorp/consul/blob/master/agent/consul/catalog_endpoint.go#L37 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. DONE |
||
uuidValue, err := uuid.ParseUUID(strnode) | ||
if err != nil { | ||
return nil, fmt.Errorf("node lookup by ID failed, wrong UUID: %v for '%s'", err, strnode) | ||
} | ||
|
||
node, err := tx.First("nodes", "uuid", uuidValue) | ||
if err != nil { | ||
return nil, fmt.Errorf("node lookup by ID failed: %s", err) | ||
} | ||
if node != nil { | ||
return node.(*structs.Node), nil | ||
} | ||
return nil, nil | ||
} | ||
|
||
// GetNodeID is used to retrieve a node registration by node ID. | ||
func (s *Store) GetNodeID(id types.NodeID) (uint64, *structs.Node, error) { | ||
tx := s.db.Txn(false) | ||
|
@@ -430,14 +489,8 @@ func (s *Store) GetNodeID(id types.NodeID) (uint64, *structs.Node, error) { | |
idx := maxIndexTxn(tx, "nodes") | ||
|
||
// Retrieve the node from the state store | ||
node, err := tx.First("nodes", "uuid", string(id)) | ||
if err != nil { | ||
return 0, nil, fmt.Errorf("node lookup failed: %s", err) | ||
} | ||
if node != nil { | ||
return idx, node.(*structs.Node), nil | ||
} | ||
return idx, nil, nil | ||
node, err := getNodeIDTxn(tx, id) | ||
return idx, node, err | ||
} | ||
|
||
// Nodes is used to return all of the known nodes. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this cover the case where the new node (node.ID == "") but clashing node isn't? That is specifically what broke tests before.
I'll read on and may well find a reason why that can't happen here but just noting. Even if that is a true invariant might be worth commenting because we want to allow either the existing or the new node to have blank ID without changing behaviour.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This function does not, but basically yes since this function is called ONLY when node.ID != ""
In my next and (hopefuly) last patch, I did not change anything but added tests to this function.
To summarize
When ID is provided:
When ID is not provided:
So, it means this PR is fixing #4413 (not being able to have two different agents with with that differs only by their case) ONLY in clusters where everybody is playing nice and has an ID (which is your case in our clusters :-)
As long as you have agents that register without any ID, you are in danger, you may have
node2
andNoDe2
. But we keep all of this for the ascending compatibility with old clusters as well as your tests