-
Notifications
You must be signed in to change notification settings - Fork 4.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
state: ensure that identical manual virtual IP updates result in not bumping the modify indexes #21909
base: main
Are you sure you want to change the base?
state: ensure that identical manual virtual IP updates result in not bumping the modify indexes #21909
Changes from all commits
36d5891
0754411
7e55ff3
90f8e8e
e86a5d9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
```release-note:bug | ||
state: ensure that identical manual virtual IP updates result in not bumping the modify indexes | ||
``` |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,15 +7,18 @@ import ( | |
"fmt" | ||
"net" | ||
|
||
hashstructure_v2 "github.com/mitchellh/hashstructure/v2" | ||
"golang.org/x/exp/maps" | ||
|
||
"github.com/hashicorp/go-bexpr" | ||
"github.com/hashicorp/go-hclog" | ||
"github.com/hashicorp/go-memdb" | ||
"github.com/hashicorp/serf/serf" | ||
hashstructure_v2 "github.com/mitchellh/hashstructure/v2" | ||
|
||
"github.com/hashicorp/consul/acl" | ||
"github.com/hashicorp/consul/agent/consul/state" | ||
"github.com/hashicorp/consul/agent/structs" | ||
"github.com/hashicorp/consul/lib/stringslice" | ||
) | ||
|
||
const MaximumManualVIPsPerService = 8 | ||
|
@@ -770,17 +773,38 @@ func (m *Internal) AssignManualServiceVIPs(args *structs.AssignServiceManualVIPs | |
return fmt.Errorf("cannot associate more than %d manual virtual IPs with the same service", MaximumManualVIPsPerService) | ||
} | ||
|
||
vipMap := make(map[string]struct{}) | ||
for _, ip := range args.ManualVIPs { | ||
parsedIP := net.ParseIP(ip) | ||
if parsedIP == nil || parsedIP.To4() == nil { | ||
return fmt.Errorf("%q is not a valid IPv4 address", parsedIP.String()) | ||
} | ||
vipMap[ip] = struct{}{} | ||
} | ||
// Silently ignore duplicates. | ||
args.ManualVIPs = maps.Keys(vipMap) | ||
|
||
psn := structs.PeeredServiceName{ | ||
ServiceName: structs.NewServiceName(args.Service, &args.EnterpriseMeta), | ||
} | ||
|
||
// Check to see if we can skip the raft apply entirely. | ||
{ | ||
existingIPs, err := m.srv.fsm.State().ServiceManualVIPs(psn) | ||
if err != nil { | ||
return fmt.Errorf("error checking for existing manual ips for service: %w", err) | ||
} | ||
if existingIPs != nil && stringslice.EqualMapKeys(existingIPs.ManualIPs, vipMap) { | ||
Comment on lines
+782
to
+797
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know we do similar thing for writing service nodes, but thinking about this isn't it racy? Another request could be writing this piece of data right after we read it from the state store. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Logically each peered-service-name (PSN) should only be manipulated by one entity at a time externally. In the case of Ideally we'd update the EC code to do a read-before-write check like this to avoid a duplicate write as you'd expect with a controller-type workflow. There is also a lot of prior art about this sort of thing, like for all config entry writes and the catalog as you pointed out. |
||
*reply = structs.AssignServiceManualVIPsResponse{ | ||
Found: true, | ||
UnassignedFrom: nil, | ||
} | ||
return nil | ||
} | ||
} | ||
|
||
req := state.ServiceVirtualIP{ | ||
Service: structs.PeeredServiceName{ | ||
ServiceName: structs.NewServiceName(args.Service, &args.EnterpriseMeta), | ||
}, | ||
Service: psn, | ||
ManualIPs: args.ManualVIPs, | ||
} | ||
resp, err := m.srv.raftApplyMsgpack(structs.UpdateVirtualIPRequestType, req) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,11 +12,11 @@ import ( | |
"testing" | ||
"time" | ||
|
||
"github.com/hashicorp/consul-net-rpc/net/rpc" | ||
"github.com/stretchr/testify/assert" | ||
"github.com/stretchr/testify/require" | ||
|
||
msgpackrpc "github.com/hashicorp/consul-net-rpc/net-rpc-msgpackrpc" | ||
"github.com/hashicorp/consul-net-rpc/net/rpc" | ||
|
||
"github.com/hashicorp/consul/acl" | ||
"github.com/hashicorp/consul/agent/structs" | ||
|
@@ -3716,36 +3716,69 @@ func TestInternal_AssignManualServiceVIPs(t *testing.T) { | |
require.NoError(t, msgpackrpc.CallWithCodec(codec, "Internal.AssignManualServiceVIPs", req, &resp)) | ||
|
||
type testcase struct { | ||
name string | ||
req structs.AssignServiceManualVIPsRequest | ||
expect structs.AssignServiceManualVIPsResponse | ||
expectErr string | ||
name string | ||
req structs.AssignServiceManualVIPsRequest | ||
expect structs.AssignServiceManualVIPsResponse | ||
expectAgain structs.AssignServiceManualVIPsResponse | ||
expectErr string | ||
expectIPs []string | ||
} | ||
run := func(t *testing.T, tc testcase) { | ||
|
||
run := func(t *testing.T, tc testcase, again bool) { | ||
if tc.expectErr != "" && again { | ||
return // we don't retest known errors | ||
} | ||
|
||
var resp structs.AssignServiceManualVIPsResponse | ||
idx1 := s1.raft.CommitIndex() | ||
err := msgpackrpc.CallWithCodec(codec, "Internal.AssignManualServiceVIPs", tc.req, &resp) | ||
idx2 := s1.raft.CommitIndex() | ||
if tc.expectErr != "" { | ||
require.Error(t, err) | ||
require.Contains(t, err.Error(), tc.expectErr) | ||
return | ||
testutil.RequireErrorContains(t, err, tc.expectErr) | ||
} else { | ||
if again { | ||
require.Equal(t, tc.expectAgain, resp) | ||
require.Equal(t, idx1, idx2, "no raft operations occurred") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was the cheapest hack I could do to verify the "skip raft" behavior without crazy refactoring of the Server behavior. |
||
} else { | ||
require.Equal(t, tc.expect, resp) | ||
} | ||
|
||
psn := structs.PeeredServiceName{ServiceName: structs.NewServiceName(tc.req.Service, nil)} | ||
got, err := s1.fsm.State().ServiceManualVIPs(psn) | ||
require.NoError(t, err) | ||
require.NotNil(t, got) | ||
require.Equal(t, tc.expectIPs, got.ManualIPs) | ||
} | ||
require.Equal(t, tc.expect, resp) | ||
} | ||
|
||
tcs := []testcase{ | ||
{ | ||
name: "successful manual ip assignment", | ||
req: structs.AssignServiceManualVIPsRequest{ | ||
Service: "web", | ||
ManualVIPs: []string{"1.1.1.1", "2.2.2.2"}, | ||
}, | ||
expect: structs.AssignServiceManualVIPsResponse{Found: true}, | ||
expectIPs: []string{"1.1.1.1", "2.2.2.2"}, | ||
expect: structs.AssignServiceManualVIPsResponse{Found: true}, | ||
expectAgain: structs.AssignServiceManualVIPsResponse{Found: true}, | ||
}, | ||
{ | ||
name: "successfully ignoring duplicates", | ||
req: structs.AssignServiceManualVIPsRequest{ | ||
Service: "web", | ||
ManualVIPs: []string{"1.2.3.4", "5.6.7.8", "1.2.3.4", "5.6.7.8"}, | ||
}, | ||
expectIPs: []string{"1.2.3.4", "5.6.7.8"}, | ||
expect: structs.AssignServiceManualVIPsResponse{Found: true}, | ||
expectAgain: structs.AssignServiceManualVIPsResponse{Found: true}, | ||
}, | ||
{ | ||
name: "reassign existing ip", | ||
req: structs.AssignServiceManualVIPsRequest{ | ||
Service: "web", | ||
ManualVIPs: []string{"8.8.8.8"}, | ||
}, | ||
expectIPs: []string{"8.8.8.8"}, | ||
expect: structs.AssignServiceManualVIPsResponse{ | ||
Found: true, | ||
UnassignedFrom: []structs.PeeredServiceName{ | ||
|
@@ -3754,20 +3787,28 @@ func TestInternal_AssignManualServiceVIPs(t *testing.T) { | |
}, | ||
}, | ||
}, | ||
// When we repeat this operation the second time it's a no-op. | ||
expectAgain: structs.AssignServiceManualVIPsResponse{Found: true}, | ||
}, | ||
{ | ||
name: "invalid ip", | ||
req: structs.AssignServiceManualVIPsRequest{ | ||
Service: "web", | ||
ManualVIPs: []string{"3.3.3.3", "invalid"}, | ||
}, | ||
expect: structs.AssignServiceManualVIPsResponse{}, | ||
expectErr: "not a valid", | ||
}, | ||
} | ||
for _, tc := range tcs { | ||
t.Run(tc.name, func(t *testing.T) { | ||
run(t, tc) | ||
t.Run("initial", func(t *testing.T) { | ||
run(t, tc, false) | ||
}) | ||
if tc.expectErr == "" { | ||
t.Run("repeat", func(t *testing.T) { | ||
run(t, tc, true) // only repeat a write if it isn't an known error | ||
}) | ||
} | ||
}) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,8 @@ import ( | |
"fmt" | ||
"net" | ||
"reflect" | ||
"slices" | ||
"sort" | ||
"strings" | ||
|
||
"github.com/hashicorp/go-memdb" | ||
|
@@ -18,6 +20,7 @@ import ( | |
"github.com/hashicorp/consul/api" | ||
"github.com/hashicorp/consul/lib" | ||
"github.com/hashicorp/consul/lib/maps" | ||
"github.com/hashicorp/consul/lib/stringslice" | ||
"github.com/hashicorp/consul/types" | ||
) | ||
|
||
|
@@ -1106,6 +1109,9 @@ func (s *Store) AssignManualServiceVIPs(idx uint64, psn structs.PeeredServiceNam | |
for _, ip := range ips { | ||
assignedIPs[ip] = struct{}{} | ||
} | ||
|
||
txnNeedsCommit := false | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this is practically an issue, but I did notice that the logic was:
and with this change i fixed it to
|
||
|
||
modifiedEntries := make(map[structs.PeeredServiceName]struct{}) | ||
for ip := range assignedIPs { | ||
entry, err := tx.First(tableServiceVirtualIPs, indexManualVIPs, psn.ServiceName.PartitionOrDefault(), ip) | ||
|
@@ -1118,7 +1124,13 @@ func (s *Store) AssignManualServiceVIPs(idx uint64, psn structs.PeeredServiceNam | |
} | ||
|
||
newEntry := entry.(ServiceVirtualIP) | ||
if newEntry.Service.ServiceName.Matches(psn.ServiceName) { | ||
|
||
var ( | ||
thisServiceName = newEntry.Service.ServiceName | ||
thisPeer = newEntry.Service.Peer | ||
) | ||
|
||
if thisServiceName.Matches(psn.ServiceName) && thisPeer == psn.Peer { | ||
continue | ||
} | ||
|
||
|
@@ -1130,13 +1142,20 @@ func (s *Store) AssignManualServiceVIPs(idx uint64, psn structs.PeeredServiceNam | |
filteredIPs = append(filteredIPs, existingIP) | ||
} | ||
} | ||
sort.Strings(filteredIPs) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Previously we were storing VIPs in whatever order they happened to be in. It seemed silly to not be sorting them. |
||
|
||
newEntry.ManualIPs = filteredIPs | ||
newEntry.ModifyIndex = idx | ||
if err := tx.Insert(tableServiceVirtualIPs, newEntry); err != nil { | ||
return false, nil, fmt.Errorf("failed inserting service virtual IP entry: %s", err) | ||
} | ||
modifiedEntries[newEntry.Service] = struct{}{} | ||
|
||
if err := updateVirtualIPMaxIndexes(tx, idx, thisServiceName.PartitionOrDefault(), thisPeer); err != nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Previously we were not updating the max index table for the entries that had VIPs stolen from them. |
||
return false, nil, err | ||
} | ||
|
||
txnNeedsCommit = true | ||
} | ||
|
||
entry, err := tx.First(tableServiceVirtualIPs, indexID, psn) | ||
|
@@ -1149,23 +1168,37 @@ func (s *Store) AssignManualServiceVIPs(idx uint64, psn structs.PeeredServiceNam | |
} | ||
|
||
newEntry := entry.(ServiceVirtualIP) | ||
newEntry.ManualIPs = ips | ||
newEntry.ModifyIndex = idx | ||
|
||
if err := tx.Insert(tableServiceVirtualIPs, newEntry); err != nil { | ||
return false, nil, fmt.Errorf("failed inserting service virtual IP entry: %s", err) | ||
} | ||
if err := updateVirtualIPMaxIndexes(tx, idx, psn.ServiceName.PartitionOrDefault(), psn.Peer); err != nil { | ||
return false, nil, err | ||
// Check to see if the slice already contains the same ips. | ||
if !stringslice.EqualMapKeys(newEntry.ManualIPs, assignedIPs) { | ||
newEntry.ManualIPs = slices.Clone(ips) | ||
newEntry.ModifyIndex = idx | ||
|
||
sort.Strings(newEntry.ManualIPs) | ||
|
||
if err := tx.Insert(tableServiceVirtualIPs, newEntry); err != nil { | ||
return false, nil, fmt.Errorf("failed inserting service virtual IP entry: %s", err) | ||
} | ||
if err := updateVirtualIPMaxIndexes(tx, idx, psn.ServiceName.PartitionOrDefault(), psn.Peer); err != nil { | ||
return false, nil, err | ||
} | ||
txnNeedsCommit = true | ||
} | ||
if err = tx.Commit(); err != nil { | ||
return false, nil, err | ||
|
||
if txnNeedsCommit { | ||
if err = tx.Commit(); err != nil { | ||
return false, nil, err | ||
} | ||
} | ||
|
||
return true, maps.SliceOfKeys(modifiedEntries), nil | ||
} | ||
|
||
func updateVirtualIPMaxIndexes(txn WriteTxn, idx uint64, partition, peerName string) error { | ||
// update global max index (for snapshots) | ||
if err := indexUpdateMaxTxn(txn, idx, tableServiceVirtualIPs); err != nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The snapshot logic grabs the max index from this table without peering/partition prefixes, so in order for that to be more correct we update the un-prefixed index here too. |
||
return fmt.Errorf("failed while updating index: %w", err) | ||
} | ||
// update per-partition max index | ||
if err := indexUpdateMaxTxn(txn, idx, partitionedIndexEntryName(tableServiceVirtualIPs, partition)); err != nil { | ||
return fmt.Errorf("failed while updating partitioned index: %w", err) | ||
|
@@ -3086,6 +3119,7 @@ func servicesVirtualIPsTxn(tx ReadTxn, ws memdb.WatchSet) (uint64, []ServiceVirt | |
vips = append(vips, vip) | ||
} | ||
|
||
// Pull from the global one | ||
idx := maxIndexWatchTxn(tx, nil, tableServiceVirtualIPs) | ||
|
||
return idx, vips, nil | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here we just return the same positive response that the FSM would have generated in this no-op case without all of the raft expense.