Skip to content

Commit

Permalink
implement dual write on Service ClusterIP allocator
Browse files Browse the repository at this point in the history
MultiCIDRServiceAllocator implements a new ClusterIP allocator based on
IPAddress object to solve the problems and limitations caused by
existing bitmap allocators.

However, during the rollout of new versions, deployments need to support
a skew of one version between kube-apiservers. To avoid the possible
problem where there are multiple Services requests on the skewed
apiservers and that both allocate the same IP to different Services,
the new allocator will implement a dual-write strategy under the
feature gate DisableAllocatorDualWrite.

After the MultiCIDRServiceAllocator is GA, the DisableAllocatorDualWrite
can be enabled safely as all apiservers will run with the new
allocators. The graduation of DisableAllocatorDualWrite can also
be used to clean up the opaque API object that contains the old bitmaps.

If MultiCIDRServiceAllocator is enabled and DisableAllocatorDualWrite is disable
and is a new environment, there is no bitmap object created, hence, the
apiserver will initialize it to be able to write on it.
  • Loading branch information
aojea committed Jun 27, 2024
1 parent 7c4726d commit 9b1bad4
Show file tree
Hide file tree
Showing 11 changed files with 634 additions and 164 deletions.
6 changes: 4 additions & 2 deletions cmd/kube-apiserver/app/options/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ func validateClusterIPFlags(options Extra) []error {
}

// Complete() expected to have set Primary* and Secondary
if !utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRServiceAllocator) {
if !utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRServiceAllocator) ||
!utilfeature.DefaultFeatureGate.Enabled(features.DisableAllocatorDualWrite) {
// primary CIDR validation
if err := validateMaxCIDRRange(options.PrimaryServiceClusterIPRange, maxCIDRBits, "--service-cluster-ip-range"); err != nil {
errs = append(errs, err)
Expand All @@ -72,7 +73,8 @@ func validateClusterIPFlags(options Extra) []error {
if !dualstack {
errs = append(errs, errors.New("--service-cluster-ip-range[0] and --service-cluster-ip-range[1] must be of different IP family"))
}
if !utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRServiceAllocator) {
if !utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRServiceAllocator) ||
!utilfeature.DefaultFeatureGate.Enabled(features.DisableAllocatorDualWrite) {
if err := validateMaxCIDRRange(options.SecondaryServiceClusterIPRange, maxCIDRBits, "--service-cluster-ip-range[1]"); err != nil {
errs = append(errs, err)
}
Expand Down
76 changes: 55 additions & 21 deletions cmd/kube-apiserver/app/options/validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,11 @@ func makeOptionsWithCIDRs(serviceCIDR string, secondaryServiceCIDR string) *Serv

func TestClusterServiceIPRange(t *testing.T) {
testCases := []struct {
name string
options *ServerRunOptions
expectErrors bool
gate bool
name string
options *ServerRunOptions
expectErrors bool
ipAllocatorGate bool
disableDualWriteGate bool
}{
{
name: "no service cidr",
Expand Down Expand Up @@ -91,33 +92,65 @@ func TestClusterServiceIPRange(t *testing.T) {
options: makeOptionsWithCIDRs("10.0.0.0/8", ""),
},
{
name: "service cidr IPv4 is too big but gate enbled",
expectErrors: false,
options: makeOptionsWithCIDRs("10.0.0.0/8", ""),
gate: true,
name: "service cidr IPv4 is too big but gate enbled",
expectErrors: true,
options: makeOptionsWithCIDRs("10.0.0.0/8", ""),
ipAllocatorGate: true,
disableDualWriteGate: false,
},
{
name: "service cidr IPv6 is too big but gate enbled",
expectErrors: false,
options: makeOptionsWithCIDRs("2001:db8::/64", ""),
gate: true,
name: "service cidr IPv6 is too big but only ipallocator gate enabled",
expectErrors: true,
options: makeOptionsWithCIDRs("2001:db8::/64", ""),
ipAllocatorGate: true,
disableDualWriteGate: false,
},
{
name: "service cidr IPv6 is too big and gate enbled",
expectErrors: false,
options: makeOptionsWithCIDRs("2001:db8::/12", ""),
gate: true,
name: "service cidr IPv6 is too big but only ipallocator gate enabled",
expectErrors: true,
options: makeOptionsWithCIDRs("2001:db8::/12", ""),
ipAllocatorGate: true,
disableDualWriteGate: false,
},
{
name: "service cidr IPv4 is too big but gate enabled",
expectErrors: false,
options: makeOptionsWithCIDRs("10.0.0.0/8", ""),
ipAllocatorGate: true,
disableDualWriteGate: true,
},
{
name: "service cidr IPv6 is too big but gate enabled",
expectErrors: false,
options: makeOptionsWithCIDRs("2001:db8::/64", ""),
ipAllocatorGate: true,
disableDualWriteGate: true,
},
{
name: "service cidr IPv6 is too big and gate enabled",
expectErrors: false,
options: makeOptionsWithCIDRs("2001:db8::/12", ""),
ipAllocatorGate: true,
disableDualWriteGate: true,
},
{
name: "dual-stack secondary cidr too big",
expectErrors: true,
options: makeOptionsWithCIDRs("10.0.0.0/16", "3000::/64"),
},
{
name: "dual-stack secondary cidr too big gate enabled",
expectErrors: false,
options: makeOptionsWithCIDRs("10.0.0.0/16", "3000::/48"),
gate: true,
name: "dual-stack secondary cidr too big but only ipallocator gate enabled",
expectErrors: true,
options: makeOptionsWithCIDRs("10.0.0.0/16", "3000::/48"),
ipAllocatorGate: true,
disableDualWriteGate: false,
},
{
name: "dual-stack secondary cidr too big gate enabled",
expectErrors: false,
options: makeOptionsWithCIDRs("10.0.0.0/16", "3000::/48"),
ipAllocatorGate: true,
disableDualWriteGate: true,
},
{
name: "more than two entries",
Expand Down Expand Up @@ -149,7 +182,8 @@ func TestClusterServiceIPRange(t *testing.T) {

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.MultiCIDRServiceAllocator, tc.gate)
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.MultiCIDRServiceAllocator, tc.ipAllocatorGate)
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.DisableAllocatorDualWrite, tc.disableDualWriteGate)

errs := validateClusterIPFlags(tc.options.Extra)
if len(errs) > 0 && !tc.expectErrors {
Expand Down
10 changes: 10 additions & 0 deletions pkg/features/kube_features.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,14 @@ const (
// Add support for CDI Device IDs in the Device Plugin API.
DevicePluginCDIDevices featuregate.Feature = "DevicePluginCDIDevices"

// owner: @aojea
// alpha: v1.31
//
// The apiservers with the MultiCIDRServiceAllocator feature enable, in order to support live migration from the old bitmap ClusterIP
// allocators to the new IPAddress allocators introduced by the MultiCIDRServiceAllocator feature, performs a dual-write on
// both allocators. This feature gate disables the dual write on the new Cluster IP allocators.
DisableAllocatorDualWrite featuregate.Feature = "DisableAllocatorDualWrite"

// owner: @andrewsykim
// alpha: v1.22
// beta: v1.29
Expand Down Expand Up @@ -1003,6 +1011,8 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS

CronJobsScheduledAnnotation: {Default: true, PreRelease: featuregate.Beta},

DisableAllocatorDualWrite: {Default: false, PreRelease: featuregate.Alpha}, // remove after MultiCIDRServiceAllocator is GA

DisableCloudProviders: {Default: true, PreRelease: featuregate.GA, LockToDefault: true},

DisableKubeletCloudCredentialProviders: {Default: true, PreRelease: featuregate.GA, LockToDefault: true},
Expand Down
65 changes: 65 additions & 0 deletions pkg/registry/core/rest/storage_core.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"k8s.io/client-go/kubernetes"
networkingv1alpha1client "k8s.io/client-go/kubernetes/typed/networking/v1alpha1"
policyclient "k8s.io/client-go/kubernetes/typed/policy/v1"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/api/legacyscheme"
api "k8s.io/kubernetes/pkg/apis/core"
"k8s.io/kubernetes/pkg/cluster/ports"
Expand Down Expand Up @@ -351,6 +352,37 @@ func (c *Config) newServiceIPAllocators() (registries rangeRegistries, primaryCl
if err != nil {
return rangeRegistries{}, nil, nil, nil, err
}
var bitmapAllocator ipallocator.Interface
if !utilfeature.DefaultFeatureGate.Enabled(features.DisableAllocatorDualWrite) {
bitmapAllocator, err = ipallocator.New(&serviceClusterIPRange, func(max int, rangeSpec string, offset int) (allocator.Interface, error) {
mem := allocator.NewAllocationMapWithOffset(max, rangeSpec, offset)
etcd, err := serviceallocator.NewEtcd(mem, "/ranges/serviceips", serviceStorageConfig.ForResource(api.Resource("serviceipallocations")))
if err != nil {
return nil, err
}
// It is possible to start apiserver clusters with the new allocator and dual write enable on new environments.
// If this is the case we need to initialize the bitmap or it will fail to allocate IP addresses because
// the ResourceVersion of the opaque API object is zero.
rangeRegistry, err := etcd.Get()
if err != nil {
return nil, err
}
rangeRegistry.Range = serviceClusterIPRange.String()
if len(rangeRegistry.ResourceVersion) == 0 {
klog.Infof("kube-apiserver started with IP allocator and dual write enabled but bitmap allocator does not exist, recreating it ...")
err := etcd.CreateOrUpdate(rangeRegistry)
if err != nil {
return nil, err
}
}
registries.clusterIP = etcd
return etcd, nil
})
if err != nil {
return rangeRegistries{}, nil, nil, nil, fmt.Errorf("cannot create cluster IP allocator: %w", err)
}

}
// TODO(aojea) Revisit the initialization of the allocators
// since right now it depends on the service-cidr flags and
// sets the default IPFamily that may not be coherent with the
Expand All @@ -360,6 +392,7 @@ func (c *Config) newServiceIPAllocators() (registries rangeRegistries, primaryCl
c.Informers.Networking().V1alpha1().ServiceCIDRs(),
c.Informers.Networking().V1alpha1().IPAddresses(),
netutils.IsIPv6CIDR(&serviceClusterIPRange),
bitmapAllocator,
)
if err != nil {
return rangeRegistries{}, nil, nil, nil, fmt.Errorf("cannot create cluster IP allocator: %v", err)
Expand Down Expand Up @@ -391,6 +424,37 @@ func (c *Config) newServiceIPAllocators() (registries rangeRegistries, primaryCl
if err != nil {
return rangeRegistries{}, nil, nil, nil, err
}
var bitmapAllocator ipallocator.Interface
if !utilfeature.DefaultFeatureGate.Enabled(features.DisableAllocatorDualWrite) {
bitmapAllocator, err = ipallocator.New(&c.Services.SecondaryClusterIPRange, func(max int, rangeSpec string, offset int) (allocator.Interface, error) {
mem := allocator.NewAllocationMapWithOffset(max, rangeSpec, offset)
// TODO etcdallocator package to return a storage interface via the storageFactory
etcd, err := serviceallocator.NewEtcd(mem, "/ranges/secondaryserviceips", serviceStorageConfig.ForResource(api.Resource("serviceipallocations")))
if err != nil {
return nil, err
}
// It is possible to start apiserver clusters with the new allocator and dual write enable on new environments.
// If this is the case we need to initialize the bitmap or it will fail to allocate IP addresses because
// the ResourceVersion of the opaque API object is zero.
rangeRegistry, err := etcd.Get()
if err != nil {
return nil, err
}
rangeRegistry.Range = serviceClusterIPRange.String()
if len(rangeRegistry.ResourceVersion) == 0 {
klog.Infof("kube-apiserver started with IP allocator and dual write enabled but bitmap allocator does not exist, recreating it ...")
err := etcd.CreateOrUpdate(rangeRegistry)
if err != nil {
return nil, err
}
}
registries.secondaryClusterIP = etcd
return etcd, nil
})
if err != nil {
return rangeRegistries{}, nil, nil, nil, fmt.Errorf("cannot create cluster secondary IP allocator: %w", err)
}
}
// TODO(aojea) Revisit the initialization of the allocators
// since right now it depends on the service-cidr flags and
// sets the default IPFamily that may not be coherent with the
Expand All @@ -400,6 +464,7 @@ func (c *Config) newServiceIPAllocators() (registries rangeRegistries, primaryCl
c.Informers.Networking().V1alpha1().ServiceCIDRs(),
c.Informers.Networking().V1alpha1().IPAddresses(),
netutils.IsIPv6CIDR(&c.Services.SecondaryClusterIPRange),
bitmapAllocator,
)
if err != nil {
return rangeRegistries{}, nil, nil, nil, fmt.Errorf("cannot create cluster secondary IP allocator: %v", err)
Expand Down
Loading

0 comments on commit 9b1bad4

Please sign in to comment.