Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

api-gateway: fix cache and service deletion issue #2377

Merged
merged 13 commits into from
Jun 15, 2023
Merged
3 changes: 2 additions & 1 deletion acceptance/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ require (
k8s.io/api v0.26.3
k8s.io/apimachinery v0.26.3
k8s.io/client-go v0.26.3
k8s.io/utils v0.0.0-20230209194617-a36077c30491
sigs.k8s.io/controller-runtime v0.14.6
sigs.k8s.io/gateway-api v0.7.0
)
Expand All @@ -30,6 +31,7 @@ require (
github.com/cespare/xxhash/v2 v2.1.2 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.0 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/deckarep/golang-set v1.7.1 // indirect
github.com/emicklei/go-restful/v3 v3.9.0 // indirect
github.com/evanphx/json-patch v5.6.0+incompatible // indirect
github.com/evanphx/json-patch/v5 v5.6.0 // indirect
Expand Down Expand Up @@ -120,7 +122,6 @@ require (
k8s.io/component-base v0.26.3 // indirect
k8s.io/klog/v2 v2.100.1 // indirect
k8s.io/kube-openapi v0.0.0-20230501164219-8b0f38b5fd1f // indirect
k8s.io/utils v0.0.0-20230209194617-a36077c30491 // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect
sigs.k8s.io/yaml v1.3.0 // indirect
Expand Down
2 changes: 2 additions & 0 deletions acceptance/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/deckarep/golang-set v1.7.1 h1:SCQV0S6gTtp6itiFrTqI+pfmJ4LN85S1YzhDf9rTHJQ=
github.com/deckarep/golang-set v1.7.1/go.mod h1:93vsz/8Wt4joVM7c2AVqh+YRMiUSc14yDtF28KmMOgQ=
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
github.com/dimchansky/utfbom v1.1.0/go.mod h1:rO41eb7gLfo8SF1jd9F8HplJm1Fewwi4mQvIirEdv+8=
github.com/dnaeon/go-vcr v1.0.1/go.mod h1:aBB1+wY4s93YsC3HHjMBMrwTj2R9FHDzUr9KyGc8n1E=
Expand Down
207 changes: 207 additions & 0 deletions acceptance/tests/api-gateway/api_gateway_gatewayclassconfig_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0

package apigateway

import (
"context"
"testing"

"github.com/hashicorp/consul-k8s/acceptance/framework/consul"
"github.com/hashicorp/consul-k8s/acceptance/framework/helpers"
"github.com/hashicorp/consul-k8s/acceptance/framework/logger"
"github.com/hashicorp/consul-k8s/control-plane/api-gateway/common"
"github.com/hashicorp/consul-k8s/control-plane/api/v1alpha1"
"github.com/hashicorp/consul/api"
"github.com/hashicorp/consul/sdk/testutil/retry"
"github.com/stretchr/testify/require"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/utils/pointer"
"sigs.k8s.io/controller-runtime/pkg/client"
gwv1beta1 "sigs.k8s.io/gateway-api/apis/v1beta1"
)

// GatewayClassConfig tests the creation of a gatewayclassconfig object and makes sure that its configuration
// is properly applied to any child gateway objects, namely that the number of gateway instances match the defined
// minInstances,maxInstances and defaultInstances parameters, and that changing the parent gateway does not affect
// the child gateways.
func TestAPIGateway_GatewayClassConfig(t *testing.T) {
ctx := suite.Environment().DefaultContext(t)
cfg := suite.Config()
helmValues := map[string]string{
"global.logLevel": "trace",
"connectInject.enabled": "true",
}
releaseName := helpers.RandomName()
consulCluster := consul.NewHelmCluster(t, helmValues, ctx, cfg, releaseName)
consulCluster.Create(t)
// Override the default proxy config settings for this test.
consulClient, _ := consulCluster.SetupConsulClient(t, false)
_, _, err := consulClient.ConfigEntries().Set(&api.ProxyConfigEntry{
Kind: api.ProxyDefaults,
Name: api.ProxyConfigGlobal,
Config: map[string]interface{}{
"protocol": "http",
},
}, nil)
require.NoError(t, err)

k8sClient := ctx.ControllerRuntimeClient(t)
namespace := "gateway-namespace"

//create clean namespace
err = k8sClient.Create(context.Background(), &corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{
Name: namespace,
},
})
require.NoError(t, err)
helpers.Cleanup(t, cfg.NoCleanupOnFailure, func() {
logger.Log(t, "deleting gateway namesapce")
k8sClient.Delete(context.Background(), &corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{
Name: namespace,
},
})
})

defaultInstances := pointer.Int32(2)
maxInstances := pointer.Int32(8)
minInstances := pointer.Int32(1)
// create a GatewayClassConfig with configuration set
gatewayClassConfigName := "gateway-class-config"
gatewayClassConfig := &v1alpha1.GatewayClassConfig{
ObjectMeta: metav1.ObjectMeta{
Name: gatewayClassConfigName,
},
Spec: v1alpha1.GatewayClassConfigSpec{
DeploymentSpec: v1alpha1.DeploymentSpec{
DefaultInstances: defaultInstances,
MaxInstances: maxInstances,
MinInstances: minInstances,
},
},
}
logger.Log(t, "creating gateway class config")
err = k8sClient.Create(context.Background(), gatewayClassConfig)
require.NoError(t, err)
helpers.Cleanup(t, cfg.NoCleanupOnFailure, func() {
logger.Log(t, "deleting all gateway class configs")
k8sClient.DeleteAllOf(context.Background(), &v1alpha1.GatewayClassConfig{})
})

gatewayParametersRef := &gwv1beta1.ParametersReference{
Group: gwv1beta1.Group(v1alpha1.ConsulHashicorpGroup),
Kind: gwv1beta1.Kind(v1alpha1.GatewayClassConfigKind),
Name: gatewayClassConfigName,
}

// create gateway class referencing gateway-class-config
gatewayClassName := "gateway-class"
logger.Log(t, "creating controlled gateway class")
createGatewayClass(t, k8sClient, gatewayClassName, gatewayClassControllerName, gatewayParametersRef)
helpers.Cleanup(t, cfg.NoCleanupOnFailure, func() {
logger.Log(t, "deleting all gateway classes")
k8sClient.DeleteAllOf(context.Background(), &gwv1beta1.GatewayClass{})
})

// Create a certificate to reference in listeners
certificateInfo := generateCertificate(t, nil, "certificate.consul.local")
certificateName := "certificate"
certificate := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: certificateName,
Namespace: namespace,
Labels: map[string]string{
"test-certificate": "true",
},
},
Type: corev1.SecretTypeTLS,
Data: map[string][]byte{
corev1.TLSCertKey: certificateInfo.CertPEM,
corev1.TLSPrivateKeyKey: certificateInfo.PrivateKeyPEM,
},
}
logger.Log(t, "creating certificate")
err = k8sClient.Create(context.Background(), certificate)
require.NoError(t, err)
helpers.Cleanup(t, cfg.NoCleanupOnFailure, func() {
k8sClient.Delete(context.Background(), certificate)
})

// Create gateway referencing gateway class config
gatewayName := "gateway"
logger.Log(t, "creating controlled gateway")
gateway := createGateway(t, k8sClient, gatewayName, namespace, gatewayClassName, certificateName)
// make sure it exists
logger.Log(t, "checking that gateway one is synchronized to Consul")
checkConsulExists(t, consulClient, api.APIGateway, gatewayName)

helpers.Cleanup(t, cfg.NoCleanupOnFailure, func() {
logger.Log(t, "deleting all gateways")
k8sClient.DeleteAllOf(context.Background(), &gwv1beta1.Gateway{}, client.InNamespace(namespace))
})

// Scenario: Gateway deployment should match the default instances defined on the gateway class config
logger.Log(t, "checking that gateway instances match defined gateway class config")
checkNumberOfInstances(t, k8sClient, consulClient, gateway.Name, gateway.Namespace, defaultInstances, gateway)

//Scenario: Updating the GatewayClassConfig should not affect gateways that have already been created
logger.Log(t, "updating gatewayclassconfig values")
err = k8sClient.Get(context.Background(), types.NamespacedName{Name: gatewayClassConfigName, Namespace: namespace}, gatewayClassConfig)
require.NoError(t, err)
gatewayClassConfig.Spec.DeploymentSpec.DefaultInstances = pointer.Int32(8)
gatewayClassConfig.Spec.DeploymentSpec.MinInstances = pointer.Int32(5)
err = k8sClient.Update(context.Background(), gatewayClassConfig)
require.NoError(t, err)
checkNumberOfInstances(t, k8sClient, consulClient, gateway.Name, gateway.Namespace, defaultInstances, gateway)

//Scenario: gateways should be able to scale independently and not get overridden by the controller unless it's above the max
scale(t, k8sClient, gateway.Name, gateway.Namespace, maxInstances)
checkNumberOfInstances(t, k8sClient, consulClient, gateway.Name, gateway.Namespace, maxInstances, gateway)
scale(t, k8sClient, gateway.Name, gateway.Namespace, pointer.Int32(10))
checkNumberOfInstances(t, k8sClient, consulClient, gateway.Name, gateway.Namespace, maxInstances, gateway)
scale(t, k8sClient, gateway.Name, gateway.Namespace, pointer.Int32(0))
checkNumberOfInstances(t, k8sClient, consulClient, gateway.Name, gateway.Namespace, minInstances, gateway)

}

func scale(t *testing.T, client client.Client, name, namespace string, scaleTo *int32) {
t.Helper()

retryCheck(t, 30, func(r *retry.R) {
var deployment appsv1.Deployment
err := client.Get(context.Background(), types.NamespacedName{Name: name, Namespace: namespace}, &deployment)
require.NoError(r, err)

deployment.Spec.Replicas = scaleTo
err = client.Update(context.Background(), &deployment)
require.NoError(r, err)
})
}

func checkNumberOfInstances(t *testing.T, k8client client.Client, consulClient *api.Client, name, namespace string, wantNumber *int32, gateway *gwv1beta1.Gateway) {
t.Helper()

retryCheck(t, 30, func(r *retry.R) {
//first check to make sure the number of replicas has been set properly
var deployment appsv1.Deployment
err := k8client.Get(context.Background(), types.NamespacedName{Name: name, Namespace: namespace}, &deployment)
require.NoError(r, err)
require.EqualValues(r, *wantNumber, *deployment.Spec.Replicas)

//then check to make sure the number of gateway pods matches the replicas generated
podList := corev1.PodList{}
labels := common.LabelsForGateway(gateway)
err = k8client.List(context.Background(), &podList, client.InNamespace(namespace), client.MatchingLabels(labels))
require.NoError(r, err)
require.EqualValues(r, *wantNumber, len(podList.Items))

services, _, err := consulClient.Catalog().Service(name, "", nil)
require.NoError(r, err)
require.EqualValues(r, *wantNumber, len(services))
})
}
1 change: 1 addition & 0 deletions control-plane/api-gateway/binding/binder.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ func (b *Binder) Snapshot() *Snapshot {
for _, registration := range registrations {
if service.ServiceID == registration.Service.ID {
found = true
break
}
}
if !found {
Expand Down
25 changes: 24 additions & 1 deletion control-plane/api-gateway/binding/binder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (

logrtest "github.com/go-logr/logr/testing"
"github.com/google/go-cmp/cmp"
"github.com/hashicorp/consul/api"
"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand All @@ -25,7 +26,6 @@ import (

"github.com/hashicorp/consul-k8s/control-plane/api-gateway/common"
"github.com/hashicorp/consul-k8s/control-plane/api/v1alpha1"
"github.com/hashicorp/consul/api"
)

func init() {
Expand Down Expand Up @@ -809,6 +809,29 @@ func TestBinder_Registrations(t *testing.T) {
{Node: "test", ServiceID: "pod2", Namespace: "namespace1"},
{Node: "test", ServiceID: "pod3", Namespace: "namespace1"},
},
Pods: []corev1.Pod{
{
ObjectMeta: metav1.ObjectMeta{Name: "pod1"},
Status: corev1.PodStatus{
Phase: corev1.PodRunning,
Conditions: []corev1.PodCondition{{Type: corev1.PodReady, Status: corev1.ConditionTrue}},
},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pod2"},
Status: corev1.PodStatus{
Phase: corev1.PodRunning,
Conditions: []corev1.PodCondition{{Type: corev1.PodReady, Status: corev1.ConditionTrue}},
},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pod3"},
Status: corev1.PodStatus{
Phase: corev1.PodRunning,
Conditions: []corev1.PodCondition{{Type: corev1.PodReady, Status: corev1.ConditionTrue}},
},
},
},
Comment on lines +812 to +834
Copy link
Member

@nathancoleman nathancoleman Jun 15, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding the pods here as we would expect them to be present when a Gateway is deleted. Their presence should be ignored and they should be deregistered from Consul anyways, which is what this test is asserting.

}),
expectedDeregistrations: []api.CatalogDeregistration{
{Node: "test", ServiceID: "pod1", Namespace: "namespace1"},
Expand Down
17 changes: 0 additions & 17 deletions control-plane/api-gateway/cache/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,13 @@ import (
"github.com/hashicorp/consul-k8s/control-plane/consul"
"github.com/hashicorp/consul/api"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/event"
)

type GatewayCache struct {
config Config
serverMgr consul.ServerConnectionManager
logger logr.Logger

events chan event.GenericEvent
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Confirmed that this channel isn't being used anywhere. It looks like it was a copy-paste leftover in a previous PR


data map[api.ResourceReference][]api.CatalogService
dataMutex sync.RWMutex

Expand All @@ -38,7 +35,6 @@ func NewGatewayCache(ctx context.Context, config Config) *GatewayCache {
config: config,
serverMgr: config.ConsulServerConnMgr,
logger: config.Logger,
events: make(chan event.GenericEvent),
data: make(map[api.ResourceReference][]api.CatalogService),
subscribedGateways: make(map[api.ResourceReference]context.CancelFunc),
ctx: ctx,
Expand Down Expand Up @@ -140,18 +136,5 @@ func (r *GatewayCache) subscribeToGateway(ctx context.Context, ref api.ResourceR
r.dataMutex.Lock()
r.data[common.NormalizeMeta(ref)] = derefed
r.dataMutex.Unlock()

event := event.GenericEvent{
Object: newConfigEntryObject(resource),
}

select {
case <-ctx.Done():
r.dataMutex.Lock()
delete(r.data, ref)
r.dataMutex.Unlock()
return
case r.events <- event:
}
Comment on lines -148 to -155
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Solidifying my own understanding by writing it out:

Since the r.events channel is not buffered and nothing was reading from the channel, the first run through this code would block on the write. The context also was still open, so ctx.Done() wasn't happening either.

As a result, we would never move past this select to the next iteration of our for loop to refresh our cache, so it would very quickly become stale. Removing this code allows our loop to continue executing just as intended so that our cache is updated regularly.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Commenting here so it's less likely to get lost. Removing the channel writing code seems fine to me, I believe it was originally there to re-trigger reconciliation if a derived Consul service was changed (like if someone manually deregistered the service for some odd reason, in which case we'd just re-register it), but clearly I forgot to hook it into our Watch routines 😬

It's probably "more correct" to do it with the Watch, but that seems to be a small enough edge-case to me where I think dropping this here is fine. If we need to add it back in later though, we can.

}
}