From 124d83ef6d200148ae9e139193714e1bd39c8f27 Mon Sep 17 00:00:00 2001 From: Jess Yuen Date: Wed, 8 Apr 2020 02:21:11 -0700 Subject: [PATCH 01/29] Return Resource on cache Fetch * Return `Resource` rather than `Response` on fetch. Orchestrator needs the Resource object afterall in order to identify and notify watches. * Export `Resource` and `Response` members that will be used by calling classes. Signed-off-by: Jess Yuen --- internal/app/cache/cache.go | 58 ++++++++++++------------- internal/app/cache/cache_test.go | 72 ++++++++++++++++---------------- 2 files changed, 64 insertions(+), 66 deletions(-) diff --git a/internal/app/cache/cache.go b/internal/app/cache/cache.go index 8f06fe16..e5abff3b 100644 --- a/internal/app/cache/cache.go +++ b/internal/app/cache/cache.go @@ -7,16 +7,16 @@ import ( "sync" "time" + v2 "github.com/envoyproxy/go-control-plane/envoy/api/v2" + gcp_types "github.com/envoyproxy/go-control-plane/pkg/cache/types" gcp "github.com/envoyproxy/go-control-plane/pkg/cache/v2" "github.com/golang/groupcache/lru" "github.com/golang/protobuf/ptypes/any" - - v2 "github.com/envoyproxy/go-control-plane/envoy/api/v2" ) type Cache interface { - // Fetch returns the cached response if it exists. - Fetch(key string) (*Response, error) + // Fetch returns the cached resource if it exists. + Fetch(key string) (*Resource, error) // SetResponse sets the cache response and returns the list of requests. SetResponse(key string, resp v2.DiscoveryResponse) ([]*v2.DiscoveryRequest, error) @@ -33,14 +33,14 @@ type cache struct { // Response stores the raw Discovery Response alongside its marshaled resources. type Response struct { - raw v2.DiscoveryResponse - marshaledResources []*any.Any + Raw v2.DiscoveryResponse + MarshaledResources []gcp_types.MarshaledResource } type Resource struct { - resp *Response - requests []*v2.DiscoveryRequest - expirationTime time.Time + Resp *Response + Requests []*v2.DiscoveryRequest + ExpirationTime time.Time } // OnEvictFunc is a callback function for each eviction. Receives the key and cache value when called. @@ -72,7 +72,7 @@ func NewCache(maxEntries int, onEvicted OnEvictFunc, ttl time.Duration) (Cache, }, nil } -func (c *cache) Fetch(key string) (*Response, error) { +func (c *cache) Fetch(key string) (*Resource, error) { c.cacheMu.RLock() value, found := c.cache.Get(key) c.cacheMu.RUnlock() @@ -104,7 +104,7 @@ func (c *cache) Fetch(key string) (*Response, error) { return nil, nil } } - return resource.resp, nil + return &resource, nil } func (c *cache) SetResponse(key string, resp v2.DiscoveryResponse) ([]*v2.DiscoveryRequest, error) { @@ -115,14 +115,14 @@ func (c *cache) SetResponse(key string, resp v2.DiscoveryResponse) ([]*v2.Discov return nil, fmt.Errorf("failed to marshal resources for key: %s, err %v", key, err) } response := &Response{ - raw: resp, - marshaledResources: marshaledResources, + Raw: resp, + MarshaledResources: marshaledResources, } value, found := c.cache.Get(key) if !found { resource := Resource{ - resp: response, - expirationTime: c.getExpirationTime(time.Now()), + Resp: response, + ExpirationTime: c.getExpirationTime(time.Now()), } c.cache.Add(key, resource) return nil, nil @@ -131,11 +131,10 @@ func (c *cache) SetResponse(key string, resp v2.DiscoveryResponse) ([]*v2.Discov if !ok { return nil, fmt.Errorf("unable to cast cache value to type resource for key: %s", key) } - resource.resp = response - resource.expirationTime = c.getExpirationTime(time.Now()) + resource.Resp = response + resource.ExpirationTime = c.getExpirationTime(time.Now()) c.cache.Add(key, resource) - // TODO: Add logic that allows for notifying of watches. - return resource.requests, nil + return resource.Requests, nil } func (c *cache) AddRequest(key string, req v2.DiscoveryRequest) error { @@ -144,8 +143,8 @@ func (c *cache) AddRequest(key string, req v2.DiscoveryRequest) error { value, found := c.cache.Get(key) if !found { resource := Resource{ - requests: []*v2.DiscoveryRequest{&req}, - expirationTime: c.getExpirationTime(time.Now()), + Requests: []*v2.DiscoveryRequest{&req}, + ExpirationTime: c.getExpirationTime(time.Now()), } c.cache.Add(key, resource) return nil @@ -154,17 +153,17 @@ func (c *cache) AddRequest(key string, req v2.DiscoveryRequest) error { if !ok { return fmt.Errorf("unable to cast cache value to type resource for key: %s", key) } - resource.requests = append(resource.requests, &req) - resource.expirationTime = c.getExpirationTime(time.Now()) + resource.Requests = append(resource.Requests, &req) + resource.ExpirationTime = c.getExpirationTime(time.Now()) c.cache.Add(key, resource) return nil } func (r *Resource) isExpired(currentTime time.Time) bool { - if r.expirationTime.IsZero() { + if r.ExpirationTime.IsZero() { return false } - return r.expirationTime.Before(currentTime) + return r.ExpirationTime.Before(currentTime) } func (c *cache) getExpirationTime(currentTime time.Time) time.Time { @@ -174,18 +173,15 @@ func (c *cache) getExpirationTime(currentTime time.Time) time.Time { return time.Time{} } -func marshalResources(resources []*any.Any) ([]*any.Any, error) { - var marshaledResources []*any.Any +func marshalResources(resources []*any.Any) ([]gcp_types.MarshaledResource, error) { + var marshaledResources []gcp_types.MarshaledResource for _, resource := range resources { marshaledResource, err := gcp.MarshalResource(resource) if err != nil { return nil, err } - marshaledResources = append(marshaledResources, &any.Any{ - TypeUrl: resource.TypeUrl, - Value: marshaledResource, - }) + marshaledResources = append(marshaledResources, marshaledResource) } return marshaledResources, nil } diff --git a/internal/app/cache/cache_test.go b/internal/app/cache/cache_test.go index 6b12c3f0..a090488c 100644 --- a/internal/app/cache/cache_test.go +++ b/internal/app/cache/cache_test.go @@ -56,28 +56,30 @@ var testDiscoveryResponse = v2.DiscoveryResponse{ } var testResponse = Response{ - raw: testDiscoveryResponse, - marshaledResources: []*any.Any{ - { - Value: []byte("\x12\x04test"), - }, + Raw: testDiscoveryResponse, + MarshaledResources: [][]byte{ + []byte("\x12\x04test"), }, } +var testResource = Resource{ + Resp: &testResponse, +} + func TestAddRequestAndFetch(t *testing.T) { cache, err := NewCache(1, testOnEvict, time.Second*60) assert.NoError(t, err) - response, err := cache.Fetch(testKeyA) + resource, err := cache.Fetch(testKeyA) assert.EqualError(t, err, "no value found for key: key_A") - assert.Nil(t, response) + assert.Nil(t, resource) err = cache.AddRequest(testKeyA, testRequest) assert.NoError(t, err) - response, err = cache.Fetch(testKeyA) + resource, err = cache.Fetch(testKeyA) assert.NoError(t, err) - assert.Nil(t, response) + assert.Nil(t, resource.Resp) } func TestSetResponseAndFetch(t *testing.T) { @@ -85,17 +87,17 @@ func TestSetResponseAndFetch(t *testing.T) { assert.NoError(t, err) // Simulate cache miss and setting of new response. - response, err := cache.Fetch(testKeyA) + resource, err := cache.Fetch(testKeyA) assert.EqualError(t, err, "no value found for key: key_A") - assert.Nil(t, response) + assert.Nil(t, resource) requests, err := cache.SetResponse(testKeyA, testDiscoveryResponse) assert.NoError(t, err) assert.Nil(t, requests) - response, err = cache.Fetch(testKeyA) + resource, err = cache.Fetch(testKeyA) assert.NoError(t, err) - assert.Equal(t, testResponse, *response) + assert.Equal(t, testResponse, *resource.Resp) } func TestAddRequestAndSetResponse(t *testing.T) { @@ -114,9 +116,9 @@ func TestAddRequestAndSetResponse(t *testing.T) { assert.Equal(t, testRequest, *requests[0]) assert.Equal(t, testRequest, *requests[1]) - response, err := cache.Fetch(testKeyA) + resource, err := cache.Fetch(testKeyA) assert.NoError(t, err) - assert.Equal(t, testResponse, *response) + assert.Equal(t, testResponse, *resource.Resp) } func TestMaxEntries(t *testing.T) { @@ -126,9 +128,9 @@ func TestMaxEntries(t *testing.T) { _, err = cache.SetResponse(testKeyA, testDiscoveryResponse) assert.NoError(t, err) - response, err := cache.Fetch(testKeyA) + resource, err := cache.Fetch(testKeyA) assert.NoError(t, err) - assert.Equal(t, testResponse, *response) + assert.Equal(t, testResponse, *resource.Resp) assert.PanicsWithValue(t, panicValues{ key: testKeyA, @@ -138,13 +140,13 @@ func TestMaxEntries(t *testing.T) { assert.NoError(t, err) }) - response, err = cache.Fetch(testKeyA) + resource, err = cache.Fetch(testKeyA) assert.EqualError(t, err, "no value found for key: key_A") - assert.Nil(t, response) + assert.Nil(t, resource) - response, err = cache.Fetch(testKeyB) + resource, err = cache.Fetch(testKeyB) assert.NoError(t, err) - assert.Nil(t, response) + assert.Nil(t, resource.Resp) } func TestTTL_Enabled(t *testing.T) { @@ -154,23 +156,23 @@ func TestTTL_Enabled(t *testing.T) { _, err = cache.SetResponse(testKeyA, testDiscoveryResponse) assert.NoError(t, err) - response, err := cache.Fetch(testKeyA) + resource, err := cache.Fetch(testKeyA) assert.NoError(t, err) - assert.Equal(t, testResponse, *response) + assert.Equal(t, testResponse, *resource.Resp) time.Sleep(time.Millisecond * 10) assert.PanicsWithValue(t, panicValues{ key: testKeyA, reason: "testOnEvict called", }, func() { - response, err = cache.Fetch(testKeyA) + resource, err = cache.Fetch(testKeyA) assert.NoError(t, err) - assert.Nil(t, response) + assert.Nil(t, resource) }) - response, err = cache.Fetch(testKeyA) + resource, err = cache.Fetch(testKeyA) assert.EqualError(t, err, "no value found for key: key_A") - assert.Nil(t, response) + assert.Nil(t, resource) } func TestTTL_Disabled(t *testing.T) { @@ -181,13 +183,13 @@ func TestTTL_Disabled(t *testing.T) { _, err = cache.SetResponse(testKeyA, testDiscoveryResponse) assert.NoError(t, err) - response, err := cache.Fetch(testKeyA) + resource, err := cache.Fetch(testKeyA) assert.NoError(t, err) - assert.Equal(t, testResponse, *response) + assert.Equal(t, testResponse, *resource.Resp) - gomega.Consistently(func() (*Response, error) { + gomega.Consistently(func() (*Resource, error) { return cache.Fetch(testKeyA) - }).Should(gomega.Equal(&testResponse)) + }).Should(gomega.Equal(&testResource)) } func TestTTL_Negative(t *testing.T) { @@ -200,14 +202,14 @@ func TestIsExpired(t *testing.T) { var resource Resource // The expiration time is 0, meaning TTL is disabled, so the resource is not considered expired. - resource.expirationTime = time.Time{} + resource.ExpirationTime = time.Time{} assert.False(t, resource.isExpired(time.Now())) - resource.expirationTime = time.Now() + resource.ExpirationTime = time.Now() assert.False(t, resource.isExpired(time.Time{})) - resource.expirationTime = time.Now() - assert.True(t, resource.isExpired(resource.expirationTime.Add(1))) + resource.ExpirationTime = time.Now() + assert.True(t, resource.isExpired(resource.ExpirationTime.Add(1))) } func TestGetExpirationTime(t *testing.T) { From 8c076031ba8f58fdb8e6f3f89523208021a9d991 Mon Sep 17 00:00:00 2001 From: Jess Yuen Date: Wed, 8 Apr 2020 02:28:07 -0700 Subject: [PATCH 02/29] Orchestrator implementation Signed-off-by: Jess Yuen --- internal/app/orchestrator/orchestrator.go | 189 ++++++++++++++++++++-- internal/app/upstream/client.go | 8 +- 2 files changed, 184 insertions(+), 13 deletions(-) diff --git a/internal/app/orchestrator/orchestrator.go b/internal/app/orchestrator/orchestrator.go index feca848b..51526ca6 100644 --- a/internal/app/orchestrator/orchestrator.go +++ b/internal/app/orchestrator/orchestrator.go @@ -9,6 +9,7 @@ package orchestrator import ( "context" "fmt" + "sync" "time" "github.com/envoyproxy/xds-relay/internal/app/cache" @@ -26,6 +27,10 @@ const ( // TODO (https://github.com/envoyproxy/xds-relay/issues/41) load from configured defaults. cacheMaxEntries = 1000 cacheTTL = 60 * time.Minute + + // unaggregatedPrefix is the prefix used to label discovery requests that + // could not be successfully mapped to an aggregation rule. + unaggregatedPrefix = "unaggregated_" ) // Orchestrator has the following responsibilities: @@ -57,6 +62,14 @@ type orchestrator struct { cache cache.Cache upstreamClient upstream.Client + // Map of downstream xDS client requests to response channels. + downstreamResponseChannels map[*gcp.Request]chan gcp.Response + downstreamResponseChannelsMu sync.Mutex + // Map of aggregate key to the receive-only upstream origin server response + // channels. + upstreamResponseChannels map[string]<-chan *upstream.Response + upstreamResponseChannelseMu sync.Mutex + logger log.Logger } @@ -65,9 +78,11 @@ type orchestrator struct { // orchestrator. func New(ctx context.Context, l log.Logger, mapper mapper.Mapper, upstreamClient upstream.Client) Orchestrator { orchestrator := &orchestrator{ - logger: l.Named(component), - mapper: mapper, - upstreamClient: upstreamClient, + logger: l.Named(component), + mapper: mapper, + upstreamClient: upstreamClient, + downstreamResponseChannels: make(map[*gcp.Request]chan gcp.Response), + upstreamResponseChannels: make(map[string]<-chan *upstream.Response), } cache, err := cache.NewCache(cacheMaxEntries, orchestrator.onCacheEvicted, cacheTTL) @@ -90,16 +105,172 @@ func New(ctx context.Context, l log.Logger, mapper mapper.Mapper, upstreamClient // // Cancel is an optional function to release resources in the producer. If // provided, the consumer may call this function multiple times. -func (c *orchestrator) CreateWatch(req gcp.Request) (chan gcp.Response, func()) { - // TODO implement. - return nil, nil +func (o *orchestrator) CreateWatch(req gcp.Request) (chan gcp.Response, func()) { + ctx := context.Background() + + o.downstreamResponseChannelsMu.Lock() + if o.downstreamResponseChannels[&req] == nil { + // If this is the first time we're seeing the request from the + // downstream client, initialize a channel to feed future responses. + o.downstreamResponseChannels[&req] = make(chan gcp.Response) + } + o.downstreamResponseChannelsMu.Unlock() + + aggregatedKey, err := o.mapper.GetKey(req) + if err != nil { + // Can't map the request to an aggregated key. Log and continue to + // propagate the response upstream without aggregation. + o.logger.With("err", err).With("req", req).Error(ctx, "failed to get aggregated key") + // Mimic the aggregated key. + // TODO (insert issue) This key needs to be made more granular to + // uniquely identify a request. User-specified defaults? + aggregatedKey = fmt.Sprintf("%s%s_%s", unaggregatedPrefix, req.GetNode().GetId(), req.GetTypeUrl()) + } + + // Register the watch for future responses. + err = o.cache.AddRequest(aggregatedKey, req) + if err != nil { + // If we fail to register the watch, we need to kill this stream by + // closing the response channel. + o.logger.With("err", err).With("key", aggregatedKey).With( + "req", req).Error(ctx, "failed to add watch") + close(o.downstreamResponseChannels[&req]) + // Clean up. + o.downstreamResponseChannelsMu.Lock() + delete(o.downstreamResponseChannels, &req) + o.downstreamResponseChannelsMu.Unlock() + return nil, nil + } + + // Check if we have a cached response first. + cached, err := o.cache.Fetch(aggregatedKey) + if err != nil { + // Log, and continue to propagate the response upstream. + o.logger.With("err", err).With("key", aggregatedKey).Error(ctx, "failed to fetch aggregated key") + } + + if cached != nil && cached.Resp != nil { + // If we have a cached response, immediately return the result. + o.downstreamResponseChannels[&req] <- gcp.Response{ + Request: req, + Version: cached.Resp.Raw.GetVersionInfo(), + ResourceMarshaled: true, + MarshaledResources: cached.Resp.MarshaledResources, + } + } else { + // Otherwise, check if we have a upstream stream open for this + // aggregated key. If not, open a stream with the representative + // request. + // + // Locking is necessary here so that a simultaneous downstream request + // that maps to the same aggregated key doesn't result in two upstream + // streams. + o.upstreamResponseChannelseMu.Lock() + if o.upstreamResponseChannels[aggregatedKey] == nil { + upstreamResponseChan := o.upstreamClient.OpenStream(ctx, &req) + // Spin up a go routine to watch for upstream responses. + // One routine is opened per aggregate key. + go o.watchUpstream(ctx, aggregatedKey, upstreamResponseChan) + o.upstreamResponseChannels[aggregatedKey] = upstreamResponseChan + } + o.upstreamResponseChannelseMu.Unlock() + } + + return o.downstreamResponseChannels[&req], nil } // Fetch implements the polling method of the config cache using a non-empty request. -func (c *orchestrator) Fetch(context.Context, discovery.DiscoveryRequest) (*gcp.Response, error) { +func (o *orchestrator) Fetch(context.Context, discovery.DiscoveryRequest) (*gcp.Response, error) { return nil, fmt.Errorf("Not implemented") } -func (c *orchestrator) onCacheEvicted(key string, resource cache.Resource) { - // TODO implement. +// watchResponse is intended to be called in a goroutine, to receive incoming +// responses and fan out to downstream clients. +func (o *orchestrator) watchUpstream( + ctx context.Context, + aggregatedKey string, + responseChannel <-chan *upstream.Response, +) { + for { + select { + case x := <-responseChannel: + if x.Err != nil { + // A problem occurred fetching the response upstream, log and + // return the most recent cached response, so that the + // downstream will reissue the discovery request. + o.logger.With("err", x.Err).With("key", aggregatedKey).Error(ctx, "upstream error") + } else { + // Cache the response. + _, err := o.cache.SetResponse(aggregatedKey, x.Response) + if err != nil { + // If we fail to cache the new response, log and return the old one. + o.logger.With("err", err).With("key", aggregatedKey). + With("resp", x.Response).Error(ctx, "Failed to cache the response") + } + } + + // Get downstream watches and fan out. + // We retrieve from cache rather than directly fanning out the + // newly received response because the cache does additional + // resource serialization. + cached, err := o.cache.Fetch(aggregatedKey) + if err != nil { + o.logger.With("err", err).With("key", aggregatedKey).Error(ctx, "cache fetch failed") + // Can't do anything because we don't know who the watches + // are. Drop the response. + } else { + if cached == nil || cached.Resp == nil { + // If cache is empty, there is nothing to fan out. + if x.Err != nil { + // Warn. Benefit of the doubt that this is the first request. + o.logger.With("key", aggregatedKey). + Warn(ctx, "attempted to fan out with no cached response") + } else { + // Error. Sanity check. Shouldn't ever reach this. + o.logger.With("key", aggregatedKey). + Error(ctx, "attempted to fan out with no cached response") + } + } else { + // Goldenpath. + o.fanout(cached.Resp, cached.Requests) + } + } + default: + // Save some processing power. + time.Sleep(1 * time.Second) + } + } +} + +// fanout pushes the response to the response channels of all open downstream +// watches. +func (o *orchestrator) fanout(resp *cache.Response, watchers []*gcp.Request) { + for _, watch := range watchers { + channel := o.downstreamResponseChannels[watch] + if channel != nil { + // Construct the go-control-plane response from the cached response. + gcpResponse := gcp.Response{ + Request: *watch, + Version: resp.Raw.GetVersionInfo(), + ResourceMarshaled: true, + MarshaledResources: resp.MarshaledResources, + } + channel <- gcpResponse + } + } +} + +// onCacheEvicted is called when the cache evicts a response due to TTL or +// other reasons. When this happens, we need to clean up open streams. +func (o *orchestrator) onCacheEvicted(key string, resource cache.Resource) { + o.downstreamResponseChannelsMu.Lock() + defer o.downstreamResponseChannelsMu.Unlock() + for _, watch := range resource.Requests { + if o.downstreamResponseChannels[watch] != nil { + close(o.downstreamResponseChannels[watch]) + delete(o.downstreamResponseChannels, watch) + } + } + // TODO close the upstream receiver channel? Need a way to notify the + // upstream client to do so since these are receive only channels. } diff --git a/internal/app/upstream/client.go b/internal/app/upstream/client.go index ea4533bf..a15e9fec 100644 --- a/internal/app/upstream/client.go +++ b/internal/app/upstream/client.go @@ -25,7 +25,7 @@ type Client interface { // If the timeouts are exhausted, receive fails or a irrecoverable error occurs, the error is sent back to the caller. // It is the caller's responsibility to send a new request from the last known DiscoveryRequest. // Cancellation of the context cleans up all outstanding streams and releases all resources. - OpenStream(context.Context, *v2.DiscoveryRequest, string) chan *Response + OpenStream(context.Context, *v2.DiscoveryRequest) <-chan *Response } type client struct { @@ -44,9 +44,9 @@ type client struct { // Only one of the fields is valid at any time. If the error is set, the response will be ignored. type Response struct { //nolint - response v2.DiscoveryResponse + Response v2.DiscoveryResponse //nolint - err error + Err error } // NewClient creates a grpc connection with an upstream origin server. @@ -60,6 +60,6 @@ func NewClient(ctx context.Context, url string) (Client, error) { return &client{}, nil } -func (m *client) OpenStream(ctx context.Context, request *v2.DiscoveryRequest, typeURL string) chan *Response { +func (m *client) OpenStream(ctx context.Context, request *v2.DiscoveryRequest) <-chan *Response { return nil } From 8fbde42800611f1af3ff74b56f7ee559669353d2 Mon Sep 17 00:00:00 2001 From: Jess Yuen Date: Sun, 12 Apr 2020 23:07:33 -0700 Subject: [PATCH 03/29] Pass by reference for cache watch, export MarshalResources Signed-off-by: Jess Yuen --- internal/app/cache/cache.go | 14 ++++++++------ internal/app/cache/cache_test.go | 8 ++++---- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/internal/app/cache/cache.go b/internal/app/cache/cache.go index e5abff3b..ea0ae37c 100644 --- a/internal/app/cache/cache.go +++ b/internal/app/cache/cache.go @@ -22,7 +22,7 @@ type Cache interface { SetResponse(key string, resp v2.DiscoveryResponse) ([]*v2.DiscoveryRequest, error) // AddRequest adds the request to the cache. - AddRequest(key string, req v2.DiscoveryRequest) error + AddRequest(key string, req *v2.DiscoveryRequest) error } type cache struct { @@ -110,7 +110,7 @@ func (c *cache) Fetch(key string) (*Resource, error) { func (c *cache) SetResponse(key string, resp v2.DiscoveryResponse) ([]*v2.DiscoveryRequest, error) { c.cacheMu.Lock() defer c.cacheMu.Unlock() - marshaledResources, err := marshalResources(resp.Resources) + marshaledResources, err := MarshalResources(resp.Resources) if err != nil { return nil, fmt.Errorf("failed to marshal resources for key: %s, err %v", key, err) } @@ -137,13 +137,13 @@ func (c *cache) SetResponse(key string, resp v2.DiscoveryResponse) ([]*v2.Discov return resource.Requests, nil } -func (c *cache) AddRequest(key string, req v2.DiscoveryRequest) error { +func (c *cache) AddRequest(key string, req *v2.DiscoveryRequest) error { c.cacheMu.Lock() defer c.cacheMu.Unlock() value, found := c.cache.Get(key) if !found { resource := Resource{ - Requests: []*v2.DiscoveryRequest{&req}, + Requests: []*v2.DiscoveryRequest{req}, ExpirationTime: c.getExpirationTime(time.Now()), } c.cache.Add(key, resource) @@ -153,7 +153,7 @@ func (c *cache) AddRequest(key string, req v2.DiscoveryRequest) error { if !ok { return fmt.Errorf("unable to cast cache value to type resource for key: %s", key) } - resource.Requests = append(resource.Requests, &req) + resource.Requests = append(resource.Requests, req) resource.ExpirationTime = c.getExpirationTime(time.Now()) c.cache.Add(key, resource) return nil @@ -173,7 +173,9 @@ func (c *cache) getExpirationTime(currentTime time.Time) time.Time { return time.Time{} } -func marshalResources(resources []*any.Any) ([]gcp_types.MarshaledResource, error) { +// MarshalResource converts the raw xDS discovery resources into a serialized +// form accepted by go-control-plane. +func MarshalResources(resources []*any.Any) ([]gcp_types.MarshaledResource, error) { var marshaledResources []gcp_types.MarshaledResource for _, resource := range resources { marshaledResource, err := gcp.MarshalResource(resource) diff --git a/internal/app/cache/cache_test.go b/internal/app/cache/cache_test.go index a090488c..ab670bd0 100644 --- a/internal/app/cache/cache_test.go +++ b/internal/app/cache/cache_test.go @@ -74,7 +74,7 @@ func TestAddRequestAndFetch(t *testing.T) { assert.EqualError(t, err, "no value found for key: key_A") assert.Nil(t, resource) - err = cache.AddRequest(testKeyA, testRequest) + err = cache.AddRequest(testKeyA, &testRequest) assert.NoError(t, err) resource, err = cache.Fetch(testKeyA) @@ -104,10 +104,10 @@ func TestAddRequestAndSetResponse(t *testing.T) { cache, err := NewCache(1, testOnEvict, time.Second*60) assert.NoError(t, err) - err = cache.AddRequest(testKeyA, testRequest) + err = cache.AddRequest(testKeyA, &testRequest) assert.NoError(t, err) - err = cache.AddRequest(testKeyA, testRequest) + err = cache.AddRequest(testKeyA, &testRequest) assert.NoError(t, err) requests, err := cache.SetResponse(testKeyA, testDiscoveryResponse) @@ -136,7 +136,7 @@ func TestMaxEntries(t *testing.T) { key: testKeyA, reason: "testOnEvict called", }, func() { - err = cache.AddRequest(testKeyB, testRequest) + err = cache.AddRequest(testKeyB, &testRequest) assert.NoError(t, err) }) From ee54b654498d28d23497125513fdc71ad4c2e404 Mon Sep 17 00:00:00 2001 From: Jess Yuen Date: Sun, 12 Apr 2020 23:09:22 -0700 Subject: [PATCH 04/29] Add downstream, upstream response map wrappers Signed-off-by: Jess Yuen --- internal/app/orchestrator/downstream.go | 58 +++++++++++++++++++++++++ internal/app/orchestrator/upstream.go | 46 ++++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 internal/app/orchestrator/downstream.go create mode 100644 internal/app/orchestrator/upstream.go diff --git a/internal/app/orchestrator/downstream.go b/internal/app/orchestrator/downstream.go new file mode 100644 index 00000000..d3a10a11 --- /dev/null +++ b/internal/app/orchestrator/downstream.go @@ -0,0 +1,58 @@ +package orchestrator + +import ( + "sync" + + gcp "github.com/envoyproxy/go-control-plane/pkg/cache/v2" +) + +// downstreamResponseMap is a map of downstream xDS client requests to response +// channels. +type downstreamResponseMap struct { + mu sync.RWMutex + responseChannel map[*gcp.Request]chan gcp.Response +} + +// createChannel initializes a new channel for a request if it doesn't already +// exist. +func (d *downstreamResponseMap) createChannel(req *gcp.Request) chan gcp.Response { + d.mu.Lock() + defer d.mu.Unlock() + if _, ok := d.responseChannel[req]; !ok { + d.responseChannel[req] = make(chan gcp.Response) + } + return d.responseChannel[req] +} + +// get retrieves the channel where responses are set for the specified request. +func (d *downstreamResponseMap) get(req *gcp.Request) (chan gcp.Response, bool) { + d.mu.RLock() + defer d.mu.RUnlock() + channel, ok := d.responseChannel[req] + return channel, ok +} + +// delete closes the response channel and removes it from the map. +func (d *downstreamResponseMap) delete(req *gcp.Request) chan gcp.Response { + d.mu.Lock() + defer d.mu.Unlock() + if channel, ok := d.responseChannel[req]; ok { + close(channel) + delete(d.responseChannel, req) + return channel + } + return nil +} + +// deleteAll closes all the response channels for the provided requests and +// removes them from the map. +func (d *downstreamResponseMap) deleteAll(req []*gcp.Request) { + d.mu.Lock() + defer d.mu.Unlock() + for _, watch := range req { + if d.responseChannel[watch] != nil { + close(d.responseChannel[watch]) + delete(d.responseChannel, watch) + } + } +} diff --git a/internal/app/orchestrator/upstream.go b/internal/app/orchestrator/upstream.go new file mode 100644 index 00000000..eabd9a27 --- /dev/null +++ b/internal/app/orchestrator/upstream.go @@ -0,0 +1,46 @@ +package orchestrator + +import ( + "sync" + + "github.com/envoyproxy/xds-relay/internal/app/upstream" +) + +// upstream is the map of aggregate key to the receive-only upstream +// origin server response channels. +type upstreamResponseMap struct { + mu sync.RWMutex + responseChannel map[string]upstreamResponseChannel +} + +type upstreamResponseChannel struct { + response <-chan *upstream.Response + done chan bool +} + +// add sets the response channel for the provided aggregated key. It also +// initializes a done channel to be used during cleanup. +// +// Expects orchestrator to manage the lock since this is called simultaneously +// with stream open. +func (u *upstreamResponseMap) add(aggregatedKey string, + responseChannel <-chan *upstream.Response) upstreamResponseChannel { + channel := upstreamResponseChannel{ + response: responseChannel, + done: make(chan bool, 1), + } + u.responseChannel[aggregatedKey] = channel + return channel +} + +// delete closes the done channel and removes both channels from the map. +// TODO We can't close the upstream receiver only channel, need to signify to +// the upstream client to do so. +func (u *upstreamResponseMap) delete(aggregatedKey string) { + u.mu.Lock() + defer u.mu.Unlock() + if channel, ok := u.responseChannel[aggregatedKey]; ok { + close(channel.done) + delete(u.responseChannel, aggregatedKey) + } +} From 3b505db4e0450c5422c151a81be73a3393c5c9cf Mon Sep 17 00:00:00 2001 From: Jess Yuen Date: Sun, 12 Apr 2020 23:09:52 -0700 Subject: [PATCH 05/29] Changes and fixes to orchestrator workflow Signed-off-by: Jess Yuen --- internal/app/orchestrator/orchestrator.go | 155 +++++++++++----------- 1 file changed, 80 insertions(+), 75 deletions(-) diff --git a/internal/app/orchestrator/orchestrator.go b/internal/app/orchestrator/orchestrator.go index 51526ca6..4c3b17af 100644 --- a/internal/app/orchestrator/orchestrator.go +++ b/internal/app/orchestrator/orchestrator.go @@ -9,7 +9,6 @@ package orchestrator import ( "context" "fmt" - "sync" "time" "github.com/envoyproxy/xds-relay/internal/app/cache" @@ -55,6 +54,13 @@ const ( // more details. type Orchestrator interface { gcp.Cache + + // shutdown takes an aggregated key and shuts down the go routine watching + // for upstream responses. + // + // This is currently used by tests to clean up channels, but can also be + // used by the main shutdown handler. + shutdown(string) } type orchestrator struct { @@ -62,15 +68,10 @@ type orchestrator struct { cache cache.Cache upstreamClient upstream.Client - // Map of downstream xDS client requests to response channels. - downstreamResponseChannels map[*gcp.Request]chan gcp.Response - downstreamResponseChannelsMu sync.Mutex - // Map of aggregate key to the receive-only upstream origin server response - // channels. - upstreamResponseChannels map[string]<-chan *upstream.Response - upstreamResponseChannelseMu sync.Mutex - logger log.Logger + + downstreamResponseMap downstreamResponseMap + upstreamResponseMap upstreamResponseMap } // New instantiates the mapper, cache, upstream client components necessary for @@ -78,11 +79,15 @@ type orchestrator struct { // orchestrator. func New(ctx context.Context, l log.Logger, mapper mapper.Mapper, upstreamClient upstream.Client) Orchestrator { orchestrator := &orchestrator{ - logger: l.Named(component), - mapper: mapper, - upstreamClient: upstreamClient, - downstreamResponseChannels: make(map[*gcp.Request]chan gcp.Response), - upstreamResponseChannels: make(map[string]<-chan *upstream.Response), + logger: l.Named(component), + mapper: mapper, + upstreamClient: upstreamClient, + downstreamResponseMap: downstreamResponseMap{ + responseChannel: make(map[*gcp.Request]chan gcp.Response), + }, + upstreamResponseMap: upstreamResponseMap{ + responseChannel: make(map[string]upstreamResponseChannel), + }, } cache, err := cache.NewCache(cacheMaxEntries, orchestrator.onCacheEvicted, cacheTTL) @@ -108,19 +113,15 @@ func New(ctx context.Context, l log.Logger, mapper mapper.Mapper, upstreamClient func (o *orchestrator) CreateWatch(req gcp.Request) (chan gcp.Response, func()) { ctx := context.Background() - o.downstreamResponseChannelsMu.Lock() - if o.downstreamResponseChannels[&req] == nil { - // If this is the first time we're seeing the request from the - // downstream client, initialize a channel to feed future responses. - o.downstreamResponseChannels[&req] = make(chan gcp.Response) - } - o.downstreamResponseChannelsMu.Unlock() + // If this is the first time we're seeing the request from the + // downstream client, initialize a channel to feed future responses. + responseChannel := o.downstreamResponseMap.createChannel(&req) aggregatedKey, err := o.mapper.GetKey(req) if err != nil { // Can't map the request to an aggregated key. Log and continue to // propagate the response upstream without aggregation. - o.logger.With("err", err).With("req", req).Error(ctx, "failed to get aggregated key") + o.logger.With("err", err).With("req node", req.GetNode()).Warn(ctx, "failed to map to aggregated key") // Mimic the aggregated key. // TODO (insert issue) This key needs to be made more granular to // uniquely identify a request. User-specified defaults? @@ -128,55 +129,47 @@ func (o *orchestrator) CreateWatch(req gcp.Request) (chan gcp.Response, func()) } // Register the watch for future responses. - err = o.cache.AddRequest(aggregatedKey, req) + err = o.cache.AddRequest(aggregatedKey, &req) if err != nil { // If we fail to register the watch, we need to kill this stream by // closing the response channel. o.logger.With("err", err).With("key", aggregatedKey).With( - "req", req).Error(ctx, "failed to add watch") - close(o.downstreamResponseChannels[&req]) - // Clean up. - o.downstreamResponseChannelsMu.Lock() - delete(o.downstreamResponseChannels, &req) - o.downstreamResponseChannelsMu.Unlock() - return nil, nil + "req node", req.GetNode()).Error(ctx, "failed to add watch") + closedChannel := o.downstreamResponseMap.delete(&req) + return closedChannel, nil } // Check if we have a cached response first. cached, err := o.cache.Fetch(aggregatedKey) if err != nil { // Log, and continue to propagate the response upstream. - o.logger.With("err", err).With("key", aggregatedKey).Error(ctx, "failed to fetch aggregated key") + o.logger.With("err", err).With("key", aggregatedKey).Warn(ctx, "failed to fetch aggregated key") } if cached != nil && cached.Resp != nil { - // If we have a cached response, immediately return the result. - o.downstreamResponseChannels[&req] <- gcp.Response{ - Request: req, - Version: cached.Resp.Raw.GetVersionInfo(), - ResourceMarshaled: true, - MarshaledResources: cached.Resp.MarshaledResources, - } - } else { - // Otherwise, check if we have a upstream stream open for this - // aggregated key. If not, open a stream with the representative - // request. - // - // Locking is necessary here so that a simultaneous downstream request - // that maps to the same aggregated key doesn't result in two upstream - // streams. - o.upstreamResponseChannelseMu.Lock() - if o.upstreamResponseChannels[aggregatedKey] == nil { - upstreamResponseChan := o.upstreamClient.OpenStream(ctx, &req) - // Spin up a go routine to watch for upstream responses. - // One routine is opened per aggregate key. - go o.watchUpstream(ctx, aggregatedKey, upstreamResponseChan) - o.upstreamResponseChannels[aggregatedKey] = upstreamResponseChan - } - o.upstreamResponseChannelseMu.Unlock() + // If we have a cached response, immediately push the result to the + // response channel. + go func() { responseChannel <- convertToGcpResponse(cached.Resp, req) }() + } + + // Check if we have a upstream stream open for this aggregated key. If not, + // open a stream with the representative request. + // + // Locking is necessary here so that a simultaneous downstream request + // that maps to the same aggregated key doesn't result in two upstream + // streams. + o.upstreamResponseMap.mu.Lock() + if _, ok := o.upstreamResponseMap.responseChannel[aggregatedKey]; !ok { + upstreamResponseChan := o.upstreamClient.OpenStream(ctx, &req) + respChannel := o.upstreamResponseMap.add(aggregatedKey, upstreamResponseChan) + // Spin up a go routine to watch for upstream responses. + // One routine is opened per aggregate key. + go o.watchUpstream(ctx, aggregatedKey, respChannel.response, respChannel.done) + } + o.upstreamResponseMap.mu.Unlock() - return o.downstreamResponseChannels[&req], nil + return responseChannel, o.onCancel(&req) } // Fetch implements the polling method of the config cache using a non-empty request. @@ -190,6 +183,7 @@ func (o *orchestrator) watchUpstream( ctx context.Context, aggregatedKey string, responseChannel <-chan *upstream.Response, + done <-chan bool, ) { for { select { @@ -235,6 +229,9 @@ func (o *orchestrator) watchUpstream( o.fanout(cached.Resp, cached.Requests) } } + case <-done: + // Exit when signaled that the stream has closed. + return default: // Save some processing power. time.Sleep(1 * time.Second) @@ -246,31 +243,39 @@ func (o *orchestrator) watchUpstream( // watches. func (o *orchestrator) fanout(resp *cache.Response, watchers []*gcp.Request) { for _, watch := range watchers { - channel := o.downstreamResponseChannels[watch] - if channel != nil { - // Construct the go-control-plane response from the cached response. - gcpResponse := gcp.Response{ - Request: *watch, - Version: resp.Raw.GetVersionInfo(), - ResourceMarshaled: true, - MarshaledResources: resp.MarshaledResources, - } - channel <- gcpResponse + if channel, ok := o.downstreamResponseMap.get(watch); ok { + channel <- convertToGcpResponse(resp, *watch) } } } // onCacheEvicted is called when the cache evicts a response due to TTL or // other reasons. When this happens, we need to clean up open streams. +// We shut down both the downstream watches and the upstream stream. func (o *orchestrator) onCacheEvicted(key string, resource cache.Resource) { - o.downstreamResponseChannelsMu.Lock() - defer o.downstreamResponseChannelsMu.Unlock() - for _, watch := range resource.Requests { - if o.downstreamResponseChannels[watch] != nil { - close(o.downstreamResponseChannels[watch]) - delete(o.downstreamResponseChannels, watch) - } + o.downstreamResponseMap.deleteAll(resource.Requests) + o.upstreamResponseMap.delete(key) +} + +// onCancel cleans up the cached watch when called. +func (o *orchestrator) onCancel(req *gcp.Request) func() { + return func() { + o.downstreamResponseMap.delete(req) + // TODO clean up watch from cache. Cache needs to expose a function to do so. + } +} + +func (o *orchestrator) shutdown(aggregatedKey string) { + o.upstreamResponseMap.delete(aggregatedKey) +} + +// convertToGcpResponse constructs the go-control-plane response from the +// cached response. +func convertToGcpResponse(resp *cache.Response, req gcp.Request) gcp.Response { + return gcp.Response{ + Request: req, + Version: resp.Raw.GetVersionInfo(), + ResourceMarshaled: true, + MarshaledResources: resp.MarshaledResources, } - // TODO close the upstream receiver channel? Need a way to notify the - // upstream client to do so since these are receive only channels. } From 071e8e5ca8e6037c76843ee51662083193b39888 Mon Sep 17 00:00:00 2001 From: Jess Yuen Date: Sun, 12 Apr 2020 23:10:16 -0700 Subject: [PATCH 06/29] Add unit tests Signed-off-by: Jess Yuen --- .../app/orchestrator/orchestrator_test.go | 277 +++++++++++++++++- 1 file changed, 274 insertions(+), 3 deletions(-) diff --git a/internal/app/orchestrator/orchestrator_test.go b/internal/app/orchestrator/orchestrator_test.go index a0cd3e84..8671fa95 100644 --- a/internal/app/orchestrator/orchestrator_test.go +++ b/internal/app/orchestrator/orchestrator_test.go @@ -2,17 +2,95 @@ package orchestrator import ( "context" + "io/ioutil" "testing" + v2 "github.com/envoyproxy/go-control-plane/envoy/api/v2" + gcp "github.com/envoyproxy/go-control-plane/pkg/cache/v2" + "github.com/golang/protobuf/ptypes/any" + "github.com/stretchr/testify/assert" + "google.golang.org/protobuf/types/known/anypb" + + "github.com/envoyproxy/xds-relay/internal/app/cache" "github.com/envoyproxy/xds-relay/internal/app/mapper" "github.com/envoyproxy/xds-relay/internal/app/upstream" "github.com/envoyproxy/xds-relay/internal/pkg/log" - + yamlproto "github.com/envoyproxy/xds-relay/internal/pkg/util" aggregationv1 "github.com/envoyproxy/xds-relay/pkg/api/aggregation/v1" - - "github.com/stretchr/testify/assert" ) +type mockSimpleUpstreamClient struct { + responseChan <-chan *upstream.Response +} + +func (m mockSimpleUpstreamClient) OpenStream(ctx context.Context, req *v2.DiscoveryRequest) <-chan *upstream.Response { + return m.responseChan +} + +type mockMultiStreamUpstreamClient struct { + ldsResponseChan <-chan *upstream.Response + cdsResponseChan <-chan *upstream.Response + + t *testing.T + mapper mapper.Mapper +} + +func (m mockMultiStreamUpstreamClient) OpenStream(ctx context.Context, + req *v2.DiscoveryRequest) <-chan *upstream.Response { + aggregatedKey, err := m.mapper.GetKey(*req) + assert.NoError(m.t, err) + + if aggregatedKey == "lds" { + return m.ldsResponseChan + } else if aggregatedKey == "cds" { + return m.cdsResponseChan + } + + m.t.Errorf("Unsupported aggregated key, %s", aggregatedKey) + return nil +} + +func newMockOrchestrator(t *testing.T, mapper mapper.Mapper, upstreamClient upstream.Client) *orchestrator { + orchestrator := &orchestrator{ + logger: log.New("info"), + mapper: mapper, + upstreamClient: upstreamClient, + downstreamResponseMap: downstreamResponseMap{ + responseChannel: make(map[*gcp.Request]chan gcp.Response), + }, + upstreamResponseMap: upstreamResponseMap{ + responseChannel: make(map[string]upstreamResponseChannel), + }, + } + + cache, err := cache.NewCache(cacheMaxEntries, orchestrator.onCacheEvicted, cacheTTL) + assert.NoError(t, err) + orchestrator.cache = cache + + return orchestrator +} + +func newMockMapper(t *testing.T) mapper.Mapper { + bytes, err := ioutil.ReadFile("testdata/aggregation_rules.yaml") // key on request type + assert.NoError(t, err) + + var config aggregationv1.KeyerConfiguration + err = yamlproto.FromYAMLToKeyerConfiguration(string(bytes), &config) + assert.NoError(t, err) + + return mapper.NewMapper(&config) +} + +func assertEqualResources(t *testing.T, got gcp.Response, expected v2.DiscoveryResponse, req gcp.Request) { + expectedResources, err := cache.MarshalResources(expected.Resources) + assert.NoError(t, err) + expectedResponse := cache.Response{ + Raw: expected, + MarshaledResources: expectedResources, + } + assert.Equal(t, convertToGcpResponse(&expectedResponse, req), got) +} + func TestNew(t *testing.T) { // Trivial test to ensure orchestrator instantiates. upstreamClient, err := upstream.NewClient(context.Background(), "example.com") @@ -30,3 +108,196 @@ func TestNew(t *testing.T) { orchestrator := New(context.Background(), log.New("info"), requestMapper, upstreamClient) assert.NotNil(t, orchestrator) } + +func TestGoldenPath(t *testing.T) { + upstreamResponseChannel := make(chan *upstream.Response) + mapper := newMockMapper(t) + orchestrator := newMockOrchestrator( + t, + mapper, + mockSimpleUpstreamClient{ + responseChan: upstreamResponseChannel, + }, + ) + assert.NotNil(t, orchestrator) + + req := gcp.Request{ + TypeUrl: "type.googleapis.com/envoy.api.v2.Listener", + } + + respChannel, cancelWatch := orchestrator.CreateWatch(req) + assert.NotNil(t, respChannel) + assert.Equal(t, 1, len(orchestrator.downstreamResponseMap.responseChannel)) + assert.Equal(t, 1, len(orchestrator.upstreamResponseMap.responseChannel)) + + upstreamResponse := upstream.Response{ + Response: v2.DiscoveryResponse{ + VersionInfo: "1", + TypeUrl: "type.googleapis.com/envoy.api.v2.Listener", + Resources: []*any.Any{ + &anypb.Any{ + Value: []byte("lds resource"), + }, + }, + }, + } + upstreamResponseChannel <- &upstreamResponse + + gotResponse := <-respChannel + assertEqualResources(t, gotResponse, upstreamResponse.Response, req) + + aggregatedKey, err := mapper.GetKey(req) + assert.NoError(t, err) + orchestrator.shutdown(aggregatedKey) + assert.Equal(t, 0, len(orchestrator.upstreamResponseMap.responseChannel)) + + cancelWatch() + assert.Equal(t, 0, len(orchestrator.downstreamResponseMap.responseChannel)) +} + +func TestCachedResponse(t *testing.T) { + upstreamResponseChannel := make(chan *upstream.Response) + mapper := newMockMapper(t) + orchestrator := newMockOrchestrator( + t, + mapper, + mockSimpleUpstreamClient{ + responseChan: upstreamResponseChannel, + }, + ) + assert.NotNil(t, orchestrator) + + req := gcp.Request{ + TypeUrl: "type.googleapis.com/envoy.api.v2.Listener", + } + + aggregatedKey, err := mapper.GetKey(req) + assert.NoError(t, err) + mockResponse := v2.DiscoveryResponse{ + VersionInfo: "1", + TypeUrl: "type.googleapis.com/envoy.api.v2.Listener", + Resources: []*any.Any{ + &anypb.Any{ + Value: []byte("lds resource"), + }, + }, + } + watches, err := orchestrator.cache.SetResponse(aggregatedKey, mockResponse) + assert.NoError(t, err) + assert.Equal(t, 0, len(watches)) + + respChannel, cancelWatch := orchestrator.CreateWatch(req) + assert.NotNil(t, respChannel) + assert.Equal(t, 1, len(orchestrator.downstreamResponseMap.responseChannel)) + assert.Equal(t, 1, len(orchestrator.upstreamResponseMap.responseChannel)) + + gotResponse := <-respChannel + assertEqualResources(t, gotResponse, mockResponse, req) + + // Attempt pushing a more recent response from upstream. + upstreamResponse := upstream.Response{ + Response: v2.DiscoveryResponse{ + VersionInfo: "2", + TypeUrl: "type.googleapis.com/envoy.api.v2.Listener", + Resources: []*any.Any{ + &anypb.Any{ + Value: []byte("some other lds resource"), + }, + }, + }, + } + upstreamResponseChannel <- &upstreamResponse + gotResponse = <-respChannel + assertEqualResources(t, gotResponse, upstreamResponse.Response, req) + assert.Equal(t, 1, len(orchestrator.upstreamResponseMap.responseChannel)) + + orchestrator.shutdown(aggregatedKey) + assert.Equal(t, 0, len(orchestrator.upstreamResponseMap.responseChannel)) + cancelWatch() + assert.Equal(t, 0, len(orchestrator.downstreamResponseMap.responseChannel)) +} + +func TestMultipleWatchesAndUpstreams(t *testing.T) { + upstreamResponseChannelLDS := make(chan *upstream.Response) + upstreamResponseChannelCDS := make(chan *upstream.Response) + mapper := newMockMapper(t) + orchestrator := newMockOrchestrator( + t, + mapper, + mockMultiStreamUpstreamClient{ + ldsResponseChan: upstreamResponseChannelLDS, + cdsResponseChan: upstreamResponseChannelCDS, + mapper: mapper, + t: t, + }, + ) + assert.NotNil(t, orchestrator) + + req1 := gcp.Request{ + TypeUrl: "type.googleapis.com/envoy.api.v2.Listener", + } + req2 := gcp.Request{ + TypeUrl: "type.googleapis.com/envoy.api.v2.Listener", + } + req3 := gcp.Request{ + TypeUrl: "type.googleapis.com/envoy.api.v2.Cluster", + } + + respChannel1, cancelWatch1 := orchestrator.CreateWatch(req1) + assert.NotNil(t, respChannel1) + respChannel2, cancelWatch2 := orchestrator.CreateWatch(req2) + assert.NotNil(t, respChannel2) + respChannel3, cancelWatch3 := orchestrator.CreateWatch(req3) + assert.NotNil(t, respChannel3) + + upstreamResponseLDS := upstream.Response{ + Response: v2.DiscoveryResponse{ + VersionInfo: "1", + TypeUrl: "type.googleapis.com/envoy.api.v2.Listener", + Resources: []*any.Any{ + &anypb.Any{ + Value: []byte("lds resource"), + }, + }, + }, + } + upstreamResponseCDS := upstream.Response{ + Response: v2.DiscoveryResponse{ + VersionInfo: "1", + TypeUrl: "type.googleapis.com/envoy.api.v2.Cluster", + Resources: []*any.Any{ + &anypb.Any{ + Value: []byte("cds resource"), + }, + }, + }, + } + + upstreamResponseChannelLDS <- &upstreamResponseLDS + upstreamResponseChannelCDS <- &upstreamResponseCDS + + gotResponseFromChannel1 := <-respChannel1 + gotResponseFromChannel2 := <-respChannel2 + gotResponseFromChannel3 := <-respChannel3 + + aggregatedKeyLDS, err := mapper.GetKey(req1) + assert.NoError(t, err) + aggregatedKeyCDS, err := mapper.GetKey(req3) + assert.NoError(t, err) + + assert.Equal(t, 3, len(orchestrator.downstreamResponseMap.responseChannel)) + assert.Equal(t, 2, len(orchestrator.upstreamResponseMap.responseChannel)) + + assertEqualResources(t, gotResponseFromChannel1, upstreamResponseLDS.Response, req1) + assertEqualResources(t, gotResponseFromChannel2, upstreamResponseLDS.Response, req2) + assertEqualResources(t, gotResponseFromChannel3, upstreamResponseCDS.Response, req3) + + orchestrator.shutdown(aggregatedKeyLDS) + orchestrator.shutdown(aggregatedKeyCDS) + assert.Equal(t, 0, len(orchestrator.upstreamResponseMap.responseChannel)) + + cancelWatch1() + cancelWatch2() + cancelWatch3() + assert.Equal(t, 0, len(orchestrator.downstreamResponseMap.responseChannel)) +} From 0e853a8f1fdb492504f6fb4a5aa3dab59b7c99e5 Mon Sep 17 00:00:00 2001 From: Jess Yuen Date: Sun, 12 Apr 2020 23:32:42 -0700 Subject: [PATCH 07/29] Add missing testdata Signed-off-by: Jess Yuen --- .../testdata/aggregation_rules.yaml | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 internal/app/orchestrator/testdata/aggregation_rules.yaml diff --git a/internal/app/orchestrator/testdata/aggregation_rules.yaml b/internal/app/orchestrator/testdata/aggregation_rules.yaml new file mode 100644 index 00000000..0bcfe5fe --- /dev/null +++ b/internal/app/orchestrator/testdata/aggregation_rules.yaml @@ -0,0 +1,26 @@ +fragments: + - rules: + - match: + request_type_match: + types: + - "type.googleapis.com/envoy.api.v2.Listener" + result: + string_fragment: "lds" + - match: + request_type_match: + types: + - "type.googleapis.com/envoy.api.v2.Cluster" + result: + string_fragment: "cds" + - match: + request_type_match: + types: + - "type.googleapis.com/envoy.api.v2.Route" + result: + string_fragment: "rds" + - match: + request_type_match: + types: + - "type.googleapis.com/envoy.api.v2.Endpoint" + result: + string_fragment: "eds" From 6234b8af47e797ed0bb81eca26eaf4bf15886d83 Mon Sep 17 00:00:00 2001 From: Jess Yuen Date: Mon, 13 Apr 2020 11:40:27 -0700 Subject: [PATCH 08/29] Link TODO with issues Signed-off-by: Jess Yuen --- internal/app/orchestrator/orchestrator.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/internal/app/orchestrator/orchestrator.go b/internal/app/orchestrator/orchestrator.go index 4c3b17af..e22d42e0 100644 --- a/internal/app/orchestrator/orchestrator.go +++ b/internal/app/orchestrator/orchestrator.go @@ -123,8 +123,8 @@ func (o *orchestrator) CreateWatch(req gcp.Request) (chan gcp.Response, func()) // propagate the response upstream without aggregation. o.logger.With("err", err).With("req node", req.GetNode()).Warn(ctx, "failed to map to aggregated key") // Mimic the aggregated key. - // TODO (insert issue) This key needs to be made more granular to - // uniquely identify a request. User-specified defaults? + // TODO (https://github.com/envoyproxy/xds-relay/issues/56). This key + // needs to be made more granular to uniquely identify a request. aggregatedKey = fmt.Sprintf("%s%s_%s", unaggregatedPrefix, req.GetNode().GetId(), req.GetTypeUrl()) } @@ -261,7 +261,8 @@ func (o *orchestrator) onCacheEvicted(key string, resource cache.Resource) { func (o *orchestrator) onCancel(req *gcp.Request) func() { return func() { o.downstreamResponseMap.delete(req) - // TODO clean up watch from cache. Cache needs to expose a function to do so. + // TODO (https://github.com/envoyproxy/xds-relay/issues/57). Clean up + // watch from cache. Cache needs to expose a function to do so. } } From fad0c13f6382dc822747263ed8a779e6b2e730ac Mon Sep 17 00:00:00 2001 From: Jess Yuen Date: Mon, 13 Apr 2020 12:07:27 -0700 Subject: [PATCH 09/29] Fanout response to watches in parallel Signed-off-by: Jess Yuen --- internal/app/orchestrator/orchestrator.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/internal/app/orchestrator/orchestrator.go b/internal/app/orchestrator/orchestrator.go index e22d42e0..17286b1d 100644 --- a/internal/app/orchestrator/orchestrator.go +++ b/internal/app/orchestrator/orchestrator.go @@ -240,12 +240,14 @@ func (o *orchestrator) watchUpstream( } // fanout pushes the response to the response channels of all open downstream -// watches. +// watches in parallel. func (o *orchestrator) fanout(resp *cache.Response, watchers []*gcp.Request) { for _, watch := range watchers { - if channel, ok := o.downstreamResponseMap.get(watch); ok { - channel <- convertToGcpResponse(resp, *watch) - } + go func(watch *gcp.Request) { + if channel, ok := o.downstreamResponseMap.get(watch); ok { + channel <- convertToGcpResponse(resp, *watch) + } + }(watch) } } From 7723d2c96abd0e93e1445b11e676361513d22de4 Mon Sep 17 00:00:00 2001 From: Jess Yuen Date: Mon, 13 Apr 2020 23:17:31 -0700 Subject: [PATCH 10/29] Send response only if req/resp versions differ Signed-off-by: Jess Yuen --- internal/app/orchestrator/orchestrator.go | 13 ++++++------ .../app/orchestrator/orchestrator_test.go | 21 ++++++++++++++++++- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/internal/app/orchestrator/orchestrator.go b/internal/app/orchestrator/orchestrator.go index 17286b1d..aa845b73 100644 --- a/internal/app/orchestrator/orchestrator.go +++ b/internal/app/orchestrator/orchestrator.go @@ -146,9 +146,9 @@ func (o *orchestrator) CreateWatch(req gcp.Request) (chan gcp.Response, func()) o.logger.With("err", err).With("key", aggregatedKey).Warn(ctx, "failed to fetch aggregated key") } - if cached != nil && cached.Resp != nil { - // If we have a cached response, immediately push the result to the - // response channel. + if cached != nil && cached.Resp != nil && cached.Resp.Raw.GetVersionInfo() != req.GetVersionInfo() { + // If we have a cached response and the version is different, + // immediately push the result to the response channel. go func() { responseChannel <- convertToGcpResponse(cached.Resp, req) }() } @@ -169,7 +169,7 @@ func (o *orchestrator) CreateWatch(req gcp.Request) (chan gcp.Response, func()) } o.upstreamResponseMap.mu.Unlock() - return responseChannel, o.onCancel(&req) + return responseChannel, o.onCancelWatch(&req) } // Fetch implements the polling method of the config cache using a non-empty request. @@ -259,8 +259,8 @@ func (o *orchestrator) onCacheEvicted(key string, resource cache.Resource) { o.upstreamResponseMap.delete(key) } -// onCancel cleans up the cached watch when called. -func (o *orchestrator) onCancel(req *gcp.Request) func() { +// onCancelWatch cleans up the cached watch when called. +func (o *orchestrator) onCancelWatch(req *gcp.Request) func() { return func() { o.downstreamResponseMap.delete(req) // TODO (https://github.com/envoyproxy/xds-relay/issues/57). Clean up @@ -268,6 +268,7 @@ func (o *orchestrator) onCancel(req *gcp.Request) func() { } } +// shutdown closes the upstream connection for the specified aggregated key. func (o *orchestrator) shutdown(aggregatedKey string) { o.upstreamResponseMap.delete(aggregatedKey) } diff --git a/internal/app/orchestrator/orchestrator_test.go b/internal/app/orchestrator/orchestrator_test.go index 8671fa95..d0b95fb3 100644 --- a/internal/app/orchestrator/orchestrator_test.go +++ b/internal/app/orchestrator/orchestrator_test.go @@ -167,8 +167,11 @@ func TestCachedResponse(t *testing.T) { ) assert.NotNil(t, orchestrator) + // Test scenario with different request and response versions. + // Version is different, so we expect a response. req := gcp.Request{ - TypeUrl: "type.googleapis.com/envoy.api.v2.Listener", + VersionInfo: "0", + TypeUrl: "type.googleapis.com/envoy.api.v2.Listener", } aggregatedKey, err := mapper.GetKey(req) @@ -211,9 +214,25 @@ func TestCachedResponse(t *testing.T) { assertEqualResources(t, gotResponse, upstreamResponse.Response, req) assert.Equal(t, 1, len(orchestrator.upstreamResponseMap.responseChannel)) + // Test scenario with same request and response version. + // We expect a watch to be open but no response. + req2 := gcp.Request{ + VersionInfo: "2", + TypeUrl: "type.googleapis.com/envoy.api.v2.Listener", + } + + respChannel2, cancelWatch2 := orchestrator.CreateWatch(req2) + assert.NotNil(t, respChannel2) + assert.Equal(t, 2, len(orchestrator.downstreamResponseMap.responseChannel)) + assert.Equal(t, 1, len(orchestrator.upstreamResponseMap.responseChannel)) + + // If we pass this point, it's safe to assume the respChannel2 is empty, + // otherwise the test would block and not complete. orchestrator.shutdown(aggregatedKey) assert.Equal(t, 0, len(orchestrator.upstreamResponseMap.responseChannel)) cancelWatch() + assert.Equal(t, 1, len(orchestrator.downstreamResponseMap.responseChannel)) + cancelWatch2() assert.Equal(t, 0, len(orchestrator.downstreamResponseMap.responseChannel)) } From 78ecf040e78e19101a31ca63e21c7db0906ed43b Mon Sep 17 00:00:00 2001 From: Jess Yuen Date: Tue, 14 Apr 2020 18:28:42 -0700 Subject: [PATCH 11/29] Remove sleep Signed-off-by: Jess Yuen --- internal/app/orchestrator/orchestrator.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/internal/app/orchestrator/orchestrator.go b/internal/app/orchestrator/orchestrator.go index aa845b73..ff873692 100644 --- a/internal/app/orchestrator/orchestrator.go +++ b/internal/app/orchestrator/orchestrator.go @@ -232,9 +232,6 @@ func (o *orchestrator) watchUpstream( case <-done: // Exit when signaled that the stream has closed. return - default: - // Save some processing power. - time.Sleep(1 * time.Second) } } } From cc6284ebea28503075241747e63fda50087fbfa8 Mon Sep 17 00:00:00 2001 From: Jyoti Mahapatra <49211422+jyotimahapatra@users.noreply.github.com> Date: Tue, 21 Apr 2020 13:15:49 -0700 Subject: [PATCH 12/29] Merge master (#2) * Don't extend TTL for AddWatch (#60) Signed-off-by: Jyoti Mahapatra Signed-off-by: Jess Yuen --- go.sum | 13 - internal/app/cache/cache.go | 1 - internal/app/orchestrator/orchestrator.go | 16 +- .../app/orchestrator/orchestrator_test.go | 109 ++++---- internal/app/orchestrator/upstream.go | 6 +- internal/app/server/server.go | 10 +- internal/app/upstream/client.go | 236 ++++++++++++++++-- internal/app/upstream/client_mock.go | 26 ++ internal/app/upstream/client_test.go | 207 +++++++++++++++ internal/app/upstream/mock/client.go | 202 +++++++++++++++ 10 files changed, 720 insertions(+), 106 deletions(-) create mode 100644 internal/app/upstream/client_mock.go create mode 100644 internal/app/upstream/client_test.go create mode 100644 internal/app/upstream/mock/client.go diff --git a/go.sum b/go.sum index 75edbe3a..1b3e83e0 100644 --- a/go.sum +++ b/go.sum @@ -80,7 +80,6 @@ github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/iancoleman/strcase v0.0.0-20191112232945-16388991a334 h1:VHgatEHNcBFEB7inlalqfNqw65aNkM1lGX2yt3NmbS8= github.com/iancoleman/strcase v0.0.0-20191112232945-16388991a334/go.mod h1:SK73tn/9oHe+/Y0h39VT4UCxmurVJkR5NA7kMEAOgSE= -github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= @@ -88,12 +87,9 @@ github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvW github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= -github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/lyft/protoc-gen-star v0.4.14 h1:HUkD4H4dYFIgu3Bns/3N6J5GmKHCEGnhYBwNu3fvXgA= github.com/lyft/protoc-gen-star v0.4.14/go.mod h1:mE8fbna26u7aEA2QCVvvfBU/ZrPgocG1206xAFPcs94= github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= @@ -109,7 +105,6 @@ github.com/onsi/gomega v1.9.0 h1:R1uwffexN6Pr340GtYRIdZmAiN4J+iw6WG4wog1DUXg= github.com/onsi/gomega v1.9.0/go.mod h1:Ho0h+IUsWyvy1OpqCwxlQ/21gkhVunqlU8fDGcoTdcA= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= @@ -131,7 +126,6 @@ github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPx github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= -github.com/spf13/afero v1.2.2 h1:5jhuqJyZCZf2JRofRvN/nIFgIWNzPa3/Vz8mYylgbWc= github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= github.com/spf13/cobra v0.0.6 h1:breEStsVwemnKh2/s6gMvSdMEkwW0sK8vGStnlVBMCs= @@ -157,7 +151,6 @@ go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= go.uber.org/multierr v1.3.0 h1:sFPn2GLc3poCkfrpIXGhBD2X0CMIo4Q/zSULXrj/+uc= go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= -go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee h1:0mgffUl7nfd+FpvXMVz4IDEaUSmT1ysygQC7qYo7sG4= go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA= go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.14.0 h1:/pduUoebOeeJzTDFuoMgC6nRkiasr1sBCIEorly7m4o= @@ -169,7 +162,6 @@ golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20190930215403-16217165b5de h1:5hukYrvBGR8/eNkX5mdUezrA6JiaEZDtJb9Ei+1LlBs= golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -207,9 +199,7 @@ golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3 golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5 h1:hKsoRgsbwY1NafxrwTs+k64bikrLBkAgPir1TNCj3Zs= golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7 h1:9zdDQZ7Thm29KFXgAX/+yaf3eVbP7djjWp/dXAppNCc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -221,7 +211,6 @@ google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98 google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= -google.golang.org/grpc v1.25.1 h1:wdKvqQk7IttEw92GoRyKG2IDrUIpgpj6H6m81yfeMW0= google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= google.golang.org/grpc v1.27.1 h1:zvIju4sqAGvwKspUQOhwnpcqSbzi7/H6QomNNjTL4sk= google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= @@ -233,7 +222,6 @@ google.golang.org/protobuf v1.20.1 h1:ESRXHgpUBG5D2I5mmsQIyYxB/tQIZfSZ8wLyFDf/N/ google.golang.org/protobuf v1.20.1/go.mod h1:KqelGeouBkcbcuB3HCk4/YH2tmNLk6YSWA5LIWeI/lY= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= @@ -248,5 +236,4 @@ gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.1-2019.2.3 h1:3JgtbtFHMiCmsznwGVTUWbgGov+pVqnlf1dEJTNAXeM= honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= diff --git a/internal/app/cache/cache.go b/internal/app/cache/cache.go index ea0ae37c..aee1647f 100644 --- a/internal/app/cache/cache.go +++ b/internal/app/cache/cache.go @@ -154,7 +154,6 @@ func (c *cache) AddRequest(key string, req *v2.DiscoveryRequest) error { return fmt.Errorf("unable to cast cache value to type resource for key: %s", key) } resource.Requests = append(resource.Requests, req) - resource.ExpirationTime = c.getExpirationTime(time.Now()) c.cache.Add(key, resource) return nil } diff --git a/internal/app/orchestrator/orchestrator.go b/internal/app/orchestrator/orchestrator.go index 02b57ab5..a2612b6b 100644 --- a/internal/app/orchestrator/orchestrator.go +++ b/internal/app/orchestrator/orchestrator.go @@ -165,7 +165,7 @@ func (o *orchestrator) CreateWatch(req gcp.Request) (chan gcp.Response, func()) // streams. o.upstreamResponseMap.mu.Lock() if _, ok := o.upstreamResponseMap.responseChannel[aggregatedKey]; !ok { - upstreamResponseChan := o.upstreamClient.OpenStream(ctx, &req) + upstreamResponseChan, _, _ := o.upstreamClient.OpenStream(req) respChannel := o.upstreamResponseMap.add(aggregatedKey, upstreamResponseChan) // Spin up a go routine to watch for upstream responses. // One routine is opened per aggregate key. @@ -187,24 +187,24 @@ func (o *orchestrator) Fetch(context.Context, discovery.DiscoveryRequest) (*gcp. func (o *orchestrator) watchUpstream( ctx context.Context, aggregatedKey string, - responseChannel <-chan *upstream.Response, + responseChannel <-chan *discovery.DiscoveryResponse, done <-chan bool, ) { for { select { - case x := <-responseChannel: - if x.Err != nil { + case x, more := <-responseChannel: + if !more { // A problem occurred fetching the response upstream, log and // return the most recent cached response, so that the // downstream will reissue the discovery request. - o.logger.With("err", x.Err).With("key", aggregatedKey).Error(ctx, "upstream error") + o.logger.With("key", aggregatedKey).Error(ctx, "upstream error") } else { // Cache the response. - _, err := o.cache.SetResponse(aggregatedKey, x.Response) + _, err := o.cache.SetResponse(aggregatedKey, *x) if err != nil { // If we fail to cache the new response, log and return the old one. o.logger.With("err", err).With("key", aggregatedKey). - With("resp", x.Response).Error(ctx, "Failed to cache the response") + Error(ctx, "Failed to cache the response") } } @@ -220,7 +220,7 @@ func (o *orchestrator) watchUpstream( } else { if cached == nil || cached.Resp == nil { // If cache is empty, there is nothing to fan out. - if x.Err != nil { + if !more { // Warn. Benefit of the doubt that this is the first request. o.logger.With("key", aggregatedKey). Warn(ctx, "attempted to fan out with no cached response") diff --git a/internal/app/orchestrator/orchestrator_test.go b/internal/app/orchestrator/orchestrator_test.go index ae69e470..59f99860 100644 --- a/internal/app/orchestrator/orchestrator_test.go +++ b/internal/app/orchestrator/orchestrator_test.go @@ -16,6 +16,7 @@ import ( "github.com/envoyproxy/xds-relay/internal/app/cache" "github.com/envoyproxy/xds-relay/internal/app/mapper" "github.com/envoyproxy/xds-relay/internal/app/upstream" + upstream_mock "github.com/envoyproxy/xds-relay/internal/app/upstream/mock" "github.com/envoyproxy/xds-relay/internal/pkg/log" yamlproto "github.com/envoyproxy/xds-relay/internal/pkg/util" aggregationv1 "github.com/envoyproxy/xds-relay/pkg/api/aggregation/v1" @@ -23,34 +24,33 @@ import ( ) type mockSimpleUpstreamClient struct { - responseChan <-chan *upstream.Response + responseChan <-chan *v2.DiscoveryResponse } -func (m mockSimpleUpstreamClient) OpenStream(ctx context.Context, req *v2.DiscoveryRequest) <-chan *upstream.Response { - return m.responseChan +func (m mockSimpleUpstreamClient) OpenStream(req v2.DiscoveryRequest) (<-chan *v2.DiscoveryResponse, func(), error) { + return m.responseChan, func() {}, nil } type mockMultiStreamUpstreamClient struct { - ldsResponseChan <-chan *upstream.Response - cdsResponseChan <-chan *upstream.Response + ldsResponseChan <-chan *v2.DiscoveryResponse + cdsResponseChan <-chan *v2.DiscoveryResponse t *testing.T mapper mapper.Mapper } -func (m mockMultiStreamUpstreamClient) OpenStream(ctx context.Context, - req *v2.DiscoveryRequest) <-chan *upstream.Response { - aggregatedKey, err := m.mapper.GetKey(*req) +func (m mockMultiStreamUpstreamClient) OpenStream(req v2.DiscoveryRequest) (<-chan *v2.DiscoveryResponse, func(), error) { + aggregatedKey, err := m.mapper.GetKey(req) assert.NoError(m.t, err) if aggregatedKey == "lds" { - return m.ldsResponseChan + return m.ldsResponseChan, func() {}, nil } else if aggregatedKey == "cds" { - return m.cdsResponseChan + return m.cdsResponseChan, func() {}, nil } m.t.Errorf("Unsupported aggregated key, %s", aggregatedKey) - return nil + return nil, func() {}, nil } func newMockOrchestrator(t *testing.T, mapper mapper.Mapper, upstreamClient upstream.Client) *orchestrator { @@ -96,8 +96,12 @@ func assertEqualResources(t *testing.T, got gcp.Response, expected v2.DiscoveryR func TestNew(t *testing.T) { // Trivial test to ensure orchestrator instantiates. - upstreamClient, err := upstream.NewClient(context.Background(), "example.com") - assert.NoError(t, err) + upstreamClient := upstream_mock.NewClient( + context.Background(), + upstream.CallOptions{}, + nil, + nil, + func(m interface{}) error { return nil }) config := aggregationv1.KeyerConfiguration{ Fragments: []*aggregationv1.KeyerConfiguration_Fragment{ @@ -120,7 +124,7 @@ func TestNew(t *testing.T) { } func TestGoldenPath(t *testing.T) { - upstreamResponseChannel := make(chan *upstream.Response) + upstreamResponseChannel := make(chan *v2.DiscoveryResponse) mapper := newMockMapper(t) orchestrator := newMockOrchestrator( t, @@ -140,21 +144,19 @@ func TestGoldenPath(t *testing.T) { assert.Equal(t, 1, len(orchestrator.downstreamResponseMap.responseChannel)) assert.Equal(t, 1, len(orchestrator.upstreamResponseMap.responseChannel)) - upstreamResponse := upstream.Response{ - Response: v2.DiscoveryResponse{ - VersionInfo: "1", - TypeUrl: "type.googleapis.com/envoy.api.v2.Listener", - Resources: []*any.Any{ - &anypb.Any{ - Value: []byte("lds resource"), - }, + resp := v2.DiscoveryResponse{ + VersionInfo: "1", + TypeUrl: "type.googleapis.com/envoy.api.v2.Listener", + Resources: []*any.Any{ + &anypb.Any{ + Value: []byte("lds resource"), }, }, } - upstreamResponseChannel <- &upstreamResponse + upstreamResponseChannel <- &resp gotResponse := <-respChannel - assertEqualResources(t, gotResponse, upstreamResponse.Response, req) + assertEqualResources(t, gotResponse, resp, req) aggregatedKey, err := mapper.GetKey(req) assert.NoError(t, err) @@ -166,7 +168,7 @@ func TestGoldenPath(t *testing.T) { } func TestCachedResponse(t *testing.T) { - upstreamResponseChannel := make(chan *upstream.Response) + upstreamResponseChannel := make(chan *v2.DiscoveryResponse) mapper := newMockMapper(t) orchestrator := newMockOrchestrator( t, @@ -208,20 +210,19 @@ func TestCachedResponse(t *testing.T) { assertEqualResources(t, gotResponse, mockResponse, req) // Attempt pushing a more recent response from upstream. - upstreamResponse := upstream.Response{ - Response: v2.DiscoveryResponse{ - VersionInfo: "2", - TypeUrl: "type.googleapis.com/envoy.api.v2.Listener", - Resources: []*any.Any{ - &anypb.Any{ - Value: []byte("some other lds resource"), - }, + resp := v2.DiscoveryResponse{ + VersionInfo: "2", + TypeUrl: "type.googleapis.com/envoy.api.v2.Listener", + Resources: []*any.Any{ + &anypb.Any{ + Value: []byte("some other lds resource"), }, }, } - upstreamResponseChannel <- &upstreamResponse + + upstreamResponseChannel <- &resp gotResponse = <-respChannel - assertEqualResources(t, gotResponse, upstreamResponse.Response, req) + assertEqualResources(t, gotResponse, resp, req) assert.Equal(t, 1, len(orchestrator.upstreamResponseMap.responseChannel)) // Test scenario with same request and response version. @@ -247,8 +248,8 @@ func TestCachedResponse(t *testing.T) { } func TestMultipleWatchesAndUpstreams(t *testing.T) { - upstreamResponseChannelLDS := make(chan *upstream.Response) - upstreamResponseChannelCDS := make(chan *upstream.Response) + upstreamResponseChannelLDS := make(chan *v2.DiscoveryResponse) + upstreamResponseChannelCDS := make(chan *v2.DiscoveryResponse) mapper := newMockMapper(t) orchestrator := newMockOrchestrator( t, @@ -279,25 +280,21 @@ func TestMultipleWatchesAndUpstreams(t *testing.T) { respChannel3, cancelWatch3 := orchestrator.CreateWatch(req3) assert.NotNil(t, respChannel3) - upstreamResponseLDS := upstream.Response{ - Response: v2.DiscoveryResponse{ - VersionInfo: "1", - TypeUrl: "type.googleapis.com/envoy.api.v2.Listener", - Resources: []*any.Any{ - &anypb.Any{ - Value: []byte("lds resource"), - }, + upstreamResponseLDS := v2.DiscoveryResponse{ + VersionInfo: "1", + TypeUrl: "type.googleapis.com/envoy.api.v2.Listener", + Resources: []*any.Any{ + &anypb.Any{ + Value: []byte("lds resource"), }, }, } - upstreamResponseCDS := upstream.Response{ - Response: v2.DiscoveryResponse{ - VersionInfo: "1", - TypeUrl: "type.googleapis.com/envoy.api.v2.Cluster", - Resources: []*any.Any{ - &anypb.Any{ - Value: []byte("cds resource"), - }, + upstreamResponseCDS := v2.DiscoveryResponse{ + VersionInfo: "1", + TypeUrl: "type.googleapis.com/envoy.api.v2.Cluster", + Resources: []*any.Any{ + &anypb.Any{ + Value: []byte("cds resource"), }, }, } @@ -317,9 +314,9 @@ func TestMultipleWatchesAndUpstreams(t *testing.T) { assert.Equal(t, 3, len(orchestrator.downstreamResponseMap.responseChannel)) assert.Equal(t, 2, len(orchestrator.upstreamResponseMap.responseChannel)) - assertEqualResources(t, gotResponseFromChannel1, upstreamResponseLDS.Response, req1) - assertEqualResources(t, gotResponseFromChannel2, upstreamResponseLDS.Response, req2) - assertEqualResources(t, gotResponseFromChannel3, upstreamResponseCDS.Response, req3) + assertEqualResources(t, gotResponseFromChannel1, upstreamResponseLDS, req1) + assertEqualResources(t, gotResponseFromChannel2, upstreamResponseLDS, req2) + assertEqualResources(t, gotResponseFromChannel3, upstreamResponseCDS, req3) orchestrator.shutdown(aggregatedKeyLDS) orchestrator.shutdown(aggregatedKeyCDS) diff --git a/internal/app/orchestrator/upstream.go b/internal/app/orchestrator/upstream.go index eabd9a27..eb4590ef 100644 --- a/internal/app/orchestrator/upstream.go +++ b/internal/app/orchestrator/upstream.go @@ -3,7 +3,7 @@ package orchestrator import ( "sync" - "github.com/envoyproxy/xds-relay/internal/app/upstream" + discovery "github.com/envoyproxy/go-control-plane/envoy/api/v2" ) // upstream is the map of aggregate key to the receive-only upstream @@ -14,7 +14,7 @@ type upstreamResponseMap struct { } type upstreamResponseChannel struct { - response <-chan *upstream.Response + response <-chan *discovery.DiscoveryResponse done chan bool } @@ -24,7 +24,7 @@ type upstreamResponseChannel struct { // Expects orchestrator to manage the lock since this is called simultaneously // with stream open. func (u *upstreamResponseMap) add(aggregatedKey string, - responseChannel <-chan *upstream.Response) upstreamResponseChannel { + responseChannel <-chan *discovery.DiscoveryResponse) upstreamResponseChannel { channel := upstreamResponseChannel{ response: responseChannel, done: make(chan bool, 1), diff --git a/internal/app/server/server.go b/internal/app/server/server.go index 2d345413..6b94c405 100644 --- a/internal/app/server/server.go +++ b/internal/app/server/server.go @@ -4,6 +4,7 @@ import ( "context" "net" "strconv" + "time" "github.com/envoyproxy/xds-relay/internal/app/mapper" "github.com/envoyproxy/xds-relay/internal/app/orchestrator" @@ -38,7 +39,14 @@ func Run(bootstrapConfig *bootstrapv1.Bootstrap, // Initialize upstream client. upstreamPort := strconv.FormatUint(uint64(bootstrapConfig.OriginServer.Address.PortValue), 10) upstreamAddress := net.JoinHostPort(bootstrapConfig.OriginServer.Address.Address, upstreamPort) - upstreamClient, err := upstream.NewClient(ctx, upstreamAddress) + // TODO: configure timeout param from bootstrap config. + // https://github.com/envoyproxy/xds-relay/issues/55 + upstreamClient, err := upstream.NewClient( + ctx, + upstreamAddress, + upstream.CallOptions{Timeout: time.Minute}, + logger.Named("xdsclient"), + ) if err != nil { logger.With("error", err).Panic(ctx, "failed to initialize upstream client") } diff --git a/internal/app/upstream/client.go b/internal/app/upstream/client.go index a15e9fec..2a9a1d6c 100644 --- a/internal/app/upstream/client.go +++ b/internal/app/upstream/client.go @@ -2,10 +2,30 @@ package upstream import ( "context" + "fmt" + "time" v2 "github.com/envoyproxy/go-control-plane/envoy/api/v2" + "github.com/envoyproxy/xds-relay/internal/pkg/log" + "google.golang.org/grpc" ) +const ( + // ListenerTypeURL is the resource url for listener + ListenerTypeURL = "type.googleapis.com/envoy.api.v2.Listener" + // ClusterTypeURL is the resource url for cluster + ClusterTypeURL = "type.googleapis.com/envoy.api.v2.Cluster" + // EndpointTypeURL is the resource url for endpoints + EndpointTypeURL = "type.googleapis.com/envoy.api.v2.ClusterLoadAssignment" + // RouteTypeURL is the resource url for route + RouteTypeURL = "type.googleapis.com/envoy.api.v2.RouteConfiguration" +) + +// UnsupportedResourceError is a custom error for unsupported typeURL +type UnsupportedResourceError struct { + TypeURL string +} + // Client handles the requests and responses from the origin server. // The xds client handles each xds request on a separate stream, // e.g. 2 different cds requests happen on 2 separate streams. @@ -22,31 +42,33 @@ type Client interface { // All responses from the origin server are sent back through the callback function. // // OpenStream uses the retry and timeout configurations to make best effort to get the responses from origin server. - // If the timeouts are exhausted, receive fails or a irrecoverable error occurs, the error is sent back to the caller. + // If the timeouts are exhausted, receive fails or a irrecoverable error occurs, the response channel is closed. // It is the caller's responsibility to send a new request from the last known DiscoveryRequest. - // Cancellation of the context cleans up all outstanding streams and releases all resources. - OpenStream(context.Context, *v2.DiscoveryRequest) <-chan *Response + // The shutdown function should be invoked to signal stream closure. + // The shutdown function represents the intent that a stream is supposed to be closed. + // All goroutines that depend on the ctx object should consider ctx.Done to be related to shutdown. + // All such scenarios need to exit cleanly and are not considered an erroneous situation. + OpenStream(v2.DiscoveryRequest) (<-chan *v2.DiscoveryResponse, func(), error) } type client struct { - //nolint - ldsClient v2.ListenerDiscoveryServiceClient - //nolint - rdsClient v2.RouteDiscoveryServiceClient - //nolint - edsClient v2.EndpointDiscoveryServiceClient - //nolint - cdsClient v2.ClusterDiscoveryServiceClient + ldsClient v2.ListenerDiscoveryServiceClient + rdsClient v2.RouteDiscoveryServiceClient + edsClient v2.EndpointDiscoveryServiceClient + cdsClient v2.ClusterDiscoveryServiceClient + callOptions CallOptions + logger log.Logger } -// Response struct is a holder for the result from a single request. -// A request can result in a response from origin server or an error -// Only one of the fields is valid at any time. If the error is set, the response will be ignored. -type Response struct { - //nolint - Response v2.DiscoveryResponse - //nolint - Err error +// CallOptions contains grpc client call options +type CallOptions struct { + // Timeout is the time to wait on a blocking grpc SendMsg. + Timeout time.Duration +} + +type version struct { + version string + nonce string } // NewClient creates a grpc connection with an upstream origin server. @@ -55,11 +77,177 @@ type Response struct { // // The method does not block until the underlying connection is up. // Returns immediately and connecting the server happens in background -// TODO: pass retry/timeout configurations -func NewClient(ctx context.Context, url string) (Client, error) { - return &client{}, nil +func NewClient(ctx context.Context, url string, callOptions CallOptions, logger log.Logger) (Client, error) { + // TODO: configure grpc options.https://github.com/envoyproxy/xds-relay/issues/55 + conn, err := grpc.Dial(url, grpc.WithInsecure()) + if err != nil { + return nil, err + } + + ldsClient := v2.NewListenerDiscoveryServiceClient(conn) + rdsClient := v2.NewRouteDiscoveryServiceClient(conn) + edsClient := v2.NewEndpointDiscoveryServiceClient(conn) + cdsClient := v2.NewClusterDiscoveryServiceClient(conn) + + go shutDown(ctx, conn) + + return &client{ + ldsClient: ldsClient, + rdsClient: rdsClient, + edsClient: edsClient, + cdsClient: cdsClient, + callOptions: callOptions, + logger: logger, + }, nil +} + +func (m *client) OpenStream(request v2.DiscoveryRequest) (<-chan *v2.DiscoveryResponse, func(), error) { + ctx, cancel := context.WithCancel(context.Background()) + var stream grpc.ClientStream + var err error + switch request.GetTypeUrl() { + case ListenerTypeURL: + stream, err = m.ldsClient.StreamListeners(ctx) + case ClusterTypeURL: + stream, err = m.cdsClient.StreamClusters(ctx) + case RouteTypeURL: + stream, err = m.rdsClient.StreamRoutes(ctx) + case EndpointTypeURL: + stream, err = m.edsClient.StreamEndpoints(ctx) + default: + defer cancel() + m.logger.Error(ctx, "Unsupported Type Url %s", request.GetTypeUrl()) + return nil, nil, &UnsupportedResourceError{TypeURL: request.GetTypeUrl()} + } + + if err != nil { + defer cancel() + return nil, nil, err + } + + signal := make(chan *version, 1) + // The xds protocol https://www.envoyproxy.io/docs/envoy/latest/api-docs/xds_protocol#ack + // specifies that the first request be empty nonce and empty version. + // The origin server will respond with the latest version. + signal <- &version{nonce: "", version: ""} + + response := make(chan *v2.DiscoveryResponse) + + go send(ctx, m.logger, cancel, &request, stream, signal, m.callOptions) + go recv(ctx, cancel, m.logger, response, stream, signal) + + // We use context cancellation over using a separate channel for signalling stream shutdown. + // The reason is cancelling a context tied with the stream is straightforward to signal closure. + // Also, the shutdown function could potentially be called more than once by a caller. + // Closing channels is not idempotent while cancelling context is idempotent. + return response, func() { cancel() }, nil +} + +// It is safe to assume send goroutine will not leak as long as these conditions are true: +// - SendMsg is performed with timeout. +// - send is a receiver for signal and exits when signal is closed by the owner. +// - send also exits on context cancellations. +func send( + ctx context.Context, + logger log.Logger, + cancelFunc context.CancelFunc, + request *v2.DiscoveryRequest, + stream grpc.ClientStream, + signal chan *version, + callOptions CallOptions) { + for { + select { + case sig, ok := <-signal: + if !ok { + return + } + request.ResponseNonce = sig.nonce + request.VersionInfo = sig.version + err := doWithTimeout(ctx, func() error { + return stream.SendMsg(request) + }, callOptions.Timeout) + if err != nil { + handleError(ctx, logger, "Error in SendMsg", cancelFunc, err) + return + } + case <-ctx.Done(): + _ = stream.CloseSend() + return + } + } +} + +// recv is an infinite loop which blocks on RecvMsg. +// The only ways to exit the goroutine is by cancelling the context or when an error occurs. +func recv( + ctx context.Context, + cancelFunc context.CancelFunc, + logger log.Logger, + response chan *v2.DiscoveryResponse, + stream grpc.ClientStream, + signal chan *version) { + for { + resp := new(v2.DiscoveryResponse) + if err := stream.RecvMsg(resp); err != nil { + handleError(ctx, logger, "Error in RecvMsg", cancelFunc, err) + break + } + select { + case <-ctx.Done(): + break + default: + response <- resp + signal <- &version{version: resp.GetVersionInfo(), nonce: resp.GetNonce()} + } + } + closeChannels(signal, response) +} + +func handleError(ctx context.Context, logger log.Logger, errMsg string, cancelFunc context.CancelFunc, err error) { + defer cancelFunc() + select { + case <-ctx.Done(): + // Context was cancelled, hence this is not an erroneous scenario. + // Context is cancelled only when shutdown is called or any of the send/recv goroutines error out. + // The shutdown can be called by the caller in many cases, during app shutdown/ttl expiry, etc + default: + logger.Error(ctx, "%s: %s", errMsg, err.Error()) + } +} + +// closeChannels is called whenever the context is cancelled (ctx.Done) in Send and Recv goroutines. +// It is also called when an irrecoverable error occurs and the error is passed to the caller. +func closeChannels(versionChan chan *version, responseChan chan *v2.DiscoveryResponse) { + close(versionChan) + close(responseChan) +} + +// shutDown should be called in a separate goroutine. +// This is a blocking function that closes the upstream connection on context completion. +func shutDown(ctx context.Context, conn *grpc.ClientConn) { + <-ctx.Done() + conn.Close() +} + +// DoWithTimeout runs f and returns its error. If the deadline d elapses first, +// it returns a DeadlineExceeded error instead. +// Ref: https://github.com/grpc/grpc-go/issues/1229#issuecomment-302755717 +func doWithTimeout(ctx context.Context, f func() error, d time.Duration) error { + timeoutCtx, cancel := context.WithTimeout(ctx, d) + defer cancel() + errChan := make(chan error, 1) + go func() { + errChan <- f() + close(errChan) + }() + select { + case <-timeoutCtx.Done(): + return timeoutCtx.Err() + case err := <-errChan: + return err + } } -func (m *client) OpenStream(ctx context.Context, request *v2.DiscoveryRequest) <-chan *Response { - return nil +func (e *UnsupportedResourceError) Error() string { + return fmt.Sprintf("Unsupported resource typeUrl: %s", e.TypeURL) } diff --git a/internal/app/upstream/client_mock.go b/internal/app/upstream/client_mock.go new file mode 100644 index 00000000..b52bf327 --- /dev/null +++ b/internal/app/upstream/client_mock.go @@ -0,0 +1,26 @@ +package upstream + +import ( + "context" + + v2 "github.com/envoyproxy/go-control-plane/envoy/api/v2" + "github.com/envoyproxy/xds-relay/internal/pkg/log" +) + +// NewMockClient creates a mock implementation for testing +func NewMockClient( + ctx context.Context, + ldsClient v2.ListenerDiscoveryServiceClient, + rdsClient v2.RouteDiscoveryServiceClient, + edsClient v2.EndpointDiscoveryServiceClient, + cdsClient v2.ClusterDiscoveryServiceClient, + callOptions CallOptions) Client { + return &client{ + ldsClient: ldsClient, + rdsClient: rdsClient, + edsClient: edsClient, + cdsClient: cdsClient, + callOptions: callOptions, + logger: log.New("panic"), + } +} diff --git a/internal/app/upstream/client_test.go b/internal/app/upstream/client_test.go new file mode 100644 index 00000000..152cabd9 --- /dev/null +++ b/internal/app/upstream/client_test.go @@ -0,0 +1,207 @@ +package upstream_test + +import ( + "context" + "fmt" + "strconv" + "testing" + "time" + + v2 "github.com/envoyproxy/go-control-plane/envoy/api/v2" + core "github.com/envoyproxy/go-control-plane/envoy/api/v2/core" + "github.com/envoyproxy/xds-relay/internal/app/upstream" + mock "github.com/envoyproxy/xds-relay/internal/app/upstream/mock" + "github.com/stretchr/testify/assert" +) + +type CallOptions = upstream.CallOptions + +func TestOpenStreamShouldReturnErrorForInvalidTypeUrl(t *testing.T) { + client := createMockClient() + + respCh, _, err := client.OpenStream(v2.DiscoveryRequest{}) + assert.NotNil(t, err) + _, ok := err.(*upstream.UnsupportedResourceError) + assert.True(t, ok) + assert.Nil(t, respCh) +} + +func TestOpenStreamShouldResturnErrorOnStreamCreationFailure(t *testing.T) { + client := createMockClientWithError() + + typeURLs := []string{ + upstream.ListenerTypeURL, + upstream.ClusterTypeURL, + upstream.RouteTypeURL, + upstream.EndpointTypeURL, + } + for _, typeURL := range typeURLs { + t.Run(typeURL, func(t *testing.T) { + respCh, _, err := client.OpenStream(v2.DiscoveryRequest{ + TypeUrl: typeURL, + Node: &core.Node{}, + }) + assert.Nil(t, respCh) + assert.NotNil(t, err) + }) + } +} + +func TestOpenStreamShouldReturnNonEmptyResponseChannel(t *testing.T) { + client := createMockClient() + + respCh, done, err := client.OpenStream(v2.DiscoveryRequest{ + TypeUrl: upstream.ListenerTypeURL, + Node: &core.Node{}, + }) + assert.NotNil(t, respCh) + assert.Nil(t, err) + done() +} + +func TestOpenStreamShouldSendTheFirstRequestToOriginServer(t *testing.T) { + var message *v2.DiscoveryRequest + responseChan := make(chan *v2.DiscoveryResponse) + wait := make(chan bool) + client := mock.NewClient( + context.Background(), + CallOptions{Timeout: time.Nanosecond}, + nil, + responseChan, + func(m interface{}) error { + message = m.(*v2.DiscoveryRequest) + wait <- true + return nil + }, + ) + + node := &core.Node{} + _, done, _ := client.OpenStream(v2.DiscoveryRequest{ + TypeUrl: upstream.ListenerTypeURL, + Node: node, + }) + <-wait + assert.NotNil(t, message) + assert.Equal(t, message.GetNode(), node) + assert.Equal(t, message.TypeUrl, upstream.ListenerTypeURL) + done() +} + +func TestOpenStreamShouldSendErrorIfSendFails(t *testing.T) { + responseChan := make(chan *v2.DiscoveryResponse) + sendError := fmt.Errorf("") + client := createMockClientWithReponse(time.Second, responseChan, func(m interface{}) error { + return sendError + }) + + resp, done, _ := client.OpenStream(v2.DiscoveryRequest{ + TypeUrl: upstream.ListenerTypeURL, + Node: &core.Node{}, + }) + _, more := <-resp + assert.False(t, more) + done() +} + +func TestOpenStreamShouldSendTheResponseOnTheChannel(t *testing.T) { + responseChan := make(chan *v2.DiscoveryResponse) + response := &v2.DiscoveryResponse{} + client := createMockClientWithReponse(time.Second, responseChan, func(m interface{}) error { + responseChan <- response + return nil + }) + + resp, done, err := client.OpenStream(v2.DiscoveryRequest{ + TypeUrl: upstream.ListenerTypeURL, + Node: &core.Node{}, + }) + assert.Nil(t, err) + assert.NotNil(t, resp) + val := <-resp + assert.Equal(t, val, response) + done() +} + +func TestOpenStreamShouldSendTheNextRequestWithUpdatedVersionAndNonce(t *testing.T) { + responseChan := make(chan *v2.DiscoveryResponse) + lastAppliedVersion := "" + index := 0 + client := createMockClientWithReponse(time.Second, responseChan, func(m interface{}) error { + message := m.(*v2.DiscoveryRequest) + + assert.Equal(t, message.GetVersionInfo(), lastAppliedVersion) + assert.Equal(t, message.GetResponseNonce(), lastAppliedVersion) + + response := &v2.DiscoveryResponse{ + VersionInfo: strconv.Itoa(index), + Nonce: strconv.Itoa(index), + TypeUrl: upstream.ListenerTypeURL, + } + lastAppliedVersion = strconv.Itoa(index) + index++ + responseChan <- response + return nil + }) + + resp, done, err := client.OpenStream(v2.DiscoveryRequest{ + TypeUrl: upstream.ListenerTypeURL, + Node: &core.Node{}, + }) + assert.Nil(t, err) + assert.NotNil(t, resp) + for i := 0; i < 5; i++ { + val := <-resp + assert.Equal(t, val.GetVersionInfo(), strconv.Itoa(i)) + assert.Equal(t, val.GetNonce(), strconv.Itoa(i)) + } + + done() +} + +func TestOpenStreamShouldSendErrorWhenSendMsgBlocks(t *testing.T) { + responseChan := make(chan *v2.DiscoveryResponse) + blockedCtx, cancel := context.WithCancel(context.Background()) + client := createMockClientWithReponse(time.Nanosecond, responseChan, func(m interface{}) error { + // TODO: When stats are available, strengthen the test + // https://github.com/envoyproxy/xds-relay/issues/61 + <-blockedCtx.Done() + return nil + }) + + resp, done, err := client.OpenStream(v2.DiscoveryRequest{ + TypeUrl: upstream.ListenerTypeURL, + Node: &core.Node{}, + }) + assert.Nil(t, err) + assert.NotNil(t, resp) + _, more := <-resp + assert.False(t, more) + + done() + cancel() +} + +func createMockClient() upstream.Client { + return mock.NewClient( + context.Background(), + CallOptions{Timeout: time.Nanosecond}, + nil, + make(chan *v2.DiscoveryResponse), + func(m interface{}) error { return nil }) +} + +func createMockClientWithError() upstream.Client { + return mock.NewClient( + context.Background(), + CallOptions{Timeout: time.Nanosecond}, + fmt.Errorf("error"), + make(chan *v2.DiscoveryResponse), + func(m interface{}) error { return nil }) +} + +func createMockClientWithReponse( + t time.Duration, + r chan *v2.DiscoveryResponse, + sendCb func(m interface{}) error) upstream.Client { + return mock.NewClient(context.Background(), CallOptions{Timeout: t}, nil, r, sendCb) +} diff --git a/internal/app/upstream/mock/client.go b/internal/app/upstream/mock/client.go new file mode 100644 index 00000000..17343ffa --- /dev/null +++ b/internal/app/upstream/mock/client.go @@ -0,0 +1,202 @@ +package upstream + +import ( + "context" + "fmt" + "io" + + v2 "github.com/envoyproxy/go-control-plane/envoy/api/v2" + "github.com/envoyproxy/xds-relay/internal/app/upstream" + "google.golang.org/grpc" + "google.golang.org/grpc/metadata" +) + +// NewClient creates a mock implementation for testing +func NewClient( + ctx context.Context, + callOptions upstream.CallOptions, + errorOnCreate error, + receiveChan chan *v2.DiscoveryResponse, + sendCb func(m interface{}) error) upstream.Client { + return upstream.NewMockClient( + ctx, + createMockLdsClient(errorOnCreate, receiveChan, sendCb), + createMockRdsClient(errorOnCreate, receiveChan, sendCb), + createMockEdsClient(errorOnCreate, receiveChan, sendCb), + createMockCdsClient(errorOnCreate, receiveChan, sendCb), + callOptions, + ) +} + +func createMockLdsClient( + errorOnCreate error, + receiveChan chan *v2.DiscoveryResponse, + sendCb func(m interface{}) error) v2.ListenerDiscoveryServiceClient { + return &mockClient{errorOnStreamCreate: errorOnCreate, receiveChan: receiveChan, sendCb: sendCb} +} + +func createMockCdsClient( + errorOnCreate error, + receiveChan chan *v2.DiscoveryResponse, + sendCb func(m interface{}) error) v2.ClusterDiscoveryServiceClient { + return &mockClient{errorOnStreamCreate: errorOnCreate, receiveChan: receiveChan, sendCb: sendCb} +} + +func createMockRdsClient( + errorOnCreate error, + receiveChan chan *v2.DiscoveryResponse, + sendCb func(m interface{}) error) v2.RouteDiscoveryServiceClient { + return &mockClient{errorOnStreamCreate: errorOnCreate, receiveChan: receiveChan, sendCb: sendCb} +} + +func createMockEdsClient( + errorOnCreate error, + receiveChan chan *v2.DiscoveryResponse, + sendCb func(m interface{}) error) v2.EndpointDiscoveryServiceClient { + return &mockClient{errorOnStreamCreate: errorOnCreate, receiveChan: receiveChan, sendCb: sendCb} +} + +type mockClient struct { + errorOnStreamCreate error + receiveChan chan *v2.DiscoveryResponse + sendCb func(m interface{}) error +} + +type mockGrpcStream struct { + ctx context.Context + receiveChan chan *v2.DiscoveryResponse + sendCb func(m interface{}) error +} + +func (stream *mockGrpcStream) SendMsg(m interface{}) error { + return stream.sendCb(m) +} + +func (stream *mockGrpcStream) RecvMsg(m interface{}) error { + for { + select { + // https://github.com/grpc/grpc-go/issues/1894#issuecomment-370487012 + case <-stream.ctx.Done(): + return io.EOF + case resp := <-stream.receiveChan: + message := m.(*v2.DiscoveryResponse) + message.VersionInfo = resp.GetVersionInfo() + message.Nonce = resp.GetNonce() + message.TypeUrl = resp.GetTypeUrl() + message.Resources = resp.GetResources() + return nil + } + } +} + +func (stream *mockGrpcStream) Send(*v2.DiscoveryRequest) error { + return fmt.Errorf("Not implemented") +} + +func (stream *mockGrpcStream) Recv() (*v2.DiscoveryResponse, error) { + return nil, fmt.Errorf("Not implemented") +} + +func (stream *mockGrpcStream) Header() (metadata.MD, error) { + return nil, fmt.Errorf("Not implemented") +} + +func (stream *mockGrpcStream) Trailer() metadata.MD { + return nil +} + +func (stream *mockGrpcStream) CloseSend() error { + return nil +} + +func (stream *mockGrpcStream) Context() context.Context { + return stream.ctx +} + +func (c *mockClient) StreamListeners( + ctx context.Context, + opts ...grpc.CallOption) (v2.ListenerDiscoveryService_StreamListenersClient, error) { + if c.errorOnStreamCreate != nil { + return nil, c.errorOnStreamCreate + } + return &mockGrpcStream{ctx: ctx, receiveChan: c.receiveChan, sendCb: c.sendCb}, nil +} + +func (c *mockClient) StreamClusters( + ctx context.Context, + opts ...grpc.CallOption) (v2.ClusterDiscoveryService_StreamClustersClient, error) { + if c.errorOnStreamCreate != nil { + return nil, c.errorOnStreamCreate + } + return &mockGrpcStream{ctx: ctx, receiveChan: c.receiveChan, sendCb: c.sendCb}, nil +} + +func (c *mockClient) StreamRoutes( + ctx context.Context, + opts ...grpc.CallOption) (v2.RouteDiscoveryService_StreamRoutesClient, error) { + if c.errorOnStreamCreate != nil { + return nil, c.errorOnStreamCreate + } + return &mockGrpcStream{ctx: ctx, receiveChan: c.receiveChan, sendCb: c.sendCb}, nil +} + +func (c *mockClient) StreamEndpoints( + ctx context.Context, + opts ...grpc.CallOption) (v2.EndpointDiscoveryService_StreamEndpointsClient, error) { + if c.errorOnStreamCreate != nil { + return nil, c.errorOnStreamCreate + } + return &mockGrpcStream{ctx: ctx, receiveChan: c.receiveChan, sendCb: c.sendCb}, nil +} + +func (c *mockClient) DeltaListeners( + ctx context.Context, + opts ...grpc.CallOption) (v2.ListenerDiscoveryService_DeltaListenersClient, error) { + return nil, fmt.Errorf("Not implemented") +} + +func (c *mockClient) DeltaClusters( + ctx context.Context, + opts ...grpc.CallOption) (v2.ClusterDiscoveryService_DeltaClustersClient, error) { + return nil, fmt.Errorf("Not implemented") +} + +func (c *mockClient) DeltaRoutes( + ctx context.Context, + opts ...grpc.CallOption) (v2.RouteDiscoveryService_DeltaRoutesClient, error) { + return nil, fmt.Errorf("Not implemented") +} + +func (c *mockClient) DeltaEndpoints( + ctx context.Context, + opts ...grpc.CallOption) (v2.EndpointDiscoveryService_DeltaEndpointsClient, error) { + return nil, fmt.Errorf("Not implemented") +} + +func (c *mockClient) FetchListeners( + ctx context.Context, + in *v2.DiscoveryRequest, + opts ...grpc.CallOption) (*v2.DiscoveryResponse, error) { + return nil, fmt.Errorf("Not implemented") +} + +func (c *mockClient) FetchClusters( + ctx context.Context, + in *v2.DiscoveryRequest, + opts ...grpc.CallOption) (*v2.DiscoveryResponse, error) { + return nil, fmt.Errorf("Not implemented") +} + +func (c *mockClient) FetchRoutes( + ctx context.Context, + in *v2.DiscoveryRequest, + opts ...grpc.CallOption) (*v2.DiscoveryResponse, error) { + return nil, fmt.Errorf("Not implemented") +} + +func (c *mockClient) FetchEndpoints( + ctx context.Context, + in *v2.DiscoveryRequest, + opts ...grpc.CallOption) (*v2.DiscoveryResponse, error) { + return nil, fmt.Errorf("Not implemented") +} From 24d87a62a5ab77ad3525a749924541279ce2805e Mon Sep 17 00:00:00 2001 From: Lisa Lu Date: Thu, 23 Apr 2020 11:03:34 -0700 Subject: [PATCH 13/29] DeleteRequest API (#59) Fixes #57 This change adds an API to the cache to support deleting a request when given the key and the (pointer to the) request, which the orchestrator can use upon cancellation. If the key is not in the cache, nothing happens. The input type to AddRequest was also changed to take a pointer to a request. Signed-off-by: Lisa Lu --- internal/app/cache/cache.go | 31 +++++++++++++++++---- internal/app/cache/cache_test.go | 48 ++++++++++++++++++++++++++------ 2 files changed, 66 insertions(+), 13 deletions(-) diff --git a/internal/app/cache/cache.go b/internal/app/cache/cache.go index aee1647f..8e869f61 100644 --- a/internal/app/cache/cache.go +++ b/internal/app/cache/cache.go @@ -19,10 +19,13 @@ type Cache interface { Fetch(key string) (*Resource, error) // SetResponse sets the cache response and returns the list of requests. - SetResponse(key string, resp v2.DiscoveryResponse) ([]*v2.DiscoveryRequest, error) + SetResponse(key string, resp v2.DiscoveryResponse) (map[*v2.DiscoveryRequest]bool, error) // AddRequest adds the request to the cache. AddRequest(key string, req *v2.DiscoveryRequest) error + + // DeleteRequest removes the given request from any cache entries it's present in. + DeleteRequest(key string, req *v2.DiscoveryRequest) error } type cache struct { @@ -39,7 +42,7 @@ type Response struct { type Resource struct { Resp *Response - Requests []*v2.DiscoveryRequest + Requests map[*v2.DiscoveryRequest]bool ExpirationTime time.Time } @@ -107,7 +110,7 @@ func (c *cache) Fetch(key string) (*Resource, error) { return &resource, nil } -func (c *cache) SetResponse(key string, resp v2.DiscoveryResponse) ([]*v2.DiscoveryRequest, error) { +func (c *cache) SetResponse(key string, resp v2.DiscoveryResponse) (map[*v2.DiscoveryRequest]bool, error) { c.cacheMu.Lock() defer c.cacheMu.Unlock() marshaledResources, err := MarshalResources(resp.Resources) @@ -142,8 +145,10 @@ func (c *cache) AddRequest(key string, req *v2.DiscoveryRequest) error { defer c.cacheMu.Unlock() value, found := c.cache.Get(key) if !found { + requests := make(map[*v2.DiscoveryRequest]bool) + requests[req] = true resource := Resource{ - Requests: []*v2.DiscoveryRequest{req}, + Requests: requests, ExpirationTime: c.getExpirationTime(time.Now()), } c.cache.Add(key, resource) @@ -153,7 +158,23 @@ func (c *cache) AddRequest(key string, req *v2.DiscoveryRequest) error { if !ok { return fmt.Errorf("unable to cast cache value to type resource for key: %s", key) } - resource.Requests = append(resource.Requests, req) + resource.Requests[req] = true + c.cache.Add(key, resource) + return nil +} + +func (c *cache) DeleteRequest(key string, req *v2.DiscoveryRequest) error { + c.cacheMu.Lock() + defer c.cacheMu.Unlock() + value, found := c.cache.Get(key) + if !found { + return nil + } + resource, ok := value.(Resource) + if !ok { + return fmt.Errorf("unable to cast cache value to type resource for key: %s", key) + } + delete(resource.Requests, req) c.cache.Add(key, resource) return nil } diff --git a/internal/app/cache/cache_test.go b/internal/app/cache/cache_test.go index ab670bd0..65395a09 100644 --- a/internal/app/cache/cache_test.go +++ b/internal/app/cache/cache_test.go @@ -29,7 +29,7 @@ func testOnEvict(key string, value Resource) { }) } -var testRequest = v2.DiscoveryRequest{ +var testRequestA = v2.DiscoveryRequest{ VersionInfo: "version_A", Node: &core.Node{ Id: "id_A", @@ -40,6 +40,17 @@ var testRequest = v2.DiscoveryRequest{ ResponseNonce: "nonce_A", } +var testRequestB = v2.DiscoveryRequest{ + VersionInfo: "version_B", + Node: &core.Node{ + Id: "id_B", + Cluster: "cluster_B", + }, + ResourceNames: []string{"resource_B"}, + TypeUrl: "typeURL_B", + ResponseNonce: "nonce_B", +} + var testDiscoveryResponse = v2.DiscoveryResponse{ VersionInfo: "version_A", Resources: []*any.Any{ @@ -74,7 +85,7 @@ func TestAddRequestAndFetch(t *testing.T) { assert.EqualError(t, err, "no value found for key: key_A") assert.Nil(t, resource) - err = cache.AddRequest(testKeyA, &testRequest) + err = cache.AddRequest(testKeyA, &testRequestA) assert.NoError(t, err) resource, err = cache.Fetch(testKeyA) @@ -101,20 +112,20 @@ func TestSetResponseAndFetch(t *testing.T) { } func TestAddRequestAndSetResponse(t *testing.T) { - cache, err := NewCache(1, testOnEvict, time.Second*60) + cache, err := NewCache(2, testOnEvict, time.Second*60) assert.NoError(t, err) - err = cache.AddRequest(testKeyA, &testRequest) + err = cache.AddRequest(testKeyA, &testRequestA) assert.NoError(t, err) - err = cache.AddRequest(testKeyA, &testRequest) + err = cache.AddRequest(testKeyA, &testRequestB) assert.NoError(t, err) requests, err := cache.SetResponse(testKeyA, testDiscoveryResponse) assert.NoError(t, err) assert.Equal(t, 2, len(requests)) - assert.Equal(t, testRequest, *requests[0]) - assert.Equal(t, testRequest, *requests[1]) + assert.Equal(t, true, requests[&testRequestA]) + assert.Equal(t, true, requests[&testRequestB]) resource, err := cache.Fetch(testKeyA) assert.NoError(t, err) @@ -136,7 +147,7 @@ func TestMaxEntries(t *testing.T) { key: testKeyA, reason: "testOnEvict called", }, func() { - err = cache.AddRequest(testKeyB, &testRequest) + err = cache.AddRequest(testKeyB, &testRequestB) assert.NoError(t, err) }) @@ -223,3 +234,24 @@ func TestGetExpirationTime(t *testing.T) { expirationTime := time.Date(0, 0, 0, 0, 0, 2, 0, time.UTC) assert.Equal(t, expirationTime, c.getExpirationTime(currentTime)) } + +func TestDeleteRequest(t *testing.T) { + cache, err := NewCache(1, testOnEvict, time.Second*60) + assert.NoError(t, err) + + err = cache.AddRequest(testKeyA, &testRequestA) + assert.NoError(t, err) + + err = cache.AddRequest(testKeyA, &testRequestA) + assert.NoError(t, err) + + err = cache.DeleteRequest(testKeyA, &testRequestA) + assert.NoError(t, err) + + requests, err := cache.SetResponse(testKeyA, testDiscoveryResponse) + assert.NoError(t, err) + assert.Equal(t, 0, len(requests)) + + err = cache.DeleteRequest(testKeyB, &testRequestB) + assert.NoError(t, err) +} From aefa63b8d11cafa8d2900602297f3dcad963cc74 Mon Sep 17 00:00:00 2001 From: Jyoti Mahapatra <49211422+jyotimahapatra@users.noreply.github.com> Date: Thu, 23 Apr 2020 17:48:01 -0700 Subject: [PATCH 14/29] grpc graceful shutdown (#64) Signed-off-by: Jyoti Mahapatra --- .github/workflows/integration-tests.yml | 8 --- integration/server_test.go | 36 ++++++++++ internal/app/server/server.go | 44 ++++++++++-- internal/app/server/server_test.go | 69 +++++++++++++++++++ internal/app/upstream/client.go | 25 ++----- internal/pkg/util/execute.go | 24 +++++++ internal/pkg/util/execute_test.go | 65 +++++++++++++++++ .../{ => yamlproto}/testdata/and_result.yaml | 0 .../util/{ => yamlproto}/testdata/empty.yaml | 0 .../testdata/error_in_second_rule.yaml | 0 .../testdata/inexistent_field.yaml | 0 .../{ => yamlproto}/testdata/invalid.yaml | 0 ...onfiguration_empty_request_type_match.yaml | 0 ...mpty_request_type_math_in_second_rule.yaml | 0 ...guration_match_predicate_invalid_bool.yaml | 0 ...configuration_missing_match_predicate.yaml | 0 ...onfiguration_missing_result_predicate.yaml | 0 ...on_not_match_empty_request_type_match.yaml | 0 ...guration_not_match_request_type_match.yaml | 0 ...n_or_match_invalid_request_type_match.yaml | 0 ...iguration_or_match_request_type_match.yaml | 0 ...ation_request_node_match_invalid_enum.yaml | 0 ...on_request_node_match_string_fragment.yaml | 0 ...on_request_type_match_string_fragment.yaml | 0 .../testdata/request_node_fragment.yaml | 0 .../testdata/resource_names_fragment.yaml | 0 .../testdata/string_fragment.yaml | 0 .../testdata/typo_in_fragments.yaml | 0 .../pkg/util/{ => yamlproto}/yamlproto.go | 0 .../{ => yamlproto}/yamlproto_suite_test.go | 0 .../util/{ => yamlproto}/yamlproto_test.go | 0 main.go | 2 +- tools/configuration-validator/main.go | 2 +- 33 files changed, 238 insertions(+), 37 deletions(-) create mode 100644 integration/server_test.go create mode 100644 internal/app/server/server_test.go create mode 100644 internal/pkg/util/execute.go create mode 100644 internal/pkg/util/execute_test.go rename internal/pkg/util/{ => yamlproto}/testdata/and_result.yaml (100%) rename internal/pkg/util/{ => yamlproto}/testdata/empty.yaml (100%) rename internal/pkg/util/{ => yamlproto}/testdata/error_in_second_rule.yaml (100%) rename internal/pkg/util/{ => yamlproto}/testdata/inexistent_field.yaml (100%) rename internal/pkg/util/{ => yamlproto}/testdata/invalid.yaml (100%) rename internal/pkg/util/{ => yamlproto}/testdata/keyer_configuration_empty_request_type_match.yaml (100%) rename internal/pkg/util/{ => yamlproto}/testdata/keyer_configuration_empty_request_type_math_in_second_rule.yaml (100%) rename internal/pkg/util/{ => yamlproto}/testdata/keyer_configuration_match_predicate_invalid_bool.yaml (100%) rename internal/pkg/util/{ => yamlproto}/testdata/keyer_configuration_missing_match_predicate.yaml (100%) rename internal/pkg/util/{ => yamlproto}/testdata/keyer_configuration_missing_result_predicate.yaml (100%) rename internal/pkg/util/{ => yamlproto}/testdata/keyer_configuration_not_match_empty_request_type_match.yaml (100%) rename internal/pkg/util/{ => yamlproto}/testdata/keyer_configuration_not_match_request_type_match.yaml (100%) rename internal/pkg/util/{ => yamlproto}/testdata/keyer_configuration_or_match_invalid_request_type_match.yaml (100%) rename internal/pkg/util/{ => yamlproto}/testdata/keyer_configuration_or_match_request_type_match.yaml (100%) rename internal/pkg/util/{ => yamlproto}/testdata/keyer_configuration_request_node_match_invalid_enum.yaml (100%) rename internal/pkg/util/{ => yamlproto}/testdata/keyer_configuration_request_node_match_string_fragment.yaml (100%) rename internal/pkg/util/{ => yamlproto}/testdata/keyer_configuration_request_type_match_string_fragment.yaml (100%) rename internal/pkg/util/{ => yamlproto}/testdata/request_node_fragment.yaml (100%) rename internal/pkg/util/{ => yamlproto}/testdata/resource_names_fragment.yaml (100%) rename internal/pkg/util/{ => yamlproto}/testdata/string_fragment.yaml (100%) rename internal/pkg/util/{ => yamlproto}/testdata/typo_in_fragments.yaml (100%) rename internal/pkg/util/{ => yamlproto}/yamlproto.go (100%) rename internal/pkg/util/{ => yamlproto}/yamlproto_suite_test.go (100%) rename internal/pkg/util/{ => yamlproto}/yamlproto_test.go (100%) diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index d01550aa..5efdc101 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -3,17 +3,9 @@ on: push: branches: - master - paths: - - 'api/**' - - 'tools/configuration-validator/**' - - .github/workflows/integration-tests.yml pull_request: branches: - master - paths: - - 'api/**' - - 'tools/configuration-validator/**' - - .github/workflows/integration-tests.yml env: PACKAGEPATH: ${{ github.workspace }}/go/src/github.com/${{ github.repository }} jobs: diff --git a/integration/server_test.go b/integration/server_test.go new file mode 100644 index 00000000..fcc97a01 --- /dev/null +++ b/integration/server_test.go @@ -0,0 +1,36 @@ +// +build integration + +package integration + +import ( + "os" + "os/exec" + "path" + "syscall" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +const ( + binaryName = "xds-relay" +) + +func TestServerShutdown(t *testing.T) { + dir, err := os.Getwd() + assert.Nil(t, err) + cmd := exec.Command(path.Join(dir, "bin", binaryName), + "-c", "./integration/testdata/bootstrap_configuration_complete_tech_spec.yaml", + "-a", "./integration/testdata/keyer_configuration_complete_tech_spec.yaml", + "-l", "debug", + "-m", "serve") + err = cmd.Start() + assert.Nil(t, err) + <-time.After(5 * time.Second) + assert.Equal(t, -1, cmd.ProcessState.ExitCode()) + e := cmd.Process.Signal(syscall.SIGINT) + assert.Nil(t, e) + err = cmd.Wait() + assert.Nil(t, e) +} diff --git a/internal/app/server/server.go b/internal/app/server/server.go index 6b94c405..5c2b132f 100644 --- a/internal/app/server/server.go +++ b/internal/app/server/server.go @@ -3,13 +3,17 @@ package server import ( "context" "net" + "os" + "os/signal" "strconv" + "syscall" "time" "github.com/envoyproxy/xds-relay/internal/app/mapper" "github.com/envoyproxy/xds-relay/internal/app/orchestrator" "github.com/envoyproxy/xds-relay/internal/app/upstream" "github.com/envoyproxy/xds-relay/internal/pkg/log" + "github.com/envoyproxy/xds-relay/internal/pkg/util" aggregationv1 "github.com/envoyproxy/xds-relay/pkg/api/aggregation/v1" bootstrapv1 "github.com/envoyproxy/xds-relay/pkg/api/bootstrap/v1" @@ -32,7 +36,6 @@ func Run(bootstrapConfig *bootstrapv1.Bootstrap, logger = log.New(bootstrapConfig.Logging.Level.String()) } - // TODO cancel should be invoked by shutdown handlers. ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -45,7 +48,7 @@ func Run(bootstrapConfig *bootstrapv1.Bootstrap, ctx, upstreamAddress, upstream.CallOptions{Timeout: time.Minute}, - logger.Named("xdsclient"), + logger, ) if err != nil { logger.With("error", err).Panic(ctx, "failed to initialize upstream client") @@ -72,10 +75,37 @@ func Run(bootstrapConfig *bootstrapv1.Bootstrap, api.RegisterRouteDiscoveryServiceServer(server, gcpServer) api.RegisterListenerDiscoveryServiceServer(server, gcpServer) - if mode == "serve" { - logger.With("address", listener.Addr()).Info(ctx, "Initializing server") - if err := server.Serve(listener); err != nil { - logger.With("err", err).Fatal(ctx, "failed to initialize server") - } + if mode != "serve" { + return + } + + registerShutdownHandler(ctx, cancel, server.GracefulStop, logger, time.Second*30) + logger.With("address", listener.Addr()).Info(ctx, "Initializing server") + if err := server.Serve(listener); err != nil { + logger.With("err", err).Fatal(ctx, "failed to initialize server") } } + +func registerShutdownHandler( + ctx context.Context, + cancel context.CancelFunc, + gracefulStop func(), + logger log.Logger, + waitTime time.Duration) { + sigs := make(chan os.Signal, 1) + signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM) + go func() { + sig := <-sigs + logger.Info(ctx, "received interrupt signal:", sig.String()) + err := util.DoWithTimeout(ctx, func() error { + logger.Info(ctx, "initiating grpc graceful stop") + gracefulStop() + _ = logger.Sync() + cancel() + return nil + }, waitTime) + if err != nil { + logger.Error(ctx, "shutdown error: ", err.Error()) + } + }() +} diff --git a/internal/app/server/server_test.go b/internal/app/server/server_test.go new file mode 100644 index 00000000..98f6b759 --- /dev/null +++ b/internal/app/server/server_test.go @@ -0,0 +1,69 @@ +package server + +import ( + "context" + "fmt" + "syscall" + "testing" + "time" + + "github.com/envoyproxy/xds-relay/internal/pkg/log" + "github.com/stretchr/testify/assert" +) + +func TestShutdown(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + blockedCh := make(chan bool, 1) + l := &logger{} + registerShutdownHandler(ctx, cancel, func() { + blockedCh <- true + }, l, time.Second*5) + _ = syscall.Kill(syscall.Getpid(), syscall.SIGINT) + <-blockedCh +} + +func TestShutdownTimeout(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + l := &logger{blockedCh: make(chan bool, 1)} + registerShutdownHandler(ctx, cancel, func() { + <-time.After(time.Minute) + }, l, time.Millisecond) + _ = syscall.Kill(syscall.Getpid(), syscall.SIGINT) + <-l.blockedCh + assert.Equal(t, "shutdown error: context deadline exceeded", l.lastErr) +} + +type logger struct { + blockedCh chan bool + lastErr string +} + +func (l *logger) Named(name string) log.Logger { + return l +} + +func (l *logger) With(args ...interface{}) log.Logger { + return l +} + +func (l *logger) Sync() error { return nil } + +func (l *logger) Debug(ctx context.Context, args ...interface{}) { +} + +func (l *logger) Info(ctx context.Context, args ...interface{}) { +} + +func (l *logger) Warn(ctx context.Context, args ...interface{}) { +} + +func (l *logger) Error(ctx context.Context, args ...interface{}) { + l.lastErr = fmt.Sprint(args...) + l.blockedCh <- true +} + +func (l *logger) Fatal(ctx context.Context, args ...interface{}) { +} + +func (l *logger) Panic(ctx context.Context, args ...interface{}) { +} diff --git a/internal/app/upstream/client.go b/internal/app/upstream/client.go index 2a9a1d6c..eb85ac0f 100644 --- a/internal/app/upstream/client.go +++ b/internal/app/upstream/client.go @@ -7,6 +7,7 @@ import ( v2 "github.com/envoyproxy/go-control-plane/envoy/api/v2" "github.com/envoyproxy/xds-relay/internal/pkg/log" + "github.com/envoyproxy/xds-relay/internal/pkg/util" "google.golang.org/grpc" ) @@ -78,6 +79,7 @@ type version struct { // The method does not block until the underlying connection is up. // Returns immediately and connecting the server happens in background func NewClient(ctx context.Context, url string, callOptions CallOptions, logger log.Logger) (Client, error) { + logger.Info(ctx, "Initiating upstream connection.") // TODO: configure grpc options.https://github.com/envoyproxy/xds-relay/issues/55 conn, err := grpc.Dial(url, grpc.WithInsecure()) if err != nil { @@ -163,7 +165,9 @@ func send( } request.ResponseNonce = sig.nonce request.VersionInfo = sig.version - err := doWithTimeout(ctx, func() error { + // Ref: https://github.com/grpc/grpc-go/issues/1229#issuecomment-302755717 + // Call SendMsg in a timeout because it can block in some cases. + err := util.DoWithTimeout(ctx, func() error { return stream.SendMsg(request) }, callOptions.Timeout) if err != nil { @@ -229,25 +233,6 @@ func shutDown(ctx context.Context, conn *grpc.ClientConn) { conn.Close() } -// DoWithTimeout runs f and returns its error. If the deadline d elapses first, -// it returns a DeadlineExceeded error instead. -// Ref: https://github.com/grpc/grpc-go/issues/1229#issuecomment-302755717 -func doWithTimeout(ctx context.Context, f func() error, d time.Duration) error { - timeoutCtx, cancel := context.WithTimeout(ctx, d) - defer cancel() - errChan := make(chan error, 1) - go func() { - errChan <- f() - close(errChan) - }() - select { - case <-timeoutCtx.Done(): - return timeoutCtx.Err() - case err := <-errChan: - return err - } -} - func (e *UnsupportedResourceError) Error() string { return fmt.Sprintf("Unsupported resource typeUrl: %s", e.TypeURL) } diff --git a/internal/pkg/util/execute.go b/internal/pkg/util/execute.go new file mode 100644 index 00000000..da80a70d --- /dev/null +++ b/internal/pkg/util/execute.go @@ -0,0 +1,24 @@ +package util + +import ( + "context" + "time" +) + +// DoWithTimeout runs f and returns its error. +// If the timeout elapses first, returns a ctx timeout error instead. +func DoWithTimeout(ctx context.Context, f func() error, t time.Duration) error { + timeoutCtx, cancel := context.WithTimeout(ctx, t) + defer cancel() + errChan := make(chan error, 1) + go func() { + errChan <- f() + close(errChan) + }() + select { + case <-timeoutCtx.Done(): + return timeoutCtx.Err() + case err := <-errChan: + return err + } +} diff --git a/internal/pkg/util/execute_test.go b/internal/pkg/util/execute_test.go new file mode 100644 index 00000000..2aacaf14 --- /dev/null +++ b/internal/pkg/util/execute_test.go @@ -0,0 +1,65 @@ +package util_test + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/envoyproxy/xds-relay/internal/pkg/util" + "github.com/stretchr/testify/assert" +) + +func TestExecuteWithinTimeout(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + methodExecuted := false + err := util.DoWithTimeout(ctx, func() error { + methodExecuted = true + return nil + }, time.Second) + + assert.Nil(t, err) + assert.Equal(t, methodExecuted, true) +} + +func TestExecuteWithinTimeoutReturnsError(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + returnErr := fmt.Errorf("error") + err := util.DoWithTimeout(ctx, func() error { + return returnErr + }, time.Second) + + assert.NotNil(t, err) + assert.Equal(t, err, returnErr) +} + +func TestExecuteWithinExceedsTimeout(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + err := util.DoWithTimeout(ctx, func() error { + <-time.After(time.Millisecond) + return nil + }, time.Nanosecond) + + assert.NotNil(t, err) + assert.Equal(t, err.Error(), "context deadline exceeded") +} + +func TestExecuteCancel(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + methodCalled := make(chan bool, 1) + go func() { + <-methodCalled + cancel() + }() + err := util.DoWithTimeout(ctx, func() error { + methodCalled <- true + <-time.After(time.Minute) + return nil + }, time.Minute) + + assert.NotNil(t, err) + assert.Equal(t, err.Error(), "context canceled") +} diff --git a/internal/pkg/util/testdata/and_result.yaml b/internal/pkg/util/yamlproto/testdata/and_result.yaml similarity index 100% rename from internal/pkg/util/testdata/and_result.yaml rename to internal/pkg/util/yamlproto/testdata/and_result.yaml diff --git a/internal/pkg/util/testdata/empty.yaml b/internal/pkg/util/yamlproto/testdata/empty.yaml similarity index 100% rename from internal/pkg/util/testdata/empty.yaml rename to internal/pkg/util/yamlproto/testdata/empty.yaml diff --git a/internal/pkg/util/testdata/error_in_second_rule.yaml b/internal/pkg/util/yamlproto/testdata/error_in_second_rule.yaml similarity index 100% rename from internal/pkg/util/testdata/error_in_second_rule.yaml rename to internal/pkg/util/yamlproto/testdata/error_in_second_rule.yaml diff --git a/internal/pkg/util/testdata/inexistent_field.yaml b/internal/pkg/util/yamlproto/testdata/inexistent_field.yaml similarity index 100% rename from internal/pkg/util/testdata/inexistent_field.yaml rename to internal/pkg/util/yamlproto/testdata/inexistent_field.yaml diff --git a/internal/pkg/util/testdata/invalid.yaml b/internal/pkg/util/yamlproto/testdata/invalid.yaml similarity index 100% rename from internal/pkg/util/testdata/invalid.yaml rename to internal/pkg/util/yamlproto/testdata/invalid.yaml diff --git a/internal/pkg/util/testdata/keyer_configuration_empty_request_type_match.yaml b/internal/pkg/util/yamlproto/testdata/keyer_configuration_empty_request_type_match.yaml similarity index 100% rename from internal/pkg/util/testdata/keyer_configuration_empty_request_type_match.yaml rename to internal/pkg/util/yamlproto/testdata/keyer_configuration_empty_request_type_match.yaml diff --git a/internal/pkg/util/testdata/keyer_configuration_empty_request_type_math_in_second_rule.yaml b/internal/pkg/util/yamlproto/testdata/keyer_configuration_empty_request_type_math_in_second_rule.yaml similarity index 100% rename from internal/pkg/util/testdata/keyer_configuration_empty_request_type_math_in_second_rule.yaml rename to internal/pkg/util/yamlproto/testdata/keyer_configuration_empty_request_type_math_in_second_rule.yaml diff --git a/internal/pkg/util/testdata/keyer_configuration_match_predicate_invalid_bool.yaml b/internal/pkg/util/yamlproto/testdata/keyer_configuration_match_predicate_invalid_bool.yaml similarity index 100% rename from internal/pkg/util/testdata/keyer_configuration_match_predicate_invalid_bool.yaml rename to internal/pkg/util/yamlproto/testdata/keyer_configuration_match_predicate_invalid_bool.yaml diff --git a/internal/pkg/util/testdata/keyer_configuration_missing_match_predicate.yaml b/internal/pkg/util/yamlproto/testdata/keyer_configuration_missing_match_predicate.yaml similarity index 100% rename from internal/pkg/util/testdata/keyer_configuration_missing_match_predicate.yaml rename to internal/pkg/util/yamlproto/testdata/keyer_configuration_missing_match_predicate.yaml diff --git a/internal/pkg/util/testdata/keyer_configuration_missing_result_predicate.yaml b/internal/pkg/util/yamlproto/testdata/keyer_configuration_missing_result_predicate.yaml similarity index 100% rename from internal/pkg/util/testdata/keyer_configuration_missing_result_predicate.yaml rename to internal/pkg/util/yamlproto/testdata/keyer_configuration_missing_result_predicate.yaml diff --git a/internal/pkg/util/testdata/keyer_configuration_not_match_empty_request_type_match.yaml b/internal/pkg/util/yamlproto/testdata/keyer_configuration_not_match_empty_request_type_match.yaml similarity index 100% rename from internal/pkg/util/testdata/keyer_configuration_not_match_empty_request_type_match.yaml rename to internal/pkg/util/yamlproto/testdata/keyer_configuration_not_match_empty_request_type_match.yaml diff --git a/internal/pkg/util/testdata/keyer_configuration_not_match_request_type_match.yaml b/internal/pkg/util/yamlproto/testdata/keyer_configuration_not_match_request_type_match.yaml similarity index 100% rename from internal/pkg/util/testdata/keyer_configuration_not_match_request_type_match.yaml rename to internal/pkg/util/yamlproto/testdata/keyer_configuration_not_match_request_type_match.yaml diff --git a/internal/pkg/util/testdata/keyer_configuration_or_match_invalid_request_type_match.yaml b/internal/pkg/util/yamlproto/testdata/keyer_configuration_or_match_invalid_request_type_match.yaml similarity index 100% rename from internal/pkg/util/testdata/keyer_configuration_or_match_invalid_request_type_match.yaml rename to internal/pkg/util/yamlproto/testdata/keyer_configuration_or_match_invalid_request_type_match.yaml diff --git a/internal/pkg/util/testdata/keyer_configuration_or_match_request_type_match.yaml b/internal/pkg/util/yamlproto/testdata/keyer_configuration_or_match_request_type_match.yaml similarity index 100% rename from internal/pkg/util/testdata/keyer_configuration_or_match_request_type_match.yaml rename to internal/pkg/util/yamlproto/testdata/keyer_configuration_or_match_request_type_match.yaml diff --git a/internal/pkg/util/testdata/keyer_configuration_request_node_match_invalid_enum.yaml b/internal/pkg/util/yamlproto/testdata/keyer_configuration_request_node_match_invalid_enum.yaml similarity index 100% rename from internal/pkg/util/testdata/keyer_configuration_request_node_match_invalid_enum.yaml rename to internal/pkg/util/yamlproto/testdata/keyer_configuration_request_node_match_invalid_enum.yaml diff --git a/internal/pkg/util/testdata/keyer_configuration_request_node_match_string_fragment.yaml b/internal/pkg/util/yamlproto/testdata/keyer_configuration_request_node_match_string_fragment.yaml similarity index 100% rename from internal/pkg/util/testdata/keyer_configuration_request_node_match_string_fragment.yaml rename to internal/pkg/util/yamlproto/testdata/keyer_configuration_request_node_match_string_fragment.yaml diff --git a/internal/pkg/util/testdata/keyer_configuration_request_type_match_string_fragment.yaml b/internal/pkg/util/yamlproto/testdata/keyer_configuration_request_type_match_string_fragment.yaml similarity index 100% rename from internal/pkg/util/testdata/keyer_configuration_request_type_match_string_fragment.yaml rename to internal/pkg/util/yamlproto/testdata/keyer_configuration_request_type_match_string_fragment.yaml diff --git a/internal/pkg/util/testdata/request_node_fragment.yaml b/internal/pkg/util/yamlproto/testdata/request_node_fragment.yaml similarity index 100% rename from internal/pkg/util/testdata/request_node_fragment.yaml rename to internal/pkg/util/yamlproto/testdata/request_node_fragment.yaml diff --git a/internal/pkg/util/testdata/resource_names_fragment.yaml b/internal/pkg/util/yamlproto/testdata/resource_names_fragment.yaml similarity index 100% rename from internal/pkg/util/testdata/resource_names_fragment.yaml rename to internal/pkg/util/yamlproto/testdata/resource_names_fragment.yaml diff --git a/internal/pkg/util/testdata/string_fragment.yaml b/internal/pkg/util/yamlproto/testdata/string_fragment.yaml similarity index 100% rename from internal/pkg/util/testdata/string_fragment.yaml rename to internal/pkg/util/yamlproto/testdata/string_fragment.yaml diff --git a/internal/pkg/util/testdata/typo_in_fragments.yaml b/internal/pkg/util/yamlproto/testdata/typo_in_fragments.yaml similarity index 100% rename from internal/pkg/util/testdata/typo_in_fragments.yaml rename to internal/pkg/util/yamlproto/testdata/typo_in_fragments.yaml diff --git a/internal/pkg/util/yamlproto.go b/internal/pkg/util/yamlproto/yamlproto.go similarity index 100% rename from internal/pkg/util/yamlproto.go rename to internal/pkg/util/yamlproto/yamlproto.go diff --git a/internal/pkg/util/yamlproto_suite_test.go b/internal/pkg/util/yamlproto/yamlproto_suite_test.go similarity index 100% rename from internal/pkg/util/yamlproto_suite_test.go rename to internal/pkg/util/yamlproto/yamlproto_suite_test.go diff --git a/internal/pkg/util/yamlproto_test.go b/internal/pkg/util/yamlproto/yamlproto_test.go similarity index 100% rename from internal/pkg/util/yamlproto_test.go rename to internal/pkg/util/yamlproto/yamlproto_test.go diff --git a/main.go b/main.go index 4b8facf3..e317929a 100644 --- a/main.go +++ b/main.go @@ -5,7 +5,7 @@ import ( "log" "github.com/envoyproxy/xds-relay/internal/app/server" - yamlproto "github.com/envoyproxy/xds-relay/internal/pkg/util" + yamlproto "github.com/envoyproxy/xds-relay/internal/pkg/util/yamlproto" aggregationv1 "github.com/envoyproxy/xds-relay/pkg/api/aggregation/v1" bootstrapv1 "github.com/envoyproxy/xds-relay/pkg/api/bootstrap/v1" "github.com/spf13/cobra" diff --git a/tools/configuration-validator/main.go b/tools/configuration-validator/main.go index 8acc09e4..4dd4025a 100644 --- a/tools/configuration-validator/main.go +++ b/tools/configuration-validator/main.go @@ -5,7 +5,7 @@ import ( "log" "os" - yamlproto "github.com/envoyproxy/xds-relay/internal/pkg/util" + yamlproto "github.com/envoyproxy/xds-relay/internal/pkg/util/yamlproto" aggregationv1 "github.com/envoyproxy/xds-relay/pkg/api/aggregation/v1" "github.com/spf13/cobra" From 3672040cf51fe0da20a5e7c3de79aa1679c63c53 Mon Sep 17 00:00:00 2001 From: Jess Yuen Date: Fri, 24 Apr 2020 01:37:54 -0700 Subject: [PATCH 15/29] Fix all the things Signed-off-by: Jess Yuen --- internal/app/cache/cache.go | 1 + internal/app/orchestrator/downstream.go | 32 ++++---- internal/app/orchestrator/orchestrator.go | 75 ++++++++++--------- .../app/orchestrator/orchestrator_test.go | 62 +++++++-------- internal/app/orchestrator/upstream.go | 38 +++++++--- 5 files changed, 120 insertions(+), 88 deletions(-) diff --git a/internal/app/cache/cache.go b/internal/app/cache/cache.go index 8e869f61..e70d6883 100644 --- a/internal/app/cache/cache.go +++ b/internal/app/cache/cache.go @@ -126,6 +126,7 @@ func (c *cache) SetResponse(key string, resp v2.DiscoveryResponse) (map[*v2.Disc resource := Resource{ Resp: response, ExpirationTime: c.getExpirationTime(time.Now()), + Requests: make(map[*v2.DiscoveryRequest]bool), } c.cache.Add(key, resource) return nil, nil diff --git a/internal/app/orchestrator/downstream.go b/internal/app/orchestrator/downstream.go index d3a10a11..c983026a 100644 --- a/internal/app/orchestrator/downstream.go +++ b/internal/app/orchestrator/downstream.go @@ -9,8 +9,14 @@ import ( // downstreamResponseMap is a map of downstream xDS client requests to response // channels. type downstreamResponseMap struct { - mu sync.RWMutex - responseChannel map[*gcp.Request]chan gcp.Response + mu sync.RWMutex + responseChannels map[*gcp.Request]chan gcp.Response +} + +func newDownstreamResponseMap() downstreamResponseMap { + return downstreamResponseMap{ + responseChannels: make(map[*gcp.Request]chan gcp.Response), + } } // createChannel initializes a new channel for a request if it doesn't already @@ -18,17 +24,17 @@ type downstreamResponseMap struct { func (d *downstreamResponseMap) createChannel(req *gcp.Request) chan gcp.Response { d.mu.Lock() defer d.mu.Unlock() - if _, ok := d.responseChannel[req]; !ok { - d.responseChannel[req] = make(chan gcp.Response) + if _, ok := d.responseChannels[req]; !ok { + d.responseChannels[req] = make(chan gcp.Response) } - return d.responseChannel[req] + return d.responseChannels[req] } // get retrieves the channel where responses are set for the specified request. func (d *downstreamResponseMap) get(req *gcp.Request) (chan gcp.Response, bool) { d.mu.RLock() defer d.mu.RUnlock() - channel, ok := d.responseChannel[req] + channel, ok := d.responseChannels[req] return channel, ok } @@ -36,9 +42,9 @@ func (d *downstreamResponseMap) get(req *gcp.Request) (chan gcp.Response, bool) func (d *downstreamResponseMap) delete(req *gcp.Request) chan gcp.Response { d.mu.Lock() defer d.mu.Unlock() - if channel, ok := d.responseChannel[req]; ok { + if channel, ok := d.responseChannels[req]; ok { close(channel) - delete(d.responseChannel, req) + delete(d.responseChannels, req) return channel } return nil @@ -46,13 +52,13 @@ func (d *downstreamResponseMap) delete(req *gcp.Request) chan gcp.Response { // deleteAll closes all the response channels for the provided requests and // removes them from the map. -func (d *downstreamResponseMap) deleteAll(req []*gcp.Request) { +func (d *downstreamResponseMap) deleteAll(watches map[*gcp.Request]bool) { d.mu.Lock() defer d.mu.Unlock() - for _, watch := range req { - if d.responseChannel[watch] != nil { - close(d.responseChannel[watch]) - delete(d.responseChannel, watch) + for watch := range watches { + if d.responseChannels[watch] != nil { + close(d.responseChannels[watch]) + delete(d.responseChannels, watch) } } } diff --git a/internal/app/orchestrator/orchestrator.go b/internal/app/orchestrator/orchestrator.go index a2612b6b..dddbc667 100644 --- a/internal/app/orchestrator/orchestrator.go +++ b/internal/app/orchestrator/orchestrator.go @@ -53,12 +53,9 @@ const ( type Orchestrator interface { gcp.Cache - // shutdown takes an aggregated key and shuts down the go routine watching - // for upstream responses. - // - // This is currently used by tests to clean up channels, but can also be - // used by the main shutdown handler. - shutdown(string) + // This is called by the main shutdown handler and tests to clean up + // open channels. + shutdown(ctx context.Context) } type orchestrator struct { @@ -75,32 +72,34 @@ type orchestrator struct { // New instantiates the mapper, cache, upstream client components necessary for // the orchestrator to operate and returns an instance of the instantiated // orchestrator. -func New(ctx context.Context, +func New( + ctx context.Context, l log.Logger, mapper mapper.Mapper, upstreamClient upstream.Client, - cacheConfig *bootstrapv1.Cache) Orchestrator { + cacheConfig *bootstrapv1.Cache, +) Orchestrator { orchestrator := &orchestrator{ - logger: l.Named(component), - mapper: mapper, - upstreamClient: upstreamClient, - downstreamResponseMap: downstreamResponseMap{ - responseChannel: make(map[*gcp.Request]chan gcp.Response), - }, - upstreamResponseMap: upstreamResponseMap{ - responseChannel: make(map[string]upstreamResponseChannel), - }, + logger: l.Named(component), + mapper: mapper, + upstreamClient: upstreamClient, + downstreamResponseMap: newDownstreamResponseMap(), + upstreamResponseMap: newUpstreamResponseMap(), } // Initialize cache. - cache, err := cache.NewCache(int(cacheConfig.MaxEntries), + cache, err := cache.NewCache( + int(cacheConfig.MaxEntries), orchestrator.onCacheEvicted, - time.Duration(cacheConfig.Ttl.Nanos)*time.Nanosecond) + time.Duration(cacheConfig.Ttl.Nanos)*time.Nanosecond, + ) if err != nil { orchestrator.logger.With("error", err).Panic(ctx, "failed to initialize cache") } orchestrator.cache = cache + go orchestrator.shutdown(ctx) + return orchestrator } @@ -164,17 +163,23 @@ func (o *orchestrator) CreateWatch(req gcp.Request) (chan gcp.Response, func()) // that maps to the same aggregated key doesn't result in two upstream // streams. o.upstreamResponseMap.mu.Lock() - if _, ok := o.upstreamResponseMap.responseChannel[aggregatedKey]; !ok { - upstreamResponseChan, _, _ := o.upstreamClient.OpenStream(req) - respChannel := o.upstreamResponseMap.add(aggregatedKey, upstreamResponseChan) - // Spin up a go routine to watch for upstream responses. - // One routine is opened per aggregate key. - go o.watchUpstream(ctx, aggregatedKey, respChannel.response, respChannel.done) + if _, ok := o.upstreamResponseMap.responseChannels[aggregatedKey]; !ok { + upstreamResponseChan, shutdown, err := o.upstreamClient.OpenStream(req) + if err != nil { + // TODO implement retry/back-off logic on error scenario. + // https://github.com/envoyproxy/xds-relay/issues/68 + o.logger.With("err", err).With("key", aggregatedKey).Error(ctx, "Failed to open stream to origin server") + } else { + respChannel := o.upstreamResponseMap.add(aggregatedKey, upstreamResponseChan) + // Spin up a go routine to watch for upstream responses. + // One routine is opened per aggregate key. + go o.watchUpstream(ctx, aggregatedKey, respChannel.response, respChannel.done, shutdown) + } } o.upstreamResponseMap.mu.Unlock() - return responseChannel, o.onCancelWatch(&req) + return responseChannel, o.onCancelWatch(aggregatedKey, &req) } // Fetch implements the polling method of the config cache using a non-empty request. @@ -189,6 +194,7 @@ func (o *orchestrator) watchUpstream( aggregatedKey string, responseChannel <-chan *discovery.DiscoveryResponse, done <-chan bool, + shutdown func(), ) { for { select { @@ -236,6 +242,7 @@ func (o *orchestrator) watchUpstream( } case <-done: // Exit when signaled that the stream has closed. + shutdown() return } } @@ -243,8 +250,8 @@ func (o *orchestrator) watchUpstream( // fanout pushes the response to the response channels of all open downstream // watches in parallel. -func (o *orchestrator) fanout(resp *cache.Response, watchers []*gcp.Request) { - for _, watch := range watchers { +func (o *orchestrator) fanout(resp *cache.Response, watchers map[*gcp.Request]bool) { + for watch := range watchers { go func(watch *gcp.Request) { if channel, ok := o.downstreamResponseMap.get(watch); ok { channel <- convertToGcpResponse(resp, *watch) @@ -262,17 +269,17 @@ func (o *orchestrator) onCacheEvicted(key string, resource cache.Resource) { } // onCancelWatch cleans up the cached watch when called. -func (o *orchestrator) onCancelWatch(req *gcp.Request) func() { +func (o *orchestrator) onCancelWatch(aggregatedKey string, req *gcp.Request) func() { return func() { o.downstreamResponseMap.delete(req) - // TODO (https://github.com/envoyproxy/xds-relay/issues/57). Clean up - // watch from cache. Cache needs to expose a function to do so. + o.cache.DeleteRequest(aggregatedKey, req) } } -// shutdown closes the upstream connection for the specified aggregated key. -func (o *orchestrator) shutdown(aggregatedKey string) { - o.upstreamResponseMap.delete(aggregatedKey) +// shutdown closes all upstream connections when ctx.Done is called. +func (o *orchestrator) shutdown(ctx context.Context) { + <-ctx.Done() + o.upstreamResponseMap.deleteAll() } // convertToGcpResponse constructs the go-control-plane response from the diff --git a/internal/app/orchestrator/orchestrator_test.go b/internal/app/orchestrator/orchestrator_test.go index 59f99860..8509e8f4 100644 --- a/internal/app/orchestrator/orchestrator_test.go +++ b/internal/app/orchestrator/orchestrator_test.go @@ -18,7 +18,7 @@ import ( "github.com/envoyproxy/xds-relay/internal/app/upstream" upstream_mock "github.com/envoyproxy/xds-relay/internal/app/upstream/mock" "github.com/envoyproxy/xds-relay/internal/pkg/log" - yamlproto "github.com/envoyproxy/xds-relay/internal/pkg/util" + "github.com/envoyproxy/xds-relay/internal/pkg/util/yamlproto" aggregationv1 "github.com/envoyproxy/xds-relay/pkg/api/aggregation/v1" bootstrapv1 "github.com/envoyproxy/xds-relay/pkg/api/bootstrap/v1" ) @@ -59,10 +59,10 @@ func newMockOrchestrator(t *testing.T, mapper mapper.Mapper, upstreamClient upst mapper: mapper, upstreamClient: upstreamClient, downstreamResponseMap: downstreamResponseMap{ - responseChannel: make(map[*gcp.Request]chan gcp.Response), + responseChannels: make(map[*gcp.Request]chan gcp.Response), }, upstreamResponseMap: upstreamResponseMap{ - responseChannel: make(map[string]upstreamResponseChannel), + responseChannels: make(map[string]upstreamResponseChannel), }, } @@ -101,7 +101,8 @@ func TestNew(t *testing.T) { upstream.CallOptions{}, nil, nil, - func(m interface{}) error { return nil }) + func(m interface{}) error { return nil }, + ) config := aggregationv1.KeyerConfiguration{ Fragments: []*aggregationv1.KeyerConfiguration_Fragment{ @@ -141,8 +142,8 @@ func TestGoldenPath(t *testing.T) { respChannel, cancelWatch := orchestrator.CreateWatch(req) assert.NotNil(t, respChannel) - assert.Equal(t, 1, len(orchestrator.downstreamResponseMap.responseChannel)) - assert.Equal(t, 1, len(orchestrator.upstreamResponseMap.responseChannel)) + assert.Equal(t, 1, len(orchestrator.downstreamResponseMap.responseChannels)) + assert.Equal(t, 1, len(orchestrator.upstreamResponseMap.responseChannels)) resp := v2.DiscoveryResponse{ VersionInfo: "1", @@ -158,13 +159,13 @@ func TestGoldenPath(t *testing.T) { gotResponse := <-respChannel assertEqualResources(t, gotResponse, resp, req) - aggregatedKey, err := mapper.GetKey(req) - assert.NoError(t, err) - orchestrator.shutdown(aggregatedKey) - assert.Equal(t, 0, len(orchestrator.upstreamResponseMap.responseChannel)) + ctx, cancel := context.WithCancel(context.Background()) + cancel() + orchestrator.shutdown(ctx) + assert.Equal(t, 0, len(orchestrator.upstreamResponseMap.responseChannels)) cancelWatch() - assert.Equal(t, 0, len(orchestrator.downstreamResponseMap.responseChannel)) + assert.Equal(t, 0, len(orchestrator.downstreamResponseMap.responseChannels)) } func TestCachedResponse(t *testing.T) { @@ -203,8 +204,8 @@ func TestCachedResponse(t *testing.T) { respChannel, cancelWatch := orchestrator.CreateWatch(req) assert.NotNil(t, respChannel) - assert.Equal(t, 1, len(orchestrator.downstreamResponseMap.responseChannel)) - assert.Equal(t, 1, len(orchestrator.upstreamResponseMap.responseChannel)) + assert.Equal(t, 1, len(orchestrator.downstreamResponseMap.responseChannels)) + assert.Equal(t, 1, len(orchestrator.upstreamResponseMap.responseChannels)) gotResponse := <-respChannel assertEqualResources(t, gotResponse, mockResponse, req) @@ -223,7 +224,7 @@ func TestCachedResponse(t *testing.T) { upstreamResponseChannel <- &resp gotResponse = <-respChannel assertEqualResources(t, gotResponse, resp, req) - assert.Equal(t, 1, len(orchestrator.upstreamResponseMap.responseChannel)) + assert.Equal(t, 1, len(orchestrator.upstreamResponseMap.responseChannels)) // Test scenario with same request and response version. // We expect a watch to be open but no response. @@ -234,17 +235,20 @@ func TestCachedResponse(t *testing.T) { respChannel2, cancelWatch2 := orchestrator.CreateWatch(req2) assert.NotNil(t, respChannel2) - assert.Equal(t, 2, len(orchestrator.downstreamResponseMap.responseChannel)) - assert.Equal(t, 1, len(orchestrator.upstreamResponseMap.responseChannel)) + assert.Equal(t, 2, len(orchestrator.downstreamResponseMap.responseChannels)) + assert.Equal(t, 1, len(orchestrator.upstreamResponseMap.responseChannels)) // If we pass this point, it's safe to assume the respChannel2 is empty, // otherwise the test would block and not complete. - orchestrator.shutdown(aggregatedKey) - assert.Equal(t, 0, len(orchestrator.upstreamResponseMap.responseChannel)) + ctx, cancel := context.WithCancel(context.Background()) + cancel() + orchestrator.shutdown(ctx) + assert.Equal(t, 0, len(orchestrator.upstreamResponseMap.responseChannels)) + cancelWatch() - assert.Equal(t, 1, len(orchestrator.downstreamResponseMap.responseChannel)) + assert.Equal(t, 1, len(orchestrator.downstreamResponseMap.responseChannels)) cancelWatch2() - assert.Equal(t, 0, len(orchestrator.downstreamResponseMap.responseChannel)) + assert.Equal(t, 0, len(orchestrator.downstreamResponseMap.responseChannels)) } func TestMultipleWatchesAndUpstreams(t *testing.T) { @@ -306,24 +310,20 @@ func TestMultipleWatchesAndUpstreams(t *testing.T) { gotResponseFromChannel2 := <-respChannel2 gotResponseFromChannel3 := <-respChannel3 - aggregatedKeyLDS, err := mapper.GetKey(req1) - assert.NoError(t, err) - aggregatedKeyCDS, err := mapper.GetKey(req3) - assert.NoError(t, err) - - assert.Equal(t, 3, len(orchestrator.downstreamResponseMap.responseChannel)) - assert.Equal(t, 2, len(orchestrator.upstreamResponseMap.responseChannel)) + assert.Equal(t, 3, len(orchestrator.downstreamResponseMap.responseChannels)) + assert.Equal(t, 2, len(orchestrator.upstreamResponseMap.responseChannels)) assertEqualResources(t, gotResponseFromChannel1, upstreamResponseLDS, req1) assertEqualResources(t, gotResponseFromChannel2, upstreamResponseLDS, req2) assertEqualResources(t, gotResponseFromChannel3, upstreamResponseCDS, req3) - orchestrator.shutdown(aggregatedKeyLDS) - orchestrator.shutdown(aggregatedKeyCDS) - assert.Equal(t, 0, len(orchestrator.upstreamResponseMap.responseChannel)) + ctx, cancel := context.WithCancel(context.Background()) + cancel() + orchestrator.shutdown(ctx) + assert.Equal(t, 0, len(orchestrator.upstreamResponseMap.responseChannels)) cancelWatch1() cancelWatch2() cancelWatch3() - assert.Equal(t, 0, len(orchestrator.downstreamResponseMap.responseChannel)) + assert.Equal(t, 0, len(orchestrator.downstreamResponseMap.responseChannels)) } diff --git a/internal/app/orchestrator/upstream.go b/internal/app/orchestrator/upstream.go index eb4590ef..6def6ed3 100644 --- a/internal/app/orchestrator/upstream.go +++ b/internal/app/orchestrator/upstream.go @@ -9,8 +9,8 @@ import ( // upstream is the map of aggregate key to the receive-only upstream // origin server response channels. type upstreamResponseMap struct { - mu sync.RWMutex - responseChannel map[string]upstreamResponseChannel + mu sync.RWMutex + responseChannels map[string]upstreamResponseChannel } type upstreamResponseChannel struct { @@ -18,29 +18,47 @@ type upstreamResponseChannel struct { done chan bool } +func newUpstreamResponseMap() upstreamResponseMap { + return upstreamResponseMap{ + responseChannels: make(map[string]upstreamResponseChannel), + } +} + // add sets the response channel for the provided aggregated key. It also // initializes a done channel to be used during cleanup. // // Expects orchestrator to manage the lock since this is called simultaneously // with stream open. -func (u *upstreamResponseMap) add(aggregatedKey string, - responseChannel <-chan *discovery.DiscoveryResponse) upstreamResponseChannel { +func (u *upstreamResponseMap) add( + aggregatedKey string, + responseChannel <-chan *discovery.DiscoveryResponse, +) upstreamResponseChannel { channel := upstreamResponseChannel{ response: responseChannel, done: make(chan bool, 1), } - u.responseChannel[aggregatedKey] = channel + u.responseChannels[aggregatedKey] = channel return channel } -// delete closes the done channel and removes both channels from the map. -// TODO We can't close the upstream receiver only channel, need to signify to -// the upstream client to do so. +// delete signifies closure of the upstream stream and removes the map entry +// for the specified aggregated key. func (u *upstreamResponseMap) delete(aggregatedKey string) { u.mu.Lock() defer u.mu.Unlock() - if channel, ok := u.responseChannel[aggregatedKey]; ok { + if channel, ok := u.responseChannels[aggregatedKey]; ok { close(channel.done) - delete(u.responseChannel, aggregatedKey) + delete(u.responseChannels, aggregatedKey) + } +} + +// deleteAll signifies closure of all upstream streams and removes the map +// entries. +func (u *upstreamResponseMap) deleteAll() { + u.mu.Lock() + defer u.mu.Unlock() + for aggregatedKey, responseChannels := range u.responseChannels { + close(responseChannels.done) + delete(u.responseChannels, aggregatedKey) } } From f302e8ca21edea178dce8492309cb24d57ca4738 Mon Sep 17 00:00:00 2001 From: Jess Yuen Date: Fri, 24 Apr 2020 01:56:55 -0700 Subject: [PATCH 16/29] lint Signed-off-by: Jess Yuen --- internal/app/orchestrator/orchestrator.go | 4 +++- internal/app/orchestrator/orchestrator_test.go | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/internal/app/orchestrator/orchestrator.go b/internal/app/orchestrator/orchestrator.go index dddbc667..377cdfc3 100644 --- a/internal/app/orchestrator/orchestrator.go +++ b/internal/app/orchestrator/orchestrator.go @@ -272,7 +272,9 @@ func (o *orchestrator) onCacheEvicted(key string, resource cache.Resource) { func (o *orchestrator) onCancelWatch(aggregatedKey string, req *gcp.Request) func() { return func() { o.downstreamResponseMap.delete(req) - o.cache.DeleteRequest(aggregatedKey, req) + if err := o.cache.DeleteRequest(aggregatedKey, req); err != nil { + o.logger.With("key", aggregatedKey).With("err", err).Warn(context.Background(), "Failed to delete from cache") + } } } diff --git a/internal/app/orchestrator/orchestrator_test.go b/internal/app/orchestrator/orchestrator_test.go index 8509e8f4..3a0ac407 100644 --- a/internal/app/orchestrator/orchestrator_test.go +++ b/internal/app/orchestrator/orchestrator_test.go @@ -39,7 +39,9 @@ type mockMultiStreamUpstreamClient struct { mapper mapper.Mapper } -func (m mockMultiStreamUpstreamClient) OpenStream(req v2.DiscoveryRequest) (<-chan *v2.DiscoveryResponse, func(), error) { +func (m mockMultiStreamUpstreamClient) OpenStream( + req v2.DiscoveryRequest, +) (<-chan *v2.DiscoveryResponse, func(), error) { aggregatedKey, err := m.mapper.GetKey(req) assert.NoError(m.t, err) From 60f7a221b785da5b8414bb332f4ec8840d0f6c25 Mon Sep 17 00:00:00 2001 From: Jess Yuen Date: Fri, 24 Apr 2020 11:32:39 -0700 Subject: [PATCH 17/29] Fix cache test --- internal/app/cache/cache_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/app/cache/cache_test.go b/internal/app/cache/cache_test.go index 65395a09..3659937b 100644 --- a/internal/app/cache/cache_test.go +++ b/internal/app/cache/cache_test.go @@ -74,7 +74,8 @@ var testResponse = Response{ } var testResource = Resource{ - Resp: &testResponse, + Resp: &testResponse, + Requests: make(map[*v2.DiscoveryRequest]bool), } func TestAddRequestAndFetch(t *testing.T) { From 97c91e7541c9b8c4a17dd54139b83eacc730e807 Mon Sep 17 00:00:00 2001 From: Jess Yuen Date: Fri, 24 Apr 2020 17:33:09 -0700 Subject: [PATCH 18/29] Add TODOs, feedback Signed-off-by: Jess Yuen --- internal/app/orchestrator/orchestrator.go | 42 ++++++++++++----------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/internal/app/orchestrator/orchestrator.go b/internal/app/orchestrator/orchestrator.go index 377cdfc3..c1d8a6ed 100644 --- a/internal/app/orchestrator/orchestrator.go +++ b/internal/app/orchestrator/orchestrator.go @@ -200,18 +200,23 @@ func (o *orchestrator) watchUpstream( select { case x, more := <-responseChannel: if !more { - // A problem occurred fetching the response upstream, log and - // return the most recent cached response, so that the - // downstream will reissue the discovery request. + // A problem occurred fetching the response upstream, log retry. + // TODO implement retry/back-off logic on error scenario. + // https://github.com/envoyproxy/xds-relay/issues/68 o.logger.With("key", aggregatedKey).Error(ctx, "upstream error") - } else { - // Cache the response. - _, err := o.cache.SetResponse(aggregatedKey, *x) - if err != nil { - // If we fail to cache the new response, log and return the old one. - o.logger.With("err", err).With("key", aggregatedKey). - Error(ctx, "Failed to cache the response") - } + return + } + // Cache the response. + _, err := o.cache.SetResponse(aggregatedKey, *x) + if err != nil { + // TODO if set fails, we may need to retry upstream as well. + // Currently the fallback is to rely on a future response, but + // that probably isn't ideal. + // https://github.com/envoyproxy/xds-relay/issues/70 + // + // If we fail to cache the new response, log and return the old one. + o.logger.With("err", err).With("key", aggregatedKey). + Error(ctx, "Failed to cache the response") } // Get downstream watches and fan out. @@ -226,15 +231,10 @@ func (o *orchestrator) watchUpstream( } else { if cached == nil || cached.Resp == nil { // If cache is empty, there is nothing to fan out. - if !more { - // Warn. Benefit of the doubt that this is the first request. - o.logger.With("key", aggregatedKey). - Warn(ctx, "attempted to fan out with no cached response") - } else { - // Error. Sanity check. Shouldn't ever reach this. - o.logger.With("key", aggregatedKey). - Error(ctx, "attempted to fan out with no cached response") - } + // Error. Sanity check. Shouldn't ever reach this since we + // just set the response, but it's a rare scenario that can + // happen if the cache TTL is set very short. + o.logger.With("key", aggregatedKey).Error(ctx, "attempted to fan out with no cached response") } else { // Goldenpath. o.fanout(cached.Resp, cached.Requests) @@ -264,6 +264,8 @@ func (o *orchestrator) fanout(resp *cache.Response, watchers map[*gcp.Request]bo // other reasons. When this happens, we need to clean up open streams. // We shut down both the downstream watches and the upstream stream. func (o *orchestrator) onCacheEvicted(key string, resource cache.Resource) { + // TODO Potential for improvements here to handle the thundering herd + // problem: https://github.com/envoyproxy/xds-relay/issues/71 o.downstreamResponseMap.deleteAll(resource.Requests) o.upstreamResponseMap.delete(key) } From aeef7ca998b42f1d2fe9237d17186003a397f91b Mon Sep 17 00:00:00 2001 From: Jess Yuen Date: Tue, 28 Apr 2020 17:38:24 -0700 Subject: [PATCH 19/29] Use sync.Map To support per-key locking. Signed-off-by: Jess Yuen --- internal/app/orchestrator/orchestrator.go | 25 ++++----- .../app/orchestrator/orchestrator_test.go | 31 +++++------ internal/app/orchestrator/upstream.go | 53 +++++++++++-------- internal/pkg/util/testutils/syncmap.go | 23 ++++++++ internal/pkg/util/testutils/syncmap_test.go | 18 +++++++ 5 files changed, 99 insertions(+), 51 deletions(-) create mode 100644 internal/pkg/util/testutils/syncmap.go create mode 100644 internal/pkg/util/testutils/syncmap_test.go diff --git a/internal/app/orchestrator/orchestrator.go b/internal/app/orchestrator/orchestrator.go index c1d8a6ed..f8b7adfe 100644 --- a/internal/app/orchestrator/orchestrator.go +++ b/internal/app/orchestrator/orchestrator.go @@ -158,26 +158,27 @@ func (o *orchestrator) CreateWatch(req gcp.Request) (chan gcp.Response, func()) // Check if we have a upstream stream open for this aggregated key. If not, // open a stream with the representative request. - // - // Locking is necessary here so that a simultaneous downstream request - // that maps to the same aggregated key doesn't result in two upstream - // streams. - o.upstreamResponseMap.mu.Lock() - if _, ok := o.upstreamResponseMap.responseChannels[aggregatedKey]; !ok { + if !o.upstreamResponseMap.exists(aggregatedKey) { upstreamResponseChan, shutdown, err := o.upstreamClient.OpenStream(req) if err != nil { // TODO implement retry/back-off logic on error scenario. // https://github.com/envoyproxy/xds-relay/issues/68 o.logger.With("err", err).With("key", aggregatedKey).Error(ctx, "Failed to open stream to origin server") } else { - respChannel := o.upstreamResponseMap.add(aggregatedKey, upstreamResponseChan) - // Spin up a go routine to watch for upstream responses. - // One routine is opened per aggregate key. - go o.watchUpstream(ctx, aggregatedKey, respChannel.response, respChannel.done, shutdown) + respChannel, upstreamOpenedPreviously := o.upstreamResponseMap.add(aggregatedKey, upstreamResponseChan) + if upstreamOpenedPreviously { + // A stream was opened previously due to a race between + // concurrent downstreams for the same aggregated key, between + // exists and add operations. In this event, simply close the + // slower stream and return the existing one. + shutdown() + } else { + // Spin up a go routine to watch for upstream responses. + // One routine is opened per aggregate key. + go o.watchUpstream(ctx, aggregatedKey, respChannel.response, respChannel.done, shutdown) + } } - } - o.upstreamResponseMap.mu.Unlock() return responseChannel, o.onCancelWatch(aggregatedKey, &req) } diff --git a/internal/app/orchestrator/orchestrator_test.go b/internal/app/orchestrator/orchestrator_test.go index 3a0ac407..63f14215 100644 --- a/internal/app/orchestrator/orchestrator_test.go +++ b/internal/app/orchestrator/orchestrator_test.go @@ -18,6 +18,7 @@ import ( "github.com/envoyproxy/xds-relay/internal/app/upstream" upstream_mock "github.com/envoyproxy/xds-relay/internal/app/upstream/mock" "github.com/envoyproxy/xds-relay/internal/pkg/log" + "github.com/envoyproxy/xds-relay/internal/pkg/util/testutils" "github.com/envoyproxy/xds-relay/internal/pkg/util/yamlproto" aggregationv1 "github.com/envoyproxy/xds-relay/pkg/api/aggregation/v1" bootstrapv1 "github.com/envoyproxy/xds-relay/pkg/api/bootstrap/v1" @@ -57,15 +58,11 @@ func (m mockMultiStreamUpstreamClient) OpenStream( func newMockOrchestrator(t *testing.T, mapper mapper.Mapper, upstreamClient upstream.Client) *orchestrator { orchestrator := &orchestrator{ - logger: log.New("info"), - mapper: mapper, - upstreamClient: upstreamClient, - downstreamResponseMap: downstreamResponseMap{ - responseChannels: make(map[*gcp.Request]chan gcp.Response), - }, - upstreamResponseMap: upstreamResponseMap{ - responseChannels: make(map[string]upstreamResponseChannel), - }, + logger: log.New("info"), + mapper: mapper, + upstreamClient: upstreamClient, + downstreamResponseMap: newDownstreamResponseMap(), + upstreamResponseMap: newUpstreamResponseMap(), } cache, err := cache.NewCache(1000, orchestrator.onCacheEvicted, 10*time.Second) @@ -145,7 +142,7 @@ func TestGoldenPath(t *testing.T) { respChannel, cancelWatch := orchestrator.CreateWatch(req) assert.NotNil(t, respChannel) assert.Equal(t, 1, len(orchestrator.downstreamResponseMap.responseChannels)) - assert.Equal(t, 1, len(orchestrator.upstreamResponseMap.responseChannels)) + testutils.AssertSyncMapLen(t, 1, orchestrator.upstreamResponseMap.internal) resp := v2.DiscoveryResponse{ VersionInfo: "1", @@ -164,7 +161,7 @@ func TestGoldenPath(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) cancel() orchestrator.shutdown(ctx) - assert.Equal(t, 0, len(orchestrator.upstreamResponseMap.responseChannels)) + testutils.AssertSyncMapLen(t, 0, orchestrator.upstreamResponseMap.internal) cancelWatch() assert.Equal(t, 0, len(orchestrator.downstreamResponseMap.responseChannels)) @@ -207,7 +204,7 @@ func TestCachedResponse(t *testing.T) { respChannel, cancelWatch := orchestrator.CreateWatch(req) assert.NotNil(t, respChannel) assert.Equal(t, 1, len(orchestrator.downstreamResponseMap.responseChannels)) - assert.Equal(t, 1, len(orchestrator.upstreamResponseMap.responseChannels)) + testutils.AssertSyncMapLen(t, 1, orchestrator.upstreamResponseMap.internal) gotResponse := <-respChannel assertEqualResources(t, gotResponse, mockResponse, req) @@ -226,7 +223,7 @@ func TestCachedResponse(t *testing.T) { upstreamResponseChannel <- &resp gotResponse = <-respChannel assertEqualResources(t, gotResponse, resp, req) - assert.Equal(t, 1, len(orchestrator.upstreamResponseMap.responseChannels)) + testutils.AssertSyncMapLen(t, 1, orchestrator.upstreamResponseMap.internal) // Test scenario with same request and response version. // We expect a watch to be open but no response. @@ -238,14 +235,14 @@ func TestCachedResponse(t *testing.T) { respChannel2, cancelWatch2 := orchestrator.CreateWatch(req2) assert.NotNil(t, respChannel2) assert.Equal(t, 2, len(orchestrator.downstreamResponseMap.responseChannels)) - assert.Equal(t, 1, len(orchestrator.upstreamResponseMap.responseChannels)) + testutils.AssertSyncMapLen(t, 1, orchestrator.upstreamResponseMap.internal) // If we pass this point, it's safe to assume the respChannel2 is empty, // otherwise the test would block and not complete. ctx, cancel := context.WithCancel(context.Background()) cancel() orchestrator.shutdown(ctx) - assert.Equal(t, 0, len(orchestrator.upstreamResponseMap.responseChannels)) + testutils.AssertSyncMapLen(t, 0, orchestrator.upstreamResponseMap.internal) cancelWatch() assert.Equal(t, 1, len(orchestrator.downstreamResponseMap.responseChannels)) @@ -313,7 +310,7 @@ func TestMultipleWatchesAndUpstreams(t *testing.T) { gotResponseFromChannel3 := <-respChannel3 assert.Equal(t, 3, len(orchestrator.downstreamResponseMap.responseChannels)) - assert.Equal(t, 2, len(orchestrator.upstreamResponseMap.responseChannels)) + testutils.AssertSyncMapLen(t, 2, orchestrator.upstreamResponseMap.internal) assertEqualResources(t, gotResponseFromChannel1, upstreamResponseLDS, req1) assertEqualResources(t, gotResponseFromChannel2, upstreamResponseLDS, req2) @@ -322,7 +319,7 @@ func TestMultipleWatchesAndUpstreams(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) cancel() orchestrator.shutdown(ctx) - assert.Equal(t, 0, len(orchestrator.upstreamResponseMap.responseChannels)) + testutils.AssertSyncMapLen(t, 0, orchestrator.upstreamResponseMap.internal) cancelWatch1() cancelWatch2() diff --git a/internal/app/orchestrator/upstream.go b/internal/app/orchestrator/upstream.go index 6def6ed3..390d460e 100644 --- a/internal/app/orchestrator/upstream.go +++ b/internal/app/orchestrator/upstream.go @@ -6,11 +6,17 @@ import ( discovery "github.com/envoyproxy/go-control-plane/envoy/api/v2" ) -// upstream is the map of aggregate key to the receive-only upstream +// upstreamResponseMap is the map of aggregate key to the receive-only upstream // origin server response channels. +// +// sync.Map was chosen due to: +// - support for concurrent locks on a per-key basis +// - stable keys (when a given key is written once but read many times) +// The main drawback is the lack of type support. type upstreamResponseMap struct { - mu sync.RWMutex - responseChannels map[string]upstreamResponseChannel + // This is of type *sync.Map[string]upstreamResponseChannel, where the key + // is the xds-relay aggregated key. + internal *sync.Map } type upstreamResponseChannel struct { @@ -20,45 +26,48 @@ type upstreamResponseChannel struct { func newUpstreamResponseMap() upstreamResponseMap { return upstreamResponseMap{ - responseChannels: make(map[string]upstreamResponseChannel), + internal: &sync.Map{}, } } +// exists returns true if the aggregatedKey exists. +func (u *upstreamResponseMap) exists(aggregatedKey string) bool { + _, ok := u.internal.Load(aggregatedKey) + return ok +} + // add sets the response channel for the provided aggregated key. It also // initializes a done channel to be used during cleanup. -// -// Expects orchestrator to manage the lock since this is called simultaneously -// with stream open. func (u *upstreamResponseMap) add( aggregatedKey string, responseChannel <-chan *discovery.DiscoveryResponse, -) upstreamResponseChannel { +) (upstreamResponseChannel, bool) { channel := upstreamResponseChannel{ response: responseChannel, done: make(chan bool, 1), } - u.responseChannels[aggregatedKey] = channel - return channel + result, exists := u.internal.LoadOrStore(aggregatedKey, channel) + return result.(upstreamResponseChannel), exists } // delete signifies closure of the upstream stream and removes the map entry // for the specified aggregated key. func (u *upstreamResponseMap) delete(aggregatedKey string) { - u.mu.Lock() - defer u.mu.Unlock() - if channel, ok := u.responseChannels[aggregatedKey]; ok { - close(channel.done) - delete(u.responseChannels, aggregatedKey) + if channel, ok := u.internal.Load(aggregatedKey); ok { + close(channel.(upstreamResponseChannel).done) + // The implementation of sync.Map will already check for key existence + // prior to issuing the delete, so we don't need worry about deleting + // a non-existence key due to concurrent race conditions. + u.internal.Delete(aggregatedKey) } } // deleteAll signifies closure of all upstream streams and removes the map -// entries. +// entries. This is called during server shutdown. func (u *upstreamResponseMap) deleteAll() { - u.mu.Lock() - defer u.mu.Unlock() - for aggregatedKey, responseChannels := range u.responseChannels { - close(responseChannels.done) - delete(u.responseChannels, aggregatedKey) - } + u.internal.Range(func(aggregatedKey, channel interface{}) bool { + close(channel.(upstreamResponseChannel).done) + u.internal.Delete(aggregatedKey) + return true + }) } diff --git a/internal/pkg/util/testutils/syncmap.go b/internal/pkg/util/testutils/syncmap.go new file mode 100644 index 00000000..58cf2266 --- /dev/null +++ b/internal/pkg/util/testutils/syncmap.go @@ -0,0 +1,23 @@ +package testutils + +import ( + "sync" + "testing" + + "github.com/stretchr/testify/assert" +) + +// AssertSyncMapLen is a custom solution for checking the length of sync.Map. +// sync.Map does not currently offer support for length checks: +// https://github.com/golang/go/issues/20680 +func AssertSyncMapLen(t *testing.T, len int, sm *sync.Map) { + count := 0 + var mu sync.Mutex + sm.Range(func(key, val interface{}) bool { + mu.Lock() + count++ + mu.Unlock() + return true + }) + assert.Equal(t, count, len) +} diff --git a/internal/pkg/util/testutils/syncmap_test.go b/internal/pkg/util/testutils/syncmap_test.go new file mode 100644 index 00000000..391d8d1a --- /dev/null +++ b/internal/pkg/util/testutils/syncmap_test.go @@ -0,0 +1,18 @@ +package testutils + +import ( + "sync" + "testing" +) + +func TestAssertSyncMapLen(t *testing.T) { + var sm sync.Map + + sm.Store("foo", 1) + sm.Store("foo", 2) + sm.Store("bar", 1) + AssertSyncMapLen(t, 2, &sm) + + sm.Delete("foo") + AssertSyncMapLen(t, 1, &sm) +} From 531596787eb86a9a35a200214516bf535e1f2b09 Mon Sep 17 00:00:00 2001 From: Jess Yuen Date: Wed, 29 Apr 2020 17:09:14 -0700 Subject: [PATCH 20/29] s/shutdown/shutdownUpstream Signed-off-by: Jess Yuen --- internal/app/orchestrator/orchestrator.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/app/orchestrator/orchestrator.go b/internal/app/orchestrator/orchestrator.go index f8b7adfe..1d4db959 100644 --- a/internal/app/orchestrator/orchestrator.go +++ b/internal/app/orchestrator/orchestrator.go @@ -195,7 +195,7 @@ func (o *orchestrator) watchUpstream( aggregatedKey string, responseChannel <-chan *discovery.DiscoveryResponse, done <-chan bool, - shutdown func(), + shutdownUpstream func(), ) { for { select { @@ -243,7 +243,7 @@ func (o *orchestrator) watchUpstream( } case <-done: // Exit when signaled that the stream has closed. - shutdown() + shutdownUpstream() return } } From d3d1e3bf7831c7415bb1bdaeba5236cbbcaddaeb Mon Sep 17 00:00:00 2001 From: Jess Yuen Date: Wed, 29 Apr 2020 21:15:35 -0700 Subject: [PATCH 21/29] add timeout Signed-off-by: Jess Yuen --- internal/app/orchestrator/orchestrator.go | 16 +++++++++++++--- internal/app/orchestrator/upstream.go | 2 +- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/internal/app/orchestrator/orchestrator.go b/internal/app/orchestrator/orchestrator.go index 1d4db959..b841ccb9 100644 --- a/internal/app/orchestrator/orchestrator.go +++ b/internal/app/orchestrator/orchestrator.go @@ -28,6 +28,10 @@ const ( // unaggregatedPrefix is the prefix used to label discovery requests that // could not be successfully mapped to an aggregation rule. unaggregatedPrefix = "unaggregated_" + + // fanoutTimeout is the seconds that a go routine remains blocked waiting + // for the downstream response channel to be populated during fanout. + fanoutTimeout = 10 ) // Orchestrator has the following responsibilities: @@ -238,7 +242,7 @@ func (o *orchestrator) watchUpstream( o.logger.With("key", aggregatedKey).Error(ctx, "attempted to fan out with no cached response") } else { // Goldenpath. - o.fanout(cached.Resp, cached.Requests) + o.fanout(cached.Resp, cached.Requests, aggregatedKey) } } case <-done: @@ -251,11 +255,17 @@ func (o *orchestrator) watchUpstream( // fanout pushes the response to the response channels of all open downstream // watches in parallel. -func (o *orchestrator) fanout(resp *cache.Response, watchers map[*gcp.Request]bool) { +func (o *orchestrator) fanout(resp *cache.Response, watchers map[*gcp.Request]bool, aggregatedKey string) { for watch := range watchers { go func(watch *gcp.Request) { if channel, ok := o.downstreamResponseMap.get(watch); ok { - channel <- convertToGcpResponse(resp, *watch) + select { + case channel <- convertToGcpResponse(resp, *watch): + return + case <-time.After(fanoutTimeout * time.Second): + o.logger.With("key", aggregatedKey).Error( + context.Background(), "timeout exceeded: channel blocked during fanout") + } } }(watch) } diff --git a/internal/app/orchestrator/upstream.go b/internal/app/orchestrator/upstream.go index 390d460e..94420c1c 100644 --- a/internal/app/orchestrator/upstream.go +++ b/internal/app/orchestrator/upstream.go @@ -57,7 +57,7 @@ func (u *upstreamResponseMap) delete(aggregatedKey string) { close(channel.(upstreamResponseChannel).done) // The implementation of sync.Map will already check for key existence // prior to issuing the delete, so we don't need worry about deleting - // a non-existence key due to concurrent race conditions. + // a non-existent key due to concurrent race conditions. u.internal.Delete(aggregatedKey) } } From 6bcaf468b63718277158e4a49bd8a9d779ec2991 Mon Sep 17 00:00:00 2001 From: Jess Yuen Date: Thu, 30 Apr 2020 12:28:29 -0700 Subject: [PATCH 22/29] add wg Signed-off-by: Jess Yuen --- internal/app/orchestrator/orchestrator.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/internal/app/orchestrator/orchestrator.go b/internal/app/orchestrator/orchestrator.go index b841ccb9..f0f4e31d 100644 --- a/internal/app/orchestrator/orchestrator.go +++ b/internal/app/orchestrator/orchestrator.go @@ -9,6 +9,7 @@ package orchestrator import ( "context" "fmt" + "sync" "time" bootstrapv1 "github.com/envoyproxy/xds-relay/pkg/api/bootstrap/v1" @@ -256,12 +257,15 @@ func (o *orchestrator) watchUpstream( // fanout pushes the response to the response channels of all open downstream // watches in parallel. func (o *orchestrator) fanout(resp *cache.Response, watchers map[*gcp.Request]bool, aggregatedKey string) { + var wg sync.WaitGroup for watch := range watchers { + wg.Add(1) go func(watch *gcp.Request) { + defer wg.Done() if channel, ok := o.downstreamResponseMap.get(watch); ok { select { case channel <- convertToGcpResponse(resp, *watch): - return + break case <-time.After(fanoutTimeout * time.Second): o.logger.With("key", aggregatedKey).Error( context.Background(), "timeout exceeded: channel blocked during fanout") @@ -269,6 +273,8 @@ func (o *orchestrator) fanout(resp *cache.Response, watchers map[*gcp.Request]bo } }(watch) } + // Wait for all fanouts to complete. + wg.Wait() } // onCacheEvicted is called when the cache evicts a response due to TTL or From 30246e7152220ec69acc72957f47ee23e0c77a5d Mon Sep 17 00:00:00 2001 From: Jess Yuen Date: Fri, 1 May 2020 14:08:43 -0700 Subject: [PATCH 23/29] Reduce timeout, don't close downstream channels Signed-off-by: Jess Yuen --- internal/app/orchestrator/downstream.go | 15 ++++++++++----- internal/app/orchestrator/orchestrator.go | 6 +++--- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/internal/app/orchestrator/downstream.go b/internal/app/orchestrator/downstream.go index c983026a..05cb5253 100644 --- a/internal/app/orchestrator/downstream.go +++ b/internal/app/orchestrator/downstream.go @@ -38,26 +38,31 @@ func (d *downstreamResponseMap) get(req *gcp.Request) (chan gcp.Response, bool) return channel, ok } -// delete closes the response channel and removes it from the map. +// delete removes the response channel and request entry from the map. +// Note: We don't close the response channel prior to deletion because there +// can be separate go routines that are still attempting to write to the +// channel. We rely on garbage collection to clean up and close outstanding +// response channels once the go routines finish writing to them. func (d *downstreamResponseMap) delete(req *gcp.Request) chan gcp.Response { d.mu.Lock() defer d.mu.Unlock() if channel, ok := d.responseChannels[req]; ok { - close(channel) delete(d.responseChannels, req) return channel } return nil } -// deleteAll closes all the response channels for the provided requests and -// removes them from the map. +// deleteAll removes all response channels and request entries from the map. +// Note: We don't close the response channel prior to deletion because there +// can be separate go routines that are still attempting to write to the +// channel. We rely on garbage collection to clean up and close outstanding +// response channels once the go routines finish writing to them. func (d *downstreamResponseMap) deleteAll(watches map[*gcp.Request]bool) { d.mu.Lock() defer d.mu.Unlock() for watch := range watches { if d.responseChannels[watch] != nil { - close(d.responseChannels[watch]) delete(d.responseChannels, watch) } } diff --git a/internal/app/orchestrator/orchestrator.go b/internal/app/orchestrator/orchestrator.go index f0f4e31d..11bd7cbe 100644 --- a/internal/app/orchestrator/orchestrator.go +++ b/internal/app/orchestrator/orchestrator.go @@ -32,7 +32,7 @@ const ( // fanoutTimeout is the seconds that a go routine remains blocked waiting // for the downstream response channel to be populated during fanout. - fanoutTimeout = 10 + fanoutTimeout = 1 ) // Orchestrator has the following responsibilities: @@ -267,8 +267,8 @@ func (o *orchestrator) fanout(resp *cache.Response, watchers map[*gcp.Request]bo case channel <- convertToGcpResponse(resp, *watch): break case <-time.After(fanoutTimeout * time.Second): - o.logger.With("key", aggregatedKey).Error( - context.Background(), "timeout exceeded: channel blocked during fanout") + o.logger.With("key", aggregatedKey).With("node ID", watch.GetNode().GetId()). + Error(context.Background(), "timeout exceeded: channel blocked during fanout") } } }(watch) From ecb0a28cf87acf85462792a949d814ec4467d4b5 Mon Sep 17 00:00:00 2001 From: Jess Yuen Date: Mon, 4 May 2020 13:38:18 -0700 Subject: [PATCH 24/29] fanout with default, buffered downstream chans Signed-off-by: Jess Yuen --- internal/app/orchestrator/downstream.go | 2 +- internal/app/orchestrator/orchestrator.go | 4 ++-- internal/app/orchestrator/orchestrator_test.go | 10 ++++++++++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/internal/app/orchestrator/downstream.go b/internal/app/orchestrator/downstream.go index 05cb5253..bd5d60d9 100644 --- a/internal/app/orchestrator/downstream.go +++ b/internal/app/orchestrator/downstream.go @@ -25,7 +25,7 @@ func (d *downstreamResponseMap) createChannel(req *gcp.Request) chan gcp.Respons d.mu.Lock() defer d.mu.Unlock() if _, ok := d.responseChannels[req]; !ok { - d.responseChannels[req] = make(chan gcp.Response) + d.responseChannels[req] = make(chan gcp.Response, 1) } return d.responseChannels[req] } diff --git a/internal/app/orchestrator/orchestrator.go b/internal/app/orchestrator/orchestrator.go index 11bd7cbe..ca9956c2 100644 --- a/internal/app/orchestrator/orchestrator.go +++ b/internal/app/orchestrator/orchestrator.go @@ -266,9 +266,9 @@ func (o *orchestrator) fanout(resp *cache.Response, watchers map[*gcp.Request]bo select { case channel <- convertToGcpResponse(resp, *watch): break - case <-time.After(fanoutTimeout * time.Second): + default: o.logger.With("key", aggregatedKey).With("node ID", watch.GetNode().GetId()). - Error(context.Background(), "timeout exceeded: channel blocked during fanout") + Error(context.Background(), "channel blocked during fanout") } } }(watch) diff --git a/internal/app/orchestrator/orchestrator_test.go b/internal/app/orchestrator/orchestrator_test.go index 63f14215..04f85bdd 100644 --- a/internal/app/orchestrator/orchestrator_test.go +++ b/internal/app/orchestrator/orchestrator_test.go @@ -7,6 +7,7 @@ import ( "time" v2 "github.com/envoyproxy/go-control-plane/envoy/api/v2" + v2_core "github.com/envoyproxy/go-control-plane/envoy/api/v2/core" gcp "github.com/envoyproxy/go-control-plane/pkg/cache/v2" "github.com/golang/protobuf/ptypes/any" "github.com/golang/protobuf/ptypes/duration" @@ -268,12 +269,21 @@ func TestMultipleWatchesAndUpstreams(t *testing.T) { req1 := gcp.Request{ TypeUrl: "type.googleapis.com/envoy.api.v2.Listener", + Node: &v2_core.Node{ + Id: "req1", + }, } req2 := gcp.Request{ TypeUrl: "type.googleapis.com/envoy.api.v2.Listener", + Node: &v2_core.Node{ + Id: "req2", + }, } req3 := gcp.Request{ TypeUrl: "type.googleapis.com/envoy.api.v2.Cluster", + Node: &v2_core.Node{ + Id: "req3", + }, } respChannel1, cancelWatch1 := orchestrator.CreateWatch(req1) From 144d1560dccd110617001f30afea2b6b6f2ed04e Mon Sep 17 00:00:00 2001 From: Jess Yuen Date: Mon, 4 May 2020 13:40:25 -0700 Subject: [PATCH 25/29] Remove timeout Signed-off-by: Jess Yuen --- internal/app/orchestrator/orchestrator.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/internal/app/orchestrator/orchestrator.go b/internal/app/orchestrator/orchestrator.go index ca9956c2..43765076 100644 --- a/internal/app/orchestrator/orchestrator.go +++ b/internal/app/orchestrator/orchestrator.go @@ -29,10 +29,6 @@ const ( // unaggregatedPrefix is the prefix used to label discovery requests that // could not be successfully mapped to an aggregation rule. unaggregatedPrefix = "unaggregated_" - - // fanoutTimeout is the seconds that a go routine remains blocked waiting - // for the downstream response channel to be populated during fanout. - fanoutTimeout = 1 ) // Orchestrator has the following responsibilities: From a36fc60718cee340bae068f977367efcad2660d4 Mon Sep 17 00:00:00 2001 From: Jess Yuen Date: Tue, 5 May 2020 11:58:03 -0700 Subject: [PATCH 26/29] Document upstream/downstream files Signed-off-by: Jess Yuen --- internal/app/orchestrator/downstream.go | 11 +++++++++++ internal/app/orchestrator/upstream.go | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/internal/app/orchestrator/downstream.go b/internal/app/orchestrator/downstream.go index bd5d60d9..87afa83c 100644 --- a/internal/app/orchestrator/downstream.go +++ b/internal/app/orchestrator/downstream.go @@ -1,3 +1,14 @@ +// Package orchestrator is responsible for instrumenting inbound xDS client +// requests to the correct aggregated key, forwarding a representative request +// to the upstream origin server, and managing the lifecycle of downstream and +// upstream connections and associates streams. It implements +// go-control-plane's Cache interface in order to receive xDS-based requests, +// send responses, and handle gRPC streams. +// +// This file manages the bookkeeping of downstream clients by tracking inbound +// requests to their corresponding response channels. The contents of this file +// are intended to only be used within the orchestrator module and should not +// be exported. package orchestrator import ( diff --git a/internal/app/orchestrator/upstream.go b/internal/app/orchestrator/upstream.go index 94420c1c..9ab04689 100644 --- a/internal/app/orchestrator/upstream.go +++ b/internal/app/orchestrator/upstream.go @@ -1,3 +1,14 @@ +// Package orchestrator is responsible for instrumenting inbound xDS client +// requests to the correct aggregated key, forwarding a representative request +// to the upstream origin server, and managing the lifecycle of downstream and +// upstream connections and associates streams. It implements +// go-control-plane's Cache interface in order to receive xDS-based requests, +// send responses, and handle gRPC streams. +// +// This file manages the bookkeeping of upstream responses by tracking the +// aggregated key and its corresponding receiver channel for upstream +// responses. The contents of this file are intended to only be used within the +// orchestrator module and should not be exported. package orchestrator import ( From d08ddccfa842e482ff8e585085ad7ce4e94365c1 Mon Sep 17 00:00:00 2001 From: Jess Yuen Date: Tue, 5 May 2020 12:45:16 -0700 Subject: [PATCH 27/29] Heavily document watchUpstreams, s/watches/watchers Signed-off-by: Jess Yuen --- internal/app/orchestrator/downstream.go | 4 +-- internal/app/orchestrator/orchestrator.go | 31 +++++++++++++++---- .../app/orchestrator/orchestrator_test.go | 6 ++-- 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/internal/app/orchestrator/downstream.go b/internal/app/orchestrator/downstream.go index 87afa83c..bf11d268 100644 --- a/internal/app/orchestrator/downstream.go +++ b/internal/app/orchestrator/downstream.go @@ -69,10 +69,10 @@ func (d *downstreamResponseMap) delete(req *gcp.Request) chan gcp.Response { // can be separate go routines that are still attempting to write to the // channel. We rely on garbage collection to clean up and close outstanding // response channels once the go routines finish writing to them. -func (d *downstreamResponseMap) deleteAll(watches map[*gcp.Request]bool) { +func (d *downstreamResponseMap) deleteAll(watchers map[*gcp.Request]bool) { d.mu.Lock() defer d.mu.Unlock() - for watch := range watches { + for watch := range watchers { if d.responseChannels[watch] != nil { delete(d.responseChannels, watch) } diff --git a/internal/app/orchestrator/orchestrator.go b/internal/app/orchestrator/orchestrator.go index 43765076..2ae54efb 100644 --- a/internal/app/orchestrator/orchestrator.go +++ b/internal/app/orchestrator/orchestrator.go @@ -189,8 +189,27 @@ func (o *orchestrator) Fetch(context.Context, discovery.DiscoveryRequest) (*gcp. return nil, fmt.Errorf("Not implemented") } -// watchResponse is intended to be called in a goroutine, to receive incoming -// responses and fan out to downstream clients. +// watchUpstream is intended to be called in a go routine, to receive incoming +// responses, cache the response, and fan out to downstream clients or +// "watchers". There is a corresponding go routine for each aggregated key. +// +// This goroutine continually listens for upstream responses from the passed +// `responseChannel`. For each response, we will: +// - cache this latest response, replacing the previous stale response. +// - retrieve the downstream watchers from the cache for this `aggregated key`. +// - trigger the fanout process to downstream watchers by pushing to the +// individual downstream response channels in separate go routines. +// +// Additionally this function tracks a `done` channel and a `shutdownUpstream` +// function. `done` is a channel that gets closed in two places: +// 1. when server shutdown is triggered. See the `shutdown` function in this +// file for more information. +// 2. when cache TTL expires for this aggregated key. See the `onCacheEvicted` +// function in this file for more information. +// When the `done` channel is closed, we call the `shutdownUpstream` callback +// function. This will signify to the upstream client that we no longer require +// responses from this stream because the downstream connections have been +// terminated. The upstream client will clean up the stream accordingly. func (o *orchestrator) watchUpstream( ctx context.Context, aggregatedKey string, @@ -221,14 +240,14 @@ func (o *orchestrator) watchUpstream( Error(ctx, "Failed to cache the response") } - // Get downstream watches and fan out. + // Get downstream watchers and fan out. // We retrieve from cache rather than directly fanning out the // newly received response because the cache does additional // resource serialization. cached, err := o.cache.Fetch(aggregatedKey) if err != nil { o.logger.With("err", err).With("key", aggregatedKey).Error(ctx, "cache fetch failed") - // Can't do anything because we don't know who the watches + // Can't do anything because we don't know who the watchers // are. Drop the response. } else { if cached == nil || cached.Resp == nil { @@ -251,7 +270,7 @@ func (o *orchestrator) watchUpstream( } // fanout pushes the response to the response channels of all open downstream -// watches in parallel. +// watchers in parallel. func (o *orchestrator) fanout(resp *cache.Response, watchers map[*gcp.Request]bool, aggregatedKey string) { var wg sync.WaitGroup for watch := range watchers { @@ -275,7 +294,7 @@ func (o *orchestrator) fanout(resp *cache.Response, watchers map[*gcp.Request]bo // onCacheEvicted is called when the cache evicts a response due to TTL or // other reasons. When this happens, we need to clean up open streams. -// We shut down both the downstream watches and the upstream stream. +// We shut down both the downstream watchers and the upstream stream. func (o *orchestrator) onCacheEvicted(key string, resource cache.Resource) { // TODO Potential for improvements here to handle the thundering herd // problem: https://github.com/envoyproxy/xds-relay/issues/71 diff --git a/internal/app/orchestrator/orchestrator_test.go b/internal/app/orchestrator/orchestrator_test.go index 04f85bdd..ae3d3bfa 100644 --- a/internal/app/orchestrator/orchestrator_test.go +++ b/internal/app/orchestrator/orchestrator_test.go @@ -198,9 +198,9 @@ func TestCachedResponse(t *testing.T) { }, }, } - watches, err := orchestrator.cache.SetResponse(aggregatedKey, mockResponse) + watchers, err := orchestrator.cache.SetResponse(aggregatedKey, mockResponse) assert.NoError(t, err) - assert.Equal(t, 0, len(watches)) + assert.Equal(t, 0, len(watchers)) respChannel, cancelWatch := orchestrator.CreateWatch(req) assert.NotNil(t, respChannel) @@ -251,7 +251,7 @@ func TestCachedResponse(t *testing.T) { assert.Equal(t, 0, len(orchestrator.downstreamResponseMap.responseChannels)) } -func TestMultipleWatchesAndUpstreams(t *testing.T) { +func TestMultipleWatchersAndUpstreams(t *testing.T) { upstreamResponseChannelLDS := make(chan *v2.DiscoveryResponse) upstreamResponseChannelCDS := make(chan *v2.DiscoveryResponse) mapper := newMockMapper(t) From 4e4e0ccc249875827f654dcaf455c602e8c15349 Mon Sep 17 00:00:00 2001 From: Jess Yuen Date: Tue, 5 May 2020 13:05:51 -0700 Subject: [PATCH 28/29] assert sync map keys Signed-off-by: Jess Yuen --- .../app/orchestrator/orchestrator_test.go | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/internal/app/orchestrator/orchestrator_test.go b/internal/app/orchestrator/orchestrator_test.go index ae3d3bfa..87a55864 100644 --- a/internal/app/orchestrator/orchestrator_test.go +++ b/internal/app/orchestrator/orchestrator_test.go @@ -144,6 +144,10 @@ func TestGoldenPath(t *testing.T) { assert.NotNil(t, respChannel) assert.Equal(t, 1, len(orchestrator.downstreamResponseMap.responseChannels)) testutils.AssertSyncMapLen(t, 1, orchestrator.upstreamResponseMap.internal) + orchestrator.upstreamResponseMap.internal.Range(func(key, val interface{}) bool { + assert.Equal(t, "lds", key.(string)) + return true + }) resp := v2.DiscoveryResponse{ VersionInfo: "1", @@ -206,6 +210,10 @@ func TestCachedResponse(t *testing.T) { assert.NotNil(t, respChannel) assert.Equal(t, 1, len(orchestrator.downstreamResponseMap.responseChannels)) testutils.AssertSyncMapLen(t, 1, orchestrator.upstreamResponseMap.internal) + orchestrator.upstreamResponseMap.internal.Range(func(key, val interface{}) bool { + assert.Equal(t, "lds", key.(string)) + return true + }) gotResponse := <-respChannel assertEqualResources(t, gotResponse, mockResponse, req) @@ -225,6 +233,10 @@ func TestCachedResponse(t *testing.T) { gotResponse = <-respChannel assertEqualResources(t, gotResponse, resp, req) testutils.AssertSyncMapLen(t, 1, orchestrator.upstreamResponseMap.internal) + orchestrator.upstreamResponseMap.internal.Range(func(key, val interface{}) bool { + assert.Contains(t, "lds", key.(string)) + return true + }) // Test scenario with same request and response version. // We expect a watch to be open but no response. @@ -237,6 +249,10 @@ func TestCachedResponse(t *testing.T) { assert.NotNil(t, respChannel2) assert.Equal(t, 2, len(orchestrator.downstreamResponseMap.responseChannels)) testutils.AssertSyncMapLen(t, 1, orchestrator.upstreamResponseMap.internal) + orchestrator.upstreamResponseMap.internal.Range(func(key, val interface{}) bool { + assert.Contains(t, "lds", key.(string)) + return true + }) // If we pass this point, it's safe to assume the respChannel2 is empty, // otherwise the test would block and not complete. @@ -321,6 +337,10 @@ func TestMultipleWatchersAndUpstreams(t *testing.T) { assert.Equal(t, 3, len(orchestrator.downstreamResponseMap.responseChannels)) testutils.AssertSyncMapLen(t, 2, orchestrator.upstreamResponseMap.internal) + orchestrator.upstreamResponseMap.internal.Range(func(key, val interface{}) bool { + assert.Contains(t, []string{"lds", "cds"}, key.(string)) + return true + }) assertEqualResources(t, gotResponseFromChannel1, upstreamResponseLDS, req1) assertEqualResources(t, gotResponseFromChannel2, upstreamResponseLDS, req2) From 57ff78eec0f35ef9007c5d58d4da0ebf9c57af10 Mon Sep 17 00:00:00 2001 From: Jess Yuen Date: Tue, 5 May 2020 13:08:52 -0700 Subject: [PATCH 29/29] comment on default Signed-off-by: Jess Yuen --- internal/app/orchestrator/orchestrator.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/internal/app/orchestrator/orchestrator.go b/internal/app/orchestrator/orchestrator.go index 2ae54efb..7b7be72f 100644 --- a/internal/app/orchestrator/orchestrator.go +++ b/internal/app/orchestrator/orchestrator.go @@ -282,6 +282,9 @@ func (o *orchestrator) fanout(resp *cache.Response, watchers map[*gcp.Request]bo case channel <- convertToGcpResponse(resp, *watch): break default: + // If the channel is blocked, we simply drop subsequent requests and error. + // Alternative possibilities are discussed here: + // https://github.com/envoyproxy/xds-relay/pull/53#discussion_r420325553 o.logger.With("key", aggregatedKey).With("node ID", watch.GetNode().GetId()). Error(context.Background(), "channel blocked during fanout") }