palantir · bluekeyes · Apr 22, 2020 · Apr 22, 2020 · Apr 22, 2020 · Apr 22, 2020
diff --git a/README.md b/README.md
@@ -10,6 +10,7 @@ logic of your application.
 * [Usage](#usage)
   + [Examples](#examples)
   + [Dependencies](#dependencies)
+* [Asynchronous Dispatch](#asynchronous-dispatch)
 * [Structured Logging](#structured-logging)
 * [GitHub Clients](#github-clients)
   + [Metrics](#metrics)
@@ -86,6 +87,38 @@ Logging and metrics are only active when they are configured (see below). This
 means you can add your own logging or metrics libraries without conflict, but
 will miss out on the free built-in support.
 
+## Asynchronous Dispatch
+
+GitHub imposes timeouts on webhook delivery responses. If an application does
+not respond in time, GitHub closes the connection and marks the delivery as
+failed. `go-githubapp` optionally supports asynchronous dispatch to help solve
+this problem. When enabled, the event dispatch sends a response to GitHub after
+validating the payload and then runs the event handler in a separate goroutine.
+
+To enable, select an appropriate _scheduler_ and configure the event dispatcher
+to use it:
+
+```go
+dispatcher := githubapp.NewEventDispatcher(handlers, secret, githubapp.WithScheduler(
+    githubapp.AsyncScheduler(),
+))
+```
+
+The following schedulers are included in the library:
+
+- `DefaultScheduler` - a synchronous scheduler that runs event handlers in
+  the current goroutine. This is the default mode.
+
+- `AsyncScheduler` - an asynchronous scheduler that handles each event in a
+  new goroutine. This is the simplest asynchronous option.
+
+- `QueueAsyncScheduler` - an asynchronous scheduler that queues events and
+  handles them with a fixed pool of worker goroutines. This is useful to limit
+  the amount of concurrent work.
+
+`AsyncScheduler` and `QueueAsyncScheduler` support several additional options
+and customizations; see the documentation for details.
+
 ## Structured Logging
 
 `go-githubapp` uses [rs/zerolog](https://github.com/rs/zerolog) for structured
@@ -186,6 +219,14 @@ middleware.
 | `github.rate.limit[installation:<id>]` | `gauge` | the maximum number of requests permitted to make per hour, tagged with the installation id |
 | `github.rate.remaining[installation:<id>]` | `gauge` | the number of requests remaining in the current rate limit window, tagged with the installation id |
 
+If using [asynchronous dispatch](#asynchronous-dispatch) and the `githubapp.WithSchedulingMetrics` option
+is set, these metrics are emitted:
+
+| metric name | type | definition |
+| ----------- | ---- | ---------- |
+| `github.event.queue` | `gauge` | the number of queued unprocessed event |
+| `github.event.workers` | `gauge` | the number of workers actively processing events |
+
 Note that metrics need to be published in order to be useful. Several
 [publishing options][] are available or you can implement your own.
 

diff --git a/githubapp/dispatcher.go b/githubapp/dispatcher.go
@@ -54,7 +54,7 @@ type ResponseCallback func(w http.ResponseWriter, r *http.Request, event string,
 // DispatcherOption configures properties of an event dispatcher.
 type DispatcherOption func(*eventDispatcher)
 
-// WithErrorCallback sets the error callback for an event dispatcher.
+// WithErrorCallback sets the error callback for a dispatcher.
 func WithErrorCallback(onError ErrorCallback) DispatcherOption {
 	return func(d *eventDispatcher) {
 		if onError != nil {
@@ -72,6 +72,20 @@ func WithResponseCallback(onResponse ResponseCallback) DispatcherOption {
 	}
 }
 
+// WithScheduler sets the scheduler used to process events. Setting a
+// non-default scheduler can enable asynchronous processing. When a scheduler
+// is asynchronous, the dispatcher validatates event payloads, queues valid
+// events for handling, and then responds to GitHub without waiting for the
+// handler to complete.  This is useful when handlers may take longer than
+// GitHub's timeout for webhook deliveries.
+func WithScheduler(s Scheduler) DispatcherOption {
+	return func(d *eventDispatcher) {
+		if s != nil {
+			d.scheduler = s
+		}
+	}
+}
+
 // ValidationError is passed to error callbacks when the webhook payload fails
 // validation.
 type ValidationError struct {
@@ -88,6 +102,7 @@ type eventDispatcher struct {
 	handlerMap map[string]EventHandler
 	secret     string
 
+	scheduler  Scheduler
 	onError    ErrorCallback
 	onResponse ResponseCallback
 }
@@ -118,6 +133,7 @@ func NewEventDispatcher(handlers []EventHandler, secret string, opts ...Dispatch
 	d := &eventDispatcher{
 		handlerMap: handlerMap,
 		secret:     secret,
+		scheduler:  DefaultScheduler(),
 		onError:    DefaultErrorCallback,
 		onResponse: DefaultResponseCallback,
 	}
@@ -172,7 +188,13 @@ func (d *eventDispatcher) ServeHTTP(w http.ResponseWriter, r *http.Request) {
 
 	handler, ok := d.handlerMap[eventType]
 	if ok {
-		if err := handler.Handle(ctx, eventType, deliveryID, payloadBytes); err != nil {
+		if err := d.scheduler.Schedule(Dispatch{
+			Ctx:        ctx,
+			Handler:    handler,
+			EventType:  eventType,
+			DeliveryID: deliveryID,
+			Payload:    payloadBytes,
+		}); err != nil {
 			d.onError(w, r, err)
 			return
 		}
@@ -184,13 +206,19 @@ func (d *eventDispatcher) ServeHTTP(w http.ResponseWriter, r *http.Request) {
 func DefaultErrorCallback(w http.ResponseWriter, r *http.Request, err error) {
 	logger := zerolog.Ctx(r.Context())
 
-	if ve, ok := err.(ValidationError); ok {
+	var ve ValidationError
+	if errors.As(err, &ve) {
 		logger.Warn().Err(ve.Cause).Msgf("Received invalid webhook headers or payload")
 		http.Error(w, "Invalid webhook headers or payload", http.StatusBadRequest)
 		return
 	}
+	if errors.Is(err, ErrCapacityExceeded) {
+		logger.Warn().Msg("Dropping webhook event due to over-capacity scheduler")
+		http.Error(w, "No capacity available to processes this event", http.StatusServiceUnavailable)
+		return
+	}
 
-	logger.Error().Err(err).Msg("Unexpected error handling webhook request")
+	logger.Error().Err(err).Msg("Unexpected error handling webhook")
 	http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError)
 }
 

diff --git a/githubapp/scheduler.go b/githubapp/scheduler.go
@@ -0,0 +1,238 @@
+// Copyright 2020 Palantir Technologies, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package githubapp
+
+import (
+	"context"
+	"fmt"
+	"sync/atomic"
+
+	"github.com/pkg/errors"
+	"github.com/rcrowley/go-metrics"
+	"github.com/rs/zerolog"
+)
+
+const (
+	MetricsKeyQueueLength   = "github.event.queued"
+	MetricsKeyActiveWorkers = "github.event.workers"
+)
+
+var (
+	ErrCapacityExceeded = errors.New("scheduler: capacity exceeded")
+)
+
+// Dispatch is a webhook payload and the handler that handles it.
+type Dispatch struct {
+	Handler EventHandler
+	Ctx     context.Context
+
+	EventType  string
+	DeliveryID string
+	Payload    []byte
+}
+
+// Execute calls the Dispatch's handler with the stored arguments.
+func (d Dispatch) Execute() error {
+	return d.Handler.Handle(d.Ctx, d.EventType, d.DeliveryID, d.Payload)
+}
+
+// AsyncErrorCallback is called by an asynchronous scheduler when an event
+// handler returns an error. The error from the handler is passed directly as
+// the final argument.
+type AsyncErrorCallback func(ctx context.Context, err error)
+
+// DefaultAsyncErrorCallback logs errors.
+func DefaultAsyncErrorCallback(ctx context.Context, err error) {
+	zerolog.Ctx(ctx).Error().Err(err).Msg("Unexpected error handling webhook")
+}
+
+// ContextDeriver creates a new independent context from a request's context.
+// The new context must be based on context.Background(), not the input.
+type ContextDeriver func(context.Context) context.Context
+
+// DefaultContextDeriver copies the logger from the request's context to a new
+// context.
+func DefaultContextDeriver(ctx context.Context) context.Context {
+	newCtx := context.Background()
+
+	// this value is always unused by async schedulers, but is set for
+	// compatibility with existing handlers that call SetResponder
+	newCtx = InitializeResponder(newCtx)
+
+	return zerolog.Ctx(ctx).WithContext(newCtx)
+}
+
+// Scheduler is a strategy for executing event handlers.
+//
+// The Schedule method takes a Dispatch and executes it by calling the handler
+// for the payload. The execution may be asynchronous, but the scheduler must
+// create a new context in this case. The dispatcher waits for Schedule to
+// return before responding to GitHub, so asynchronous schedulers should only
+// return errors that happen during scheduling, not during execution.
+//
+// Schedule may return ErrCapacityExceeded if it cannot schedule or queue new
+// events at the time of the call.
+type Scheduler interface {
+	Schedule(d Dispatch) error
+}
+
+// SchedulerOption configures properties of a scheduler.
+type SchedulerOption func(*scheduler)
+
+// WithAsyncErrorCallback sets the error callback for an asynchronous
+// scheduler. If not set, the scheduler uses DefaultAsyncErrorCallback.
+func WithAsyncErrorCallback(onError AsyncErrorCallback) SchedulerOption {
+	return func(s *scheduler) {
+		if onError != nil {
+			s.onError = onError
+		}
+	}
+}
+
+// WithContextDeriver sets the context deriver for an asynchronous scheduler.
+// If not set, the scheduler uses DefaultContextDeriver.
+func WithContextDeriver(deriver ContextDeriver) SchedulerOption {
+	return func(s *scheduler) {
+		if deriver != nil {
+			s.deriver = deriver
+		}
+	}
+}
+
+// WithSchedulingMetrics enables metrics reporting for schedulers.
+func WithSchedulingMetrics(r metrics.Registry) SchedulerOption {
+	return func(s *scheduler) {
+		metrics.NewRegisteredFunctionalGauge(MetricsKeyQueueLength, r, func() int64 {
+			return int64(len(s.queue))
+		})
+		metrics.NewRegisteredFunctionalGauge(MetricsKeyActiveWorkers, r, func() int64 {
+			return atomic.LoadInt64(&s.activeWorkers)
+		})
+	}
+}
+
+// core functionality and options for (async) schedulers
+type scheduler struct {
+	onError AsyncErrorCallback
+	deriver ContextDeriver
+
+	activeWorkers int64
+	queue         chan Dispatch
+}
+
+func (s *scheduler) safeExecute(d Dispatch) {
+	var err error
+	defer func() {
+		if r := recover(); r != nil {
+			if rerr, ok := r.(error); ok {
+				err = rerr
+			} else {
+				err = fmt.Errorf("%v", r)
+			}
+		}
+		if err != nil && s.onError != nil {
+			s.onError(d.Ctx, err)
+		}
+		atomic.AddInt64(&s.activeWorkers, -1)
+	}()
+
+	atomic.AddInt64(&s.activeWorkers, 1)
+	if s.deriver != nil {
+		d.Ctx = s.deriver(d.Ctx)
+	}
+	err = d.Execute()
+}
+
+// DefaultScheduler returns a scheduler that executes handlers in the go
+// routine of the caller and returns any error.
+func DefaultScheduler() Scheduler {
+	return &defaultScheduler{}
+}
+
+type defaultScheduler struct{}
+
+func (s *defaultScheduler) Schedule(d Dispatch) error {
+	return d.Execute()
+}
+
+// AsyncScheduler returns a scheduler that executes handlers in new goroutines.
+// Goroutines are not reused and there is no limit on the number created.
+func AsyncScheduler(opts ...SchedulerOption) Scheduler {
+	s := &asyncScheduler{
+		scheduler: scheduler{
+			deriver: DefaultContextDeriver,
+			onError: DefaultAsyncErrorCallback,
+		},
+	}
+	for _, opt := range opts {
+		opt(&s.scheduler)
+	}
+	return s
+}
+
+type asyncScheduler struct {
+	scheduler
+}
+
+func (s *asyncScheduler) Schedule(d Dispatch) error {
+	go s.safeExecute(d)
+	return nil
+}
+
+// QueueAsyncScheduler returns a scheduler that executes handlers in a fixed
+// number of worker goroutines. If no workers are available, events queue until
+// the queue is full.
+func QueueAsyncScheduler(queueSize int, workers int, opts ...SchedulerOption) Scheduler {
+	if queueSize < 0 {
+		panic("NewQueueAsyncScheduler: queue size must be non-negative")
+	}
+	if workers < 1 {
+		panic("NewQueueAsyncScheduler: worker count must be positive")
+	}
+
+	s := &queueScheduler{
+		scheduler: scheduler{
+			deriver: DefaultContextDeriver,
+			onError: DefaultAsyncErrorCallback,
+			queue:   make(chan Dispatch, queueSize),
+		},
+	}
+	for _, opt := range opts {
+		opt(&s.scheduler)
+	}
+
+	for i := 0; i < workers; i++ {
+		go func() {
+			for d := range s.queue {
+				s.safeExecute(d)
+			}
+		}()
+	}
+
+	return s
+}
+
+type queueScheduler struct {
+	scheduler
+}
+
+func (s *queueScheduler) Schedule(d Dispatch) error {
+	select {
+	case s.queue <- d:
+	default:
+		return ErrCapacityExceeded
+	}
+	return nil
+}