Skip to content

Commit

Permalink
Add quit without envoy timeout (#28)
Browse files Browse the repository at this point in the history
* Allow scuttle to timeout if Envoy becomes unavailable after startup
* Add new env var QUIT_WITHOUT_ENVOY_TIMEOUT
  • Loading branch information
ptzianos authored May 5, 2020
1 parent 2d68ee0 commit aaacb49
Show file tree
Hide file tree
Showing 6 changed files with 165 additions and 15 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ When the application exits, unless `NEVER_KILL_ISTIO_ON_FAILURE` has been set an
| `WAIT_FOR_ENVOY_TIMEOUT` | If provided and set to a valid `time.Duration` string greater than 0 seconds, `scuttle` will wait for that amount of time before starting the main application. By default, it will wait indefinitely.|
| `ISTIO_QUIT_API` | If provided `scuttle` will send a POST to `/quitquitquit` at the given API. Should be in format `http://127.0.0.1:15020`. This is intended for Istio v1.3 and higher. When not given, Istio will be stopped using a `pkill` command.
| `GENERIC_QUIT_ENDPOINTS` | If provided `scuttle` will send a POST to the URL given. Multiple URLs are supported and must be provided as a CSV string. Should be in format `http://myendpoint.com` or `http://myendpoint.com,https://myotherendpoint.com`. The status code response is logged (if logging is enabled) but is not used. A 200 is treated the same as a 404 or 500. `GENERIC_QUIT_ENDPOINTS` is handled before Istio is stopped. |
| `QUIT_WITHOUT_ENVOY_TIMEOUT` | If provided and set to a valid duration, `scuttle` will exit if Envoy does not become available before the end of the timeout. If `START_WITHOUT_ENVOY` is also set, this variable will not be taken into account |

## How Scuttle stops Istio

Expand Down
35 changes: 35 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
version: '3.8'

services:
scuttle:
build:
context: .
dockerfile: "./docker/alpine/Dockerfile"
command:
- /bin/sh
- -c
- |
for i in `seq 10`
do
echo executing
sleep 1
done
environment:
ENVOY_ADMIN_API: "http://envoy:9901"
ISTIO_QUIT_API: "http://envoy:15020"
networks:
- scuttle
envoy:
image: envoyproxy/envoy:v1.13.1
networks:
- scuttle
ports:
- 80
- 443
- 15000
volumes:
- ./docker/envoy.yaml:/etc/envoy/envoy.yaml

networks:
scuttle:
name: "scuttle"
55 changes: 55 additions & 0 deletions docker/envoy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
admin:
access_log_path: /tmp/admin_access.log
address:
socket_address:
protocol: TCP
address: 0.0.0.0
port_value: 9901
static_resources:
listeners:
- name: listener_0
address:
socket_address:
protocol: TCP
address: 0.0.0.0
port_value: 10000
filter_chains:
- filters:
- name: envoy.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.config.filter.network.http_connection_manager.v2.HttpConnectionManager
stat_prefix: ingress_http
route_config:
name: local_route
virtual_hosts:
- name: local_service
domains: ["*"]
routes:
- match:
prefix: "/"
route:
host_rewrite: www.google.com
cluster: service_google
http_filters:
- name: envoy.router
clusters:
- name: service_google
connect_timeout: 0.25s
type: LOGICAL_DNS
# Comment out the following line to test on v6 networks
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: service_google
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: www.google.com
port_value: 443
transport_socket:
name: envoy.transport_sockets.tls
typed_config:
"@type": type.googleapis.com/envoy.api.v2.auth.UpstreamTlsContext
sni: www.google.com
48 changes: 36 additions & 12 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"os/exec"
"os/signal"
"strings"
"time"

"github.com/cenk/backoff"
"github.com/monzo/typhon"
Expand All @@ -24,21 +25,29 @@ var (
func main() {
config = getConfig()

if len(os.Args) < 2 {
log("No arguments received, exiting")
return
}

// Check if logging is enabled
if config.LoggingEnabled {
log("Logging is now enabled")
}

// If an envoy API was set and config is set to wait on envoy
if config.EnvoyAdminAPI != "" && config.StartWithoutEnvoy == false {
log("Blocking until envoy starts")
block()
log("Blocking finished, envoy has started")
}

if len(os.Args) < 2 {
log("No arguments received, exiting")
return
if config.EnvoyAdminAPI != "" {
if blockingCtx := waitForEnvoy(); blockingCtx != nil {
<-blockingCtx.Done()
err := blockingCtx.Err()
if err == nil || errors.Is(err, context.Canceled) {
log("Blocking finished, Envoy has started")
} else if errors.Is(err, context.DeadlineExceeded) {
panic(errors.New("timeout reached while waiting for Envoy to start"))
} else {
panic(err.Error())
}
}
}

// Find the executable the user wants to run
Expand Down Expand Up @@ -158,19 +167,32 @@ func killIstioWithPkill() {
}
}

func block() {
func waitForEnvoy() context.Context {
if config.StartWithoutEnvoy {
return
return nil
}
var blockingCtx context.Context
var cancel context.CancelFunc
if config.WaitForEnvoyTimeout > time.Duration(0) {
blockingCtx, cancel = context.WithTimeout(context.Background(), config.WaitForEnvoyTimeout)
} else {
blockingCtx, cancel = context.WithCancel(context.Background())
}

log("Blocking until Envoy starts")
go pollEnvoy(blockingCtx, cancel)
return blockingCtx
}

func pollEnvoy(ctx context.Context, cancel context.CancelFunc) {
url := fmt.Sprintf("%s/server_info", config.EnvoyAdminAPI)

b := backoff.NewExponentialBackOff()
// We wait forever for envoy to start. In practice k8s will kill the pod if we take too long.
b.MaxElapsedTime = config.WaitForEnvoyTimeout

_ = backoff.Retry(func() error {
rsp := typhon.NewRequest(context.Background(), "GET", url, nil).Send().Response()
rsp := typhon.NewRequest(ctx, "GET", url, nil).Send().Response()

info := &ServerInfo{}

Expand All @@ -185,4 +207,6 @@ func block() {

return nil
}, b)
// Notify the context that it's done, if it has not already been cancelled
cancel()
}
39 changes: 36 additions & 3 deletions main_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package main

import (
"context"
"errors"
"fmt"
"net/http"
"net/http/httptest"
Expand Down Expand Up @@ -31,7 +33,7 @@ func initTestingEnv() {
return
}

fmt.Println("Initing test HTTP servers")
fmt.Println("Initiating test HTTP servers")

// Always 200 and live envoy state
goodServer = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
Expand Down Expand Up @@ -68,7 +70,17 @@ func initTestingEnv() {
// Pass in a negative integer to block but skip kill
func initAndRun(exitCode int) {
initTestingEnv()
block()
if blockingCtx := waitForEnvoy(); blockingCtx != nil {
<-blockingCtx.Done()
err := blockingCtx.Err()
if err == nil || errors.Is(err, context.Canceled) {
log("Blocking finished, Envoy has started")
} else if errors.Is(err, context.DeadlineExceeded) {
panic(errors.New("timeout reached while waiting for Envoy to start"))
} else {
panic(err.Error())
}
}
if exitCode >= 0 {
kill(exitCode)
}
Expand Down Expand Up @@ -121,10 +133,31 @@ func TestNoQuitQuitQuitResponse(t *testing.T) {
}

// Tests scuttle does not fail when the /quitquitquit endpoint is not a valid URL
func TestNoQuitQuitQuitMalformattedUrl(t *testing.T) {
func TestNoQuitQuitQuitMalformedUrl(t *testing.T) {
fmt.Println("Starting TestNoQuitQuitQuitMalformattedUrl")
os.Setenv("START_WITHOUT_ENVOY", "false")
os.Setenv("ISTIO_QUIT_API", "notaurl^^")
initTestingEnv()
killIstioWithAPI()
}

// Tests scuttle waits
func TestWaitTillTimeoutForEnvoy(t *testing.T) {
fmt.Println("Starting TestWaitTillTimeoutForEnvoy")
os.Setenv("QUIT_WITHOUT_ENVOY_TIMEOUT", "500ms")
os.Setenv("ENVOY_ADMIN_API", badServer.URL)
dur, _ := time.ParseDuration("500ms")
config.QuitWithoutEnvoyTimeout = dur
blockingCtx := waitForEnvoy()
if blockingCtx == nil {
t.Fatal("Blocking context was nil")
}
select {
case <-time.After(1 * time.Second):
t.Fatal("Context did not timeout")
case <-blockingCtx.Done():
if !errors.Is(blockingCtx.Err(), context.Canceled) {
t.Fatalf("Context contains wrong error: %s", blockingCtx.Err())
}
}
}
2 changes: 2 additions & 0 deletions scuttle_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ type ScuttleConfig struct {
IstioFallbackPkill bool
NeverKillIstioOnFailure bool
GenericQuitEndpoints []string
QuitWithoutEnvoyTimeout time.Duration
}

func log(message string) {
Expand All @@ -38,6 +39,7 @@ func getConfig() ScuttleConfig {
IstioFallbackPkill: getBoolFromEnv("ISTIO_FALLBACK_PKILL", false, loggingEnabled),
NeverKillIstioOnFailure: getBoolFromEnv("NEVER_KILL_ISTIO_ON_FAILURE", false, loggingEnabled),
GenericQuitEndpoints: getStringArrayFromEnv("GENERIC_QUIT_ENDPOINTS", make([]string, 0), loggingEnabled),
QuitWithoutEnvoyTimeout: getDurationFromEnv("QUIT_WITHOUT_ENVOY_TIMEOUT", time.Duration(0), loggingEnabled),
}

return config
Expand Down

0 comments on commit aaacb49

Please sign in to comment.