From ed8b6d34dedb3ad0067af78ebc7c766f8362674b Mon Sep 17 00:00:00 2001 From: Berndt Jung Date: Mon, 16 Jul 2018 14:53:02 -0700 Subject: [PATCH] Reduce the readiness checks for functions * Significantly improves scale up time for functions (when going from 0 -> 1) * Health check is hit more frequently, but should not noticibly impact performance * Use the httpget probe type and leverage the watchdog /healthz endpoint * Make all probe attributes configurable in charts This could be optimized a little further if new image for doing the http probes where created which would block on connection errors and return immediately when the response comes back, but the best case is < 1s improvement. Some performance numbers. Before (there was a timeout error): cold start: 10.240251064300537 error calling function: Command 'echo -n "Test" | faas-cli -g http://192.168.64.78:31112 invoke hello-python' returned non-zero exit status 1. cold start: 4.621361255645752 cold start: 5.6364970207214355 cold start: 11.648431777954102 cold start: 8.450724840164185 cold start: 9.854270935058594 cold start: 12.048357009887695 cold start: 12.24026870727539 After: cold start: 1.8590199947357178 cold start: 1.8544681072235107 cold start: 2.065181016921997 cold start: 1.8414137363433838 cold start: 1.6598482131958008 cold start: 2.4577977657318115 cold start: 2.4510068893432617 cold start: 2.244048833847046 cold start: 2.6444039344787598 Signed-off-by: Berndt Jung --- README.md | 2 +- chart/openfaas/Chart.yaml | 2 +- chart/openfaas/templates/gateway-dep.yaml | 14 ++++++ chart/openfaas/values.yaml | 9 ++++ handlers/deploy.go | 53 ++++++++++++++++++----- server.go | 15 +++++-- types/read_config.go | 38 +++++++++++++--- 7 files changed, 109 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index c088f8a47..44559f6a9 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ FaaS-netes can be configured via environment variables. | Option | Usage | |------------------------|------------------------------------------------------------------------------------------------| -| `enable_function_readiness_probe` | Boolean - enable a readiness probe to test functions. Default: `true` | +| `httpProbe` | Boolean - use http probe type for function readiness and liveness. Default: `false` | | `write_timeout` | HTTP timeout for writing a response body from your function (in seconds). Default: `8` | | `read_timeout` | HTTP timeout for reading the payload from the client caller (in seconds). Default: `8` | | `image_pull_policy` | Image pull policy for deployed functions (`Always`, `IfNotPresent`, `Never`. Default: `Always` | diff --git a/chart/openfaas/Chart.yaml b/chart/openfaas/Chart.yaml index 540fa84ce..0860ffdc3 100644 --- a/chart/openfaas/Chart.yaml +++ b/chart/openfaas/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v1 description: Enable Kubernetes as a backend for OpenFaaS (Functions as a Service) name: openfaas -version: 1.1.3 +version: 1.1.4 sources: - https://github.com/openfaas/faas - https://github.com/openfaas/faas-netes diff --git a/chart/openfaas/templates/gateway-dep.yaml b/chart/openfaas/templates/gateway-dep.yaml index 607b2f369..351aa4c91 100644 --- a/chart/openfaas/templates/gateway-dep.yaml +++ b/chart/openfaas/templates/gateway-dep.yaml @@ -113,6 +113,20 @@ spec: value: "{{ .Values.faasnetesd.writeTimeout }}" - name: image_pull_policy value: {{ .Values.faasnetesd.imagePullPolicy | quote }} + - name: http_probe + value: "{{ .Values.faasnetesd.httpProbe }}" + - name: readiness_probe_initial_delay_seconds + value: "{{ .Values.faasnetesd.readinessProbe.initialDelaySeconds }}" + - name: readiness_probe_timeout_seconds + value: "{{ .Values.faasnetesd.readinessProbe.timeoutSeconds }}" + - name: readiness_probe_period_seconds + value: "{{ .Values.faasnetesd.readinessProbe.periodSeconds }}" + - name: liveness_probe_initial_delay_seconds + value: "{{ .Values.faasnetesd.livenessProbe.initialDelaySeconds }}" + - name: liveness_probe_timeout_seconds + value: "{{ .Values.faasnetesd.livenessProbe.timeoutSeconds }}" + - name: liveness_probe_period_seconds + value: "{{ .Values.faasnetesd.livenessProbe.periodSeconds }}" ports: - containerPort: 8081 protocol: TCP diff --git a/chart/openfaas/values.yaml b/chart/openfaas/values.yaml index c0a98acdc..b4d46bf1c 100644 --- a/chart/openfaas/values.yaml +++ b/chart/openfaas/values.yaml @@ -13,6 +13,15 @@ faasnetesd: readTimeout : "20s" writeTimeout : "20s" imagePullPolicy : "Always" # Image pull policy for deployed functions + httpProbe: false # Setting to true will use a lock file for readiness and liveness + readinessProbe: + initialDelaySeconds: 0 + timeoutSeconds: 1 + periodSeconds: 1 + livenessProbe: + initialDelaySeconds: 3 + timeoutSeconds: 1 + periodSeconds: 10 gateway: image: openfaas/gateway:0.8.4 diff --git a/handlers/deploy.go b/handlers/deploy.go index 395475e56..ebdefcd72 100644 --- a/handlers/deploy.go +++ b/handlers/deploy.go @@ -44,9 +44,18 @@ func ValidateDeployRequest(request *requests.CreateFunctionRequest) error { return fmt.Errorf("(%s) must be a valid DNS entry for service name", request.Service) } +// FunctionProbeConfig specify options for Liveliness and Readiness checks +type FunctionProbeConfig struct { + InitialDelaySeconds int32 + TimeoutSeconds int32 + PeriodSeconds int32 +} + // DeployHandlerConfig specify options for Deployments type DeployHandlerConfig struct { - EnableFunctionReadinessProbe bool + HTTPProbe bool + FunctionReadinessProbeConfig *FunctionProbeConfig + FunctionLivenessProbeConfig *FunctionProbeConfig ImagePullPolicy string } @@ -118,21 +127,41 @@ func MakeDeployHandler(functionNamespace string, clientset *kubernetes.Clientset func makeDeploymentSpec(request requests.CreateFunctionRequest, existingSecrets map[string]*apiv1.Secret, config *DeployHandlerConfig) (*v1beta1.Deployment, error) { envVars := buildEnvVars(&request) - path := filepath.Join(os.TempDir(), ".lock") - probe := &apiv1.Probe{ - Handler: apiv1.Handler{ + var handler apiv1.Handler + + if config.HTTPProbe { + handler = apiv1.Handler{ + HTTPGet: &apiv1.HTTPGetAction{ + Path: "/_/health", + Port: intstr.IntOrString{ + Type: intstr.Int, + IntVal: int32(watchdogPort), + }, + }, + } + } else { + path := filepath.Join(os.TempDir(), ".lock") + handler = apiv1.Handler{ Exec: &apiv1.ExecAction{ Command: []string{"cat", path}, }, - }, - InitialDelaySeconds: 3, - TimeoutSeconds: 1, - PeriodSeconds: 10, + } + } + readinessProbe := &apiv1.Probe{ + Handler: handler, + InitialDelaySeconds: config.FunctionReadinessProbeConfig.InitialDelaySeconds, + TimeoutSeconds: config.FunctionReadinessProbeConfig.TimeoutSeconds, + PeriodSeconds: config.FunctionReadinessProbeConfig.PeriodSeconds, SuccessThreshold: 1, FailureThreshold: 3, } - if !config.EnableFunctionReadinessProbe { - probe = nil + livenessProbe := &apiv1.Probe{ + Handler: handler, + InitialDelaySeconds: config.FunctionLivenessProbeConfig.InitialDelaySeconds, + TimeoutSeconds: config.FunctionLivenessProbeConfig.TimeoutSeconds, + PeriodSeconds: config.FunctionLivenessProbeConfig.PeriodSeconds, + SuccessThreshold: 1, + FailureThreshold: 3, } initialReplicas := int32p(initialReplicasCount) @@ -214,8 +243,8 @@ func makeDeploymentSpec(request requests.CreateFunctionRequest, existingSecrets Env: envVars, Resources: *resources, ImagePullPolicy: imagePullPolicy, - LivenessProbe: probe, - ReadinessProbe: probe, + LivenessProbe: livenessProbe, + ReadinessProbe: readinessProbe, }, }, RestartPolicy: v1.RestartPolicyAlways, diff --git a/server.go b/server.go index 2bbe6cf74..68dcb1efa 100644 --- a/server.go +++ b/server.go @@ -40,11 +40,20 @@ func main() { log.Printf("HTTP Read Timeout: %s\n", cfg.ReadTimeout) log.Printf("HTTP Write Timeout: %s\n", cfg.WriteTimeout) - log.Printf("Function Readiness Probe Enabled: %v\n", cfg.EnableFunctionReadinessProbe) deployConfig := &handlers.DeployHandlerConfig{ - EnableFunctionReadinessProbe: cfg.EnableFunctionReadinessProbe, - ImagePullPolicy: cfg.ImagePullPolicy, + HTTPProbe: cfg.HTTPProbe, + FunctionReadinessProbeConfig: &handlers.FunctionProbeConfig{ + InitialDelaySeconds: int32(cfg.ReadinessProbeInitialDelaySeconds), + TimeoutSeconds: int32(cfg.ReadinessProbeTimeoutSeconds), + PeriodSeconds: int32(cfg.ReadinessProbePeriodSeconds), + }, + FunctionLivenessProbeConfig: &handlers.FunctionProbeConfig{ + InitialDelaySeconds: int32(cfg.LivenessProbeInitialDelaySeconds), + TimeoutSeconds: int32(cfg.LivenessProbeTimeoutSeconds), + PeriodSeconds: int32(cfg.LivenessProbePeriodSeconds), + }, + ImagePullPolicy: cfg.ImagePullPolicy, } bootstrapHandlers := bootTypes.FaaSHandlers{ diff --git a/types/read_config.go b/types/read_config.go index bcf6adb2d..214efadd4 100644 --- a/types/read_config.go +++ b/types/read_config.go @@ -70,7 +70,15 @@ func parseString(val string, fallback string) string { func (ReadConfig) Read(hasEnv HasEnv) BootstrapConfig { cfg := BootstrapConfig{} - enableProbe := parseBoolValue(hasEnv.Getenv("enable_function_readiness_probe"), true) + httpProbe := parseBoolValue(hasEnv.Getenv("http_probe"), false) + + readinessProbeInitialDelaySeconds := parseIntValue(hasEnv.Getenv("readiness_probe_initial_delay_seconds"), 3) + readinessProbeTimeoutSeconds := parseIntValue(hasEnv.Getenv("readiness_probe_timeout_seconds"), 1) + readinessProbePeriodSeconds := parseIntValue(hasEnv.Getenv("readiness_probe_period_seconds"), 10) + + livenessProbeInitialDelaySeconds := parseIntValue(hasEnv.Getenv("liveness_probe_initial_delay_seconds"), 3) + livenessProbeTimeoutSeconds := parseIntValue(hasEnv.Getenv("liveness_probe_timeout_seconds"), 1) + livenessProbePeriodSeconds := parseIntValue(hasEnv.Getenv("liveness_probe_period_seconds"), 10) readTimeout := parseIntOrDurationValue(hasEnv.Getenv("read_timeout"), time.Second*10) writeTimeout := parseIntOrDurationValue(hasEnv.Getenv("write_timeout"), time.Second*10) @@ -80,7 +88,15 @@ func (ReadConfig) Read(hasEnv HasEnv) BootstrapConfig { cfg.ReadTimeout = readTimeout cfg.WriteTimeout = writeTimeout - cfg.EnableFunctionReadinessProbe = enableProbe + cfg.HTTPProbe = httpProbe + + cfg.ReadinessProbeInitialDelaySeconds = readinessProbeInitialDelaySeconds + cfg.ReadinessProbeTimeoutSeconds = readinessProbeTimeoutSeconds + cfg.ReadinessProbePeriodSeconds = readinessProbePeriodSeconds + + cfg.LivenessProbeInitialDelaySeconds = livenessProbeInitialDelaySeconds + cfg.LivenessProbeTimeoutSeconds = livenessProbeTimeoutSeconds + cfg.LivenessProbePeriodSeconds = livenessProbePeriodSeconds cfg.ImagePullPolicy = imagePullPolicy @@ -92,9 +108,17 @@ func (ReadConfig) Read(hasEnv HasEnv) BootstrapConfig { // BootstrapConfig for the process. type BootstrapConfig struct { - EnableFunctionReadinessProbe bool - ReadTimeout time.Duration - WriteTimeout time.Duration - ImagePullPolicy string - Port int + // HTTPProbe when set to true switches readiness and liveness probe to + // access /_/health over HTTP instead of accessing /tmp/.lock. + HTTPProbe bool + ReadinessProbeInitialDelaySeconds int + ReadinessProbeTimeoutSeconds int + ReadinessProbePeriodSeconds int + LivenessProbeInitialDelaySeconds int + LivenessProbeTimeoutSeconds int + LivenessProbePeriodSeconds int + ReadTimeout time.Duration + WriteTimeout time.Duration + ImagePullPolicy string + Port int }