Skip to content

Commit

Permalink
Reduce the readiness checks for functions
Browse files Browse the repository at this point in the history
* Significantly improves scale up time for functions (when going
  from 0 -> 1)
* Health check is hit more frequently, but should not noticibly
  impact performance
* Use the httpget probe type and leverage the watchdog /healthz
  endpoint
* Make all probe attributes configurable in charts

This could be optimized a little further if new image for doing
the http probes where created which would block on connection errors
and return immediately when the response comes back, but the best
case is < 1s improvement.

Some performance numbers.  Before (there was a timeout error):

cold start: 10.240251064300537
error calling function: Command 'echo -n "Test" | faas-cli -g http://192.168.64.78:31112 invoke hello-python' returned non-zero exit status 1.
cold start: 4.621361255645752
cold start: 5.6364970207214355
cold start: 11.648431777954102
cold start: 8.450724840164185
cold start: 9.854270935058594
cold start: 12.048357009887695
cold start: 12.24026870727539

After:

cold start: 1.8590199947357178
cold start: 1.8544681072235107
cold start: 2.065181016921997
cold start: 1.8414137363433838
cold start: 1.6598482131958008
cold start: 2.4577977657318115
cold start: 2.4510068893432617
cold start: 2.244048833847046
cold start: 2.6444039344787598

Signed-off-by: Berndt Jung <bjung@vmware.com>
  • Loading branch information
berndtj authored and alexellis committed Jul 21, 2018
1 parent 8816ccc commit aa04e3e
Show file tree
Hide file tree
Showing 7 changed files with 109 additions and 24 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ FaaS-netes can be configured via environment variables.

| Option | Usage |
|------------------------|------------------------------------------------------------------------------------------------|
| `enable_function_readiness_probe` | Boolean - enable a readiness probe to test functions. Default: `true` |
| `httpProbe` | Boolean - use http probe type for function readiness and liveness. Default: `false` |
| `write_timeout` | HTTP timeout for writing a response body from your function (in seconds). Default: `8` |
| `read_timeout` | HTTP timeout for reading the payload from the client caller (in seconds). Default: `8` |
| `image_pull_policy` | Image pull policy for deployed functions (`Always`, `IfNotPresent`, `Never`. Default: `Always` |
Expand Down
2 changes: 1 addition & 1 deletion chart/openfaas/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
apiVersion: v1
description: Enable Kubernetes as a backend for OpenFaaS (Functions as a Service)
name: openfaas
version: 1.1.6
version: 1.1.7
sources:
- https://github.com/openfaas/faas
- https://github.com/openfaas/faas-netes
Expand Down
14 changes: 14 additions & 0 deletions chart/openfaas/templates/gateway-dep.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,20 @@ spec:
value: "{{ .Values.faasnetesd.writeTimeout }}"
- name: image_pull_policy
value: {{ .Values.faasnetesd.imagePullPolicy | quote }}
- name: http_probe
value: "{{ .Values.faasnetesd.httpProbe }}"
- name: readiness_probe_initial_delay_seconds
value: "{{ .Values.faasnetesd.readinessProbe.initialDelaySeconds }}"
- name: readiness_probe_timeout_seconds
value: "{{ .Values.faasnetesd.readinessProbe.timeoutSeconds }}"
- name: readiness_probe_period_seconds
value: "{{ .Values.faasnetesd.readinessProbe.periodSeconds }}"
- name: liveness_probe_initial_delay_seconds
value: "{{ .Values.faasnetesd.livenessProbe.initialDelaySeconds }}"
- name: liveness_probe_timeout_seconds
value: "{{ .Values.faasnetesd.livenessProbe.timeoutSeconds }}"
- name: liveness_probe_period_seconds
value: "{{ .Values.faasnetesd.livenessProbe.periodSeconds }}"
ports:
- containerPort: 8081
protocol: TCP
Expand Down
9 changes: 9 additions & 0 deletions chart/openfaas/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,15 @@ faasnetesd:
readTimeout : "20s"
writeTimeout : "20s"
imagePullPolicy : "Always" # Image pull policy for deployed functions
httpProbe: false # Setting to true will use a lock file for readiness and liveness
readinessProbe:
initialDelaySeconds: 0
timeoutSeconds: 1
periodSeconds: 1
livenessProbe:
initialDelaySeconds: 3
timeoutSeconds: 1
periodSeconds: 10

gateway:
image: openfaas/gateway:0.8.7
Expand Down
53 changes: 41 additions & 12 deletions handlers/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,18 @@ func ValidateDeployRequest(request *requests.CreateFunctionRequest) error {
return fmt.Errorf("(%s) must be a valid DNS entry for service name", request.Service)
}

// FunctionProbeConfig specify options for Liveliness and Readiness checks
type FunctionProbeConfig struct {
InitialDelaySeconds int32
TimeoutSeconds int32
PeriodSeconds int32
}

// DeployHandlerConfig specify options for Deployments
type DeployHandlerConfig struct {
EnableFunctionReadinessProbe bool
HTTPProbe bool
FunctionReadinessProbeConfig *FunctionProbeConfig
FunctionLivenessProbeConfig *FunctionProbeConfig
ImagePullPolicy string
}

Expand Down Expand Up @@ -118,21 +127,41 @@ func MakeDeployHandler(functionNamespace string, clientset *kubernetes.Clientset

func makeDeploymentSpec(request requests.CreateFunctionRequest, existingSecrets map[string]*apiv1.Secret, config *DeployHandlerConfig) (*v1beta1.Deployment, error) {
envVars := buildEnvVars(&request)
path := filepath.Join(os.TempDir(), ".lock")
probe := &apiv1.Probe{
Handler: apiv1.Handler{
var handler apiv1.Handler

if config.HTTPProbe {
handler = apiv1.Handler{
HTTPGet: &apiv1.HTTPGetAction{
Path: "/_/health",
Port: intstr.IntOrString{
Type: intstr.Int,
IntVal: int32(watchdogPort),
},
},
}
} else {
path := filepath.Join(os.TempDir(), ".lock")
handler = apiv1.Handler{
Exec: &apiv1.ExecAction{
Command: []string{"cat", path},
},
},
InitialDelaySeconds: 3,
TimeoutSeconds: 1,
PeriodSeconds: 10,
}
}
readinessProbe := &apiv1.Probe{
Handler: handler,
InitialDelaySeconds: config.FunctionReadinessProbeConfig.InitialDelaySeconds,
TimeoutSeconds: config.FunctionReadinessProbeConfig.TimeoutSeconds,
PeriodSeconds: config.FunctionReadinessProbeConfig.PeriodSeconds,
SuccessThreshold: 1,
FailureThreshold: 3,
}
if !config.EnableFunctionReadinessProbe {
probe = nil
livenessProbe := &apiv1.Probe{
Handler: handler,
InitialDelaySeconds: config.FunctionLivenessProbeConfig.InitialDelaySeconds,
TimeoutSeconds: config.FunctionLivenessProbeConfig.TimeoutSeconds,
PeriodSeconds: config.FunctionLivenessProbeConfig.PeriodSeconds,
SuccessThreshold: 1,
FailureThreshold: 3,
}

initialReplicas := int32p(initialReplicasCount)
Expand Down Expand Up @@ -214,8 +243,8 @@ func makeDeploymentSpec(request requests.CreateFunctionRequest, existingSecrets
Env: envVars,
Resources: *resources,
ImagePullPolicy: imagePullPolicy,
LivenessProbe: probe,
ReadinessProbe: probe,
LivenessProbe: livenessProbe,
ReadinessProbe: readinessProbe,
},
},
RestartPolicy: v1.RestartPolicyAlways,
Expand Down
15 changes: 12 additions & 3 deletions server.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,20 @@ func main() {

log.Printf("HTTP Read Timeout: %s\n", cfg.ReadTimeout)
log.Printf("HTTP Write Timeout: %s\n", cfg.WriteTimeout)
log.Printf("Function Readiness Probe Enabled: %v\n", cfg.EnableFunctionReadinessProbe)

deployConfig := &handlers.DeployHandlerConfig{
EnableFunctionReadinessProbe: cfg.EnableFunctionReadinessProbe,
ImagePullPolicy: cfg.ImagePullPolicy,
HTTPProbe: cfg.HTTPProbe,
FunctionReadinessProbeConfig: &handlers.FunctionProbeConfig{
InitialDelaySeconds: int32(cfg.ReadinessProbeInitialDelaySeconds),
TimeoutSeconds: int32(cfg.ReadinessProbeTimeoutSeconds),
PeriodSeconds: int32(cfg.ReadinessProbePeriodSeconds),
},
FunctionLivenessProbeConfig: &handlers.FunctionProbeConfig{
InitialDelaySeconds: int32(cfg.LivenessProbeInitialDelaySeconds),
TimeoutSeconds: int32(cfg.LivenessProbeTimeoutSeconds),
PeriodSeconds: int32(cfg.LivenessProbePeriodSeconds),
},
ImagePullPolicy: cfg.ImagePullPolicy,
}

bootstrapHandlers := bootTypes.FaaSHandlers{
Expand Down
38 changes: 31 additions & 7 deletions types/read_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,15 @@ func parseString(val string, fallback string) string {
func (ReadConfig) Read(hasEnv HasEnv) BootstrapConfig {
cfg := BootstrapConfig{}

enableProbe := parseBoolValue(hasEnv.Getenv("enable_function_readiness_probe"), true)
httpProbe := parseBoolValue(hasEnv.Getenv("http_probe"), false)

readinessProbeInitialDelaySeconds := parseIntValue(hasEnv.Getenv("readiness_probe_initial_delay_seconds"), 3)
readinessProbeTimeoutSeconds := parseIntValue(hasEnv.Getenv("readiness_probe_timeout_seconds"), 1)
readinessProbePeriodSeconds := parseIntValue(hasEnv.Getenv("readiness_probe_period_seconds"), 10)

livenessProbeInitialDelaySeconds := parseIntValue(hasEnv.Getenv("liveness_probe_initial_delay_seconds"), 3)
livenessProbeTimeoutSeconds := parseIntValue(hasEnv.Getenv("liveness_probe_timeout_seconds"), 1)
livenessProbePeriodSeconds := parseIntValue(hasEnv.Getenv("liveness_probe_period_seconds"), 10)

readTimeout := parseIntOrDurationValue(hasEnv.Getenv("read_timeout"), time.Second*10)
writeTimeout := parseIntOrDurationValue(hasEnv.Getenv("write_timeout"), time.Second*10)
Expand All @@ -80,7 +88,15 @@ func (ReadConfig) Read(hasEnv HasEnv) BootstrapConfig {
cfg.ReadTimeout = readTimeout
cfg.WriteTimeout = writeTimeout

cfg.EnableFunctionReadinessProbe = enableProbe
cfg.HTTPProbe = httpProbe

cfg.ReadinessProbeInitialDelaySeconds = readinessProbeInitialDelaySeconds
cfg.ReadinessProbeTimeoutSeconds = readinessProbeTimeoutSeconds
cfg.ReadinessProbePeriodSeconds = readinessProbePeriodSeconds

cfg.LivenessProbeInitialDelaySeconds = livenessProbeInitialDelaySeconds
cfg.LivenessProbeTimeoutSeconds = livenessProbeTimeoutSeconds
cfg.LivenessProbePeriodSeconds = livenessProbePeriodSeconds

cfg.ImagePullPolicy = imagePullPolicy

Expand All @@ -92,9 +108,17 @@ func (ReadConfig) Read(hasEnv HasEnv) BootstrapConfig {

// BootstrapConfig for the process.
type BootstrapConfig struct {
EnableFunctionReadinessProbe bool
ReadTimeout time.Duration
WriteTimeout time.Duration
ImagePullPolicy string
Port int
// HTTPProbe when set to true switches readiness and liveness probe to
// access /_/health over HTTP instead of accessing /tmp/.lock.
HTTPProbe bool
ReadinessProbeInitialDelaySeconds int
ReadinessProbeTimeoutSeconds int
ReadinessProbePeriodSeconds int
LivenessProbeInitialDelaySeconds int
LivenessProbeTimeoutSeconds int
LivenessProbePeriodSeconds int
ReadTimeout time.Duration
WriteTimeout time.Duration
ImagePullPolicy string
Port int
}

0 comments on commit aa04e3e

Please sign in to comment.