-
Notifications
You must be signed in to change notification settings - Fork 162
/
e2e-tests.sh
executable file
·477 lines (424 loc) · 18.3 KB
/
e2e-tests.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
#!/bin/bash
# Copyright 2019 The Knative Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This is a helper script for Knative E2E test scripts.
# See README.md for instructions on how to use it.
source $(dirname ${BASH_SOURCE})/library.sh
# Build a resource name based on $E2E_BASE_NAME, a suffix and $BUILD_NUMBER.
# Restricts the name length to 40 chars (the limit for resource names in GCP).
# Name will have the form $E2E_BASE_NAME-<PREFIX>$BUILD_NUMBER.
# Parameters: $1 - name suffix
function build_resource_name() {
local prefix=${E2E_BASE_NAME}-$1
local suffix=${BUILD_NUMBER}
# Restrict suffix length to 20 chars
if [[ -n "${suffix}" ]]; then
suffix=${suffix:${#suffix}<20?0:-20}
fi
local name="${prefix:0:20}${suffix}"
# Ensure name doesn't end with "-"
echo "${name%-}"
}
# Test cluster parameters
# Configurable parameters
# export E2E_CLUSTER_REGION and E2E_CLUSTER_ZONE as they're used in the cluster setup subprocess
export E2E_CLUSTER_REGION=${E2E_CLUSTER_REGION:-us-central1}
# By default we use regional clusters.
export E2E_CLUSTER_ZONE=${E2E_CLUSTER_ZONE:-}
# Default backup regions in case of stockouts; by default we don't fall back to a different zone in the same region
readonly E2E_CLUSTER_BACKUP_REGIONS=${E2E_CLUSTER_BACKUP_REGIONS:-us-west1 us-east1}
readonly E2E_CLUSTER_BACKUP_ZONES=${E2E_CLUSTER_BACKUP_ZONES:-}
readonly E2E_CLUSTER_MACHINE=${E2E_CLUSTER_MACHINE:-n1-standard-4}
readonly E2E_GKE_ENVIRONMENT=${E2E_GKE_ENVIRONMENT:-prod}
readonly E2E_GKE_COMMAND_GROUP=${E2E_GKE_COMMAND_GROUP:-beta}
# Each knative repository may have a different cluster size requirement here,
# so we allow calling code to set these parameters. If they are not set we
# use some sane defaults.
readonly E2E_MIN_CLUSTER_NODES=${E2E_MIN_CLUSTER_NODES:-1}
readonly E2E_MAX_CLUSTER_NODES=${E2E_MAX_CLUSTER_NODES:-3}
readonly E2E_BASE_NAME="k${REPO_NAME}"
readonly E2E_CLUSTER_NAME=$(build_resource_name e2e-cls)
readonly E2E_NETWORK_NAME=$(build_resource_name e2e-net)
readonly TEST_RESULT_FILE=/tmp/${E2E_BASE_NAME}-e2e-result
# Flag whether test is using a boskos GCP project
IS_BOSKOS=0
# Tear down the test resources.
function teardown_test_resources() {
# On boskos, save time and don't teardown as the cluster will be destroyed anyway.
(( IS_BOSKOS )) && return
header "Tearing down test environment"
function_exists test_teardown && test_teardown
(( ! SKIP_KNATIVE_SETUP )) && function_exists knative_teardown && knative_teardown
# Delete the kubernetes source downloaded by kubetest
rm -fr kubernetes kubernetes.tar.gz
}
# Run the given E2E tests. Assume tests are tagged e2e, unless `-tags=XXX` is passed.
# Parameters: $1..$n - any go test flags, then directories containing the tests to run.
function go_test_e2e() {
local test_options=""
local go_options=""
(( EMIT_METRICS )) && test_options="-emitmetrics"
[[ ! " $@" == *" -tags="* ]] && go_options="-tags=e2e"
report_go_test -v -race -count=1 ${go_options} $@ ${test_options}
}
# Dump info about the test cluster. If dump_extra_cluster_info() is defined, calls it too.
# This is intended to be called when a test fails to provide debugging information.
function dump_cluster_state() {
echo "***************************************"
echo "*** E2E TEST FAILED ***"
echo "*** Start of information dump ***"
echo "***************************************"
echo ">>> All resources:"
kubectl get all --all-namespaces
echo ">>> Services:"
kubectl get services --all-namespaces
echo ">>> Events:"
kubectl get events --all-namespaces
function_exists dump_extra_cluster_state && dump_extra_cluster_state
echo "***************************************"
echo "*** E2E TEST FAILED ***"
echo "*** End of information dump ***"
echo "***************************************"
}
# On a Prow job, save some metadata about the test for Testgrid.
function save_metadata() {
(( ! IS_PROW )) && return
local geo_key="Region"
local geo_value="${E2E_CLUSTER_REGION}"
if [[ -n "${E2E_CLUSTER_ZONE}" ]]; then
geo_key="Zone"
geo_value="${E2E_CLUSTER_REGION}-${E2E_CLUSTER_ZONE}"
fi
local cluster_version="$(gcloud container clusters list --project=${E2E_PROJECT_ID} --format='value(currentMasterVersion)')"
cat << EOF > ${ARTIFACTS}/metadata.json
{
"E2E:${geo_key}": "${geo_value}",
"E2E:Machine": "${E2E_CLUSTER_MACHINE}",
"E2E:Version": "${cluster_version}",
"E2E:MinNodes": "${E2E_MIN_CLUSTER_NODES}",
"E2E:MaxNodes": "${E2E_MAX_CLUSTER_NODES}"
}
EOF
}
# Set E2E_CLUSTER_VERSION to a specific GKE version.
# Parameters: $1 - target GKE version (X.Y, X.Y.Z, X.Y.Z-gke.W, default or gke-latest).
# $2 - region[-zone] where the clusteer will be created.
function resolve_k8s_version() {
local target_version="$1"
if [[ "${target_version}" == "default" ]]; then
local version="$(gcloud container get-server-config \
--format='value(defaultClusterVersion)' \
--zone=$2)"
[[ -z "${version}" ]] && return 1
E2E_CLUSTER_VERSION="${version}"
echo "Using default version, ${E2E_CLUSTER_VERSION}"
return 0
fi
# Fetch valid versions
local versions="$(gcloud container get-server-config \
--format='value(validMasterVersions)' \
--zone=$2)"
[[ -z "${versions}" ]] && return 1
local gke_versions=($(echo -n "${versions//;/ }"))
echo "Available GKE versions in $2 are [${versions//;/, }]"
if [[ "${target_version}" == "gke-latest" ]]; then
# Get first (latest) version, excluding the "-gke.#" suffix
E2E_CLUSTER_VERSION="${gke_versions[0]}"
echo "Using latest version, ${E2E_CLUSTER_VERSION}"
else
local latest="$(echo "${gke_versions[@]}" | tr ' ' '\n' | grep -E ^${target_version} | cut -f1 -d- | sort | tail -1)"
if [[ -z "${latest}" ]]; then
echo "ERROR: version ${target_version} is not available"
return 1
fi
E2E_CLUSTER_VERSION="${latest}"
echo "Using ${E2E_CLUSTER_VERSION} for supplied version ${target_version}"
fi
return 0
}
# Create a test cluster with kubetest and call the current script again.
function create_test_cluster() {
# Fail fast during setup.
set -o errexit
set -o pipefail
if function_exists cluster_setup; then
cluster_setup || fail_test "cluster setup failed"
fi
echo "Cluster will have a minimum of ${E2E_MIN_CLUSTER_NODES} and a maximum of ${E2E_MAX_CLUSTER_NODES} nodes."
# Smallest cluster required to run the end-to-end-tests
local CLUSTER_CREATION_ARGS=(
--gke-create-command="container clusters create --quiet --enable-autoscaling --min-nodes=${E2E_MIN_CLUSTER_NODES} --max-nodes=${E2E_MAX_CLUSTER_NODES} --scopes=cloud-platform --enable-basic-auth --no-issue-client-certificate ${GKE_ADDONS} ${EXTRA_CLUSTER_CREATION_FLAGS[@]}"
--gke-shape={\"default\":{\"Nodes\":${E2E_MIN_CLUSTER_NODES}\,\"MachineType\":\"${E2E_CLUSTER_MACHINE}\"}}
--provider=gke
--deployment=gke
--cluster="${E2E_CLUSTER_NAME}"
--gcp-network="${E2E_NETWORK_NAME}"
--gcp-node-image="${SERVING_GKE_IMAGE}"
--gke-environment="${E2E_GKE_ENVIRONMENT}"
--gke-command-group="${E2E_GKE_COMMAND_GROUP}"
--test=false
--up
)
if (( ! IS_BOSKOS )); then
CLUSTER_CREATION_ARGS+=(--gcp-project=${GCP_PROJECT})
fi
# SSH keys are not used, but kubetest checks for their existence.
# Touch them so if they don't exist, empty files are create to satisfy the check.
mkdir -p $HOME/.ssh
touch $HOME/.ssh/google_compute_engine.pub
touch $HOME/.ssh/google_compute_engine
# Assume test failed (see details in set_test_return_code()).
set_test_return_code 1
local gcloud_project="${GCP_PROJECT}"
[[ -z "${gcloud_project}" ]] && gcloud_project="$(gcloud config get-value project)"
echo "gcloud project is ${gcloud_project}"
echo "gcloud user is $(gcloud config get-value core/account)"
(( IS_BOSKOS )) && echo "Using boskos for the test cluster"
[[ -n "${GCP_PROJECT}" ]] && echo "GCP project for test cluster is ${GCP_PROJECT}"
echo "Test script is ${E2E_SCRIPT}"
# Set arguments for this script again
local test_cmd_args="--run-tests"
(( EMIT_METRICS )) && test_cmd_args+=" --emit-metrics"
(( SKIP_KNATIVE_SETUP )) && test_cmd_args+=" --skip-knative-setup"
[[ -n "${GCP_PROJECT}" ]] && test_cmd_args+=" --gcp-project ${GCP_PROJECT}"
[[ -n "${E2E_SCRIPT_CUSTOM_FLAGS[@]}" ]] && test_cmd_args+=" ${E2E_SCRIPT_CUSTOM_FLAGS[@]}"
local extra_flags=()
if (( IS_BOSKOS )); then # Add arbitrary duration, wait for Boskos projects acquisition before error out
extra_flags+=(--boskos-wait-duration=20m)
else # Only let kubetest tear down the cluster if not using Boskos, it's done by Janitor if using Boskos
extra_flags+=(--down)
fi
# Set a minimal kubernetes environment that satisfies kubetest
# TODO(adrcunha): Remove once https://github.com/kubernetes/test-infra/issues/13029 is fixed.
local kubedir="$(mktemp -d -t kubernetes.XXXXXXXXXX)"
local test_wrapper="${kubedir}/e2e-test.sh"
mkdir ${kubedir}/cluster
ln -s "$(which kubectl)" ${kubedir}/cluster/kubectl.sh
echo "#!/bin/bash" > ${test_wrapper}
echo "cd $(pwd) && set -x" >> ${test_wrapper}
echo "${E2E_SCRIPT} ${test_cmd_args}" >> ${test_wrapper}
chmod +x ${test_wrapper}
cd ${kubedir}
# Create cluster and run the tests
create_test_cluster_with_retries "${CLUSTER_CREATION_ARGS[@]}" \
--test-cmd "${test_wrapper}" \
${extra_flags[@]} \
${EXTRA_KUBETEST_FLAGS[@]}
echo "Test subprocess exited with code $?"
# Ignore any errors below, this is a best-effort cleanup and shouldn't affect the test result.
set +o errexit
function_exists cluster_teardown && cluster_teardown
local result=$(get_test_return_code)
echo "Artifacts were written to ${ARTIFACTS}"
echo "Test result code is ${result}"
exit ${result}
}
# Retry backup regions/zones if cluster creations failed due to stockout.
# Parameters: $1..$n - any kubetest flags other than geo flag.
function create_test_cluster_with_retries() {
local cluster_creation_log=/tmp/${E2E_BASE_NAME}-cluster_creation-log
# zone_not_provided is a placeholder for e2e_cluster_zone to make for loop below work
local zone_not_provided="zone_not_provided"
local e2e_cluster_regions=(${E2E_CLUSTER_REGION})
local e2e_cluster_zones=(${E2E_CLUSTER_ZONE})
if [[ -n "${E2E_CLUSTER_BACKUP_ZONES}" ]]; then
e2e_cluster_zones+=(${E2E_CLUSTER_BACKUP_ZONES})
elif [[ -n "${E2E_CLUSTER_BACKUP_REGIONS}" ]]; then
e2e_cluster_regions+=(${E2E_CLUSTER_BACKUP_REGIONS})
e2e_cluster_zones=(${zone_not_provided})
else
echo "No backup region/zone set, cluster creation will fail in case of stockout"
fi
local e2e_cluster_target_version="${E2E_CLUSTER_VERSION}"
for e2e_cluster_region in "${e2e_cluster_regions[@]}"; do
for e2e_cluster_zone in "${e2e_cluster_zones[@]}"; do
E2E_CLUSTER_REGION=${e2e_cluster_region}
E2E_CLUSTER_ZONE=${e2e_cluster_zone}
[[ "${E2E_CLUSTER_ZONE}" == "${zone_not_provided}" ]] && E2E_CLUSTER_ZONE=""
local cluster_creation_zone="${E2E_CLUSTER_REGION}"
[[ -n "${E2E_CLUSTER_ZONE}" ]] && cluster_creation_zone="${E2E_CLUSTER_REGION}-${E2E_CLUSTER_ZONE}"
resolve_k8s_version ${e2e_cluster_target_version} ${cluster_creation_zone} || return 1
header "Creating test cluster ${E2E_CLUSTER_VERSION} in ${cluster_creation_zone}"
# Don't fail test for kubetest, as it might incorrectly report test failure
# if teardown fails (for details, see success() below)
set +o errexit
export CLUSTER_API_VERSION=${E2E_CLUSTER_VERSION}
run_go_tool k8s.io/test-infra/kubetest \
kubetest "$@" --gcp-region=${cluster_creation_zone} 2>&1 | tee ${cluster_creation_log}
# Exit if test succeeded
[[ "$(get_test_return_code)" == "0" ]] && return 0
# Retry if cluster creation failed because of:
# - stockout (https://github.com/knative/test-infra/issues/592)
# - latest GKE not available in this region/zone yet (https://github.com/knative/test-infra/issues/694)
[[ -z "$(grep -Fo 'does not have enough resources available to fulfill' ${cluster_creation_log})" \
&& -z "$(grep -Fo 'ResponseError: code=400, message=No valid versions with the prefix' ${cluster_creation_log})" \
&& -z "$(grep -Po 'ResponseError: code=400, message=Master version "[0-9a-z\-\.]+" is unsupported' ${cluster_creation_log})" ]] \
&& return 1
done
done
echo "No more region/zones to try, quitting"
return 1
}
# Setup the test cluster for running the tests.
function setup_test_cluster() {
# Fail fast during setup.
set -o errexit
set -o pipefail
header "Setting up test cluster"
# Set the actual project the test cluster resides in
# It will be a project assigned by Boskos if test is running on Prow,
# otherwise will be ${GCP_PROJECT} set up by user.
export E2E_PROJECT_ID="$(gcloud config get-value project)"
readonly E2E_PROJECT_ID
# Save some metadata about cluster creation for using in prow and testgrid
save_metadata
local k8s_user=$(gcloud config get-value core/account)
local k8s_cluster=$(kubectl config current-context)
is_protected_cluster ${k8s_cluster} && \
abort "kubeconfig context set to ${k8s_cluster}, which is forbidden"
# If cluster admin role isn't set, this is a brand new cluster
# Setup the admin role and also KO_DOCKER_REPO
if [[ -z "$(kubectl get clusterrolebinding cluster-admin-binding 2> /dev/null)" ]]; then
acquire_cluster_admin_role ${k8s_user} ${E2E_CLUSTER_NAME} ${E2E_CLUSTER_REGION} ${E2E_CLUSTER_ZONE}
kubectl config set-context ${k8s_cluster} --namespace=default
# Incorporate an element of randomness to ensure that each run properly publishes images.
export KO_DOCKER_REPO=gcr.io/${E2E_PROJECT_ID}/${E2E_BASE_NAME}-e2e-img/${RANDOM}
fi
# Safety checks
is_protected_gcr ${KO_DOCKER_REPO} && \
abort "\$KO_DOCKER_REPO set to ${KO_DOCKER_REPO}, which is forbidden"
echo "- Project is ${E2E_PROJECT_ID}"
echo "- Cluster is ${k8s_cluster}"
echo "- User is ${k8s_user}"
echo "- Docker is ${KO_DOCKER_REPO}"
export KO_DATA_PATH="${REPO_ROOT_DIR}/.git"
trap teardown_test_resources EXIT
# Handle failures ourselves, so we can dump useful info.
set +o errexit
set +o pipefail
if (( ! SKIP_KNATIVE_SETUP )) && function_exists knative_setup; then
# Wait for Istio installation to complete, if necessary, before calling knative_setup.
(( ! SKIP_ISTIO_ADDON )) && (wait_until_batch_job_complete istio-system || return 1)
knative_setup || fail_test "Knative setup failed"
fi
if function_exists test_setup; then
test_setup || fail_test "test setup failed"
fi
}
# Gets the exit of the test script.
# For more details, see set_test_return_code().
function get_test_return_code() {
echo $(cat ${TEST_RESULT_FILE})
}
# Set the return code that the test script will return.
# Parameters: $1 - return code (0-255)
function set_test_return_code() {
# kubetest teardown might fail and thus incorrectly report failure of the
# script, even if the tests pass.
# We store the real test result to return it later, ignoring any teardown
# failure in kubetest.
# TODO(adrcunha): Get rid of this workaround.
echo -n "$1"> ${TEST_RESULT_FILE}
}
# Signal (as return code and in the logs) that all E2E tests passed.
function success() {
set_test_return_code 0
echo "**************************************"
echo "*** E2E TESTS PASSED ***"
echo "**************************************"
exit 0
}
# Exit test, dumping current state info.
# Parameters: $1 - error message (optional).
function fail_test() {
set_test_return_code 1
[[ -n $1 ]] && echo "ERROR: $1"
dump_cluster_state
exit 1
}
RUN_TESTS=0
EMIT_METRICS=0
SKIP_KNATIVE_SETUP=0
SKIP_ISTIO_ADDON=0
GCP_PROJECT=""
E2E_SCRIPT=""
E2E_CLUSTER_VERSION=""
GKE_ADDONS=""
EXTRA_CLUSTER_CREATION_FLAGS=()
EXTRA_KUBETEST_FLAGS=()
E2E_SCRIPT_CUSTOM_FLAGS=()
# Parse flags and initialize the test cluster.
function initialize() {
E2E_SCRIPT="$(get_canonical_path $0)"
E2E_CLUSTER_VERSION="${SERVING_GKE_VERSION}"
cd ${REPO_ROOT_DIR}
while [[ $# -ne 0 ]]; do
local parameter=$1
# Try parsing flag as a custom one.
if function_exists parse_flags; then
parse_flags $@
local skip=$?
if [[ ${skip} -ne 0 ]]; then
# Skip parsed flag (and possibly argument) and continue
# Also save it to it's passed through to the test script
for ((i=1;i<=skip;i++)); do
E2E_SCRIPT_CUSTOM_FLAGS+=("$1")
shift
done
continue
fi
fi
# Try parsing flag as a standard one.
case ${parameter} in
--run-tests) RUN_TESTS=1 ;;
--emit-metrics) EMIT_METRICS=1 ;;
--skip-knative-setup) SKIP_KNATIVE_SETUP=1 ;;
--skip-istio-addon) SKIP_ISTIO_ADDON=1 ;;
*)
[[ $# -ge 2 ]] || abort "missing parameter after $1"
shift
case ${parameter} in
--gcp-project) GCP_PROJECT=$1 ;;
--cluster-version) E2E_CLUSTER_VERSION=$1 ;;
--cluster-creation-flag) EXTRA_CLUSTER_CREATION_FLAGS+=($1) ;;
--kubetest-flag) EXTRA_KUBETEST_FLAGS+=($1) ;;
*) abort "unknown option ${parameter}" ;;
esac
esac
shift
done
# Use PROJECT_ID if set, unless --gcp-project was used.
if [[ -n "${PROJECT_ID:-}" && -z "${GCP_PROJECT}" ]]; then
echo "\$PROJECT_ID is set to '${PROJECT_ID}', using it to run the tests"
GCP_PROJECT="${PROJECT_ID}"
fi
if (( ! IS_PROW )) && (( ! RUN_TESTS )) && [[ -z "${GCP_PROJECT}" ]]; then
abort "set \$PROJECT_ID or use --gcp-project to select the GCP project where the tests are run"
fi
(( IS_PROW )) && [[ -z "${GCP_PROJECT}" ]] && IS_BOSKOS=1
(( SKIP_ISTIO_ADDON )) || GKE_ADDONS="--addons=Istio"
readonly RUN_TESTS
readonly EMIT_METRICS
readonly GCP_PROJECT
readonly IS_BOSKOS
readonly EXTRA_CLUSTER_CREATION_FLAGS
readonly EXTRA_KUBETEST_FLAGS
readonly SKIP_KNATIVE_SETUP
readonly GKE_ADDONS
if (( ! RUN_TESTS )); then
create_test_cluster
else
setup_test_cluster
fi
}