Skip to content

Commit

Permalink
feat: e2e-aws using tf code
Browse files Browse the repository at this point in the history
e2e-aws using TF code.

Signed-off-by: Noel Georgi <git@frezbo.dev>
  • Loading branch information
frezbo committed Aug 11, 2023
1 parent bf3a5e0 commit 79ca1a3
Show file tree
Hide file tree
Showing 8 changed files with 169 additions and 99 deletions.
69 changes: 64 additions & 5 deletions .drone.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ local Step(name, image='', target='', privileged=false, depends_on=[], environme

// TriggerDownstream is a helper function for creating a step that triggers a
// downstream pipeline. It is used to standardize the creation of these steps.
local TriggerDownstream(name, target, repositories, image='', params=[], depends_on=[]) = {
local TriggerDownstream(name, target, repositories, image='', params=[], depends_on=[], when={}) = {
name: name,
image: if image == '' then downstream_image else image,
settings: {
Expand All @@ -146,6 +146,7 @@ local TriggerDownstream(name, target, repositories, image='', params=[], depends
deploy: target,
},
depends_on: [x.name for x in depends_on],
when: when,
};

// This provides the docker service.
Expand Down Expand Up @@ -326,7 +327,7 @@ local load_artifacts = Step(
extra_commands=[
'az login --service-principal -u "$${AZURE_CLIENT_ID}" -p "$${AZURE_CLIENT_SECRET}" --tenant "$${AZURE_TENANT_ID}"',
'mkdir -p _out/',
'az storage blob download-batch --overwrite true -d _out -s ${CI_COMMIT_SHA}${DRONE_TAG//./-}',
'az storage blob download-batch --overwrite -d _out -s ${CI_COMMIT_SHA}${DRONE_TAG//./-}',
'chmod +x _out/clusterctl _out/integration-test-linux-amd64 _out/module-sig-verify-linux-amd64 _out/kubectl _out/kubestr _out/helm _out/cilium _out/talosctl*',
]
);
Expand All @@ -340,7 +341,7 @@ local extensions_build = TriggerDownstream(
std.format('REGISTRY=%s', local_registry),
'PLATFORM=linux/amd64',
'BUCKET_PATH=${CI_COMMIT_SHA}${DRONE_TAG//./-}',
'_out/talos-metadata',
'_out/talos-metadata', // params passed from file with KEY=VALUE format
],
depends_on=[load_artifacts],
);
Expand Down Expand Up @@ -642,7 +643,65 @@ local capi_docker = Step('e2e-docker', depends_on=[load_artifacts], target='e2e-
INTEGRATION_TEST_RUN: 'XXX',
});
local e2e_capi = Step('e2e-capi', depends_on=[capi_docker], environment=creds_env_vars);
local e2e_aws = Step('e2e-aws', depends_on=[e2e_capi], environment=creds_env_vars);

local e2e_aws_prepare = Step(
'cloud-images',
depends_on=[
load_artifacts,
],
environment=creds_env_vars {
CLOUD_IMAGES_EXTRA_ARGS: '--name-prefix talos-e2e --target-clouds aws --architectures amd64 --aws-regions us-east-1',
},
extra_commands=[
'make e2e-aws-prepare',
'az login --service-principal -u "$${AZURE_CLIENT_ID}" -p "$${AZURE_CLIENT_SECRET}" --tenant "$${AZURE_TENANT_ID}"',
'az storage blob upload-batch --overwrite -s _out --pattern "e2e-aws-generated/*" -d "${CI_COMMIT_SHA}${DRONE_TAG//./-}"',
]
);

local tf_apply = TriggerDownstream(
'tf-apply',
'e2e-talos-tf-apply',
['siderolabs/contrib@main'],
params=[
'BUCKET_PATH=${CI_COMMIT_SHA}${DRONE_TAG//./-}',
'TYPE=aws',
'AWS_DEFAULT_REGION=us-east-1',
],
depends_on=[e2e_aws_prepare],
);

local e2e_aws_tf_apply_post = Step(
'e2e-aws-download-artifacts',
with_make=false,
environment=creds_env_vars,
extra_commands=[
'az login --service-principal -u "$${AZURE_CLIENT_ID}" -p "$${AZURE_CLIENT_SECRET}" --tenant "$${AZURE_TENANT_ID}"',
'az storage blob download -f _out/e2e-aws-talosconfig -n e2e-aws-talosconfig -c ${CI_COMMIT_SHA}${DRONE_TAG//./-}',
'az storage blob download -f _out/e2e-aws-kubeconfig -n e2e-aws-kubeconfig -c ${CI_COMMIT_SHA}${DRONE_TAG//./-}',
],
depends_on=[tf_apply],
);

local e2e_aws = Step('e2e-aws', depends_on=[e2e_aws_tf_apply_post], environment=creds_env_vars);

local tf_destroy = TriggerDownstream(
'tf-destroy',
'e2e-talos-tf-destroy',
['siderolabs/contrib@main'],
params=[
'TYPE=aws',
'AWS_DEFAULT_REGION=us-east-1',
],
depends_on=[e2e_aws],
when={
status: [
'failure',
'success',
],
},
);

local e2e_azure = Step('e2e-azure', depends_on=[e2e_capi], environment=creds_env_vars);
local e2e_gcp = Step('e2e-gcp', depends_on=[e2e_capi], environment=creds_env_vars);

Expand All @@ -656,7 +715,7 @@ local e2e_trigger(names) = {

local e2e_pipelines = [
// regular pipelines, triggered on promote events
Pipeline('e2e-aws', default_pipeline_steps + [capi_docker, e2e_capi, e2e_aws]) + e2e_trigger(['e2e-aws']),
Pipeline('e2e-aws', default_pipeline_steps + [e2e_aws_prepare, tf_apply, e2e_aws_tf_apply_post, e2e_aws, tf_destroy]) + e2e_trigger(['e2e-aws']),
Pipeline('e2e-gcp', default_pipeline_steps + [capi_docker, e2e_capi, e2e_gcp]) + e2e_trigger(['e2e-gcp']),

// cron pipelines, triggered on schedule events
Expand Down
2 changes: 1 addition & 1 deletion hack/cloud-image-uploader.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ set -e

cd hack/cloud-image-uploader

go run . --artifacts-path="../../${ARTIFACTS}" --tag="${TAG}" --abbrev-tag="${ABBREV_TAG}"
go run . --artifacts-path="../../${ARTIFACTS}" --tag="${TAG}" --abbrev-tag="${ABBREV_TAG}" "$@"
4 changes: 4 additions & 0 deletions hack/cloud-image-uploader/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,10 @@ func (au *AWSUploader) registerAMIArch(ctx context.Context, region string, svc *

imageName := fmt.Sprintf("talos-%s-%s-%s", au.Options.Tag, region, arch)

if au.Options.NamePrefix != "" {
imageName = fmt.Sprintf("%s-%s-%s-%s", au.Options.NamePrefix, au.Options.Tag, region, arch)
}

imageResp, err := svc.DescribeImagesWithContext(ctx, &ec2.DescribeImagesInput{
Filters: []*ec2.Filter{
{
Expand Down
37 changes: 23 additions & 14 deletions hack/cloud-image-uploader/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,12 @@ func run() error {
log.Printf("failed to get a list of enabled AWS regions: %s, ignored", err)
}

pflag.StringSliceVar(&DefaultOptions.TargetClouds, "target-clouds", DefaultOptions.TargetClouds, "cloud targets to upload to")
pflag.StringSliceVar(&DefaultOptions.Architectures, "architectures", DefaultOptions.Architectures, "list of architectures to process")
pflag.StringVar(&DefaultOptions.ArtifactsPath, "artifacts-path", DefaultOptions.ArtifactsPath, "artifacts path")
pflag.StringVar(&DefaultOptions.Tag, "tag", DefaultOptions.Tag, "tag (version) of the uploaded image")
pflag.StringVar(&DefaultOptions.AzureAbbrevTag, "abbrev-tag", DefaultOptions.AzureAbbrevTag, "abbreviated tag (version) of the uploaded image")
pflag.StringVar(&DefaultOptions.NamePrefix, "name-prefix", DefaultOptions.NamePrefix, "prefix for the name of the uploaded image")

pflag.StringSliceVar(&DefaultOptions.AWSRegions, "aws-regions", DefaultOptions.AWSRegions, "list of AWS regions to upload to")

Expand All @@ -84,21 +86,28 @@ func run() error {

g, ctx = errgroup.WithContext(ctx)

g.Go(func() error {
aws := AWSUploader{
Options: DefaultOptions,
for _, target := range DefaultOptions.TargetClouds {
switch target {
case "aws":
g.Go(func() error {
aws := AWSUploader{
Options: DefaultOptions,
}

return aws.Upload(ctx)
})
case "azure":
g.Go(func() error {
azure := AzureUploader{
Options: DefaultOptions,
}

return azure.AzureGalleryUpload(ctx)
})
default:
return fmt.Errorf("unknown target: %s", target)
}

return aws.Upload(ctx)
})

g.Go(func() error {
azure := AzureUploader{
Options: DefaultOptions,
}

return azure.AzureGalleryUpload(ctx)
})
}

if err = g.Wait(); err != nil {
return fmt.Errorf("failed: %w", err)
Expand Down
3 changes: 3 additions & 0 deletions hack/cloud-image-uploader/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ import (
type Options struct {
Tag string
ArtifactsPath string
NamePrefix string
Architectures []string
TargetClouds []string

// AWS options.
AWSRegions []string
Expand All @@ -36,6 +38,7 @@ type Location struct {
var DefaultOptions = Options{
ArtifactsPath: "_out/",
Architectures: []string{"amd64", "arm64"},
TargetClouds: []string{"aws", "azure"},
}

// AWSImage returns path to AWS pre-built image.
Expand Down
16 changes: 16 additions & 0 deletions hack/test/e2e-aws-prepare.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/usr/bin/env bash

set -eou pipefail

source ./hack/test/e2e.sh

REGION="us-east-1"

AMI_ID=$(jq -r ".[] | select(.region == \"${REGION}\") | select (.arch == \"amd64\") | .id" "${ARTIFACTS}/cloud-images.json")

mkdir -p "${ARTIFACTS}/e2e-aws-generated"

NAME_PREFIX="talos-e2e-${SHA}-aws"

jq --null-input --arg AMI_ID "${AMI_ID}" --arg CLUSTER_NAME "${NAME_PREFIX}" --arg KUBERNETES_VERSION "${KUBERNETES_VERSION}" '{ami_id: $AMI_ID, cluster_name: $CLUSTER_NAME, kubernetes_version: $KUBERNETES_VERSION}' \
| jq -f hack/test/tfvars/aws.jq > "${ARTIFACTS}/e2e-aws-generated/vars.json"
122 changes: 43 additions & 79 deletions hack/test/e2e-aws.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,85 +4,49 @@ set -eou pipefail

source ./hack/test/e2e.sh

REGION="us-east-1"
BUCKET="talos-ci-e2e"
cp "${ARTIFACTS}/e2e-aws-talosconfig" "${TALOSCONFIG}"
cp "${ARTIFACTS}/e2e-aws-kubeconfig" "${KUBECONFIG}"

# Wait for nodes to check in
timeout=$(($(date +%s) + TIMEOUT))
until ${KUBECTL} get nodes -o go-template='{{ len .items }}' | grep ${NUM_NODES} >/dev/null; do
[[ $(date +%s) -gt $timeout ]] && exit 1
${KUBECTL} get nodes -o wide && :
sleep 10
done

# Wait for nodes to be ready
timeout=$(($(date +%s) + TIMEOUT))
until ${KUBECTL} wait --timeout=1s --for=condition=ready=true --all nodes > /dev/null; do
[[ $(date +%s) -gt $timeout ]] && exit 1
${KUBECTL} get nodes -o wide && :
sleep 10
done

# Verify that we have an HA controlplane
timeout=$(($(date +%s) + TIMEOUT))
until ${KUBECTL} get nodes -l node-role.kubernetes.io/control-plane='' -o go-template='{{ len .items }}' | grep 3 > /dev/null; do
[[ $(date +%s) -gt $timeout ]] && exit 1
${KUBECTL} get nodes -l node-role.kubernetes.io/control-plane='' && :
sleep 10
done

CONTROLPLANE0_NODE_NAME=$(${KUBECTL} get nodes -l node-role.kubernetes.io/control-plane='' -o jsonpath='{.items[0].metadata.name}')

# Wait until we have an IP for first controlplane node
timeout=$(($(date +%s) + TIMEOUT))
until [ -n "$(${KUBECTL} get nodes "${CONTROLPLANE0_NODE_NAME}" -o go-template --template='{{range .status.addresses}}{{if eq .type "ExternalIP"}}{{.address}}{{end}}{{end}}')" ]; do
[[ $(date +%s) -gt $timeout ]] && exit 1
sleep 10
done


# lets get the ip of the first controlplane node
CONTROLPLANE0_NODE=$(${KUBECTL} get nodes "${CONTROLPLANE0_NODE_NAME}" -o go-template --template='{{range .status.addresses}}{{if eq .type "ExternalIP"}}{{.address}}{{end}}{{end}}')

# set the talosconfig to use the first controlplane ip
${TALOSCTL} config endpoint "${CONTROLPLANE0_NODE}"
${TALOSCTL} config node "${CONTROLPLANE0_NODE}"

function setup {
# Setup svc account
mkdir -p ${TMP}

# Uncompress image
xz -d < ${ARTIFACTS}/aws-amd64.raw.xz > ${TMP}/disk.raw

# Upload Image
echo "uploading image to s3"
aws s3 cp --quiet ${TMP}/disk.raw s3://${BUCKET}/aws-${TAG}.raw

# Create snapshot from image
echo "importing snapshot from s3"
import_task_id=$(aws ec2 import-snapshot --region ${REGION} --description "talos e2e ${TAG}" --disk-container "Format=raw,UserBucket={S3Bucket=${BUCKET},S3Key=aws-${TAG}.raw}" | jq -r '.ImportTaskId')
echo ${import_task_id}

# Wait for import to complete
echo "waiting for snapshot import to complete"
snapshot_status=$(aws ec2 describe-import-snapshot-tasks --region ${REGION} --import-task-ids ${import_task_id} | \
jq -r --arg image_name "aws-${TAG}.raw" '.ImportSnapshotTasks[] | select(.SnapshotTaskDetail.UserBucket.S3Key == $image_name) | .SnapshotTaskDetail.Status')
while [ ${snapshot_status} != "completed" ]; do
sleep 5
snapshot_status=$(aws ec2 describe-import-snapshot-tasks --region ${REGION} --import-task-ids ${import_task_id} | \
jq -r --arg image_name "aws-${TAG}.raw" '.ImportSnapshotTasks[] | select(.SnapshotTaskDetail.UserBucket.S3Key == $image_name) | .SnapshotTaskDetail.Status')
done
snapshot_id=$(aws ec2 describe-import-snapshot-tasks --region ${REGION} --import-task-ids ${import_task_id} | \
jq -r --arg image_name "aws-${TAG}.raw" '.ImportSnapshotTasks[] | select(.SnapshotTaskDetail.UserBucket.S3Key == $image_name) | .SnapshotTaskDetail.SnapshotId')
echo ${snapshot_id}

# Create AMI
image_id=$(aws ec2 describe-images --region ${REGION} --filters="Name=name,Values=talos-e2e-${TAG}" | jq -r '.Images[0].ImageId') || true

if [[ ${image_id} != "null" ]]; then
aws ec2 deregister-image --region ${REGION} --image-id ${image_id}
fi

ami=$(aws ec2 register-image --region ${REGION} \
--block-device-mappings "DeviceName=/dev/xvda,VirtualName=talostest,Ebs={DeleteOnTermination=true,SnapshotId=${snapshot_id},VolumeSize=20,VolumeType=gp2}" \
--root-device-name /dev/xvda \
--virtualization-type hvm \
--architecture x86_64 \
--ena-support \
--name talos-e2e-${TAG} | jq -r '.ImageId')

## Cluster-wide vars
export CLUSTER_NAME=${NAME_PREFIX}
export AWS_REGION=us-east-1
export AWS_SSH_KEY_NAME=talos-e2e
export AWS_VPC_ID=vpc-ff5c5687
export AWS_SUBNET=subnet-c4e9b3a0
export AWS_SUBNET_AZ=us-east-1a
export CALICO_VERSION=v3.24.1
export AWS_CLOUD_PROVIDER_VERSION=v1.20.0-alpha.0

## Control plane vars
export CONTROL_PLANE_MACHINE_COUNT=3
export AWS_CONTROL_PLANE_MACHINE_TYPE=t3.large
export AWS_CONTROL_PLANE_VOL_SIZE=50
export AWS_CONTROL_PLANE_AMI_ID=${ami}
export AWS_CONTROL_PLANE_ADDL_SEC_GROUPS='[{id: sg-ebe8e59f}]'
export AWS_CONTROL_PLANE_IAM_PROFILE=CAPI_AWS_ControlPlane

## Worker vars
export WORKER_MACHINE_COUNT=3
export AWS_NODE_MACHINE_TYPE=t3.large
export AWS_NODE_VOL_SIZE=50
export AWS_NODE_AMI_ID=${ami}
export AWS_NODE_ADDL_SEC_GROUPS='[{id: sg-ebe8e59f}]'
export AWS_NODE_IAM_PROFILE=CAPI_AWS_Worker

${CLUSTERCTL} generate cluster ${NAME_PREFIX} \
--kubeconfig /tmp/e2e/docker/kubeconfig \
--from https://github.com/siderolabs/cluster-api-templates/blob/main/aws/standard/standard.yaml > ${TMP}/cluster.yaml
}

setup
create_cluster_capi aws
run_talos_integration_test
run_kubernetes_integration_test
15 changes: 15 additions & 0 deletions hack/test/tfvars/aws.jq
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"cluster_name": .cluster_name,
"num_control_planes": 3,
"num_workers": 3,
"ami_id": .ami_id,
"ccm": true,
"kubernetes_version": .kubernetes_version,
"instance_type_control_plane": "t3.large",
"instance_type_worker": "t3.large",
"extra_tags": {
"Name": .cluster_name,
"Project": "talos-e2e-ci",
"Environment": "ci"
}
}

0 comments on commit 79ca1a3

Please sign in to comment.