From 9d74a1e0c39200bf24524ad3e45b14560a098841 Mon Sep 17 00:00:00 2001
From: Alex Wu <wuale@google.com>
Date: Tue, 25 Oct 2022 15:46:11 -0700
Subject: [PATCH] launcher: Integration test startup script on image (#255)

Add a Cloud Build-based integration test for the launcher image against
startup script disablement.
---
 launcher/image/test/README.md                 | 68 +++++++++++++++++++
 launcher/image/test/check_failure.sh          | 16 +++++
 launcher/image/test/cleanup.sh                | 12 ++++
 launcher/image/test/create_vm.sh              | 60 ++++++++++++++++
 .../image/test/data/echo_startupscript.sh     |  4 ++
 .../image/test/test_hardened_cloudbuild.yaml  | 30 ++++++++
 .../image/test/test_startupscript_disabled.sh | 17 +++++
 launcher/image/test/util/read_serial.sh       | 25 +++++++
 8 files changed, 232 insertions(+)
 create mode 100644 launcher/image/test/README.md
 create mode 100644 launcher/image/test/check_failure.sh
 create mode 100644 launcher/image/test/cleanup.sh
 create mode 100644 launcher/image/test/create_vm.sh
 create mode 100644 launcher/image/test/data/echo_startupscript.sh
 create mode 100644 launcher/image/test/test_hardened_cloudbuild.yaml
 create mode 100644 launcher/image/test/test_startupscript_disabled.sh
 create mode 100644 launcher/image/test/util/read_serial.sh

diff --git a/launcher/image/test/README.md b/launcher/image/test/README.md
new file mode 100644
index 000000000..96caa4f07
--- /dev/null
+++ b/launcher/image/test/README.md
@@ -0,0 +1,68 @@
+This directory contains the image integration tests.
+
+# Tests
+Integration tests run on [Cloud Build](https://cloud.google.com/build).
+Run the test with `gcloud builds submit --config=test_{image_type}_cloudbuild.yaml`
+
+# Development
+When writing a test, determine whether it should target the hardened image,
+debug image, or both. Add it to the corresponding test `test_{image_type}_cloudbuild.yaml`
+file.
+
+If there need to be multiple scripts, please suffix the script with the test name in each script.
+
+For example, testing `new_feature` might use three scripts:
+`test_newfeature_initresource.sh`, `test_newfeature_validate.sh`, and `test_newfeature_cleanupresource.sh`.
+
+## Common Steps
+Hardened and debug tests will include common steps that do test setup and cleanup activities.
+They look like:
+
+```yaml
+- name: 'gcr.io/cloud-builders/gcloud'
+  entrypoint: 'bash'
+  env:
+  - 'CLEANUP=$_CLEANUP'
+  args: ['cleanup.sh']
+```
+
+* `create_vm.sh` creates a VM with the given image project, image name, and metadata. It then caches the VM name in the Cloud Build workspace.
+* `cleanup.sh` deletes the VM created in create_vm.sh.
+* `check_failure.sh` checks for a failure message in the status.txt file from a previous test step. This runs last due to Cloud Build exiting on previous step failures.
+
+## Data
+`data/` contains data that will be loaded as Metadata or onto the VM directly.
+
+## Utils
+Scripts in `util/` contain functions that can be sourced from other test scripts.
+
+* `read_serial.sh` contains a helper to pull the entire serial log for a VM.
+
+## Sharing Data Between Steps
+`/workspace` is used in Cloud Build as a scratch space for specific builds. Some conventions for Confidential Space tests:
+
+* `/workspace/vm_name.txt` contains the VM name created in `create_vm.sh`.
+Other test steps and `cleanup.sh` utilize this information to reference the VM
+name.
+* `/workspace/status.txt` contains the success/failure message from test steps.
+`check_failure.sh` looks for a failed message in the step to determine whether
+the cloud build is successful.
+
+## Test Failures
+Due to the sequential/only-proceed-with-success nature of Cloud Build, tests
+with non-zero exit codes will cause subsequent steps to fail. This is
+problematic when cleanup of a VM or other resources do not occur.
+
+To avoid this issue, test assertions with non-zero exit codes should shell OR (`||`) the result
+and place a "Test failed" message in `/workspace/status.txt`.
+
+For example, `echo $SERIAL_OUTPUT | grep 'Expected output'` will fail and cancel
+the rest of the Cloud Build on not finding the string "Expected output" in the
+serial log.
+The test writer should modify this line to do:
+
+```bash
+echo $SERIAL_OUTPUT | grep 'Expected output' || echo 'TEST FAILED' > /workspace/status.txt
+# Optionally, for debugging:
+echo $SERIAL_OUTPUT > /workspace/status.txt
+```
diff --git a/launcher/image/test/check_failure.sh b/launcher/image/test/check_failure.sh
new file mode 100644
index 000000000..5f4397827
--- /dev/null
+++ b/launcher/image/test/check_failure.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+set -euo pipefail
+
+echo "Checking the status.txt file for test results:"
+if [ -f /workspace/status.txt ]; then
+  cat /workspace/status.txt
+  if grep -qi 'failed' /workspace/status.txt; then
+    echo "The test failed for build $BUILD_ID."
+    exit 1
+  else
+    echo "No test failure found."
+    exit
+  fi
+else
+  echo "No status.txt file found."
+fi
diff --git a/launcher/image/test/cleanup.sh b/launcher/image/test/cleanup.sh
new file mode 100644
index 000000000..20c1d13ea
--- /dev/null
+++ b/launcher/image/test/cleanup.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+set -euo pipefail
+
+if [ $CLEANUP != "true" ]; then
+  echo "NOT cleaning up."
+  exit 0
+fi
+echo "Cleaning up."
+VM_NAME=$(cat /workspace/vm_name.txt)
+
+echo 'Deleting VM' $VM_NAME
+gcloud compute instances delete $VM_NAME --zone us-central1-a
\ No newline at end of file
diff --git a/launcher/image/test/create_vm.sh b/launcher/image/test/create_vm.sh
new file mode 100644
index 000000000..a0a09db17
--- /dev/null
+++ b/launcher/image/test/create_vm.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+set -euxo pipefail
+
+print_usage() {
+    echo "usage: test_launcher.sh [-i imageName] [-p projectName] [-m metadata]"
+    echo "  -i <imageName>: which image name to use for the VM"
+    echo "  -p <imageProject>: which image project to use for the VM"
+    echo "  -m <metadata>: metadata variables on VM creation; passed directly into gcloud"
+    echo "  -f <metadataFromFile>: read a metadata value from a file; specified in format key=filePath"
+    exit 1
+}
+
+create_vm() {
+  if [ -z "$IMAGE_NAME" ]; then
+    echo "Empty image name supplied."
+    exit 1
+  fi
+
+  APPEND_METADATA=''
+  if ! [ -z "$METADATA" ]; then
+    APPEND_METADATA="--metadata ${METADATA}"
+  fi
+
+  APPEND_METADATA_FILE=''
+  if ! [ -z "$METADATA_FILE" ]; then
+    APPEND_METADATA_FILE="--metadata-from-file ${METADATA_FILE}"
+  fi
+
+  VM_NAME=confidential-space-test-$BUILD_ID
+  echo 'Creating VM' ${VM_NAME} 'with image' $IMAGE_NAME
+
+  # check the active account
+  gcloud auth list
+
+  gcloud compute instances create $VM_NAME --zone us-central1-a --image=$IMAGE_NAME --image-project=$PROJECT_NAME --shielded-secure-boot \
+  $APPEND_METADATA $APPEND_METADATA_FILE
+}
+
+IMAGE_NAME=''
+PROJECT_NAME=''
+VM_NAME=''
+METADATA=''
+METADATA_FILE=''
+
+# In getopts, a ':' following a letter means that that flag takes an argument.
+# For example, i: means -i takes an additional argument.
+while getopts 'i:f:m:p:' flag; do
+  case "${flag}" in
+    i) IMAGE_NAME=${OPTARG} ;;
+    f) METADATA_FILE=${OPTARG} ;;
+    m) METADATA=${OPTARG} ;;
+    p) PROJECT_NAME=${OPTARG} ;;
+    *) print_usage ;;
+  esac
+done
+
+create_vm
+
+# Persist VM name
+echo $VM_NAME > /workspace/vm_name.txt
diff --git a/launcher/image/test/data/echo_startupscript.sh b/launcher/image/test/data/echo_startupscript.sh
new file mode 100644
index 000000000..5da1a7036
--- /dev/null
+++ b/launcher/image/test/data/echo_startupscript.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+echo "Executing startup script"
+sudo chmod 666 /dev/ttyS0
+sudo echo "Executing startup script: logging to serial" > /dev/ttyS0
diff --git a/launcher/image/test/test_hardened_cloudbuild.yaml b/launcher/image/test/test_hardened_cloudbuild.yaml
new file mode 100644
index 000000000..673acb65a
--- /dev/null
+++ b/launcher/image/test/test_hardened_cloudbuild.yaml
@@ -0,0 +1,30 @@
+substitutions:
+  # Expects hardened image (not debug) and should have startup-script service
+  # disabled. google-startup-scripts.service is only enabled with multi-user.target.
+  '_IMAGE_NAME': 'confidential-space-51031c1-dev-hardened'
+  '_BASE_IMAGE_PROJECT': 'confidential-space-images-dev'
+  '_METADATA_FILE': 'startup-script=data/echo_startupscript.sh'
+  '_CLEANUP': 'true'
+steps:
+- name: 'gcr.io/cloud-builders/gcloud'
+  entrypoint: 'bash'
+  env:
+  - 'BUILD_ID=$BUILD_ID'
+  args: ['create_vm.sh','-i', '${_IMAGE_NAME}',
+          -p, '${_BASE_IMAGE_PROJECT}',
+          -f, '${_METADATA_FILE}'
+        ]
+- name: 'gcr.io/cloud-builders/gcloud'
+  entrypoint: 'bash'
+  args: ['test_startupscript_disabled.sh']
+- name: 'gcr.io/cloud-builders/gcloud'
+  entrypoint: 'bash'
+  env:
+  - 'CLEANUP=$_CLEANUP'
+  args: ['cleanup.sh']
+# Must come after cleanup.
+- name: 'gcr.io/cloud-builders/gcloud'
+  entrypoint: 'bash'
+  env:
+  - 'BUILD_ID=$BUILD_ID'
+  args: ['check_failure.sh']
diff --git a/launcher/image/test/test_startupscript_disabled.sh b/launcher/image/test/test_startupscript_disabled.sh
new file mode 100644
index 000000000..f8e6c3861
--- /dev/null
+++ b/launcher/image/test/test_startupscript_disabled.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+set -euo pipefail
+source util/read_serial.sh
+
+echo 'Running startup script test'
+VM_NAME=$(cat /workspace/vm_name.txt)
+
+echo 'Sleeping to allow startup script to run'
+sleep 5
+
+echo 'Reading from serial port:'
+SERIAL_OUTPUT=$(read_serial)
+echo $SERIAL_OUTPUT
+
+# Without the or logic, this step will fail and cleanup does not run.
+# Instead, we put the test assertion output in /workspace/status.txt.
+echo $SERIAL_OUTPUT | grep -v 'Executing startup script' || echo 'TEST FAILED' > /workspace/status.txt
\ No newline at end of file
diff --git a/launcher/image/test/util/read_serial.sh b/launcher/image/test/util/read_serial.sh
new file mode 100644
index 000000000..b3a696453
--- /dev/null
+++ b/launcher/image/test/util/read_serial.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+# read_serial pulls from the global VM_NAME variable and attempts to read the
+# entirety of its serial port output.
+# Use var=$(read_serial) to capture the output of this command into a variable.
+read_serial() {
+  local base_cmd='gcloud compute instances get-serial-port-output $VM_NAME --zone us-central1-a 2>/workspace/next_start.txt'
+  local serial_out=$(eval ${base_cmd})
+  local last=''
+  while [ -s /workspace/next_start.txt ]; do
+    next=$(cat /workspace/next_start.txt | sed -n 2p | cut -d ' ' -f2)
+    # Need to compare the last value to avoid infinite looping with no more data.
+    if [[ "$last" == "$next" ]]; then
+      break
+    fi
+
+    local next_cmd="${base_cmd} ${next}"
+    local tmp=$(eval ${next_cmd})
+    serial_out="$serial_out $tmp"
+
+    last=$next
+  done
+
+  echo $serial_out
+}