Skip to content

Commit

Permalink
ref(actions): allow more flexibility on cached states usage (#8908)
Browse files Browse the repository at this point in the history
* ref(actions): allow more flexibility on cached states usage

* chore: improve message

* fix(actions): deploy single instances even if no cached state is needed

* rev: apply suggestions from code review

Co-authored-by: Marek <mail@marek.onl>

* fix: wrong use of `failure()`

* chore: remove extra file

* chore: `echo` the pattern for easier debugging

* chore: add extra details to image naming convention

Addresses #8908 (comment)

* ref(actions): use a better logic for disk image selection

This supersedes #8936 using a different approach with `${VAR:-value}`

---------

Co-authored-by: Marek <mail@marek.onl>
  • Loading branch information
gustavovalverde and upbqdn authored Oct 17, 2024
1 parent 8193840 commit cc78b6a
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 49 deletions.
58 changes: 42 additions & 16 deletions .github/workflows/cd-deploy-nodes-gcp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,39 @@ on:
workflow_dispatch:
inputs:
network:
default: 'Mainnet'
default: Mainnet
description: 'Network to deploy: Mainnet or Testnet'
required: true
log_file:
default: ''
description: 'Log to a file path rather than standard output'
type: choice
options:
- Mainnet
- Testnet
cached_disk_type:
default: tip
description: 'Type of cached disk to use'
required: true
type: choice
options:
- tip
- checkpoint
prefer_main_cached_state:
default: false
description: 'Prefer cached state from the main branch'
required: false
type: boolean
no_cached_disk:
default: false
description: 'Do not use a cached state disk'
required: false
type: boolean
no_cache:
description: 'Disable the Docker cache for this build'
required: false
type: boolean
default: false
log_file:
default: ''
description: 'Log to a file path rather than standard output'

push:
# Skip main branch updates where Rust code and dependencies aren't modified.
Expand Down Expand Up @@ -175,18 +197,19 @@ jobs:
test_variables: '-e NETWORK -e ZEBRA_CONF_PATH="zebrad/tests/common/configs/v1.0.0-rc.2.toml"'
network: ${{ inputs.network || vars.ZCASH_NETWORK }}

# Finds a `tip` cached state disk for zebra from the main branch
# Finds a cached state disk for zebra
#
# Passes the disk name to subsequent jobs using `cached_disk_name` output
#
get-disk-name:
name: Get disk name
uses: ./.github/workflows/sub-find-cached-disks.yml
if: ${{ !inputs.no_cached_disk && github.event_name == 'workflow_dispatch' }}
with:
network: ${{ inputs.network || vars.ZCASH_NETWORK }}
disk_prefix: zebrad-cache
disk_suffix: tip
prefer_main_cached_state: true
disk_suffix: ${{ inputs.cached_disk_type || 'tip' }}
prefer_main_cached_state: ${{ inputs.prefer_main_cached_state || (github.event_name == 'push' && github.ref_name == 'main' && true) || false }}

# Deploy Managed Instance Groups (MiGs) for Mainnet and Testnet,
# with one node in the configured GCP region.
Expand Down Expand Up @@ -250,14 +273,14 @@ jobs:
- name: Set up Cloud SDK
uses: google-github-actions/setup-gcloud@v2.1.1

# TODO we should implement the fixes from https://github.com/ZcashFoundation/zebra/pull/5670 here
# but the implementation is failing as it's requiring the disk names, contrary to what is stated in the official documentation
- name: Create instance template for ${{ matrix.network }}
run: |
NAME="zebrad-cache-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}"
DISK_PARAMS="name=${NAME},device-name=${NAME},size=400GB,type=pd-ssd"
DISK_NAME="zebrad-cache-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}"
DISK_PARAMS="name=${DISK_NAME},device-name=${DISK_NAME},size=400GB,type=pd-ssd"
if [ -n "${{ env.CACHED_DISK_NAME }}" ]; then
DISK_PARAMS+=",image=${{ env.CACHED_DISK_NAME }}"
elif [ ${{ inputs.no_cached_disk && github.event_name == 'workflow_dispatch' }} ]; then
echo "No cached disk required"
else
echo "No cached disk found for ${{ matrix.network }} in main branch"
exit 1
Expand All @@ -270,7 +293,7 @@ jobs:
--image-family=cos-stable \
--network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \
--create-disk="${DISK_PARAMS}" \
--container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${NAME},mode=rw \
--container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${DISK_NAME},mode=rw \
--container-stdin \
--container-tty \
--container-image ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} \
Expand Down Expand Up @@ -326,7 +349,8 @@ jobs:
permissions:
contents: 'read'
id-token: 'write'
if: github.event_name == 'workflow_dispatch'
# Run even if we don't need a cached disk, but only when triggered by a workflow_dispatch
if: ${{ !failure() && github.event_name == 'workflow_dispatch' }}

steps:
- uses: actions/checkout@v4.2.1
Expand Down Expand Up @@ -363,10 +387,12 @@ jobs:
# Create instance template from container image
- name: Manual deploy of a single ${{ inputs.network }} instance running zebrad
run: |
NAME="zebrad-cache-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}"
DISK_PARAMS="name=${NAME},device-name=${NAME},size=400GB,type=pd-ssd"
DISK_NAME="zebrad-cache-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}"
DISK_PARAMS="name=${DISK_NAME},device-name=${DISK_NAME},size=400GB,type=pd-ssd"
if [ -n "${{ env.CACHED_DISK_NAME }}" ]; then
DISK_PARAMS+=",image=${{ env.CACHED_DISK_NAME }}"
elif [ ${{ inputs.no_cached_disk && github.event_name == 'workflow_dispatch' }} ]; then
echo "No cached disk required"
else
echo "No cached disk found for ${{ matrix.network }} in main branch"
exit 1
Expand All @@ -379,7 +405,7 @@ jobs:
--image-family=cos-stable \
--network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \
--create-disk="${DISK_PARAMS}" \
--container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${NAME},mode=rw \
--container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${DISK_NAME},mode=rw \
--container-stdin \
--container-tty \
--container-image ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} \
Expand Down
49 changes: 22 additions & 27 deletions .github/workflows/scripts/gcp-get-cached-disks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
# This script finds a cached Google Cloud Compute image based on specific criteria.
#
# If there are multiple disks:
# - prefer images generated from the same commit, then
# - if prefer_main_cached_state is true, prefer images from the `main` branch, then
# - use any images from any other branch or commit.
# - if `PREFER_MAIN_CACHED_STATE` is "true", then select an image from the `main` branch, else
# - try to find a cached disk image from the current branch (or PR), else
# - try to find an image from any branch.
#
# Within each of these categories:
# - prefer newer images to older images
Expand All @@ -20,7 +20,7 @@ echo "Extracting local state version..."
LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "${GITHUB_WORKSPACE}/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1)
echo "STATE_VERSION: ${LOCAL_STATE_VERSION}"

# Function to find a cached disk image based on the git pattern (commit, main, or any branch)
# Function to find a cached disk image based on the git pattern (branch, main, or any branch)
find_cached_disk_image() {
local git_pattern="${1}"
local git_source="${2}"
Expand All @@ -34,40 +34,36 @@ find_cached_disk_image() {
echo "Found ${git_source} Disk: ${disk_name}" >&2
disk_description=$(gcloud compute images describe "${disk_name}" --format="value(DESCRIPTION)")
echo "Description: ${disk_description}" >&2
echo "${disk_name}" # This is the actual return value when a disk is found
echo "${disk_name}" # This is the actual return value when a disk is found
else
echo "No ${git_source} disk found." >&2
echo "No ${git_source} disk found with '${disk_search_pattern}' pattern." >&2
fi
}

# Check if both $DISK_PREFIX and $DISK_SUFFIX are set, as they are required to find a cached disk image
# Check if both $DISK_PREFIX and $DISK_SUFFIX are set, as they are required to
# find a cached disk image.
if [[ -n "${DISK_PREFIX}" && -n "${DISK_SUFFIX}" ]]; then
# Find the most suitable cached disk image
echo "Finding the most suitable cached disk image..."
echo "Finding a ${DISK_PREFIX}-${DISK_SUFFIX} disk image for ${NETWORK}..."
CACHED_DISK_NAME=""

# First, try to find a cached disk image from the current commit
CACHED_DISK_NAME=$(find_cached_disk_image ".+-${GITHUB_SHA_SHORT}" "commit")

# If no cached disk image is found
if [[ -z "${CACHED_DISK_NAME}" ]]; then
# Check if main branch images are preferred
if [[ "${PREFER_MAIN_CACHED_STATE}" == "true" ]]; then
CACHED_DISK_NAME=$(find_cached_disk_image "main-[0-9a-f]+" "main branch")
# Else, try to find one from any branch
else
CACHED_DISK_NAME=$(find_cached_disk_image ".+-[0-9a-f]+" "any branch")
fi
# Try to find an image based on the `main` branch if that branch is preferred.
if [[ "${PREFER_MAIN_CACHED_STATE}" == "true" ]]; then
CACHED_DISK_NAME=$(find_cached_disk_image "main-[0-9a-f]+" "main branch")
fi
# If no image was found, try to find one from the current branch (or PR).
CACHED_DISK_NAME=${CACHED_DISK_NAME:-$(find_cached_disk_image ".+-${GITHUB_REF}" "branch")}
# If we still have no image, try to find one from any branch.
CACHED_DISK_NAME=${CACHED_DISK_NAME:-$(find_cached_disk_image ".+-[0-9a-f]+" "any branch")}

# Handle case where no suitable disk image is found
# Handle the case where no suitable disk image is found
if [[ -z "${CACHED_DISK_NAME}" ]]; then
echo "No suitable cached state disk available."
echo "Cached state test jobs must depend on the cached state rebuild job."
echo "No suitable cached state disk available. Try running the cached state rebuild job."
exit 1
else
echo "Selected Disk: ${CACHED_DISK_NAME}"
fi

echo "Selected Disk: ${CACHED_DISK_NAME}"
else
echo "DISK_PREFIX or DISK_SUFFIX is not set. Skipping disk image search."
fi
Expand All @@ -77,7 +73,6 @@ find_available_disk_type() {
local base_name="${1}"
local disk_type="${2}"
local disk_pattern="${base_name}-cache"
local output_var="${base_name}_${disk_type}_disk"
local disk_name

disk_name=$(gcloud compute images list --filter="status=READY AND name~${disk_pattern}-.+-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${disk_type}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)
Expand All @@ -87,10 +82,10 @@ find_available_disk_type() {
echo "Found ${disk_type^^} disk: ${disk_name} for ${base_name^^} on network: ${NETWORK}" >&2
disk_description=$(gcloud compute images describe "${disk_name}" --format="value(DESCRIPTION)")
echo "Description: ${disk_description}" >&2
echo "true" # This is the actual return value when a disk is found
echo "true" # This is the actual return value when a disk is found
else
echo "No ${disk_type^^} disk found for ${base_name^^} on network: ${NETWORK}" >&2
echo "false" # This is the actual return value when no disk is found
echo "false" # This is the actual return value when no disk is found
fi
}
if [[ -n "${NETWORK}" ]]; then
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/sub-deploy-integration-tests-gcp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,7 @@ jobs:
# (This is unlikely, because each image created by a workflow has a different name.)
#
# The image name must also be 63 characters or less.
# More info: https://cloud.google.com/compute/docs/naming-resources#resource-name-format
#
# Force the image creation (--force) as the disk is still attached even though is not being
# used by the container.
Expand Down
22 changes: 16 additions & 6 deletions .github/workflows/sub-find-cached-disks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,20 +74,30 @@ jobs:
- name: Set up Cloud SDK
uses: google-github-actions/setup-gcloud@v2.1.1

# Performs formatting on disk name components.
#
# Disk images in GCP are required to be in lowercase, but the blockchain network
# uses sentence case, so we need to downcase ${{ inputs.network }}
# uses sentence case, so we need to downcase ${{ inputs.network }}.
#
# Disk image names in GCP are limited to 63 characters, so we need to limit
# branch names to 12 characters.
# Check the `create-state-image` in `sub-deploy-integration-tests-gcp.yml` for more details in image names.
# More info: https://cloud.google.com/compute/docs/naming-resources#resource-name-format
#
# Passes a lowercase Network name to subsequent steps using $NETWORK env variable
- name: Downcase network name for disks
# Passes ${{ inputs.network }} to subsequent steps using $NETWORK env variable.
# Passes ${{ env.GITHUB_REF_SLUG_URL }} to subsequent steps using $SHORT_GITHUB_REF env variable.
- name: Format network name and branch name for disks
run: |
NETWORK_CAPS=${{ inputs.network }}
echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
NETWORK_CAPS="${{ inputs.network }}"
echo "NETWORK=${NETWORK_CAPS,,}" >> "$GITHUB_ENV"
LONG_GITHUB_REF="${{ env.GITHUB_REF_SLUG_URL }}"
echo "SHORT_GITHUB_REF=${LONG_GITHUB_REF:0:12}" >> "$GITHUB_ENV"
# Check if there are cached state disks available for subsequent jobs to use.
- name: Check if cached state disks exists
id: get-available-disks
env:
GITHUB_SHA_SHORT: ${{ env.GITHUB_SHA_SHORT }}
GITHUB_REF: ${{ env.SHORT_GITHUB_REF }}
NETWORK: ${{ env.NETWORK }} # use lowercase version from env, not input
DISK_PREFIX: ${{ inputs.disk_prefix }}
DISK_SUFFIX: ${{ inputs.disk_suffix }}
Expand Down

0 comments on commit cc78b6a

Please sign in to comment.