Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ref(ci): consolidate cached states workflows and scripts #8865

Merged
merged 2 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 0 additions & 42 deletions .github/workflows/scripts/gcp-get-available-disks.sh

This file was deleted.

114 changes: 76 additions & 38 deletions .github/workflows/scripts/gcp-get-cached-disks.sh
Original file line number Diff line number Diff line change
@@ -1,20 +1,33 @@
#!/usr/bin/env bash

# Description:
# This script finds a cached Google Cloud Compute image based on specific criteria.
# It prioritizes images from the current commit, falls back to the main branch,
# and finally checks other branches if needed. The selected image is used for
# setting up the environment in a CI/CD pipeline.
#
# If there are multiple disks:
# - prefer images generated from the same commit, then
# - if prefer_main_cached_state is true, prefer images from the `main` branch, then
# - use any images from any other branch or commit.
#
# Within each of these categories:
# - prefer newer images to older images
#
# The selected image is used for setting up the environment in a CI/CD pipeline.
# It also checks if specific disk types are available for subsequent jobs.

set -eo pipefail

# Function to find and report a cached disk image
# Extract local state version
echo "Extracting local state version..."
LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "${GITHUB_WORKSPACE}/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1)
echo "STATE_VERSION: ${LOCAL_STATE_VERSION}"

# Function to find a cached disk image based on the git pattern (commit, main, or any branch)
find_cached_disk_image() {
local search_pattern="${1}"
local git_pattern="${1}"
local git_source="${2}"
local disk_name
local disk_search_pattern="${DISK_PREFIX}-${git_pattern}-v${LOCAL_STATE_VERSION}-${NETWORK}-${DISK_SUFFIX}"

disk_name=$(gcloud compute images list --filter="status=READY AND name~${search_pattern}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)
disk_name=$(gcloud compute images list --filter="status=READY AND name~${disk_search_pattern}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)

# Use >&2 to redirect to stderr and avoid sending wrong assignments to stdout
if [[ -n "${disk_name}" ]]; then
Expand All @@ -27,46 +40,71 @@ find_cached_disk_image() {
fi
}

# Extract local state version
echo "Extracting local state version..."
LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "${GITHUB_WORKSPACE}/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1)
echo "STATE_VERSION: ${LOCAL_STATE_VERSION}"
# Check if both $DISK_PREFIX and $DISK_SUFFIX are set, as they are required to find a cached disk image
if [[ -n "${DISK_PREFIX}" && -n "${DISK_SUFFIX}" ]]; then
# Find the most suitable cached disk image
echo "Finding the most suitable cached disk image..."
CACHED_DISK_NAME=""

# First, try to find a cached disk image from the current commit
CACHED_DISK_NAME=$(find_cached_disk_image ".+-${GITHUB_SHA_SHORT}" "commit")

# Define DISK_PREFIX based on the requiring state directory
if [[ "${NEEDS_LWD_STATE}" == "true" ]]; then
DISK_PREFIX="${LWD_STATE_DIR}"
# If no cached disk image is found
if [[ -z "${CACHED_DISK_NAME}" ]]; then
# Check if main branch images are preferred
if [[ "${PREFER_MAIN_CACHED_STATE}" == "true" ]]; then
CACHED_DISK_NAME=$(find_cached_disk_image "main-[0-9a-f]+" "main branch")
# Else, try to find one from any branch
else
CACHED_DISK_NAME=$(find_cached_disk_image ".+-[0-9a-f]+" "any branch")
fi
fi

# Handle case where no suitable disk image is found
if [[ -z "${CACHED_DISK_NAME}" ]]; then
echo "No suitable cached state disk available."
echo "Cached state test jobs must depend on the cached state rebuild job."
exit 1
fi

echo "Selected Disk: ${CACHED_DISK_NAME}"
else
DISK_PREFIX="${ZEBRA_STATE_DIR:-${DISK_PREFIX}}"
echo "DISK_PREFIX or DISK_SUFFIX is not set. Skipping disk image search."
fi

# Find the most suitable cached disk image
echo "Finding the most suitable cached disk image..."
if [[ -z "${CACHED_DISK_NAME}" ]]; then
# Try to find a cached disk image from the current commit
COMMIT_DISK_PREFIX="${DISK_PREFIX}-.+-${GITHUB_SHA_SHORT}-v${LOCAL_STATE_VERSION}-${NETWORK}-${DISK_SUFFIX}"
CACHED_DISK_NAME=$(find_cached_disk_image "${COMMIT_DISK_PREFIX}" "commit")
# If no cached disk image is found, try to find one from the main branch
if [[ "${PREFER_MAIN_CACHED_STATE}" == "true" ]]; then
MAIN_DISK_PREFIX="${DISK_PREFIX}-main-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${DISK_SUFFIX}"
CACHED_DISK_NAME=$(find_cached_disk_image "${MAIN_DISK_PREFIX}" "main branch")
# Else, try to find one from any branch
# Function to find and output available disk image types (e.g., lwd_tip_disk, zebra_tip_disk, zebra_checkpoint_disk)
find_available_disk_type() {
local base_name="${1}"
local disk_type="${2}"
local disk_pattern="${base_name}-cache"
local output_var="${base_name}_${disk_type}_disk"
local disk_name

disk_name=$(gcloud compute images list --filter="status=READY AND name~${disk_pattern}-.+-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${disk_type}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)

# Use >&2 to redirect to stderr and avoid sending wrong assignments to stdout
if [[ -n "${disk_name}" ]]; then
echo "Found ${disk_type^^} disk: ${disk_name} for ${base_name^^} on network: ${NETWORK}" >&2
disk_description=$(gcloud compute images describe "${disk_name}" --format="value(DESCRIPTION)")
echo "Description: ${disk_description}" >&2
echo "true" # This is the actual return value when a disk is found
else
ANY_DISK_PREFIX="${DISK_PREFIX}-.+-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${DISK_SUFFIX}"
CACHED_DISK_NAME=$(find_cached_disk_image "${ANY_DISK_PREFIX}" "any branch")
echo "No ${disk_type^^} disk found for ${base_name^^} on network: ${NETWORK}" >&2
echo "false" # This is the actual return value when no disk is found
fi
}
if [[ -n "${NETWORK}" ]]; then
# Check for specific disk images (lwd_tip_disk, zebra_tip_disk, zebra_checkpoint_disk)
echo "Checking for specific disk images..."
LWD_TIP_DISK=$(find_available_disk_type "lwd" "tip")
ZEBRA_TIP_DISK=$(find_available_disk_type "zebrad" "tip")
ZEBRA_CHECKPOINT_DISK=$(find_available_disk_type "zebrad" "checkpoint")
fi

# Handle case where no suitable disk image is found
if [[ -z "${CACHED_DISK_NAME}" ]]; then
echo "No suitable cached state disk available."
echo "Expected pattern: ${COMMIT_DISK_PREFIX}"
echo "Cached state test jobs must depend on the cached state rebuild job."
exit 1
fi

echo "Selected Disk: ${CACHED_DISK_NAME}"

# Exporting variables for subsequent steps
echo "Exporting variables for subsequent steps..."
export CACHED_DISK_NAME="${CACHED_DISK_NAME}"
export LOCAL_STATE_VERSION="${LOCAL_STATE_VERSION}"
export LWD_TIP_DISK="${LWD_TIP_DISK}"
export ZEBRA_TIP_DISK="${ZEBRA_TIP_DISK}"
export ZEBRA_CHECKPOINT_DISK="${ZEBRA_CHECKPOINT_DISK}"
54 changes: 8 additions & 46 deletions .github/workflows/sub-ci-integration-tests-gcp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ on:
#!
#! The job names in `ci-integration-tests-gcp.yml`, `ci-integration-tests-gcp.patch.yml` and
#! `ci-integration-tests-gcp.patch-external.yml` must be kept in sync.
#!
#! The test variables ZEBRA_CACHED_STATE_DIR and LIGHTWALLETD_DATA_DIR used in some steps are set in the
#! `sub-deploy-integration-tests-gcp.yml` workflow file as inputs. If modified in this file, they must
#! also be updated in the `sub-deploy-integration-tests-gcp.yml` file.
jobs:
# to also run a job on Mergify head branches,
# add `|| (github.event_name == 'push' && startsWith(github.head_ref, 'mergify/merge-queue/'))`:
Expand Down Expand Up @@ -79,7 +83,7 @@ jobs:
app_name: zebrad
test_id: sync-to-checkpoint
test_description: Test sync up to mandatory checkpoint
test_variables: "-e NETWORK=${{ inputs.network || vars.ZCASH_NETWORK }} -e TEST_DISK_REBUILD=1 -e ZEBRA_FORCE_USE_COLOR=1"
test_variables: "-e NETWORK=${{ inputs.network || vars.ZCASH_NETWORK }} -e TEST_DISK_REBUILD=1 -e ZEBRA_FORCE_USE_COLOR=1 -e ZEBRA_CACHED_STATE_DIR=/var/cache/zebrad-cache"
needs_zebra_state: false
saves_to_disk: true
force_save_to_disk: ${{ inputs.force_save_to_disk || false }}
Expand Down Expand Up @@ -108,7 +112,7 @@ jobs:
app_name: zebrad
test_id: sync-past-checkpoint
test_description: Test full validation sync from a cached state
test_variables: "-e NETWORK=${{ inputs.network || vars.ZCASH_NETWORK }} -e TEST_CHECKPOINT_SYNC=1 -e ZEBRA_FORCE_USE_COLOR=1"
test_variables: "-e NETWORK=${{ inputs.network || vars.ZCASH_NETWORK }} -e TEST_CHECKPOINT_SYNC=1 -e ZEBRA_FORCE_USE_COLOR=1 -e ZEBRA_CACHED_STATE_DIR=/var/cache/zebrad-cache"
needs_zebra_state: true
saves_to_disk: false
disk_suffix: checkpoint
Expand Down Expand Up @@ -138,13 +142,12 @@ jobs:
test_description: Test a full sync up to the tip
# The value of FULL_SYNC_MAINNET_TIMEOUT_MINUTES is currently ignored.
# TODO: update the test to use {{ input.network }} instead?
test_variables: "-e NETWORK=Mainnet -e FULL_SYNC_MAINNET_TIMEOUT_MINUTES=0 -e ZEBRA_FORCE_USE_COLOR=1"
test_variables: "-e NETWORK=Mainnet -e FULL_SYNC_MAINNET_TIMEOUT_MINUTES=0 -e ZEBRA_FORCE_USE_COLOR=1 -e ZEBRA_CACHED_STATE_DIR=/var/cache/zebrad-cache"
# This test runs for longer than 6 hours, so it needs multiple jobs
is_long_test: true
needs_zebra_state: false
saves_to_disk: true
force_save_to_disk: ${{ inputs.force_save_to_disk || false }}
disk_suffix: tip
height_grep_text: 'current_height.*=.*Height.*\('
secrets: inherit
# We want to prevent multiple full zebrad syncs running at the same time,
Expand Down Expand Up @@ -184,9 +187,6 @@ jobs:
# update the disk on every PR, to increase CI speed
saves_to_disk: true
force_save_to_disk: ${{ inputs.force_save_to_disk || false }}
disk_suffix: tip
root_state_path: "/var/cache"
zebra_state_dir: "zebrad-cache"
height_grep_text: 'current_height.*=.*Height.*\('
secrets: inherit

Expand Down Expand Up @@ -217,9 +217,6 @@ jobs:
needs_zebra_state: true
# test-update-sync updates the disk on every PR, so we don't need to do it here
saves_to_disk: false
disk_suffix: tip
root_state_path: "/var/cache"
zebra_state_dir: "zebrad-cache"
height_grep_text: 'current_height.*=.*Height.*\('
secrets: inherit

Expand Down Expand Up @@ -248,15 +245,14 @@ jobs:
test_id: full-sync-testnet
test_description: Test a full sync up to the tip on testnet
# The value of FULL_SYNC_TESTNET_TIMEOUT_MINUTES is currently ignored.
test_variables: "-e NETWORK=Testnet -e FULL_SYNC_TESTNET_TIMEOUT_MINUTES=0 -e ZEBRA_FORCE_USE_COLOR=1"
test_variables: "-e NETWORK=Testnet -e FULL_SYNC_TESTNET_TIMEOUT_MINUTES=0 -e ZEBRA_FORCE_USE_COLOR=1 -e ZEBRA_CACHED_STATE_DIR=/var/cache/zebrad-cache"
network: "Testnet"
# A full testnet sync could take 2-10 hours in April 2023.
# The time varies a lot due to the small number of nodes.
is_long_test: true
needs_zebra_state: false
saves_to_disk: true
force_save_to_disk: ${{ inputs.force_save_to_disk || false }}
disk_suffix: tip
height_grep_text: 'current_height.*=.*Height.*\('
secrets: inherit
# We want to prevent multiple full zebrad syncs running at the same time,
Expand Down Expand Up @@ -300,9 +296,6 @@ jobs:
# we don't have a test-update-sync-testnet job, so we need to update the disk here
saves_to_disk: true
force_save_to_disk: ${{ inputs.force_save_to_disk || false }}
disk_suffix: tip
root_state_path: "/var/cache"
zebra_state_dir: "zebrad-cache"
height_grep_text: 'zebra_tip_height.*=.*Height.*\('
secrets: inherit

Expand Down Expand Up @@ -335,10 +328,6 @@ jobs:
saves_to_disk: true
force_save_to_disk: ${{ inputs.force_save_to_disk || false }}
disk_prefix: lwd-cache
disk_suffix: tip
root_state_path: "/var/cache"
zebra_state_dir: "zebrad-cache"
lwd_state_dir: "lwd-cache"
height_grep_text: "Waiting for block: "
secrets: inherit
# We want to prevent multiple lightwalletd full syncs running at the same time,
Expand Down Expand Up @@ -372,10 +361,6 @@ jobs:
saves_to_disk: true
force_save_to_disk: ${{ inputs.force_save_to_disk || false }}
disk_prefix: lwd-cache
disk_suffix: tip
root_state_path: "/var/cache"
zebra_state_dir: "zebrad-cache"
lwd_state_dir: "lwd-cache"
height_grep_text: "Waiting for block: "
secrets: inherit

Expand All @@ -401,9 +386,6 @@ jobs:
test_variables: "-e NETWORK=${{ inputs.network || vars.ZCASH_NETWORK }} -e TEST_LWD_RPC_CALL=1 -e ZEBRA_TEST_LIGHTWALLETD=1 -e ZEBRA_FORCE_USE_COLOR=1 -e ZEBRA_CACHED_STATE_DIR=/var/cache/zebrad-cache"
needs_zebra_state: true
saves_to_disk: false
disk_suffix: tip
root_state_path: "/var/cache"
zebra_state_dir: "zebrad-cache"
secrets: inherit

# Test that Zebra can handle a lightwalletd send transaction RPC call, using a cached Zebra tip state
Expand All @@ -427,10 +409,6 @@ jobs:
needs_zebra_state: true
needs_lwd_state: true
saves_to_disk: false
disk_suffix: tip
root_state_path: "/var/cache"
zebra_state_dir: "zebrad-cache"
lwd_state_dir: "lwd-cache"
secrets: inherit

# Test that Zebra can handle gRPC wallet calls, using a cached Zebra tip state
Expand All @@ -454,10 +432,6 @@ jobs:
needs_zebra_state: true
needs_lwd_state: true
saves_to_disk: false
disk_suffix: tip
root_state_path: "/var/cache"
zebra_state_dir: "zebrad-cache"
lwd_state_dir: "lwd-cache"
secrets: inherit

## getblocktemplate-rpcs using cached Zebra state on mainnet
Expand Down Expand Up @@ -485,9 +459,6 @@ jobs:
needs_zebra_state: true
needs_lwd_state: false
saves_to_disk: false
disk_suffix: tip
root_state_path: "/var/cache"
zebra_state_dir: "zebrad-cache"
secrets: inherit

# Test that Zebra can handle a submit block RPC call, using a cached Zebra tip state
Expand All @@ -511,9 +482,6 @@ jobs:
needs_zebra_state: true
needs_lwd_state: false
saves_to_disk: false
disk_suffix: tip
root_state_path: "/var/cache"
zebra_state_dir: "zebrad-cache"
secrets: inherit

# Test that the scanner can continue scanning where it was left when zebrad restarts.
Expand All @@ -537,9 +505,6 @@ jobs:
needs_zebra_state: true
needs_lwd_state: false
saves_to_disk: true
disk_suffix: tip
root_state_path: "/var/cache"
zebra_state_dir: "zebrad-cache"
secrets: inherit

# Test that the scan task registers keys, deletes keys, and subscribes to results for keys while running.
Expand All @@ -563,9 +528,6 @@ jobs:
needs_zebra_state: true
needs_lwd_state: false
saves_to_disk: false
disk_suffix: tip
root_state_path: "/var/cache"
zebra_state_dir: "zebrad-cache"
secrets: inherit

failure-issue:
Expand Down
Loading
Loading