Merge of #8865

ZcashFoundation · Sep 19, 2024 · 9a1dede · 9a1dede
2 parents c5d8eb5 + b09411b
commit 9a1dede
Show file tree

Hide file tree

Showing 8 changed files with 194 additions and 252 deletions.
diff --git a/.github/workflows/scripts/gcp-get-available-disks.sh b/.github/workflows/scripts/gcp-get-available-disks.sh
diff --git a/.github/workflows/scripts/gcp-get-cached-disks.sh b/.github/workflows/scripts/gcp-get-cached-disks.sh
@@ -1,20 +1,33 @@
 #!/usr/bin/env bash
 
-# Description:
 # This script finds a cached Google Cloud Compute image based on specific criteria.
-# It prioritizes images from the current commit, falls back to the main branch,
-# and finally checks other branches if needed. The selected image is used for
-# setting up the environment in a CI/CD pipeline.
+#
+# If there are multiple disks:
+# - prefer images generated from the same commit, then
+# - if prefer_main_cached_state is true, prefer images from the `main` branch, then
+# - use any images from any other branch or commit.
+#
+# Within each of these categories:
+# - prefer newer images to older images
+#
+# The selected image is used for setting up the environment in a CI/CD pipeline.
+# It also checks if specific disk types are available for subsequent jobs.
 
 set -eo pipefail
 
-# Function to find and report a cached disk image
+# Extract local state version
+echo "Extracting local state version..."
+LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "${GITHUB_WORKSPACE}/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1)
+echo "STATE_VERSION: ${LOCAL_STATE_VERSION}"
+
+# Function to find a cached disk image based on the git pattern (commit, main, or any branch)
 find_cached_disk_image() {
-    local search_pattern="${1}"
+    local git_pattern="${1}"
     local git_source="${2}"
     local disk_name
+    local disk_search_pattern="${DISK_PREFIX}-${git_pattern}-v${LOCAL_STATE_VERSION}-${NETWORK}-${DISK_SUFFIX}"
 
-    disk_name=$(gcloud compute images list --filter="status=READY AND name~${search_pattern}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)
+    disk_name=$(gcloud compute images list --filter="status=READY AND name~${disk_search_pattern}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)
 
     # Use >&2 to redirect to stderr and avoid sending wrong assignments to stdout
     if [[ -n "${disk_name}" ]]; then
@@ -27,46 +40,71 @@ find_cached_disk_image() {
     fi
 }
 
-# Extract local state version
-echo "Extracting local state version..."
-LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "${GITHUB_WORKSPACE}/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1)
-echo "STATE_VERSION: ${LOCAL_STATE_VERSION}"
+# Check if both $DISK_PREFIX and $DISK_SUFFIX are set, as they are required to find a cached disk image
+if [[ -n "${DISK_PREFIX}" && -n "${DISK_SUFFIX}" ]]; then
+    # Find the most suitable cached disk image
+    echo "Finding the most suitable cached disk image..."
+    CACHED_DISK_NAME=""
+
+    # First, try to find a cached disk image from the current commit
+    CACHED_DISK_NAME=$(find_cached_disk_image ".+-${GITHUB_SHA_SHORT}" "commit")
 
-# Define DISK_PREFIX based on the requiring state directory
-if [[ "${NEEDS_LWD_STATE}" == "true" ]]; then
-    DISK_PREFIX="${LWD_STATE_DIR}"
+    # If no cached disk image is found
+    if [[ -z "${CACHED_DISK_NAME}" ]]; then
+        # Check if main branch images are preferred
+        if [[ "${PREFER_MAIN_CACHED_STATE}" == "true" ]]; then
+            CACHED_DISK_NAME=$(find_cached_disk_image "main-[0-9a-f]+" "main branch")
+        # Else, try to find one from any branch
+        else
+            CACHED_DISK_NAME=$(find_cached_disk_image ".+-[0-9a-f]+" "any branch")
+        fi
+    fi
+
+    # Handle case where no suitable disk image is found
+    if [[ -z "${CACHED_DISK_NAME}" ]]; then
+        echo "No suitable cached state disk available."
+        echo "Cached state test jobs must depend on the cached state rebuild job."
+        exit 1
+    fi
+
+    echo "Selected Disk: ${CACHED_DISK_NAME}"
 else
-    DISK_PREFIX="${ZEBRA_STATE_DIR:-${DISK_PREFIX}}"
+    echo "DISK_PREFIX or DISK_SUFFIX is not set. Skipping disk image search."
 fi
 
-# Find the most suitable cached disk image
-echo "Finding the most suitable cached disk image..."
-if [[ -z "${CACHED_DISK_NAME}" ]]; then
-    # Try to find a cached disk image from the current commit
-    COMMIT_DISK_PREFIX="${DISK_PREFIX}-.+-${GITHUB_SHA_SHORT}-v${LOCAL_STATE_VERSION}-${NETWORK}-${DISK_SUFFIX}"
-    CACHED_DISK_NAME=$(find_cached_disk_image "${COMMIT_DISK_PREFIX}" "commit")
-    # If no cached disk image is found, try to find one from the main branch
-    if [[ "${PREFER_MAIN_CACHED_STATE}" == "true" ]]; then
-        MAIN_DISK_PREFIX="${DISK_PREFIX}-main-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${DISK_SUFFIX}"
-        CACHED_DISK_NAME=$(find_cached_disk_image "${MAIN_DISK_PREFIX}" "main branch")
-    # Else, try to find one from any branch
+# Function to find and output available disk image types (e.g., lwd_tip_disk, zebra_tip_disk, zebra_checkpoint_disk)
+find_available_disk_type() {
+    local base_name="${1}"
+    local disk_type="${2}"
+    local disk_pattern="${base_name}-cache"
+    local output_var="${base_name}_${disk_type}_disk"
+    local disk_name
+
+    disk_name=$(gcloud compute images list --filter="status=READY AND name~${disk_pattern}-.+-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${disk_type}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)
+
+    # Use >&2 to redirect to stderr and avoid sending wrong assignments to stdout
+    if [[ -n "${disk_name}" ]]; then
+        echo "Found ${disk_type^^} disk: ${disk_name} for ${base_name^^} on network: ${NETWORK}" >&2
+        disk_description=$(gcloud compute images describe "${disk_name}" --format="value(DESCRIPTION)")
+        echo "Description: ${disk_description}" >&2
+        echo "true"  # This is the actual return value when a disk is found
     else
-        ANY_DISK_PREFIX="${DISK_PREFIX}-.+-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${DISK_SUFFIX}"
-        CACHED_DISK_NAME=$(find_cached_disk_image "${ANY_DISK_PREFIX}" "any branch")
+        echo "No ${disk_type^^} disk found for ${base_name^^} on network: ${NETWORK}" >&2
+        echo "false"  # This is the actual return value when no disk is found
     fi
+}
+if [[ -n "${NETWORK}" ]]; then
+    # Check for specific disk images (lwd_tip_disk, zebra_tip_disk, zebra_checkpoint_disk)
+    echo "Checking for specific disk images..."
+    LWD_TIP_DISK=$(find_available_disk_type "lwd" "tip")
+    ZEBRA_TIP_DISK=$(find_available_disk_type "zebrad" "tip")
+    ZEBRA_CHECKPOINT_DISK=$(find_available_disk_type "zebrad" "checkpoint")
 fi
 
-# Handle case where no suitable disk image is found
-if [[ -z "${CACHED_DISK_NAME}" ]]; then
-    echo "No suitable cached state disk available."
-    echo "Expected pattern: ${COMMIT_DISK_PREFIX}"
-    echo "Cached state test jobs must depend on the cached state rebuild job."
-    exit 1
-fi
-
-echo "Selected Disk: ${CACHED_DISK_NAME}"
-
 # Exporting variables for subsequent steps
 echo "Exporting variables for subsequent steps..."
 export CACHED_DISK_NAME="${CACHED_DISK_NAME}"
 export LOCAL_STATE_VERSION="${LOCAL_STATE_VERSION}"
+export LWD_TIP_DISK="${LWD_TIP_DISK}"
+export ZEBRA_TIP_DISK="${ZEBRA_TIP_DISK}"
+export ZEBRA_CHECKPOINT_DISK="${ZEBRA_CHECKPOINT_DISK}"
diff --git a/.github/workflows/sub-ci-integration-tests-gcp.yml b/.github/workflows/sub-ci-integration-tests-gcp.yml
@@ -31,6 +31,10 @@ on:
 #!
 #! The job names in `ci-integration-tests-gcp.yml`, `ci-integration-tests-gcp.patch.yml` and
 #! `ci-integration-tests-gcp.patch-external.yml` must be kept in sync.
+#!
+#! The test variables ZEBRA_CACHED_STATE_DIR and LIGHTWALLETD_DATA_DIR used in some steps are set in the
+#! `sub-deploy-integration-tests-gcp.yml` workflow file as inputs. If modified in this file, they must
+#! also be updated in the `sub-deploy-integration-tests-gcp.yml` file.
 jobs:
   # to also run a job on Mergify head branches,
   # add `|| (github.event_name == 'push' && startsWith(github.head_ref, 'mergify/merge-queue/'))`:
@@ -79,7 +83,7 @@ jobs:
       app_name: zebrad
       test_id: sync-to-checkpoint
       test_description: Test sync up to mandatory checkpoint
-      test_variables: "-e NETWORK=${{ inputs.network || vars.ZCASH_NETWORK }} -e TEST_DISK_REBUILD=1 -e ZEBRA_FORCE_USE_COLOR=1"
+      test_variables: "-e NETWORK=${{ inputs.network || vars.ZCASH_NETWORK }} -e TEST_DISK_REBUILD=1 -e ZEBRA_FORCE_USE_COLOR=1 -e ZEBRA_CACHED_STATE_DIR=/var/cache/zebrad-cache"
       needs_zebra_state: false
       saves_to_disk: true
       force_save_to_disk: ${{ inputs.force_save_to_disk || false }}
@@ -108,7 +112,7 @@ jobs:
       app_name: zebrad
       test_id: sync-past-checkpoint
       test_description: Test full validation sync from a cached state
-      test_variables: "-e NETWORK=${{ inputs.network || vars.ZCASH_NETWORK }} -e TEST_CHECKPOINT_SYNC=1 -e ZEBRA_FORCE_USE_COLOR=1"
+      test_variables: "-e NETWORK=${{ inputs.network || vars.ZCASH_NETWORK }} -e TEST_CHECKPOINT_SYNC=1 -e ZEBRA_FORCE_USE_COLOR=1 -e ZEBRA_CACHED_STATE_DIR=/var/cache/zebrad-cache"
       needs_zebra_state: true
       saves_to_disk: false
       disk_suffix: checkpoint
@@ -138,13 +142,12 @@ jobs:
       test_description: Test a full sync up to the tip
       # The value of FULL_SYNC_MAINNET_TIMEOUT_MINUTES is currently ignored.
       # TODO: update the test to use {{ input.network }} instead?
-      test_variables: "-e NETWORK=Mainnet -e FULL_SYNC_MAINNET_TIMEOUT_MINUTES=0 -e ZEBRA_FORCE_USE_COLOR=1"
+      test_variables: "-e NETWORK=Mainnet -e FULL_SYNC_MAINNET_TIMEOUT_MINUTES=0 -e ZEBRA_FORCE_USE_COLOR=1 -e ZEBRA_CACHED_STATE_DIR=/var/cache/zebrad-cache"
       # This test runs for longer than 6 hours, so it needs multiple jobs
       is_long_test: true
       needs_zebra_state: false
       saves_to_disk: true
       force_save_to_disk: ${{ inputs.force_save_to_disk || false }}
-      disk_suffix: tip
       height_grep_text: 'current_height.*=.*Height.*\('
     secrets: inherit
     # We want to prevent multiple full zebrad syncs running at the same time,
@@ -184,9 +187,6 @@ jobs:
       # update the disk on every PR, to increase CI speed
       saves_to_disk: true
       force_save_to_disk: ${{ inputs.force_save_to_disk || false }}
-      disk_suffix: tip
-      root_state_path: "/var/cache"
-      zebra_state_dir: "zebrad-cache"
       height_grep_text: 'current_height.*=.*Height.*\('
     secrets: inherit
 
@@ -217,9 +217,6 @@ jobs:
       needs_zebra_state: true
       # test-update-sync updates the disk on every PR, so we don't need to do it here
       saves_to_disk: false
-      disk_suffix: tip
-      root_state_path: "/var/cache"
-      zebra_state_dir: "zebrad-cache"
       height_grep_text: 'current_height.*=.*Height.*\('
     secrets: inherit
 
@@ -248,15 +245,14 @@ jobs:
       test_id: full-sync-testnet
       test_description: Test a full sync up to the tip on testnet
       # The value of FULL_SYNC_TESTNET_TIMEOUT_MINUTES is currently ignored.
-      test_variables: "-e NETWORK=Testnet -e FULL_SYNC_TESTNET_TIMEOUT_MINUTES=0 -e ZEBRA_FORCE_USE_COLOR=1"
+      test_variables: "-e NETWORK=Testnet -e FULL_SYNC_TESTNET_TIMEOUT_MINUTES=0 -e ZEBRA_FORCE_USE_COLOR=1 -e ZEBRA_CACHED_STATE_DIR=/var/cache/zebrad-cache"
       network: "Testnet"
       # A full testnet sync could take 2-10 hours in April 2023.
       # The time varies a lot due to the small number of nodes.
       is_long_test: true
       needs_zebra_state: false
       saves_to_disk: true
       force_save_to_disk: ${{ inputs.force_save_to_disk || false }}
-      disk_suffix: tip
       height_grep_text: 'current_height.*=.*Height.*\('
     secrets: inherit
     # We want to prevent multiple full zebrad syncs running at the same time,
@@ -300,9 +296,6 @@ jobs:
       # we don't have a test-update-sync-testnet job, so we need to update the disk here
       saves_to_disk: true
       force_save_to_disk: ${{ inputs.force_save_to_disk || false }}
-      disk_suffix: tip
-      root_state_path: "/var/cache"
-      zebra_state_dir: "zebrad-cache"
       height_grep_text: 'zebra_tip_height.*=.*Height.*\('
     secrets: inherit
 
@@ -335,10 +328,6 @@ jobs:
       saves_to_disk: true
       force_save_to_disk: ${{ inputs.force_save_to_disk || false }}
       disk_prefix: lwd-cache
-      disk_suffix: tip
-      root_state_path: "/var/cache"
-      zebra_state_dir: "zebrad-cache"
-      lwd_state_dir: "lwd-cache"
       height_grep_text: "Waiting for block: "
     secrets: inherit
     # We want to prevent multiple lightwalletd full syncs running at the same time,
@@ -372,10 +361,6 @@ jobs:
       saves_to_disk: true
       force_save_to_disk: ${{ inputs.force_save_to_disk || false }}
       disk_prefix: lwd-cache
-      disk_suffix: tip
-      root_state_path: "/var/cache"
-      zebra_state_dir: "zebrad-cache"
-      lwd_state_dir: "lwd-cache"
       height_grep_text: "Waiting for block: "
     secrets: inherit
 
@@ -401,9 +386,6 @@ jobs:
       test_variables: "-e NETWORK=${{ inputs.network || vars.ZCASH_NETWORK }} -e TEST_LWD_RPC_CALL=1 -e ZEBRA_TEST_LIGHTWALLETD=1 -e ZEBRA_FORCE_USE_COLOR=1 -e ZEBRA_CACHED_STATE_DIR=/var/cache/zebrad-cache"
       needs_zebra_state: true
       saves_to_disk: false
-      disk_suffix: tip
-      root_state_path: "/var/cache"
-      zebra_state_dir: "zebrad-cache"
     secrets: inherit
 
   # Test that Zebra can handle a lightwalletd send transaction RPC call, using a cached Zebra tip state
@@ -427,10 +409,6 @@ jobs:
       needs_zebra_state: true
       needs_lwd_state: true
       saves_to_disk: false
-      disk_suffix: tip
-      root_state_path: "/var/cache"
-      zebra_state_dir: "zebrad-cache"
-      lwd_state_dir: "lwd-cache"
     secrets: inherit
 
   # Test that Zebra can handle gRPC wallet calls, using a cached Zebra tip state
@@ -454,10 +432,6 @@ jobs:
       needs_zebra_state: true
       needs_lwd_state: true
       saves_to_disk: false
-      disk_suffix: tip
-      root_state_path: "/var/cache"
-      zebra_state_dir: "zebrad-cache"
-      lwd_state_dir: "lwd-cache"
     secrets: inherit
 
   ## getblocktemplate-rpcs using cached Zebra state on mainnet
@@ -485,9 +459,6 @@ jobs:
       needs_zebra_state: true
       needs_lwd_state: false
       saves_to_disk: false
-      disk_suffix: tip
-      root_state_path: "/var/cache"
-      zebra_state_dir: "zebrad-cache"
     secrets: inherit
 
   # Test that Zebra can handle a submit block RPC call, using a cached Zebra tip state
@@ -511,9 +482,6 @@ jobs:
       needs_zebra_state: true
       needs_lwd_state: false
       saves_to_disk: false
-      disk_suffix: tip
-      root_state_path: "/var/cache"
-      zebra_state_dir: "zebrad-cache"
     secrets: inherit
 
   # Test that the scanner can continue scanning where it was left when zebrad restarts.
@@ -537,9 +505,6 @@ jobs:
       needs_zebra_state: true
       needs_lwd_state: false
       saves_to_disk: true
-      disk_suffix: tip
-      root_state_path: "/var/cache"
-      zebra_state_dir: "zebrad-cache"
     secrets: inherit
 
   # Test that the scan task registers keys, deletes keys, and subscribes to results for keys while running.
@@ -563,9 +528,6 @@ jobs:
       needs_zebra_state: true
       needs_lwd_state: false
       saves_to_disk: false
-      disk_suffix: tip
-      root_state_path: "/var/cache"
-      zebra_state_dir: "zebrad-cache"
     secrets: inherit
 
   failure-issue: