From fc70ab38d1d40a3e2f0506efff942c1c0f2e773e Mon Sep 17 00:00:00 2001 From: Conor Schaefer Date: Mon, 29 Jan 2024 15:16:00 -0800 Subject: [PATCH] ci: workflow for standalone pd We want to exercise the pd https logic, but we can't naively run it from scratch on every deploy, because that'd be far too many API requests to reissue certs from ACME. Instead, let's preserve the ACME directory before wiping state, and reuse it before bouncing the service. This setup requires always-on boxes provisioned out of band. So far, this adds the base logic via a workflow. In order to get it running, I'll need to iterate on the workflow, but workflows must land on main prior to being available for ad-hoc execution. Refs #3336. --- .github/workflows/deploy-standalone.yml | 35 +++++++++ deployments/scripts/install-cometbft | 30 ++++++-- .../scripts/redeploy-ci-fullnode-via-remote | 74 +++++++++++++++++++ .../scripts/redeploy-ci-fullnode-via-runner | 37 ++++++++++ deployments/systemd/penumbra.service | 4 + 5 files changed, 175 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/deploy-standalone.yml create mode 100755 deployments/scripts/redeploy-ci-fullnode-via-remote create mode 100755 deployments/scripts/redeploy-ci-fullnode-via-runner diff --git a/.github/workflows/deploy-standalone.yml b/.github/workflows/deploy-standalone.yml new file mode 100644 index 0000000000..026206d4bf --- /dev/null +++ b/.github/workflows/deploy-standalone.yml @@ -0,0 +1,35 @@ +--- +# Deploys a standalone instance of pd, specifically to exercise the auto-https direct-serve logic. +name: Deploy standalone pd, preview +on: + # Only run when triggered manually. + workflow_dispatch: + workflow_call: + +# Queue deploys serially. +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: false + +jobs: + deploy: + name: deploy jawn + permissions: + contents: 'read' + id-token: 'write' + runs-on: ubuntu-latest + steps: + - name: checkout + uses: actions/checkout@v4 + + - name: configure ssh identity + run: |- + mkdir -p ~/.ssh + chmod 700 ~/.ssh + echo ${{ secrets.CI_RUNNER_SSH_PRIVKEY }} > ~/.ssh/id_ed25519 + # TODO host key mgmt + + - name: deploy + run: |- + # TODO: iterate on this workflow in a separate PR. need the base in main first. + ./deployments/scripts/redeploy-ci-fullnode-via-runner diff --git a/deployments/scripts/install-cometbft b/deployments/scripts/install-cometbft index 577314bce8..445f0cf9ce 100755 --- a/deployments/scripts/install-cometbft +++ b/deployments/scripts/install-cometbft @@ -1,12 +1,32 @@ #!/bin/bash -# Utility script to download a version of CometBFT for use with Penumbra. +# Utility script to download a specific version of CometBFT for use with Penumbra. +# Designed to be used in CI contexts, to bootstrap a testing setup quickly. +set -euo pipefail +# Sane defaults COMETBFT_VERSION="${COMETBFT_VERSION:-0.37.2}" -curl -L -O "https://github.com/cometbft/cometbft/releases/download/v${COMETBFT_VERSION}/cometbft_${COMETBFT_VERSION}_linux_amd64.tar.gz" + +# Download and extract +cometbft_download_url="https://github.com/cometbft/cometbft/releases/download/v${COMETBFT_VERSION}/cometbft_${COMETBFT_VERSION}_linux_amd64.tar.gz" +cometbft_temp_dir="$(mktemp -d)" +pushd "$cometbft_temp_dir" > /dev/null +curl -sSfL -O "$cometbft_download_url" tar -xzf "cometbft_${COMETBFT_VERSION}_linux_amd64.tar.gz" cometbft -mkdir -p "$HOME/bin" -cp -v cometbft "$HOME/bin/" -export PATH="$HOME/bin:$PATH" +trap 'rm -r "$cometbft_temp_dir"' EXIT + +# Try to write to system-wide location. +if [[ -w /usr/local/bin/ ]] ; then + mv -v cometbft /usr/local/bin/ +else + cometbft_install_dir="${HOME:?}/bin" + >&2 echo "WARNING: /usr/local/bin/ not writable, installing cometbft to $cometbft_install_dir" + mkdir -p "$cometbft_install_dir" + mv -v cometbft "${cometbft_install_dir}/" + export PATH="$PATH:$cometbft_install_dir" +fi + +# Sanity checks +echo "Checking that cometbft is installed:" which cometbft cometbft version diff --git a/deployments/scripts/redeploy-ci-fullnode-via-remote b/deployments/scripts/redeploy-ci-fullnode-via-remote new file mode 100755 index 0000000000..5ec5f5ba68 --- /dev/null +++ b/deployments/scripts/redeploy-ci-fullnode-via-remote @@ -0,0 +1,74 @@ +#!/bin/bash +# CI script to manage a standalone fullnode, created in order to exercise +# direct serving of pd. This script is intended to executed on the remote host +# that serves `pd`, triggered from a CI runner over SSH. +set -euo pipefail + + +# Unpack args. +if [[ $# -lt 2 ]] ; then + >&2 echo "ERROR: required arguments not specified." + >&2 echo "Usage: $0 " + exit 1 +fi +PENUMBRA_VERSION="${1:-}" +PENUMBRA_ENVIRONMENT="${2:-}" +shift 2 +if [[ "$PENUMBRA_ENVIRONMENT" = "penumbra-preview" ]] ; then + pd_bootstrap_url="https://rpc.testnet-preview.penumbra.zone" +elif [[ "$PENUMBRA_ENVIRONMENT" = "penumbra-testnet" ]] ; then + pd_bootstrap_url="https://rpc.testnet.penumbra.zone" +else + >&2 echo "ERROR: unsupported PENUMBRA_ENVIRONMENT: '$PENUMBRA_ENVIRONMENT'" + exit 2 +fi + +# Additional sanity-check to ensure we're running in the proper CI context. +if ! getent passwd | grep -q "^penumbra:" ; then + >&2 echo "ERROR: 'penumbra' user not found." + >&2 echo "This script should only be run within a dedicated CI box." + exit 3 +fi + +# Take down running service prior to maintenance. +sudo systemctl stop cometbft penumbra + +# Pluck out recently built `pd` from packaged container. +# We reuse existing build artifacts to ensure what's deployed it what was built, +# and it has the nice benefit of being faster, because we don't have to rebuild +# the same gitref on a slower remote host. +# TODO: local container storage will grow indefinitely; only a problem for preview, but handle it. +container_img="ghcr.io/penumbra-zone/penumbra:${PENUMBRA_VERSION}" +podman pull "$container_img" +container_id="$(podman run "$container_img" sleep infinity)" +f="$(mktemp)" +podman cp "${container_id}:/usr/bin/pd" "$f" +podman kill "$container_id" +sudo mv -v -f "$f" /usr/local/bin/pd + +# Back up ACME dir, so we don't hit ratelimit requesting new certs. +acme_cache="/home/penumbra/.penumbra/testnet_data/node0/pd/tokio_rustls_acme_cache" +if [[ -d "$acme_cache" ]]; then + sudo rm -rf /opt/penumbra-ci + sudo mkdir -p /opt/penumbra-ci + sudo mv "$acme_cache" /opt/penumbra-ci/ +else + >&2 echo "ERROR: ACME cache directory not found: $acme_cache" + exit 4 +fi + +# The pd operations must be run specifically as "penumbra" user. +# Nuke state, rejoin. +sudo -u penumbra pd testnet unsafe-reset-all +sudo -u penumbra pd testnet join "$pd_bootstrap_url" +# Restore ACME dir prior to service start +sudo mv -v "/opt/penumbra-ci/$(basename "$acme_cache")" "$acme_cache" +sudo chown -R penumbra: /home/penumbra/.penumbra + +# Bring service back up. +sudo systemctl daemon-reload +sudo systemctl restart penumbra cometbft +# Verify that the services are in fact running, else exit non-zero. +sleep 5 +sudo systemctl is-active penumbra +sudo systemctl is-active cometbft diff --git a/deployments/scripts/redeploy-ci-fullnode-via-runner b/deployments/scripts/redeploy-ci-fullnode-via-runner new file mode 100755 index 0000000000..a6f5a07ac4 --- /dev/null +++ b/deployments/scripts/redeploy-ci-fullnode-via-runner @@ -0,0 +1,37 @@ +#!/bin/bash +# CI script to manage a standalone fullnode, created in order to exercise +# direct serving of pd. This script is intended to be run from CI, +# communicating with a remote node over SSH and munging its state. +set -euo pipefail +set -x + +# Unpack args. Different CI workflows can override these settings, +# to determine whether we're targeting testnet or preview. +PENUMBRA_VERSION="${PENUMBRA_VERSION:-main}" +PENUMBRA_ENVIRONMENT="${PENUMBRA_ENVIRONMENT:-penumbra-preview}" + +if [[ -z "$PENUMBRA_VERSION" || -z "$PENUMBRA_ENVIRONMENT" ]] ; then + >&2 echo "ERROR: required env vars not set: PENUMBRA_VERSION, PENUMBRA_ENVIRONMENT" + exit 1 +fi + +if [[ "$PENUMBRA_ENVIRONMENT" = "penumbra-preview" ]] ; then + ci_ssh_host="solo-pd.testnet-preview.plinfra.net" +elif [[ "$PENUMBRA_ENVIRONMENT" = "penumbra-testnet" ]] ; then + ci_ssh_host="solo-pd.testnet.plinfra.net" +else + >&2 echo "ERROR: unsupported PENUMBRA_ENVIRONMENT: '$PENUMBRA_ENVIRONMENT'" + exit 2 +fi + +# Communicate with target host over SSH, run the script. +# The remote box has been provisioned with: +# +# 1) an ssh keypair assigned to admin user `ci` +# 2) a normal user account `penumbra` for running services +# 3) systemd service files for pd & cometbft +# +# As for the script that's being execute on the target, we'll copy that up from local context. +scp ./deployments/scripts/redeploy-ci-fullnode-via-remote "${ci_ssh_host}:" +ssh -l ci "$ci_ssh_host" sudo mv redeploy-ci-fullnode-via-remote /usr/local/bin/redeploy-ci-fullnode-via-remote +ssh -l ci "$ci_ssh_host" sudo /usr/local/bin/redeploy-ci-fullnode-via-remote diff --git a/deployments/systemd/penumbra.service b/deployments/systemd/penumbra.service index 3e9a7696d2..379e11c689 100644 --- a/deployments/systemd/penumbra.service +++ b/deployments/systemd/penumbra.service @@ -3,6 +3,10 @@ Description=Penumbra pd Wants=cometbft.service [Service] +# If both 1) running pd as non-root; and 2) using auto-https logic, then +# uncomment the capability declarations below to permit binding to 443/TCP for HTTPS. +# CapabilityBoundingSet=CAP_NET_BIND_SERVICE +# AmbientCapabilities=CAP_NET_BIND_SERVICE ExecStart=/usr/local/bin/pd start # Consider overriding the home directory, e.g. # ExecStart=/usr/local/bin/pd start --home /var/www/.penumbra/testnet_data/node0/pd