diff --git a/.github/workflows/deploy-standalone.yml b/.github/workflows/deploy-standalone.yml index 026206d4bf..4851408b93 100644 --- a/.github/workflows/deploy-standalone.yml +++ b/.github/workflows/deploy-standalone.yml @@ -1,5 +1,6 @@ --- # Deploys a standalone instance of pd, specifically to exercise the auto-https direct-serve logic. +# TODO: once stable, fold this logic into the "deploy-preview" workflow. name: Deploy standalone pd, preview on: # Only run when triggered manually. @@ -13,7 +14,7 @@ concurrency: jobs: deploy: - name: deploy jawn + name: deploy standalone pd node, preview permissions: contents: 'read' id-token: 'write' @@ -26,10 +27,24 @@ jobs: run: |- mkdir -p ~/.ssh chmod 700 ~/.ssh - echo ${{ secrets.CI_RUNNER_SSH_PRIVKEY }} > ~/.ssh/id_ed25519 - # TODO host key mgmt + echo "$SSH_PRIVKEY" > ~/.ssh/id_ed25519 + chmod 600 ~/.ssh/id_ed25519 + echo "$SSH_HOSTKEYS" > ~/.ssh/known_hosts + env: + # In order to generate the hostkey material: + # + # echo > hostkeys.txt + # ssh-keyscan -H solo-pd.testnet-preview.plinfra.net | tee -a hostkeys.txt + # ssh-keyscan -H solo-pd.testnet.plinfra.net | tee -a hostkeys.txt + # + # Then paste the contents of that file into `secrets.CI_RUNNER_SSH_HOSTKEYS`, + # so it's consumable via this step. + SSH_PRIVKEY: ${{ secrets.CI_RUNNER_SSH_PRIVKEY }} + SSH_HOSTKEYS: ${{ secrets.CI_RUNNER_SSH_HOSTKEYS }} - name: deploy + shell: bash run: |- - # TODO: iterate on this workflow in a separate PR. need the base in main first. + export PENUMBRA_VERSION="main" + export PENUMBRA_ENVIRONMENT="penumbra-preview" ./deployments/scripts/redeploy-ci-fullnode-via-runner diff --git a/deployments/scripts/redeploy-ci-fullnode-via-remote b/deployments/scripts/redeploy-ci-fullnode-via-remote index 5ec5f5ba68..e01647703d 100755 --- a/deployments/scripts/redeploy-ci-fullnode-via-remote +++ b/deployments/scripts/redeploy-ci-fullnode-via-remote @@ -31,44 +31,61 @@ if ! getent passwd | grep -q "^penumbra:" ; then fi # Take down running service prior to maintenance. +echo "Stopping running services..." sudo systemctl stop cometbft penumbra # Pluck out recently built `pd` from packaged container. # We reuse existing build artifacts to ensure what's deployed it what was built, # and it has the nice benefit of being faster, because we don't have to rebuild # the same gitref on a slower remote host. -# TODO: local container storage will grow indefinitely; only a problem for preview, but handle it. +echo "Fetching latest version of pd..." container_img="ghcr.io/penumbra-zone/penumbra:${PENUMBRA_VERSION}" podman pull "$container_img" -container_id="$(podman run "$container_img" sleep infinity)" +container_id="$(podman run --detach "$container_img" sleep infinity)" f="$(mktemp)" podman cp "${container_id}:/usr/bin/pd" "$f" podman kill "$container_id" sudo mv -v -f "$f" /usr/local/bin/pd +# Clean up container storage, which will grow indefinitely; mostly only a problem for preview, +# but we still don't want to fill up disks. +podman system prune --force + # Back up ACME dir, so we don't hit ratelimit requesting new certs. +sudo rm -rf /opt/penumbra-ci acme_cache="/home/penumbra/.penumbra/testnet_data/node0/pd/tokio_rustls_acme_cache" if [[ -d "$acme_cache" ]]; then - sudo rm -rf /opt/penumbra-ci + echo "Backing up ACME certificate directory..." sudo mkdir -p /opt/penumbra-ci sudo mv "$acme_cache" /opt/penumbra-ci/ else - >&2 echo "ERROR: ACME cache directory not found: $acme_cache" - exit 4 + >&2 echo "WARNING: ACME cache directory not found: $acme_cache" + # don't exit + # exit 4 fi # The pd operations must be run specifically as "penumbra" user. # Nuke state, rejoin. +echo "Resetting node state..." sudo -u penumbra pd testnet unsafe-reset-all -sudo -u penumbra pd testnet join "$pd_bootstrap_url" -# Restore ACME dir prior to service start -sudo mv -v "/opt/penumbra-ci/$(basename "$acme_cache")" "$acme_cache" +# Using "oumuamua" has moniker to connote that this node is "out there", i.e. separate +# from the standard fullnode deployments, and also it's cutely technically a celestial body. +sudo -u penumbra pd testnet join --moniker oumuamua "$pd_bootstrap_url" + +# ACME cache dir may not be present, e.g. on a first deploy. +if [[ -d "/opt/penumbra-ci/$(basename "$acme_cache")" ]] ; then + echo "Restoring ACME dir prior to service start..." + sudo mv -v "/opt/penumbra-ci/$(basename "$acme_cache")" "$acme_cache" +fi sudo chown -R penumbra: /home/penumbra/.penumbra # Bring service back up. +echo "Bringing services back up..." sudo systemctl daemon-reload sudo systemctl restart penumbra cometbft -# Verify that the services are in fact running, else exit non-zero. +echo "Verifying that the services are running:" sleep 5 +printf 'penumbra: ' sudo systemctl is-active penumbra +printf 'cometbft: ' sudo systemctl is-active cometbft diff --git a/deployments/scripts/redeploy-ci-fullnode-via-runner b/deployments/scripts/redeploy-ci-fullnode-via-runner index a6f5a07ac4..ae1f0b26cc 100755 --- a/deployments/scripts/redeploy-ci-fullnode-via-runner +++ b/deployments/scripts/redeploy-ci-fullnode-via-runner @@ -32,6 +32,6 @@ fi # 3) systemd service files for pd & cometbft # # As for the script that's being execute on the target, we'll copy that up from local context. -scp ./deployments/scripts/redeploy-ci-fullnode-via-remote "${ci_ssh_host}:" +scp ./deployments/scripts/redeploy-ci-fullnode-via-remote "ci@${ci_ssh_host}:" ssh -l ci "$ci_ssh_host" sudo mv redeploy-ci-fullnode-via-remote /usr/local/bin/redeploy-ci-fullnode-via-remote -ssh -l ci "$ci_ssh_host" sudo /usr/local/bin/redeploy-ci-fullnode-via-remote +ssh -l ci "$ci_ssh_host" sudo /usr/local/bin/redeploy-ci-fullnode-via-remote "$PENUMBRA_VERSION" "$PENUMBRA_ENVIRONMENT"