-
-
Notifications
You must be signed in to change notification settings - Fork 110
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Update nightly branch after successful build (#3195)
* Enable updating of nightly branch after successful build. * Skip nightly build if no changes since last successful build. * Checkout the full repo so we have all tags etc. * Respond to PR comments / clean up git commands --------- Co-authored-by: Dazhong Xia <[email protected]>
- Loading branch information
1 parent
e9a91be
commit 5a81260
Showing
2 changed files
with
110 additions
and
59 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,21 +33,35 @@ jobs: | |
uses: actions/checkout@v4 | ||
with: | ||
ref: ${{ env.BUILD_REF }} | ||
fetch-depth: 0 | ||
|
||
- name: Skip the build if no changes since the last successful nightly build. | ||
if: ${{ (github.event_name == 'schedule') }} | ||
run: | | ||
CURRENT_COMMIT=$(git rev-parse HEAD) | ||
NIGHTLY_COMMIT=$(git rev-parse origin/nightly) | ||
if [[ "$CURRENT_COMMIT" == "$NIGHTLY_COMMIT" ]]; then | ||
echo "::notice::No changes since last successful nightly build. Skipping." | ||
echo "SKIP_BUILD=true" >> $GITHUB_ENV | ||
exit 0 | ||
fi | ||
- name: Set action environment variables | ||
if: ${{ env.SKIP_BUILD != 'true' }} | ||
run: | | ||
echo "NIGHTLY_TAG=nightly-$(date +%Y-%m-%d)" >> $GITHUB_ENV | ||
echo "BUILD_ID=$(date +%Y-%m-%d-%H%M)-$(git rev-parse --short HEAD)-${BUILD_REF}" >> $GITHUB_ENV | ||
- name: Show freshly set envvars | ||
if: ${{ env.SKIP_BUILD != 'true' }} | ||
run: | | ||
echo "GCE_INSTANCE: $GCE_INSTANCE" | ||
echo "BUILD_REF: $BUILD_REF" | ||
echo "NIGHTLY_TAG: $NIGHTLY_TAG" | ||
echo "BUILD_ID: $BUILD_ID" | ||
- name: Tag nightly build | ||
if: ${{ (github.event_name == 'schedule') }} | ||
if: ${{ (github.event_name == 'schedule') && (env.SKIP_BUILD != 'true') }} | ||
run: | | ||
git config user.email "[email protected]" | ||
git config user.name "pudlbot" | ||
|
@@ -56,6 +70,7 @@ jobs: | |
- name: Docker Metadata | ||
id: docker_metadata | ||
if: ${{ env.SKIP_BUILD != 'true' }} | ||
uses: docker/metadata-action@v5 | ||
with: | ||
images: catalystcoop/pudl-etl | ||
|
@@ -66,16 +81,18 @@ jobs: | |
type=ref,event=tag | ||
- name: Set up Docker Buildx | ||
if: ${{ env.SKIP_BUILD != 'true' }} | ||
uses: docker/setup-buildx-action@v3 | ||
|
||
- name: Login to DockerHub | ||
if: github.event_name != 'pull_request' | ||
if: ${{ (github.event_name != 'pull_request') && (env.SKIP_BUILD != 'true') }} | ||
uses: docker/login-action@v3 | ||
with: | ||
username: ${{ secrets.DOCKERHUB_USERNAME }} | ||
password: ${{ secrets.DOCKERHUB_TOKEN }} | ||
|
||
- name: Build image and push to Docker Hub | ||
if: ${{ env.SKIP_BUILD != 'true' }} | ||
uses: docker/build-push-action@v5 | ||
with: | ||
context: . | ||
|
@@ -87,17 +104,20 @@ jobs: | |
cache-to: type=gha,mode=max | ||
|
||
- id: "auth" | ||
if: ${{ env.SKIP_BUILD != 'true' }} | ||
uses: "google-github-actions/auth@v2" | ||
with: | ||
workload_identity_provider: "projects/345950277072/locations/global/workloadIdentityPools/gh-actions-pool/providers/gh-actions-provider" | ||
service_account: "deploy-pudl-github-action@catalyst-cooperative-pudl.iam.gserviceaccount.com" | ||
|
||
# Setup gcloud CLI | ||
- name: Set up Cloud SDK | ||
if: ${{ env.SKIP_BUILD != 'true' }} | ||
uses: google-github-actions/setup-gcloud@v2 | ||
|
||
# Deploy PUDL image to GCE | ||
- name: Deploy | ||
if: ${{ env.SKIP_BUILD != 'true' }} | ||
env: | ||
DAGSTER_PG_PASSWORD: ${{ secrets.DAGSTER_PG_PASSWORD }} | ||
PUDL_OUTPUT_PATH: ${{ env.GCS_OUTPUT_BUCKET }}/${{ env.BUILD_ID }} | ||
|
@@ -107,7 +127,7 @@ jobs: | |
--metadata-from-file startup-script=./docker/vm_startup_script.sh | ||
gcloud compute instances update-container "$GCE_INSTANCE" \ | ||
--zone "$GCE_INSTANCE_ZONE" \ | ||
--container-image "docker.io/catalystcoop/pudl-etl:${{ env.BUILD_REF}}" \ | ||
--container-image "docker.io/catalystcoop/pudl-etl:${{ env.BUILD_REF }}" \ | ||
--container-command "micromamba" \ | ||
--container-arg="run" \ | ||
--container-arg="--prefix" \ | ||
|
@@ -117,7 +137,7 @@ jobs: | |
--container-arg="bash" \ | ||
--container-arg="./docker/gcp_pudl_etl.sh" \ | ||
--container-env-file="./docker/.env" \ | ||
--container-env BUILD_REF=${{ env.BUILD_REF}} \ | ||
--container-env BUILD_REF=${{ env.BUILD_REF }} \ | ||
--container-env BUILD_ID=${{ env.BUILD_ID }} \ | ||
--container-env NIGHTLY_TAG=${{ env.NIGHTLY_TAG }} \ | ||
--container-env GITHUB_ACTION_TRIGGER=${{ github.event_name }} \ | ||
|
@@ -140,6 +160,7 @@ jobs: | |
# Start the VM | ||
- name: Start the deploy-pudl-vm | ||
if: ${{ env.SKIP_BUILD != 'true' }} | ||
run: gcloud compute instances start "$GCE_INSTANCE" --zone="$GCE_INSTANCE_ZONE" | ||
|
||
- name: Post to a pudl-deployments channel | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,11 +2,6 @@ | |
# This script runs the entire ETL and validation tests in a docker container on a Google Compute Engine instance. | ||
# This script won't work locally because it needs adequate GCP permissions. | ||
|
||
# Set PUDL_GCS_OUTPUT *only* if it is currently unset | ||
: "${PUDL_GCS_OUTPUT:=gs://nightly-build-outputs.catalyst.coop/$BUILD_ID}" | ||
|
||
set -x | ||
|
||
function send_slack_msg() { | ||
curl -X POST -H "Content-type: application/json" -H "Authorization: Bearer ${SLACK_TOKEN}" https://slack.com/api/chat.postMessage --data "{\"channel\": \"C03FHB9N0PQ\", \"text\": \"$1\"}" | ||
} | ||
|
@@ -57,38 +52,38 @@ function shutdown_vm() { | |
curl -X POST -H "Content-Length: 0" -H "Authorization: Bearer ${ACCESS_TOKEN}" "https://compute.googleapis.com/compute/v1/projects/catalyst-cooperative-pudl/zones/$GCE_INSTANCE_ZONE/instances/$GCE_INSTANCE/stop" | ||
} | ||
|
||
function copy_outputs_to_gcs() { | ||
echo "Copying outputs to GCP bucket $PUDL_GCS_OUTPUT" | ||
gsutil -m cp -r "$PUDL_OUTPUT" "$PUDL_GCS_OUTPUT" | ||
function save_outputs_to_gcs() { | ||
echo "Copying outputs to GCP bucket $PUDL_GCS_OUTPUT" && \ | ||
gsutil -m cp -r "$PUDL_OUTPUT" "$PUDL_GCS_OUTPUT" && \ | ||
rm "$PUDL_OUTPUT/success" | ||
} | ||
|
||
function copy_outputs_to_distribution_bucket() { | ||
# Only attempt to update outputs if we have a real value of BUILD_REF | ||
if [ -n "$BUILD_REF" ]; then | ||
echo "Removing old $BUILD_REF outputs from GCP distributon bucket." | ||
gsutil -m -u "$GCP_BILLING_PROJECT" rm -r "gs://pudl.catalyst.coop/$BUILD_REF" | ||
echo "Copying outputs to GCP distribution bucket" | ||
gsutil -m -u "$GCP_BILLING_PROJECT" cp -r "$PUDL_OUTPUT/*" "gs://pudl.catalyst.coop/$BUILD_REF" | ||
|
||
echo "Removing old $BUILD_REF outputs from AWS distributon bucket." | ||
aws s3 rm "s3://pudl.catalyst.coop/$BUILD_REF" --recursive | ||
echo "Copying outputs to AWS distribution bucket" | ||
# This avoids accidentally blowing away the whole bucket if it's not set. | ||
if [[ -n "$BUILD_REF" ]]; then | ||
echo "Removing old $BUILD_REF outputs from GCP distributon bucket." && \ | ||
gsutil -m -u "$GCP_BILLING_PROJECT" rm -r "gs://pudl.catalyst.coop/$BUILD_REF" && \ | ||
echo "Copying outputs to GCP distribution bucket" && \ | ||
gsutil -m -u "$GCP_BILLING_PROJECT" cp -r "$PUDL_OUTPUT/*" "gs://pudl.catalyst.coop/$BUILD_REF" && \ | ||
echo "Removing old $BUILD_REF outputs from AWS distributon bucket." && \ | ||
aws s3 rm "s3://pudl.catalyst.coop/$BUILD_REF" --recursive && \ | ||
echo "Copying outputs to AWS distribution bucket" && \ | ||
aws s3 cp "$PUDL_OUTPUT/" "s3://pudl.catalyst.coop/$BUILD_REF" --recursive | ||
fi | ||
} | ||
|
||
function zenodo_data_release() { | ||
echo "Creating a new PUDL data release on Zenodo." | ||
~/pudl/devtools/zenodo/zenodo_data_release.py --publish --env sandbox --source-dir "$PUDL_OUTPUT" | ||
echo "Creating a new PUDL data release on Zenodo." && \ | ||
~/pudl/devtools/zenodo/zenodo_data_release.py --publish --env "$1" --source-dir "$PUDL_OUTPUT" | ||
} | ||
|
||
function notify_slack() { | ||
# Notify pudl-builds slack channel of deployment status | ||
if [ "$1" = "success" ]; then | ||
if [[ "$1" == "success" ]]; then | ||
message=":large_green_circle: :sunglasses: :unicorn_face: :rainbow: The deployment succeeded!! :partygritty: :database_parrot: :blob-dance: :large_green_circle:\n\n " | ||
message+="<https://github.com/catalyst-cooperative/pudl/compare/main...${BUILD_REF}|Make a PR for \`${BUILD_REF}\` into \`main\`!>\n\n" | ||
elif [ "$1" = "failure" ]; then | ||
elif [[ "$1" == "failure" ]]; then | ||
message=":large_red_square: Oh bummer the deployment failed ::fiiiiine: :sob: :cry_spin:\n\n " | ||
else | ||
echo "Invalid deployment status" | ||
|
@@ -100,60 +95,95 @@ function notify_slack() { | |
} | ||
|
||
function update_nightly_branch() { | ||
git config --unset http.https://github.com/.extraheader | ||
git config user.email "[email protected]" | ||
git config user.name "pudlbot" | ||
git remote set-url origin "https://pudlbot:$PUDL_BOT_PAT@github.com/catalyst-cooperative/pudl.git" | ||
echo "BOGUS: Updating nightly branch to point at $NIGHTLY_TAG." | ||
git fetch origin nightly:nightly | ||
git checkout nightly | ||
git merge --ff-only "$NIGHTLY_TAG" | ||
ETL_SUCCESS=${PIPESTATUS[0]} | ||
git push -u origin | ||
# When building the image, GHA adds an HTTP basic auth header in git | ||
# config, which overrides the auth we set below. So we unset it. | ||
git config --unset http.https://github.com/.extraheader && \ | ||
git config user.email "[email protected]" && \ | ||
git config user.name "pudlbot" && \ | ||
git remote set-url origin "https://pudlbot:$PUDL_BOT_PAT@github.com/catalyst-cooperative/pudl.git" && \ | ||
echo "Updating nightly branch to point at $NIGHTLY_TAG." && \ | ||
git fetch --force --tags origin "$NIGHTLY_TAG" && \ | ||
git fetch origin nightly:nightly && \ | ||
git checkout nightly && \ | ||
git show-ref -d nightly "$NIGHTLY_TAG" && \ | ||
git merge --ff-only "$NIGHTLY_TAG" && \ | ||
git push -u origin nightly | ||
} | ||
|
||
function clean_up_outputs_for_distribution() { | ||
# Compress the SQLite DBs for easier distribution | ||
gzip --verbose "$PUDL_OUTPUT"/*.sqlite && \ | ||
# Remove redundant multi-file EPA CEMS outputs prior to distribution | ||
rm -rf "$PUDL_OUTPUT/core_epacems__hourly_emissions/" && \ | ||
rm -f "$PUDL_OUTPUT/metadata.yml" | ||
} | ||
|
||
# # Run ETL. Copy outputs to GCS and shutdown VM if ETL succeeds or fails | ||
######################################################################################## | ||
# MAIN SCRIPT | ||
######################################################################################## | ||
# Initialize our success variables so they all definitely have a value to check | ||
ETL_SUCCESS=0 | ||
SAVE_OUTPUTS_SUCCESS=0 | ||
UPDATE_NIGHTLY_SUCCESS=0 | ||
DATASETTE_SUCCESS=0 | ||
CLEAN_UP_OUTPUTS_SUCCESS=0 | ||
DISTRIBUTION_BUCKET_SUCCESS=0 | ||
ZENODO_SUCCESS=0 | ||
|
||
# Set PUDL_GCS_OUTPUT *only* if it is currently unset | ||
: "${PUDL_GCS_OUTPUT:=gs://nightly-build-outputs.catalyst.coop/$BUILD_ID}" | ||
|
||
# Run ETL. Copy outputs to GCS and shutdown VM if ETL succeeds or fails | ||
# 2>&1 redirects stderr to stdout. | ||
run_pudl_etl 2>&1 | tee "$LOGFILE" | ||
ETL_SUCCESS=${PIPESTATUS[0]} | ||
|
||
copy_outputs_to_gcs | ||
save_outputs_to_gcs 2>&1 | tee -a "$LOGFILE" | ||
SAVE_OUTPUTS_SUCCESS=${PIPESTATUS[0]} | ||
|
||
# if pipeline is successful, distribute + publish datasette | ||
if [[ $ETL_SUCCESS == 0 ]]; then | ||
# Deploy the updated data to datasette | ||
if [ "$BUILD_REF" = "dev" ]; then | ||
python ~/pudl/devtools/datasette/publish.py 2>&1 | tee -a "$LOGFILE" | ||
ETL_SUCCESS=${PIPESTATUS[0]} | ||
if [[ "$GITHUB_ACTION_TRIGGER" == "schedule" ]]; then | ||
update_nightly_branch 2>&1 | tee -a "$LOGFILE" | ||
UPDATE_NIGHTLY_SUCCESS=${PIPESTATUS[0]} | ||
fi | ||
|
||
# Compress the SQLite DBs for easier distribution | ||
# Remove redundant multi-file EPA CEMS outputs prior to distribution | ||
gzip --verbose "$PUDL_OUTPUT"/*.sqlite && \ | ||
rm -rf "$PUDL_OUTPUT/core_epacems__hourly_emissions/" && \ | ||
rm -f "$PUDL_OUTPUT/metadata.yml" | ||
ETL_SUCCESS=${PIPESTATUS[0]} | ||
# Deploy the updated data to datasette if we're on dev | ||
if [[ "$BUILD_REF" == "dev" ]]; then | ||
python ~/pudl/devtools/datasette/publish.py 2>&1 | tee -a "$LOGFILE" | ||
DATASETTE_SUCCESS=${PIPESTATUS[0]} | ||
fi | ||
|
||
# Dump outputs to s3 bucket if branch is dev or build was triggered by a tag | ||
# TODO: this behavior should be controlled by on/off switch here and this logic | ||
# should be moved to the triggering github action. Having it here feels | ||
# fragmented. | ||
if [ "$GITHUB_ACTION_TRIGGER" = "push" ] || [ "$BUILD_REF" = "dev" ]; then | ||
copy_outputs_to_distribution_bucket | ||
ETL_SUCCESS=${PIPESTATUS[0]} | ||
# TEMPORARY: this currently just makes a sandbox release, for testing: | ||
zenodo_data_release 2>&1 | tee -a "$LOGFILE" | ||
ETL_SUCCESS=${PIPESTATUS[0]} | ||
# should be moved to the triggering github action. Having it here feels fragmented. | ||
# Distribute outputs if branch is dev or the build was triggered by tag push | ||
if [[ "$GITHUB_ACTION_TRIGGER" == "push" || "$BUILD_REF" == "dev" ]]; then | ||
# Remove some cruft from the builds that we don't want to distribute | ||
clean_up_outputs_for_distribution 2>&1 | tee -a "$LOGFILE" | ||
CLEAN_UP_OUTPUTS_SUCCESS=${PIPESTATUS[0]} | ||
# Copy cleaned up outputs to the S3 and GCS distribution buckets | ||
copy_outputs_to_distribution_bucket | tee -a "$LOGFILE" | ||
DISTRIBUTION_BUCKET_SUCCESS=${PIPESTATUS[0]} | ||
# TODO: this currently just makes a sandbox release, for testing. Should be | ||
# switched to production and only run on push of a version tag eventually. | ||
# Push a data release to Zenodo for long term accessiblity | ||
zenodo_data_release sandbox 2>&1 | tee -a "$LOGFILE" | ||
ZENODO_SUCCESS=${PIPESTATUS[0]} | ||
fi | ||
fi | ||
|
||
# This way we also save the logs from latter steps in the script | ||
gsutil cp "$LOGFILE" "$PUDL_GCS_OUTPUT" | ||
|
||
# Notify slack about entire pipeline's success or failure; | ||
# PIPESTATUS[0] either refers to the failed ETL run or the last distribution | ||
# task that was run above | ||
if [[ $ETL_SUCCESS == 0 ]]; then | ||
if [[ $ETL_SUCCESS == 0 && \ | ||
$SAVE_OUTPUTS_SUCCESS == 0 && \ | ||
$UPDATE_NIGHTLY_SUCCESS == 0 && \ | ||
$DATASETTE_SUCCESS == 0 && \ | ||
$CLEAN_UP_OUTPUTS_SUCCESS == 0 && \ | ||
$DISTRIBUTION_BUCKET_SUCCESS == 0 && \ | ||
$ZENODO_SUCCESS == 0 | ||
]]; then | ||
notify_slack "success" | ||
else | ||
notify_slack "failure" | ||
|