Skip to content

Commit

Permalink
TEST: only run some echo thing and put it in a logfile.
Browse files Browse the repository at this point in the history
  • Loading branch information
jdangerx committed Jan 3, 2024
1 parent 46594a7 commit 4fb1a9e
Showing 1 changed file with 2 additions and 49 deletions.
51 changes: 2 additions & 49 deletions docker/gcp_pudl_etl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -97,17 +97,7 @@ function notify_slack() {
function update_nightly_branch() {
# When building the image, GHA adds an HTTP basic auth header in git
# config, which overrides the auth we set below. So we unset it.
git config --unset http.https://github.com/.extraheader && \
git config user.email "[email protected]" && \
git config user.name "pudlbot" && \
git remote set-url origin "https://pudlbot:$PUDL_BOT_PAT@github.com/catalyst-cooperative/pudl.git" && \
echo "Updating nightly branch to point at $NIGHTLY_TAG." && \
git fetch --force --tags origin "$NIGHTLY_TAG" && \
git fetch origin nightly:nightly && \
git checkout nightly && \
git show-ref -d nightly "$NIGHTLY_TAG" && \
git merge --ff-only "$NIGHTLY_TAG" && \
git push -u origin nightly
echo "TEST: First iteration"
}

function clean_up_outputs_for_distribution() {
Expand All @@ -133,45 +123,8 @@ ZENODO_SUCCESS=0
# Set PUDL_GCS_OUTPUT *only* if it is currently unset
: "${PUDL_GCS_OUTPUT:=gs://nightly-build-outputs.catalyst.coop/$BUILD_ID}"

# Run ETL. Copy outputs to GCS and shutdown VM if ETL succeeds or fails
# 2>&1 redirects stderr to stdout.
run_pudl_etl 2>&1 | tee "$LOGFILE"
ETL_SUCCESS=${PIPESTATUS[0]}

save_outputs_to_gcs 2>&1 | tee -a "$LOGFILE"
SAVE_OUTPUTS_SUCCESS=${PIPESTATUS[0]}

# if pipeline is successful, distribute + publish datasette
if [[ $ETL_SUCCESS == 0 ]]; then
if [[ "$GITHUB_ACTION_TRIGGER" == "schedule" ]]; then
update_nightly_branch 2>&1 | tee -a "$LOGFILE"
UPDATE_NIGHTLY_SUCCESS=${PIPESTATUS[0]}
fi

# Deploy the updated data to datasette if we're on dev
if [[ "$BUILD_REF" == "dev" ]]; then
python ~/pudl/devtools/datasette/publish.py 2>&1 | tee -a "$LOGFILE"
DATASETTE_SUCCESS=${PIPESTATUS[0]}
fi

# TODO: this behavior should be controlled by on/off switch here and this logic
# should be moved to the triggering github action. Having it here feels fragmented.
# Distribute outputs if branch is dev or the build was triggered by tag push
if [[ "$GITHUB_ACTION_TRIGGER" == "push" || "$BUILD_REF" == "dev" ]]; then
# Remove some cruft from the builds that we don't want to distribute
clean_up_outputs_for_distribution 2>&1 | tee -a "$LOGFILE"
CLEAN_UP_OUTPUTS_SUCCESS=${PIPESTATUS[0]}
# Copy cleaned up outputs to the S3 and GCS distribution buckets
copy_outputs_to_distribution_bucket | tee -a "$LOGFILE"
DISTRIBUTION_BUCKET_SUCCESS=${PIPESTATUS[0]}
# TODO: this currently just makes a sandbox release, for testing. Should be
# switched to production and only run on push of a version tag eventually.
# Push a data release to Zenodo for long term accessiblity
zenodo_data_release sandbox 2>&1 | tee -a "$LOGFILE"
ZENODO_SUCCESS=${PIPESTATUS[0]}
fi
fi

update_nightly_branch 2>&1 | tee -a "$LOGFILE"
# This way we also save the logs from latter steps in the script
gsutil cp "$LOGFILE" "$PUDL_GCS_OUTPUT"

Expand Down

0 comments on commit 4fb1a9e

Please sign in to comment.