.github/workflows/containers-and-az-pool.yaml

name: Create Docker Image and Azure Pool

# This GitHub Actions workflow builds a Docker image for the
# cfa-epinow2-pipeline-docker project. In-container tests can be added here.

on:
  workflow_dispatch:
  pull_request:
    paths-ignore: # we don't need this to run everytime we make an edit to an irrelevant file
      - .github/workflows/block-fixup.yaml
      - .github/workflows/check-news-md.yaml
      - .github/workflows/manual-docker-prune.yml
      - .github/workflows/pkgdown.yaml
      - .github/workflows/pr-commands.yaml
      - .github/workflows/r-cmd-check.yaml
      - .github/workflows/test-coverage.yaml
      - '**.md'
    branches:
      - main
  push:
    paths-ignore: # we don't need this to run everytime we make an edit to an irrelevant file
      - .github/workflows/block-fixup.yaml
      - .github/workflows/check-news-md.yaml
      - .github/workflows/manual-docker-prune.yml
      - .github/workflows/pkgdown.yaml
      - .github/workflows/pr-commands.yaml
      - .github/workflows/r-cmd-check.yaml
      - .github/workflows/test-coverage.yaml
      - '**.md'
    branches:
      - main

env:
  # Together, these form: cfaprdbatchcr.azurecr.io/cfa-epinow2-pipeline
  REGISTRY: cfaprdbatchcr.azurecr.io
  IMAGE_NAME: cfa-epinow2-pipeline

jobs:

  build-dependencies-image:
    runs-on: cfa-cdcgov # VM based runner serving CFA's cdcgov repos (as opposed to cdcent)
    name: Build dependencies image

    outputs:
      tag: ${{ steps.image-tag.outputs.tag }}

    steps:

      - name: Checkout code
        uses: actions/checkout@v4

      #########################################################################
      # Getting the tag from the branch
      # The tag will be used for both the docker image and the batch pool
      #########################################################################

      # From: https://stackoverflow.com/a/58035262/2097171
      - name: Extract branch name
        shell: bash
        run: echo "branch=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> $GITHUB_OUTPUT
        id: branch-name

      - name: Figure out tag (either latest if it is main or the branch name)
        id: image-tag
        run: |
          if [ "${{ steps.branch-name.outputs.branch }}" = "main" ]; then
            echo "tag=latest" >> $GITHUB_OUTPUT
          else
            echo "tag=${{ steps.branch-name.outputs.branch }}" >> $GITHUB_OUTPUT
          fi

      # NOTE: This lookup is only for the cache _key_. We don't need the cache _value_
      # because we explicitly fetch image from the registry in the next step. Keeping the
      # cached image on the runner causes the runner to quickly run out of storage.
      - name: Check cache for base image
        uses: actions/cache@v4
        id: cache
        with:
          key: docker-dependencies-${{ runner.os }}-${{ hashFiles('./DESCRIPTION', './Dockerfile-dependencies') }}-${{ steps.image-tag.outputs.tag }}
          lookup-only: true
          path:
            ./DESCRIPTION

      - name: Login to the Container Registry
        if: steps.cache.outputs.cache-hit != 'true'
        uses: docker/login-action@v3
        with:
          registry: "cfaprdbatchcr.azurecr.io"
          username: "cfaprdbatchcr"
          password: ${{ secrets.CFAPRDBATCHCR_REGISTRY_PASSWORD }}

      - name: Build and push
        if: steps.cache.outputs.cache-hit != 'true'
        uses: docker/build-push-action@v6
        with:
          push: true
          no-cache: true
          tags: |
            ${{ env.REGISTRY}}/${{ env.IMAGE_NAME }}-dependencies:${{ steps.image-tag.outputs.tag }}
          file: ./Dockerfile-dependencies

  build-pipeline-image:

    name: Build pipeline image

    needs: build-dependencies-image
    runs-on: cfa-cdcgov

    outputs:
      tag: ${{ needs.build-dependencies-image.outputs.tag }}

    steps:

      - name: Login to the Container Registry
        uses: docker/login-action@v3
        with:
          registry: "cfaprdbatchcr.azurecr.io"
          username: "cfaprdbatchcr"
          password: ${{ secrets.CFAPRDBATCHCR_REGISTRY_PASSWORD }}

      - name: Build and push model pipeline image for Azure batch
        id: build_and_push_model_image
        uses: docker/build-push-action@v6
        with:
          push: true # This can be toggled manually for tweaking.
          no-cache: true
          tags: |
            ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ needs.build-dependencies-image.outputs.tag }}
          file: ./Dockerfile
          build-args: |
            TAG=${{ needs.build-dependencies-image.outputs.tag }}

  batch-pool:

    name: Create Batch Pool and Submit Jobs
    runs-on: cfa-cdcgov
    needs: build-pipeline-image
    container: python:3.12

    permissions:
      contents: read
      packages: write

    env:
      TAG: ${{ needs.build-pipeline-image.outputs.tag }}
      # Every Azure Batch Pool parameter can simply go here,
      # no python module or config toml necessary
      POOL_ID: "cfa-epinow2-${{ needs.build-pipeline-image.outputs.tag }}"
      BATCH_ACCOUNT:     "cfaprdba"
      BATCH_ENDPOINT:    "https://cfaprdba.eastus.batch.azure.com/"
      VM_IMAGE_TAG:      "canonical:0001-com-ubuntu-server-focal:20_04-lts"
      NODE_AGENT_SKU_ID: "batch.node.ubuntu 20.04"
      VM_SIZE:           "standard_a4m_v2"
      RESOURCE_GROUP:    ${{ secrets.PRD_RESOURCE_GROUP }}

    steps:
      - name: Checkout Repo
        id: checkout_repo
        uses: actions/checkout@v4

      # This step is only needed during the action to write the
      # config file. Users can have a config file stored in their VAP
      # sessions. In the future, we will have the config.toml file
      # distributed with the repo (encrypted).
      - name: Writing out config file
        run: |
          cat <<EOF > pool-config-${{ github.sha }}.toml
          ${{ secrets.POOL_CONFIG_TOML }}
          EOF

          # Replacing placeholders in the config file
          sed -i 's|{{ IMAGE_NAME }}|${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ env.TAG }}|g' pool-config-${{ github.sha }}.toml
          sed -i 's|{{ VM_SIZE }}|${{ env.VM_SIZE }}|g' pool-config-${{ github.sha }}.toml
          sed -i 's|{{ POOL_ID }}|${{ env.POOL_ID }}|g' pool-config-${{ github.sha }}.toml

      - name: Ensuring the Azure CLI is installed
        run: |
          apt-get update && apt-get install -y --no-install-recommends azure-cli

      - name: Login to Azure with NNH Service Principal
        id: azure_login_2
        uses: azure/login@v2
        with:
        # managed by EDAV. Contact Amit Mantri or Jon Kislin if you have issues.
          creds: ${{ secrets.EDAV_CFA_PREDICT_NNHT_SP }}

      #########################################################################
      # Checking if the pool exists
      # This is done via az batch pool list. If there is no pool matching the
      # pool id (which is a function of the tag, i.e., branch name), then we
      # pool-exists will be ''.
      #########################################################################
      - name: Check if pool exists
        id: check_pool_id
        run: |

          az batch account login \
            --resource-group ${{ secrets.PRD_RESOURCE_GROUP }} \
            --name "${{ env.BATCH_ACCOUNT }}"

          az batch pool list \
            --output tsv \
            --filter "(id eq '${{ env.POOL_ID }}')" \
            --query "[].[id, allocationState, creationTime]" > \
            pool-list-${{ github.sha }}

          echo "pool-exists=$(cat pool-list-${{ github.sha }})" >> \
            $GITHUB_OUTPUT

      - name: Create cfa-epinow2-pipeline Pool
        id: create_batch_pool

        # This is a conditional step that will only run if the pool does not
        # exist
        if: ${{ steps.check_pool_id.outputs.pool-exists == '' }}

        # The call to the az cli that actually generates the pool
        run: |
          # Running the python script azure/pool.py passing the config file
          # as an argument
          pip install -r azure/requirements.txt
          python3 azure/pool.py \
            pool-config-${{ github.sha }}.toml \
            batch-autoscale-formula.txt