Skip to content

Add checkpoint resuming from mlfoundry #25

Add checkpoint resuming from mlfoundry

Add checkpoint resuming from mlfoundry #25

name: Build the LLM Finetuning Docker image
on:
push:
tags:
- v*
permissions:
id-token: write
contents: read
concurrency:
group: build-llm-finetune-${{ github.ref }}
cancel-in-progress: true
env:
GITHUB_SHA: ${{ github.sha }}
TAG_PREFIX: "v"
jobs:
build_image:
name: Build Image
runs-on: ubuntu-latest
steps:
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
tool-cache: false
android: true
dotnet: true
haskell: true
large-packages: false
docker-images: false
swap-storage: false
- name: Checkout
uses: actions/checkout@v3
- name: Set version tags
run: |
echo "Ref name is ${{ github.ref_name }}"
echo "TAG_PREFIX is ${{ env.TAG_PREFIX }}"
REF_NAME=${{ github.ref_name }}
TAG_PREFIX=${{ env.TAG_PREFIX }}
VERSION_TAG=$(echo $REF_NAME | awk -F$TAG_PREFIX '{print $2}')
echo "Setting VERSION_TAG equal to $VERSION_TAG"
echo "VERSION_TAG=$VERSION_TAG" >> $GITHUB_ENV
ALT_VERSION_TAG=$(echo $VERSION_TAG | awk -F- '{print $1}')-${GITHUB_SHA::7}
echo "Setting ALT_VERSION_TAG equal to $ALT_VERSION_TAG"
echo "ALT_VERSION_TAG=$ALT_VERSION_TAG" >> $GITHUB_ENV
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ secrets.PUBLIC_ECR_IAM_ROLE_ARN }}
aws-region: us-east-1
- name: Login to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v2
with:
registry-type: public
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Set repo uri
env:
REGISTRY: ${{ steps.login-ecr.outputs.registry }}
REGISTRY_ALIAS: truefoundrycloud
REPOSITORY: llm-finetune
run: |
REPOSITORY_URI=${{ env.REGISTRY }}/${{ env.REGISTRY_ALIAS }}/${{ env.REPOSITORY }}
echo "Setting REPOSITORY_URI equal to $REPOSITORY_URI"
echo "REPOSITORY_URI=$REPOSITORY_URI" >> $GITHUB_ENV
- name: Build and push image
uses: docker/build-push-action@v5
env:
IMAGE_TAG_1: ${{ env.REPOSITORY_URI }}:${{ env.GITHUB_SHA }}
IMAGE_TAG_2: ${{ env.REPOSITORY_URI }}:${{ env.VERSION_TAG }}
IMAGE_TAG_3: ${{ env.REPOSITORY_URI }}:${{ env.ALT_VERSION_TAG }}
with:
context: .
file: Dockerfile
push: true
tags: ${{ env.IMAGE_TAG_1 }},${{ env.IMAGE_TAG_2 }},${{ env.IMAGE_TAG_3 }}
cache-from: type=registry,ref=${{ env.REPOSITORY_URI }}:buildcache
cache-to: mode=max,image-manifest=true,type=registry,ref=${{ env.REPOSITORY_URI }}:buildcache
# build_notebook_image:
# name: Build Notebook Image
# runs-on: ubuntu-latest
# steps:
# - name: Free Disk Space (Ubuntu)
# uses: jlumbroso/free-disk-space@main
# with:
# tool-cache: false
# android: true
# dotnet: true
# haskell: true
# large-packages: false
# docker-images: false
# swap-storage: false
# - name: Checkout
# uses: actions/checkout@v3
# - name: Set version tags
# run: |
# echo "Ref name is ${{ github.ref_name }}"
# echo "TAG_PREFIX is ${{ env.TAG_PREFIX }}"
# REF_NAME=${{ github.ref_name }}
# TAG_PREFIX=${{ env.TAG_PREFIX }}
# VERSION_TAG=$(echo $REF_NAME | awk -F$TAG_PREFIX '{print $2}')-jupyter
# echo "Setting VERSION_TAG equal to $VERSION_TAG"
# echo "VERSION_TAG=$VERSION_TAG" >> $GITHUB_ENV
# - name: Configure AWS credentials
# uses: aws-actions/configure-aws-credentials@v4
# with:
# role-to-assume: ${{ secrets.PUBLIC_ECR_IAM_ROLE_ARN }}
# aws-region: us-east-1
# - name: Login to Amazon ECR
# id: login-ecr
# uses: aws-actions/amazon-ecr-login@v2
# with:
# registry-type: public
# - name: Set up Docker Buildx
# uses: docker/setup-buildx-action@v3
# - name: Set repo uri
# env:
# REGISTRY: ${{ steps.login-ecr.outputs.registry }}
# REGISTRY_ALIAS: truefoundrycloud
# REPOSITORY: llm-finetune
# run: |
# REPOSITORY_URI=${{ env.REGISTRY }}/${{ env.REGISTRY_ALIAS }}/${{ env.REPOSITORY }}
# echo "Setting REPOSITORY_URI equal to $REPOSITORY_URI"
# echo "REPOSITORY_URI=$REPOSITORY_URI" >> $GITHUB_ENV
# - name: Build and push image
# uses: docker/build-push-action@v5
# env:
# REGISTRY: ${{ steps.login-ecr.outputs.registry }}
# REGISTRY_ALIAS: truefoundrycloud
# REPOSITORY: llm-finetune
# IMAGE_TAG_1: ${{ env.REPOSITORY_URI }}:${{ env.VERSION_TAG }}
# with:
# context: .
# file: Dockerfile-notebook
# push: true
# tags: ${{ env.IMAGE_TAG_1 }}
# cache-from: type=registry,ref=${{ env.REPOSITORY_URI }}:jupyter-buildcache
# cache-to: mode=max,image-manifest=true,type=registry,ref=${{ env.REPOSITORY_URI }}:jupyter-buildcache