Skip to content

wip: test #9152

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 8 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
203 changes: 154 additions & 49 deletions .github/ci.md

Large diffs are not rendered by default.

95 changes: 95 additions & 0 deletions .github/workflows/_torchprime_ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
name: torchprime tests
on:
workflow_call:
inputs:
timeout-minutes:
required: false
type: number
description: Timeout in minutes for the job run
default: 120
has_code_changes:
required: false
type: string
description: Whether to run full workflow or not
default: 'true'
secrets:
PERSONAL_ACCESS_TOKEN_FOR_TRIGGERING_TORCHPRIME:
required: true
GCLOUD_SERVICE_KEY:
required: true
jobs:
torchprime-e2e-test:
name: Run torchprime E2E tests
timeout-minutes: ${{ inputs.timeout-minutes }}
runs-on: ubuntu-22.04
steps:
- name: Use Docker in rootless mode
if: inputs.has_code_changes == 'true'
uses: ScribeMD/[email protected]
- name: Add user to docker group
if: inputs.has_code_changes == 'true'
run: |
sudo usermod -aG docker $USER
newgrp docker
shell: bash
# Googlers: if this fails, follow http://shortn/_61iSj31q1b to debug.
- uses: google-github-actions/auth@v2
if: inputs.has_code_changes == 'true'
with:
credentials_json: '${{ secrets.GCLOUD_SERVICE_KEY }}'
- uses: google-github-actions/setup-gcloud@v2
if: inputs.has_code_changes == 'true'
with:
version: '>= 363.0.0'
install_components: 'beta,gke-gcloud-auth-plugin'
- name: Verify GCP setup
if: inputs.has_code_changes == 'true'
run: gcloud info
shell: bash
- name: Authenticate Docker
if: inputs.has_code_changes == 'true'
run: gcloud auth configure-docker --quiet
shell: bash
- name: Activate SA credentials
if: inputs.has_code_changes == 'true'
run: gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS
shell: bash
- name: Checkout infra
if: inputs.has_code_changes == 'true'
uses: actions/checkout@v4
with:
sparse-checkout: |
infra
fetch-depth: 1
path: pytorch-xla
# Build a docker image for torchprime E2E test
# First download the torch-xla-wheels
- name: Fetch wheels
if: inputs.has_code_changes == 'true'
uses: actions/download-artifact@v4
with:
name: torch-xla-wheels
path: /tmp/wheels/
# Then run docker to install them and push a docker
- name: Build and push docker image
if: inputs.has_code_changes == 'true'
shell: bash
working-directory: pytorch-xla
run: |
. ./infra/ansible/build_for_torchprime.sh
env:
XLA_WHEELS_PATH: /tmp/wheels/todo
DOCKER_IMAGE_NAME: todo-torch-xla-torchprime-ci
DOCKER_IMAGE_TAG: todo
DOCKER_PROJECT: pytorch-todo
# Trigger torchprime E2E test workflow
- uses: convictional/[email protected]
if: inputs.has_code_changes == 'true'
with:
owner: AI-Hypercomputer
repo: torchprime
github_token: ${{ secrets.PERSONAL_ACCESS_TOKEN_FOR_TRIGGERING_TORCHPRIME }}
workflow_file_name: e2e_test.yml
wait_interval: 60
ref: yifeit/torchprime-ci
client_payload: '{"docker_url": "gcr.io/tpu-pytorch/for-torchprime-ci:test"}'
4 changes: 4 additions & 0 deletions .github/workflows/_tpu_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ name: TPU Integration Test
on:
workflow_call:
inputs:
torch-commit:
required: false
type: string
description: torch-commit
timeout-minutes:
required: false
type: number
Expand Down
10 changes: 10 additions & 0 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,16 @@ jobs:
has_code_changes: ${{ needs.check_code_changes.outputs.has_code_changes }}
if: github.event_name == 'push' || github.event_name == 'pull_request'

test-torchprime:
name: "torchprime tests"
uses: ./.github/workflows/_torchprime_ci.yml
needs: [build-torch-xla, check_code_changes]
with:
timeout-minutes: 100
has_code_changes: ${{ needs.check_code_changes.outputs.has_code_changes }}
if: github.event_name == 'push' || github.event_name == 'pull_request'
secrets: inherit

push-docs:
name: "Build docs"
uses: ./.github/workflows/_docs.yml
Expand Down
23 changes: 23 additions & 0 deletions infra/ansible/build_for_torchprime.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# syntax=docker/dockerfile:1.4
ARG python_version=3.10
ARG debian_version=bullseye

FROM python:${python_version}-${debian_version} AS release

WORKDIR /tmp/wheels
COPY ./*.whl ./

RUN echo "Installing the following wheels" && ls *.whl
RUN pip install *.whl

# Install the dependencies including libtpu.
WORKDIR /ansible
RUN pip install ansible
COPY --from=ansible . /ansible

ARG ansible_vars
RUN ansible-playbook -vvv playbook.yaml -e "stage=release" -e "${ansible_vars}" --tags "install_deps"

WORKDIR /

RUN rm -rf /ansible /tmp/wheels
16 changes: 16 additions & 0 deletions infra/ansible/build_for_torchprime.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

set -ex

export IMAGE_NAME="gcr.io/tpu-pytorch/for-torchprime-ci:test"
export DOCKERFILE_PATH="infra/ansible/build_for_torchprime.Dockerfile"
export DEFAULT_CONTEXT_PATH="/tmp/wheels"

docker build -t "${IMAGE_NAME}" \
--build-context ansible=infra/ansible \
"${DEFAULT_CONTEXT_PATH}" \
-f "${DOCKERFILE_PATH}" \
--build-arg ansible_vars='{"arch":"amd64","accelerator":"tpu","pytorch_git_rev":"main","xla_git_rev":"foobar","bundle_libtpu":"0","package_version":"2.8","nightly_release":true}' \
--build-arg python_version=3.10 \
--build-arg debian_version=bullseye
docker push "${IMAGE_NAME}"
Loading