Skip to content

Comparative Benchmarks #519

Comparative Benchmarks

Comparative Benchmarks #519

# Copyright 2023 The OpenXLA Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# Comparative Benchmarks Workflow.
name: Comparative Benchmarks
on:
# Will only run when manually triggered.
workflow_dispatch:
concurrency:
# A PR number if a pull request and otherwise the commit hash. This cancels
# queued and in-progress runs for the same PR (presubmit) or commit
# (postsubmit).
group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
cancel-in-progress: true
env:
GCS_DIR: gs://openxla-github-actions-${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }}-artifacts/${{ github.run_id }}/${{ github.run_attempt }}
jobs:
setup:
runs-on: ubuntu-22.04
outputs:
runner-group: ${{ steps.configure.outputs.runner-group }}
benchmark-gcs-dir: ${{ steps.configure.outputs.benchmark-gcs-dir }}
steps:
- name: "Checking out PR repository"
uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
- name: "Configuring CI options"
id: configure
env:
RUNNER_GROUP: ${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }}
run: |
# Just informative logging. There should only be two commits in the
# history here, but limiting the depth helps when copying from a local
# repo instead of using checkout, e.g. with
# https://github.com/nektos/act where there will be more.
git log --oneline --graph --max-count=3
# Workflow jobs can't access `env` in `runs-on`, so we need to make
# `runner-group` a job output variable.
echo "runner-group=${RUNNER_GROUP}" > "${GITHUB_OUTPUT}"
# For presubmit testing, the result artifacts are uploaded to the
# temporary workflow GCS dir. In postsubmit, the result artifacts are
# uploaded to the comparative benchmark GCS dir.
if [[ "${RUNNER_GROUP}" == "presubmit" ]]; then
BENCHMARK_GCS_DIR="${GCS_DIR}/comparative-benchmark-artifacts"
else
BENCHMARK_GCS_DIR="gs://comparative-benchmark-artifacts/$(date +'%Y-%m-%d').$(date +'%s')"
fi
echo "benchmark-gcs-dir=${BENCHMARK_GCS_DIR}" >> "${GITHUB_OUTPUT}"
build_xla_tools:
needs: setup
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
- environment=prod
- cpu
- os-family=Linux
env:
CUDA_VERSION: 11.8
XLA_TOOLS_DIR: xla-tools-dir
BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }}
outputs:
xla-tools-dir: ${{ env.XLA_TOOLS_DIR }}
xla-tools-dir-archive: ${{ steps.archive.outputs.xla-tools-dir-archive }}
xla-tools-dir-gcs-artifact: ${{ steps.upload.outputs.xla-tools-dir-gcs-artifact }}
steps:
- name: "Checking out PR repository"
uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
- name: "Building XLA"
run: |
mkdir -p "${XLA_TOOLS_DIR}"
docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \
"gcr.io/iree-oss/openxla-benchmark/cuda11.8-cudnn8.9@sha256:5e42a41cc8a606fef86630863a207d294f2070fd7bbc9c86b48c089a97eb0f89" \
./comparative_benchmark/xla_hlo/build_xla_tools.sh \
"${XLA_TOOLS_DIR}" \
"${CUDA_VERSION}"
- name: "Creating tool dir archive"
id: archive
env:
XLA_TOOLS_DIR_ARCHIVE: ${{ env.XLA_TOOLS_DIR }}.tgz
run: |
tar -zcvf ${XLA_TOOLS_DIR_ARCHIVE} ${XLA_TOOLS_DIR}
echo "xla-tools-dir-archive=${XLA_TOOLS_DIR_ARCHIVE}" >> "${GITHUB_OUTPUT}"
- name: "Uploading tool dir archive"
id: upload
env:
XLA_TOOLS_DIR_ARCHIVE: ${{ steps.archive.outputs.xla-tools-dir-archive }}
XLA_TOOLS_DIR_GCS_ARTIFACT: ${{ env.BENCHMARK_GCS_DIR }}/${{ steps.archive.outputs.xla-tools-dir-archive }}
run: |
gcloud storage cp "${XLA_TOOLS_DIR_ARCHIVE}" "${XLA_TOOLS_DIR_GCS_ARTIFACT}"
echo "xla-tools-dir-gcs-artifact=${XLA_TOOLS_DIR_GCS_ARTIFACT}" >> "${GITHUB_OUTPUT}"
benchmark_on_a2-highgpu-1g:
needs: [setup, build_xla_tools]
timeout-minutes: 1440
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
- environment=prod
- machine-type=a2-highgpu-1g
env:
BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }}
RESULTS_DIR: results-dir
TARGET_DEVICE: a2-highgpu-1g
XLA_TOOLS_DIR: ${{ needs.build_xla_tools.outputs.xla-tools-dir }}
XLA_TOOLS_DIR_ARCHIVE: ${{ needs.build_xla_tools.outputs.xla-tools-dir-archive }}
XLA_TOOLS_DIR_GCS_ARTIFACT: ${{ needs.build_xla_tools.outputs.xla-tools-dir-gcs-artifact }}
steps:
- name: "Checking out PR repository"
uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
- name: "Setup"
id: setup
run: |
echo "results-gcs-dir=${BENCHMARK_GCS_DIR}/${TARGET_DEVICE}-results" >> "${GITHUB_OUTPUT}"
mkdir "${RESULTS_DIR}"
- name: "Downloading and unpacking XLA tools"
run: |
gcloud storage cp "${XLA_TOOLS_DIR_GCS_ARTIFACT}" "${XLA_TOOLS_DIR_ARCHIVE}"
tar -xvf "${XLA_TOOLS_DIR_ARCHIVE}"
- name: "Benchmarking XLA-HLO:GPU"
env:
XLA_HLO_RESULTS_JSON: xla-hlo.json
RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }}
run: |
RESULTS_PATH="${RESULTS_DIR}/${XLA_HLO_RESULTS_JSON}"
docker run --gpus all --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \
--env "OOBI_XLA_TOOLS_DIR=${XLA_TOOLS_DIR}" \
"gcr.io/iree-oss/openxla-benchmark/cuda11.8-cudnn8.9@sha256:5e42a41cc8a606fef86630863a207d294f2070fd7bbc9c86b48c089a97eb0f89" \
./comparative_benchmark/xla_hlo/benchmark_all.sh \
"${TARGET_DEVICE}"\
"${RESULTS_PATH}"
gcloud storage cp "${RESULTS_PATH}" "${RESULTS_GCS_DIR}/"
- name: "Benchmarking JAX-XLA:GPU"
env:
JAX_XLA_RESULTS_JSON: jax-xla.json
RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }}
run: |
RESULTS_PATH="${RESULTS_DIR}/${JAX_XLA_RESULTS_JSON}"
docker run --gpus all --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \
"gcr.io/iree-oss/openxla-benchmark/cuda11.8-cudnn8.9@sha256:5e42a41cc8a606fef86630863a207d294f2070fd7bbc9c86b48c089a97eb0f89" \
./comparative_benchmark/jax/benchmark_xla.sh \
"${TARGET_DEVICE}"\
"${RESULTS_PATH}"
gcloud storage cp "${RESULTS_PATH}" "${RESULTS_GCS_DIR}/"
# # Disabled due to https://github.com/openxla/openxla-pjrt-plugin/issues/203.
# - name: "Benchmarking JAX-IREE:GPU"
# env:
# JAX_IREE_RESULTS_JSON: jax-iree.json
# RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }}
# run: |
# RESULTS_PATH="${RESULTS_DIR}/${JAX_IREE_RESULTS_JSON}"
# docker run --gpus all --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \
# "gcr.io/iree-oss/openxla-benchmark/cuda11.8-cudnn8.9@sha256:5e42a41cc8a606fef86630863a207d294f2070fd7bbc9c86b48c089a97eb0f89" \
# ./comparative_benchmark/jax/benchmark_iree.sh \
# "${TARGET_DEVICE}"\
# "${RESULTS_PATH}"
# gcloud storage cp "${RESULTS_PATH}" "${RESULTS_GCS_DIR}/"
# # Disabled in favor of JAX and reducing maintenance burden.
# - name: "Benchmarking TF-XLA:GPU"
# env:
# TF_XLA_RESULTS_JSON: tf-xla.json
# RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }}
# run: |
# RESULTS_PATH="${RESULTS_DIR}/${TF_XLA_RESULTS_JSON}"
# docker run --gpus all --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \
# "gcr.io/iree-oss/openxla-benchmark/cuda11.8-cudnn8.9@sha256:5e42a41cc8a606fef86630863a207d294f2070fd7bbc9c86b48c089a97eb0f89" \
# ./comparative_benchmark/tf_xla/benchmark_all.sh \
# "${TARGET_DEVICE}"\
# "${RESULTS_PATH}"
# gcloud storage cp "${RESULTS_PATH}" "${RESULTS_GCS_DIR}/"
- name: "Benchmarking PT-Inductor:GPU"
env:
PT_INDUCTOR_RESULTS_JSON: pt-inductor.json
RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }}
run: |
RESULTS_PATH="${RESULTS_DIR}/${PT_INDUCTOR_RESULTS_JSON}"
docker run --gpus all --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \
"gcr.io/iree-oss/openxla-benchmark/cuda11.8-cudnn8.9@sha256:5e42a41cc8a606fef86630863a207d294f2070fd7bbc9c86b48c089a97eb0f89" \
./comparative_benchmark/pt_inductor/benchmark_all.sh \
"${TARGET_DEVICE}"\
"${RESULTS_PATH}"
gcloud storage cp "${RESULTS_PATH}" "${RESULTS_GCS_DIR}/"
benchmark_on_c2-standard-60:
needs: [setup, build_xla_tools]
timeout-minutes: 1440
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
- environment=prod
- machine-type=c2-standard-60
env:
BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }}
RESULTS_DIR: results-dir
TARGET_DEVICE: c2-standard-60
XLA_TOOLS_DIR: ${{ needs.build_xla_tools.outputs.xla-tools-dir }}
XLA_TOOLS_DIR_ARCHIVE: ${{ needs.build_xla_tools.outputs.xla-tools-dir-archive }}
XLA_TOOLS_DIR_GCS_ARTIFACT: ${{ needs.build_xla_tools.outputs.xla-tools-dir-gcs-artifact }}
steps:
- name: "Checking out PR repository"
uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
- name: "Setup"
id: setup
run: |
echo "results-gcs-dir=${BENCHMARK_GCS_DIR}/${TARGET_DEVICE}-results" >> "${GITHUB_OUTPUT}"
mkdir "${RESULTS_DIR}"
- name: "Downloading and unpacking XLA tools"
run: |
gcloud storage cp "${XLA_TOOLS_DIR_GCS_ARTIFACT}" "${XLA_TOOLS_DIR_ARCHIVE}"
tar -xvf "${XLA_TOOLS_DIR_ARCHIVE}"
- name: "Benchmarking XLA-HLO:CPU"
env:
XLA_HLO_RESULTS_JSON: xla-hlo.json
RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }}
run: |
RESULTS_PATH="${RESULTS_DIR}/${XLA_HLO_RESULTS_JSON}"
docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \
--env "OOBI_XLA_TOOLS_DIR=${XLA_TOOLS_DIR}" \
"gcr.io/iree-oss/openxla-benchmark/base-python3.10@sha256:245a074284cfed5de60cf06a153e3bcd9a9c42702b6bb66a39bb47ef23b61669" \
./comparative_benchmark/xla_hlo/benchmark_all.sh \
"${TARGET_DEVICE}"\
"${RESULTS_PATH}"
gcloud storage cp "${RESULTS_PATH}" "${RESULTS_GCS_DIR}/"
- name: "Benchmarking JAX-XLA:CPU"
env:
JAX_XLA_RESULTS_JSON: jax-xla.json
RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }}
run: |
RESULTS_PATH="${RESULTS_DIR}/${JAX_XLA_RESULTS_JSON}"
docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \
"gcr.io/iree-oss/openxla-benchmark/base-python3.10@sha256:245a074284cfed5de60cf06a153e3bcd9a9c42702b6bb66a39bb47ef23b61669" \
./comparative_benchmark/jax/benchmark_xla.sh \
"${TARGET_DEVICE}"\
"${RESULTS_PATH}"
gcloud storage cp "${RESULTS_PATH}" "${RESULTS_GCS_DIR}/"
# # Disabled due to https://github.com/openxla/openxla-benchmark/issues/169.
# - name: "Benchmarking JAX-IREE:CPU"
# env:
# JAX_IREE_RESULTS_JSON: jax-iree.json
# RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }}
# run: |
# RESULTS_PATH="${RESULTS_DIR}/${JAX_IREE_RESULTS_JSON}"
# docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \
# "gcr.io/iree-oss/openxla-benchmark/base-python3.11@sha256:b9b98da7bcc5e431800ff798a6dcc394b1838a9ed3d695f5cd0dac3510fc8c8d" \
# ./comparative_benchmark/jax/benchmark_iree.sh \
# "${TARGET_DEVICE}"\
# "${RESULTS_PATH}"
# gcloud storage cp "${RESULTS_PATH}" "${RESULTS_GCS_DIR}/"
# # Disabled in favor of JAX and reducing maintenance burden.
# - name: "Benchmarking TF-XLA:CPU"
# env:
# TF_XLA_RESULTS_JSON: tf-xla.json
# RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }}
# run: |
# RESULTS_PATH="${RESULTS_DIR}/${TF_XLA_RESULTS_JSON}"
# docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \
# "gcr.io/iree-oss/openxla-benchmark/base-python3.10@sha256:245a074284cfed5de60cf06a153e3bcd9a9c42702b6bb66a39bb47ef23b61669" \
# ./comparative_benchmark/tf_xla/benchmark_all.sh \
# "${TARGET_DEVICE}"\
# "${RESULTS_PATH}"
# gcloud storage cp "${RESULTS_PATH}" "${RESULTS_GCS_DIR}/"
- name: "Benchmarking PT-Inductor:CPU"
env:
PT_INDUCTOR_RESULTS_JSON: pt-inductor.json
RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }}
run: |
RESULTS_PATH="${RESULTS_DIR}/${PT_INDUCTOR_RESULTS_JSON}"
docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \
"gcr.io/iree-oss/openxla-benchmark/base-python3.10@sha256:245a074284cfed5de60cf06a153e3bcd9a9c42702b6bb66a39bb47ef23b61669" \
./comparative_benchmark/pt_inductor/benchmark_all.sh \
"${TARGET_DEVICE}"\
"${RESULTS_PATH}"
gcloud storage cp "${RESULTS_PATH}" "${RESULTS_GCS_DIR}/"