From 0dbb6e1df016223a86a080aa175d46399429e7cd Mon Sep 17 00:00:00 2001 From: Jerry Wu Date: Mon, 11 Sep 2023 22:06:44 +0000 Subject: [PATCH 01/17] Add workflow --- .github/workflows/update_model_artifacts.yml | 43 ++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 .github/workflows/update_model_artifacts.yml diff --git a/.github/workflows/update_model_artifacts.yml b/.github/workflows/update_model_artifacts.yml new file mode 100644 index 00000000..87d6d02d --- /dev/null +++ b/.github/workflows/update_model_artifacts.yml @@ -0,0 +1,43 @@ +# Copyright 2023 The OpenXLA Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Comparative Benchmarks Workflow. + +name: Update Model Artifacts + +on: + schedule: + # Scheduled to run at 09:00 UTC. + - cron: '0 09 * * *' + workflow_dispatch: + +concurrency: + # A PR number if a pull request and otherwise the commit hash. This cancels + # queued and in-progress runs for the same PR (presubmit) or commit + # (postsubmit). + group: ${{ github.workflow }}-${{ github.event.number || github.sha }} + cancel-in-progress: true + +jobs: + build_xla_tools: + needs: setup + runs-on: + - self-hosted # must come first + - runner-group=${{ needs.setup.outputs.runner-group }} + - environment=prod + - cpu + - os-family=Linux + steps: + - name: "Checking out PR repository" + uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0 + - name: "Generate JAX model artifacts" + run: | + docker run --gpus all --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \ + --env "PYTHON=python3" \ + --env "WITH_CUDA=1" \ + --env "AUTO_UPLOAD=1" \ + "gcr.io/iree-oss/openxla-benchmark/cuda11.8-cudnn8.9@sha256:f43984cd6c16ad1faad4dfb6aac3f53e552dd728c9330c90752e78ae51e4276f" \ + "common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh" From 13500b82706ed8d3a28f6dcfdb365a03e50f09a4 Mon Sep 17 00:00:00 2001 From: Jerry Wu Date: Mon, 11 Sep 2023 22:06:58 +0000 Subject: [PATCH 02/17] Test in presubmit --- .github/workflows/update_model_artifacts.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/update_model_artifacts.yml b/.github/workflows/update_model_artifacts.yml index 87d6d02d..10c985b4 100644 --- a/.github/workflows/update_model_artifacts.yml +++ b/.github/workflows/update_model_artifacts.yml @@ -13,6 +13,7 @@ on: # Scheduled to run at 09:00 UTC. - cron: '0 09 * * *' workflow_dispatch: + pull_request: concurrency: # A PR number if a pull request and otherwise the commit hash. This cancels From 347c9f57e13fc91694a995ea98129d9df79dbcd6 Mon Sep 17 00:00:00 2001 From: Jerry Wu Date: Mon, 11 Sep 2023 22:10:17 +0000 Subject: [PATCH 03/17] Fix setup --- .github/workflows/update_model_artifacts.yml | 23 +++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/.github/workflows/update_model_artifacts.yml b/.github/workflows/update_model_artifacts.yml index 10c985b4..5fa2a21b 100644 --- a/.github/workflows/update_model_artifacts.yml +++ b/.github/workflows/update_model_artifacts.yml @@ -23,7 +23,28 @@ concurrency: cancel-in-progress: true jobs: - build_xla_tools: + setup: + runs-on: ubuntu-22.04 + outputs: + runner-group: ${{ steps.configure.outputs.runner-group }} + steps: + - name: "Checking out PR repository" + uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0 + - name: "Configuring CI options" + id: configure + env: + RUNNER_GROUP: ${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }} + run: | + # Just informative logging. There should only be two commits in the + # history here, but limiting the depth helps when copying from a local + # repo instead of using checkout, e.g. with + # https://github.com/nektos/act where there will be more. + git log --oneline --graph --max-count=3 + # Workflow jobs can't access `env` in `runs-on`, so we need to make + # `runner-group` a job output variable. + echo "runner-group=${RUNNER_GROUP}" > "${GITHUB_OUTPUT}" + + generate_artifacts: needs: setup runs-on: - self-hosted # must come first From 1a6e9e206041846f7d746608c19e2a32247b5ac9 Mon Sep 17 00:00:00 2001 From: Jerry Wu Date: Mon, 11 Sep 2023 22:21:33 +0000 Subject: [PATCH 04/17] Change to GPU runner --- .github/workflows/update_model_artifacts.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/update_model_artifacts.yml b/.github/workflows/update_model_artifacts.yml index 5fa2a21b..28851096 100644 --- a/.github/workflows/update_model_artifacts.yml +++ b/.github/workflows/update_model_artifacts.yml @@ -50,8 +50,7 @@ jobs: - self-hosted # must come first - runner-group=${{ needs.setup.outputs.runner-group }} - environment=prod - - cpu - - os-family=Linux + - machine-type=a2-highgpu-1g steps: - name: "Checking out PR repository" uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0 From ba3c814f86ae0c4e891bf762a2b86f3adc5d3674 Mon Sep 17 00:00:00 2001 From: Jerry Wu Date: Tue, 12 Sep 2023 17:17:57 +0000 Subject: [PATCH 05/17] Test one model --- .github/workflows/update_model_artifacts.yml | 9 +++++++-- .../jax/scripts/generate_model_artifacts.sh | 11 ++++++----- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/.github/workflows/update_model_artifacts.yml b/.github/workflows/update_model_artifacts.yml index 28851096..e1499ab5 100644 --- a/.github/workflows/update_model_artifacts.yml +++ b/.github/workflows/update_model_artifacts.yml @@ -59,6 +59,11 @@ jobs: docker run --gpus all --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \ --env "PYTHON=python3" \ --env "WITH_CUDA=1" \ - --env "AUTO_UPLOAD=1" \ + --env "AUTO_UPLOAD=0" \ + --env "OUTPUT_DIR=jax" \ "gcr.io/iree-oss/openxla-benchmark/cuda11.8-cudnn8.9@sha256:f43984cd6c16ad1faad4dfb6aac3f53e552dd728c9330c90752e78ae51e4276f" \ - "common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh" + "common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh" \ + "T5_LARGE_FP32_JAX_512XI32_BATCH1" + - name: "Upload JAX model artifacts" + run: | + gcloud storage cp -r "jax/*" "gs://iree-model-artifacts/jax/" diff --git a/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh b/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh index 7fbb175d..edfb7055 100755 --- a/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh +++ b/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh @@ -34,6 +34,7 @@ VENV_DIR="${VENV_DIR:-jax-models.venv}" PYTHON="${PYTHON:-"$(which python)"}" WITH_CUDA="${WITH_CUDA:-}" AUTO_UPLOAD="${AUTO_UPLOAD:-0}" +OUTPUT_DIR="${OUTPUT_DIR}:-/tmp" FILTER="${1:-".*"}" @@ -46,13 +47,13 @@ PYTHON_VERSION="$(python --version | sed -e "s/^Python \(.*\)\.\(.*\)\..*$/\1\.\ # Generate unique output directory. JAX_VERSION=$(pip show jax | grep Version | sed -e "s/^Version: \(.*\)$/\1/g") DIR_NAME="jax_models_${JAX_VERSION}_$(date +'%s')" -OUTPUT_DIR="/tmp/${DIR_NAME}" -mkdir "${OUTPUT_DIR}" +VERSION_DIR="/tmp/${DIR_NAME}" +mkdir "${VERSION_DIR}" -pip list > "${OUTPUT_DIR}/models_version_info.txt" +pip list > "${VERSION_DIR}/models_version_info.txt" declare -a args=( - -o "${OUTPUT_DIR}" + -o "${VERSION_DIR}" --iree_ir_tool="$(which iree-ir-tool)" --filter="${FILTER}" ) @@ -65,4 +66,4 @@ fi python "${TD}/generate_model_artifacts.py" "${args[@]}" -echo "Output directory: ${OUTPUT_DIR}" +echo "Output directory: ${VERSION_DIR}" From 1401decf470eeb74d394d9d0b96a11295d244f72 Mon Sep 17 00:00:00 2001 From: Jerry Wu Date: Tue, 12 Sep 2023 18:09:32 +0000 Subject: [PATCH 06/17] Support multi-filters --- .github/workflows/update_model_artifacts.yml | 7 +++++- .../jax/scripts/generate_model_artifacts.py | 22 ++++++++++--------- .../jax/scripts/generate_model_artifacts.sh | 19 ++++++++-------- 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/.github/workflows/update_model_artifacts.yml b/.github/workflows/update_model_artifacts.yml index e1499ab5..2d50ec4d 100644 --- a/.github/workflows/update_model_artifacts.yml +++ b/.github/workflows/update_model_artifacts.yml @@ -56,6 +56,7 @@ jobs: uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0 - name: "Generate JAX model artifacts" run: | + mkdir jax docker run --gpus all --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \ --env "PYTHON=python3" \ --env "WITH_CUDA=1" \ @@ -63,7 +64,11 @@ jobs: --env "OUTPUT_DIR=jax" \ "gcr.io/iree-oss/openxla-benchmark/cuda11.8-cudnn8.9@sha256:f43984cd6c16ad1faad4dfb6aac3f53e552dd728c9330c90752e78ae51e4276f" \ "common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh" \ - "T5_LARGE_FP32_JAX_512XI32_BATCH1" + "T5_LARGE_FP32_JAX_512XI32_BATCH1" \ + "T5_LARGE_FP32_JAX_512XI32_BATCH16" \ + "T5_LARGE_FP32_JAX_512XI32_BATCH24" \ + "T5_LARGE_FP32_JAX_512XI32_BATCH32" \ + "T5_LARGE_FP32_JAX_512XI32_BATCH48" - name: "Upload JAX model artifacts" run: | gcloud storage cp -r "jax/*" "gs://iree-model-artifacts/jax/" diff --git a/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.py b/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.py index 610c530a..2e83abc8 100644 --- a/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.py +++ b/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.py @@ -13,7 +13,7 @@ import shutil import subprocess import sys -from typing import Any, Optional +from typing import Any, List, Optional # Add openxla dir to the search path. sys.path.insert(0, str(pathlib.Path(__file__).parents[5])) @@ -103,11 +103,6 @@ def _parse_arguments() -> argparse.Namespace: type=pathlib.Path, required=True, help="Directory to save model artifacts.") - parser.add_argument("-f", - "--filter", - type=str, - default=".*", - help="The regex pattern to filter model names.") parser.add_argument("--iree-ir-tool", "--iree_ir_tool", type=pathlib.Path, @@ -120,12 +115,19 @@ def _parse_arguments() -> argparse.Namespace: help= f"If set, uploads artifacts automatically to {GCS_UPLOAD_DIR} and removes them locally once uploaded." ) + parser.add_argument("-f", + "--filter", + dest="filters", + nargs="+", + default=[".*"], + help="The regex patterns to filter model names.") return parser.parse_args() -def main(output_dir: pathlib.Path, filter: str, iree_ir_tool: pathlib.Path, - auto_upload: bool): - name_pattern = re.compile(f"^{filter}$") +def main(output_dir: pathlib.Path, filters: List[str], + iree_ir_tool: pathlib.Path, auto_upload: bool): + combined_filters = "|".join(f"({name_filter})" for name_filter in filters) + name_pattern = re.compile(f"^{combined_filters}$") models = [ model for model in model_definitions.ALL_MODELS if name_pattern.match(model.name) @@ -134,7 +136,7 @@ def main(output_dir: pathlib.Path, filter: str, iree_ir_tool: pathlib.Path, if not models: all_models_list = "\n".join( model.name for model in model_definitions.ALL_MODELS) - raise ValueError(f'No model matches "{filter}".' + raise ValueError(f'No model matches "{filters}".' f' Available models:\n{all_models_list}') output_dir.mkdir(parents=True, exist_ok=True) diff --git a/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh b/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh index edfb7055..5a4a7948 100755 --- a/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh +++ b/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh @@ -7,7 +7,8 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # # Runs `generate_model_artifacts.py` on all registered JAX models and saves -# artifacts into the directory `/tmp/jax_models__`. +# artifacts into the directory +# `${OUTPUT_DIR}/jax_models__`. # # Once complete. please upload the output directory to # `gs://iree-model-artifacts/jax`, preserving directory name. @@ -34,9 +35,8 @@ VENV_DIR="${VENV_DIR:-jax-models.venv}" PYTHON="${PYTHON:-"$(which python)"}" WITH_CUDA="${WITH_CUDA:-}" AUTO_UPLOAD="${AUTO_UPLOAD:-0}" -OUTPUT_DIR="${OUTPUT_DIR}:-/tmp" - -FILTER="${1:-".*"}" +OUTPUT_DIR="${OUTPUT_DIR:-/tmp}" +FILTER=( "$@" ) VENV_DIR=${VENV_DIR} PYTHON=${PYTHON} WITH_CUDA=${WITH_CUDA} "${TD}/setup_venv.sh" source ${VENV_DIR}/bin/activate @@ -47,7 +47,7 @@ PYTHON_VERSION="$(python --version | sed -e "s/^Python \(.*\)\.\(.*\)\..*$/\1\.\ # Generate unique output directory. JAX_VERSION=$(pip show jax | grep Version | sed -e "s/^Version: \(.*\)$/\1/g") DIR_NAME="jax_models_${JAX_VERSION}_$(date +'%s')" -VERSION_DIR="/tmp/${DIR_NAME}" +VERSION_DIR="${OUTPUT_DIR}/${DIR_NAME}" mkdir "${VERSION_DIR}" pip list > "${VERSION_DIR}/models_version_info.txt" @@ -55,13 +55,14 @@ pip list > "${VERSION_DIR}/models_version_info.txt" declare -a args=( -o "${VERSION_DIR}" --iree_ir_tool="$(which iree-ir-tool)" - --filter="${FILTER}" ) +if (( "${#FILTER[@]}" > 0 )); then + args+=( --filter "${FILTER[@]}" ) +fi + if (( AUTO_UPLOAD == 1 )); then - args+=( - --auto_upload - ) + args+=( --auto_upload ) fi python "${TD}/generate_model_artifacts.py" "${args[@]}" From d6f258a2ad8c6726ddedf8d75fbc5835b756b76a Mon Sep 17 00:00:00 2001 From: Jerry Wu Date: Tue, 12 Sep 2023 18:44:44 +0000 Subject: [PATCH 07/17] Only generate enabled models --- .github/workflows/update_model_artifacts.yml | 9 ++------- .../jax/scripts/generate_model_artifacts.sh | 10 ++++++++++ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/.github/workflows/update_model_artifacts.yml b/.github/workflows/update_model_artifacts.yml index 2d50ec4d..8e5f871a 100644 --- a/.github/workflows/update_model_artifacts.yml +++ b/.github/workflows/update_model_artifacts.yml @@ -63,12 +63,7 @@ jobs: --env "AUTO_UPLOAD=0" \ --env "OUTPUT_DIR=jax" \ "gcr.io/iree-oss/openxla-benchmark/cuda11.8-cudnn8.9@sha256:f43984cd6c16ad1faad4dfb6aac3f53e552dd728c9330c90752e78ae51e4276f" \ - "common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh" \ - "T5_LARGE_FP32_JAX_512XI32_BATCH1" \ - "T5_LARGE_FP32_JAX_512XI32_BATCH16" \ - "T5_LARGE_FP32_JAX_512XI32_BATCH24" \ - "T5_LARGE_FP32_JAX_512XI32_BATCH32" \ - "T5_LARGE_FP32_JAX_512XI32_BATCH48" + "common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh" - name: "Upload JAX model artifacts" run: | - gcloud storage cp -r "jax/*" "gs://iree-model-artifacts/jax/" + gcloud storage cp -r "jax/*" "gs://iree-model-artifacts/jax" diff --git a/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh b/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh index 5a4a7948..3c5c95ea 100755 --- a/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh +++ b/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh @@ -59,6 +59,16 @@ declare -a args=( if (( "${#FILTER[@]}" > 0 )); then args+=( --filter "${FILTER[@]}" ) +else + # Generate enabled models in comparative_benchmark/jax/benchmark_xla.sh + args+=( + --filter + "RESNET50_FP32_JAX_.+" + "BERT_LARGE_FP32_JAX_.+_BATCH(1|16|24|32|48|64|512)" + "T5_LARGE_FP32_JAX_.+_BATCH(1|16|24|32|48|64)" + "T5_4CG_LARGE_FP32_JAX_.+" + "GPT2LMHEAD_FP32_JAX_.+" + ) fi if (( AUTO_UPLOAD == 1 )); then From ef8ceb42bb08b5328b2cc5964e816169bdf0bc35 Mon Sep 17 00:00:00 2001 From: Jerry Wu Date: Tue, 12 Sep 2023 21:08:51 +0000 Subject: [PATCH 08/17] Cur more benchmarks --- .../comparative_suite/jax/scripts/generate_model_artifacts.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh b/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh index 3c5c95ea..a586e151 100755 --- a/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh +++ b/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh @@ -65,7 +65,7 @@ else --filter "RESNET50_FP32_JAX_.+" "BERT_LARGE_FP32_JAX_.+_BATCH(1|16|24|32|48|64|512)" - "T5_LARGE_FP32_JAX_.+_BATCH(1|16|24|32|48|64)" + "T5_LARGE_FP32_JAX_.+_BATCH(1|16|24|32|48)" "T5_4CG_LARGE_FP32_JAX_.+" "GPT2LMHEAD_FP32_JAX_.+" ) From 0905f8d2091349cc6fd4bdb9a70fc551f5c4760b Mon Sep 17 00:00:00 2001 From: Jerry Wu Date: Wed, 13 Sep 2023 17:29:49 +0000 Subject: [PATCH 09/17] Test on CPU machine --- .github/workflows/update_model_artifacts.yml | 18 ++++++++++++++---- .../jax/scripts/generate_model_artifacts.sh | 6 +++--- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/.github/workflows/update_model_artifacts.yml b/.github/workflows/update_model_artifacts.yml index 8e5f871a..bf65c34b 100644 --- a/.github/workflows/update_model_artifacts.yml +++ b/.github/workflows/update_model_artifacts.yml @@ -50,19 +50,29 @@ jobs: - self-hosted # must come first - runner-group=${{ needs.setup.outputs.runner-group }} - environment=prod - - machine-type=a2-highgpu-1g + - machine-type=c2-standard-16 steps: - name: "Checking out PR repository" uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0 + # - name: "Generate JAX model artifacts" + # run: | + # mkdir jax + # docker run --gpus all --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \ + # --env "PYTHON=python3" \ + # --env "WITH_CUDA=1" \ + # --env "AUTO_UPLOAD=0" \ + # --env "OUTPUT_DIR=jax" \ + # "gcr.io/iree-oss/openxla-benchmark/cuda11.8-cudnn8.9@sha256:f43984cd6c16ad1faad4dfb6aac3f53e552dd728c9330c90752e78ae51e4276f" \ + # "common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh" - name: "Generate JAX model artifacts" run: | mkdir jax - docker run --gpus all --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \ + docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \ --env "PYTHON=python3" \ - --env "WITH_CUDA=1" \ + --env "WITH_CUDA=0" \ --env "AUTO_UPLOAD=0" \ --env "OUTPUT_DIR=jax" \ - "gcr.io/iree-oss/openxla-benchmark/cuda11.8-cudnn8.9@sha256:f43984cd6c16ad1faad4dfb6aac3f53e552dd728c9330c90752e78ae51e4276f" \ + "gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251" \ "common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh" - name: "Upload JAX model artifacts" run: | diff --git a/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh b/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh index a586e151..601dd61c 100755 --- a/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh +++ b/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh @@ -64,9 +64,9 @@ else args+=( --filter "RESNET50_FP32_JAX_.+" - "BERT_LARGE_FP32_JAX_.+_BATCH(1|16|24|32|48|64|512)" - "T5_LARGE_FP32_JAX_.+_BATCH(1|16|24|32|48)" - "T5_4CG_LARGE_FP32_JAX_.+" + # "BERT_LARGE_FP32_JAX_.+_BATCH(1|16|24|32|48|64|512)" + # "T5_LARGE_FP32_JAX_.+_BATCH(1|16|24|32|48)" + # "T5_4CG_LARGE_FP32_JAX_.+" "GPT2LMHEAD_FP32_JAX_.+" ) fi From ae675fd7951999b3b21b41a6116f5ad3c8b6d905 Mon Sep 17 00:00:00 2001 From: Jerry Wu Date: Wed, 13 Sep 2023 18:45:01 +0000 Subject: [PATCH 10/17] Enable others --- .../jax/scripts/generate_model_artifacts.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh b/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh index 601dd61c..a586e151 100755 --- a/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh +++ b/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh @@ -64,9 +64,9 @@ else args+=( --filter "RESNET50_FP32_JAX_.+" - # "BERT_LARGE_FP32_JAX_.+_BATCH(1|16|24|32|48|64|512)" - # "T5_LARGE_FP32_JAX_.+_BATCH(1|16|24|32|48)" - # "T5_4CG_LARGE_FP32_JAX_.+" + "BERT_LARGE_FP32_JAX_.+_BATCH(1|16|24|32|48|64|512)" + "T5_LARGE_FP32_JAX_.+_BATCH(1|16|24|32|48)" + "T5_4CG_LARGE_FP32_JAX_.+" "GPT2LMHEAD_FP32_JAX_.+" ) fi From b235029a909680b536cf4f3a7adb3a5b50569e7b Mon Sep 17 00:00:00 2001 From: Jerry Wu Date: Fri, 15 Sep 2023 17:24:37 +0000 Subject: [PATCH 11/17] Refactor --- .github/workflows/update_model_artifacts.yml | 23 +++++++------------ .../jax/scripts/generate_model_artifacts.sh | 10 -------- 2 files changed, 8 insertions(+), 25 deletions(-) diff --git a/.github/workflows/update_model_artifacts.yml b/.github/workflows/update_model_artifacts.yml index bf65c34b..9d6b6121 100644 --- a/.github/workflows/update_model_artifacts.yml +++ b/.github/workflows/update_model_artifacts.yml @@ -4,14 +4,11 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # -# Comparative Benchmarks Workflow. +# Workflow that updates model artifacts. name: Update Model Artifacts on: - schedule: - # Scheduled to run at 09:00 UTC. - - cron: '0 09 * * *' workflow_dispatch: pull_request: @@ -54,26 +51,22 @@ jobs: steps: - name: "Checking out PR repository" uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0 - # - name: "Generate JAX model artifacts" - # run: | - # mkdir jax - # docker run --gpus all --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \ - # --env "PYTHON=python3" \ - # --env "WITH_CUDA=1" \ - # --env "AUTO_UPLOAD=0" \ - # --env "OUTPUT_DIR=jax" \ - # "gcr.io/iree-oss/openxla-benchmark/cuda11.8-cudnn8.9@sha256:f43984cd6c16ad1faad4dfb6aac3f53e552dd728c9330c90752e78ae51e4276f" \ - # "common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh" - name: "Generate JAX model artifacts" run: | mkdir jax + # Generate enabled models in comparative_benchmark/jax/benchmark_xla.sh docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \ --env "PYTHON=python3" \ --env "WITH_CUDA=0" \ --env "AUTO_UPLOAD=0" \ --env "OUTPUT_DIR=jax" \ "gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251" \ - "common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh" + "common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh" --filter \ + "RESNET50_FP32_JAX_.+" \ + "BERT_LARGE_FP32_JAX_.+_BATCH(1|16|24|32|48|64|512)" \ + "T5_LARGE_FP32_JAX_.+_BATCH(1|16|24|32|48|64)" \ + "T5_4CG_LARGE_FP32_JAX_.+" \ + "GPT2LMHEAD_FP32_JAX_.+" - name: "Upload JAX model artifacts" run: | gcloud storage cp -r "jax/*" "gs://iree-model-artifacts/jax" diff --git a/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh b/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh index a586e151..5a4a7948 100755 --- a/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh +++ b/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh @@ -59,16 +59,6 @@ declare -a args=( if (( "${#FILTER[@]}" > 0 )); then args+=( --filter "${FILTER[@]}" ) -else - # Generate enabled models in comparative_benchmark/jax/benchmark_xla.sh - args+=( - --filter - "RESNET50_FP32_JAX_.+" - "BERT_LARGE_FP32_JAX_.+_BATCH(1|16|24|32|48|64|512)" - "T5_LARGE_FP32_JAX_.+_BATCH(1|16|24|32|48)" - "T5_4CG_LARGE_FP32_JAX_.+" - "GPT2LMHEAD_FP32_JAX_.+" - ) fi if (( AUTO_UPLOAD == 1 )); then From ae50e3dc758f72fef00ebc0ba308b5a12c112e0b Mon Sep 17 00:00:00 2001 From: Jerry Wu Date: Fri, 15 Sep 2023 17:41:46 +0000 Subject: [PATCH 12/17] Support concurrent generation --- .github/workflows/update_model_artifacts.yml | 1 + .../jax/scripts/generate_model_artifacts.py | 40 +++++++++++-------- .../jax/scripts/generate_model_artifacts.sh | 3 ++ 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/.github/workflows/update_model_artifacts.yml b/.github/workflows/update_model_artifacts.yml index 9d6b6121..377de26d 100644 --- a/.github/workflows/update_model_artifacts.yml +++ b/.github/workflows/update_model_artifacts.yml @@ -59,6 +59,7 @@ jobs: --env "PYTHON=python3" \ --env "WITH_CUDA=0" \ --env "AUTO_UPLOAD=0" \ + --env "JOBS=8" \ --env "OUTPUT_DIR=jax" \ "gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251" \ "common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh" --filter \ diff --git a/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.py b/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.py index 2e83abc8..6fe0dc26 100644 --- a/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.py +++ b/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.py @@ -5,11 +5,11 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception import argparse +import concurrent.futures import jax import os import pathlib import re -import multiprocessing import shutil import subprocess import sys @@ -103,6 +103,12 @@ def _parse_arguments() -> argparse.Namespace: type=pathlib.Path, required=True, help="Directory to save model artifacts.") + parser.add_argument("-f", + "--filter", + dest="filters", + nargs="+", + default=[".*"], + help="The regex patterns to filter model names.") parser.add_argument("--iree-ir-tool", "--iree_ir_tool", type=pathlib.Path, @@ -115,17 +121,18 @@ def _parse_arguments() -> argparse.Namespace: help= f"If set, uploads artifacts automatically to {GCS_UPLOAD_DIR} and removes them locally once uploaded." ) - parser.add_argument("-f", - "--filter", - dest="filters", - nargs="+", - default=[".*"], - help="The regex patterns to filter model names.") + parser.add_argument( + "-j", + "--jobs", + type=int, + default=1, + help="Max number of concurrent jobs to generate artifacts. Be cautious" + " when generating with GPU.") return parser.parse_args() def main(output_dir: pathlib.Path, filters: List[str], - iree_ir_tool: pathlib.Path, auto_upload: bool): + iree_ir_tool: pathlib.Path, auto_upload: bool, jobs: int): combined_filters = "|".join(f"({name_filter})" for name_filter in filters) name_pattern = re.compile(f"^{combined_filters}$") models = [ @@ -141,14 +148,15 @@ def main(output_dir: pathlib.Path, filters: List[str], output_dir.mkdir(parents=True, exist_ok=True) - for model in models: - # We need to generate artifacts in a separate proces each time in order for - # XLA to update the HLO dump directory. - p = multiprocessing.Process(target=_generate_artifacts, - args=(model, output_dir, iree_ir_tool, - auto_upload)) - p.start() - p.join() + with concurrent.futures.ProcessPoolExecutor(max_workers=jobs) as executor: + for model in models: + # We need to generate artifacts in a separate proces each time in order for + # XLA to update the HLO dump directory. + executor.submit(_generate_artifacts, + model=model, + save_dir=output_dir, + iree_ir_tool=iree_ir_tool, + auto_upload=auto_upload) if auto_upload: utils.gcs_upload(f"{output_dir}/**", f"{GCS_UPLOAD_DIR}/{output_dir.name}/") diff --git a/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh b/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh index 5a4a7948..f371fa0c 100755 --- a/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh +++ b/common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh @@ -24,6 +24,7 @@ # WITH_CUDA=1 # GCS_UPLOAD_DIR=gs://iree-model-artifacts/jax # AUTO_UPLOAD=1 +# JOBS=1 # # Positional arguments: # FILTER (Optional): Regex to match models, e.g., BERT_LARGE_FP32_.+ @@ -36,6 +37,7 @@ PYTHON="${PYTHON:-"$(which python)"}" WITH_CUDA="${WITH_CUDA:-}" AUTO_UPLOAD="${AUTO_UPLOAD:-0}" OUTPUT_DIR="${OUTPUT_DIR:-/tmp}" +JOBS="${JOBS:-1}" FILTER=( "$@" ) VENV_DIR=${VENV_DIR} PYTHON=${PYTHON} WITH_CUDA=${WITH_CUDA} "${TD}/setup_venv.sh" @@ -55,6 +57,7 @@ pip list > "${VERSION_DIR}/models_version_info.txt" declare -a args=( -o "${VERSION_DIR}" --iree_ir_tool="$(which iree-ir-tool)" + --jobs="${JOBS}" ) if (( "${#FILTER[@]}" > 0 )); then From 6d4b1dfc3e134ed56a385b54e36d4e4b0b8c6113 Mon Sep 17 00:00:00 2001 From: Jerry Wu Date: Fri, 15 Sep 2023 17:45:28 +0000 Subject: [PATCH 13/17] Fix commands --- .github/workflows/update_model_artifacts.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/update_model_artifacts.yml b/.github/workflows/update_model_artifacts.yml index 377de26d..91a82fd7 100644 --- a/.github/workflows/update_model_artifacts.yml +++ b/.github/workflows/update_model_artifacts.yml @@ -62,7 +62,7 @@ jobs: --env "JOBS=8" \ --env "OUTPUT_DIR=jax" \ "gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251" \ - "common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh" --filter \ + "common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh" \ "RESNET50_FP32_JAX_.+" \ "BERT_LARGE_FP32_JAX_.+_BATCH(1|16|24|32|48|64|512)" \ "T5_LARGE_FP32_JAX_.+_BATCH(1|16|24|32|48|64)" \ From 0c8146df918ad2ad730d481f84eec396b44e1d57 Mon Sep 17 00:00:00 2001 From: Jerry Wu Date: Fri, 15 Sep 2023 18:55:43 +0000 Subject: [PATCH 14/17] Reduce jobs --- .github/workflows/update_model_artifacts.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/update_model_artifacts.yml b/.github/workflows/update_model_artifacts.yml index 91a82fd7..d9d447e0 100644 --- a/.github/workflows/update_model_artifacts.yml +++ b/.github/workflows/update_model_artifacts.yml @@ -59,7 +59,7 @@ jobs: --env "PYTHON=python3" \ --env "WITH_CUDA=0" \ --env "AUTO_UPLOAD=0" \ - --env "JOBS=8" \ + --env "JOBS=2" \ --env "OUTPUT_DIR=jax" \ "gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251" \ "common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh" \ From 0aca8bca4036cc929955193ee178549ea797d926 Mon Sep 17 00:00:00 2001 From: Jerry Wu Date: Fri, 15 Sep 2023 21:19:21 +0000 Subject: [PATCH 15/17] Reduce model sizes --- .github/workflows/update_model_artifacts.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/update_model_artifacts.yml b/.github/workflows/update_model_artifacts.yml index d9d447e0..aa8d8c95 100644 --- a/.github/workflows/update_model_artifacts.yml +++ b/.github/workflows/update_model_artifacts.yml @@ -64,7 +64,7 @@ jobs: "gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251" \ "common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh" \ "RESNET50_FP32_JAX_.+" \ - "BERT_LARGE_FP32_JAX_.+_BATCH(1|16|24|32|48|64|512)" \ + "BERT_LARGE_FP32_JAX_.+_BATCH(1|16|24|32|48|64)" \ "T5_LARGE_FP32_JAX_.+_BATCH(1|16|24|32|48|64)" \ "T5_4CG_LARGE_FP32_JAX_.+" \ "GPT2LMHEAD_FP32_JAX_.+" From aa635bfc3c2a5ee1e1a6c35cbab17351a9ba0183 Mon Sep 17 00:00:00 2001 From: Jerry Wu Date: Sun, 17 Sep 2023 22:39:59 +0000 Subject: [PATCH 16/17] Reduce jobs to 1 --- .github/workflows/update_model_artifacts.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/update_model_artifacts.yml b/.github/workflows/update_model_artifacts.yml index aa8d8c95..de79a2e9 100644 --- a/.github/workflows/update_model_artifacts.yml +++ b/.github/workflows/update_model_artifacts.yml @@ -59,12 +59,12 @@ jobs: --env "PYTHON=python3" \ --env "WITH_CUDA=0" \ --env "AUTO_UPLOAD=0" \ - --env "JOBS=2" \ + --env "JOBS=1" \ --env "OUTPUT_DIR=jax" \ "gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251" \ "common_benchmark_suite/openxla/benchmark/comparative_suite/jax/scripts/generate_model_artifacts.sh" \ "RESNET50_FP32_JAX_.+" \ - "BERT_LARGE_FP32_JAX_.+_BATCH(1|16|24|32|48|64)" \ + "BERT_LARGE_FP32_JAX_.+_BATCH(1|16|24|32|48|64|512)" \ "T5_LARGE_FP32_JAX_.+_BATCH(1|16|24|32|48|64)" \ "T5_4CG_LARGE_FP32_JAX_.+" \ "GPT2LMHEAD_FP32_JAX_.+" From 83cbb47ffcf6141ee91e07cd67c171840bb463b1 Mon Sep 17 00:00:00 2001 From: Jerry Wu Date: Mon, 18 Sep 2023 01:52:28 +0000 Subject: [PATCH 17/17] Revert "Test in presubmit" This reverts commit 13500b82706ed8d3a28f6dcfdb365a03e50f09a4. --- .github/workflows/update_model_artifacts.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/update_model_artifacts.yml b/.github/workflows/update_model_artifacts.yml index de79a2e9..c480f87e 100644 --- a/.github/workflows/update_model_artifacts.yml +++ b/.github/workflows/update_model_artifacts.yml @@ -10,7 +10,6 @@ name: Update Model Artifacts on: workflow_dispatch: - pull_request: concurrency: # A PR number if a pull request and otherwise the commit hash. This cancels