From 5790e68030dd880c2cd393e11fdbb703189b076c Mon Sep 17 00:00:00 2001 From: Julia Guo <153684546+juliagmt-google@users.noreply.github.com> Date: Wed, 6 Nov 2024 12:00:44 -0800 Subject: [PATCH 01/18] Clone https://github.com/mlcommons/algorithmic-efficiency.git --- userbenchmark/release-test/run.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/userbenchmark/release-test/run.py b/userbenchmark/release-test/run.py index 6f17f6583..7ce09468d 100644 --- a/userbenchmark/release-test/run.py +++ b/userbenchmark/release-test/run.py @@ -27,6 +27,7 @@ BM_NAME = "release-test" EXAMPLE_URL = "https://github.com/pytorch/examples.git" +ALGORITHMIC_EFFICIENCY_URL = "https://github.com/mlcommons/algorithmic-efficiency.git" CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) DEFAULT_CONFIG_PATH = os.path.join( os.path.dirname(os.path.abspath(__file__)), "configs" @@ -132,6 +133,7 @@ def prepare_release_tests(args: argparse.Namespace, work_dir: Path): dump_test_scripts(run_scripts, work_dir) # clone the examples repo Repo.clone_from(EXAMPLE_URL, work_dir.joinpath("examples")) + Repo.clone_from(ALGORITHMIC_EFFICIENCY_URL, work_dir.joinpath("algorithmic_efficiency") return run_scripts From 645c043958b7abd8d3b36317477395bd50d6a414 Mon Sep 17 00:00:00 2001 From: Julia Guo <153684546+juliagmt-google@users.noreply.github.com> Date: Wed, 6 Nov 2024 13:26:20 -0800 Subject: [PATCH 02/18] Manually run submission_runner.py from algorithmic-efficiency --- userbenchmark/release-test/run.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/userbenchmark/release-test/run.py b/userbenchmark/release-test/run.py index 7ce09468d..e5026a1ff 100644 --- a/userbenchmark/release-test/run.py +++ b/userbenchmark/release-test/run.py @@ -32,13 +32,22 @@ DEFAULT_CONFIG_PATH = os.path.join( os.path.dirname(os.path.abspath(__file__)), "configs" ) + +# Updated RUN_TEMPLATE RUN_TEMPLATE = """ # GENERATED BY userbenchmark/release-test/__init__.py. DO NOT EDIT! bash {RELEASE_TEST_ROOT}/setup_env.sh '{CUDA_VERSION}' '{MAGMA_VERSION}' '{PYTORCH_VERSION}' '{PYTORCH_CHANNEL}' '{WORK_DIR}' -bash {RELEASE_TEST_ROOT}/run_release_test.sh '{CUDA_VERSION}' '{RESULT_DIR}' +bash {RELEASE_TEST_ROOT}/monitor_proc.sh {ALGORITHMIC_EFFICIENCY_ROOT}/submission_runner.py \ + --workload=mnist \ + --framework=pytorch \ + --submission_path=reference_algorithms/development_algorithms/mnist/mnist_pytorch/submission.py \ + --tuning_ruleset=external \ + --tuning_search_space=reference_algorithms/development_algorithms/mnist/tuning_search_space.json \ + --num_tuning_trials=3 \ + --experiment_dir=$HOME/experiments_mnist_pytorch \ + --experiment_name=baseline_mnist """ - def get_timestamp(): return datetime.fromtimestamp(time.time()).strftime("%Y%m%d%H%M%S") @@ -68,8 +77,10 @@ def generate_test_scripts(config, work_dir): PYTORCH_CHANNEL=pytorch["conda_channel"], WORK_DIR=work_dir, RESULT_DIR=work_dir.joinpath(run_key), + ALGORITHMIC_EFFICIENCY_ROOT=work_dir.joinpath("algorithmic_efficiency") ) run_scripts[run_key] = run_script + print(f"run_script: {run_script} ...") return run_scripts @@ -133,7 +144,7 @@ def prepare_release_tests(args: argparse.Namespace, work_dir: Path): dump_test_scripts(run_scripts, work_dir) # clone the examples repo Repo.clone_from(EXAMPLE_URL, work_dir.joinpath("examples")) - Repo.clone_from(ALGORITHMIC_EFFICIENCY_URL, work_dir.joinpath("algorithmic_efficiency") + Repo.clone_from(ALGORITHMIC_EFFICIENCY_URL, work_dir.joinpath("algorithmic_efficiency")) return run_scripts @@ -141,6 +152,9 @@ def cleanup_release_tests(work_dir): examples_path = work_dir.joinpath("examples") if examples_path.exists(): shutil.rmtree(examples_path) + algorithmic_efficiency_path = work_dir.joinpath("algorithmic_efficiency") + if algorithmic_efficiency_path.exists(): + shutil.rmtree(algorithmic_efficiency_path) def run(args: List[str]): From a78431fbf24b2d79df3ea0295f7fcdadbd5ce07d Mon Sep 17 00:00:00 2001 From: Julia Guo <153684546+juliagmt-google@users.noreply.github.com> Date: Wed, 6 Nov 2024 15:31:29 -0800 Subject: [PATCH 03/18] Update run.py --- userbenchmark/release-test/run.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/userbenchmark/release-test/run.py b/userbenchmark/release-test/run.py index e5026a1ff..83c32835d 100644 --- a/userbenchmark/release-test/run.py +++ b/userbenchmark/release-test/run.py @@ -37,15 +37,6 @@ RUN_TEMPLATE = """ # GENERATED BY userbenchmark/release-test/__init__.py. DO NOT EDIT! bash {RELEASE_TEST_ROOT}/setup_env.sh '{CUDA_VERSION}' '{MAGMA_VERSION}' '{PYTORCH_VERSION}' '{PYTORCH_CHANNEL}' '{WORK_DIR}' -bash {RELEASE_TEST_ROOT}/monitor_proc.sh {ALGORITHMIC_EFFICIENCY_ROOT}/submission_runner.py \ - --workload=mnist \ - --framework=pytorch \ - --submission_path=reference_algorithms/development_algorithms/mnist/mnist_pytorch/submission.py \ - --tuning_ruleset=external \ - --tuning_search_space=reference_algorithms/development_algorithms/mnist/tuning_search_space.json \ - --num_tuning_trials=3 \ - --experiment_dir=$HOME/experiments_mnist_pytorch \ - --experiment_name=baseline_mnist """ def get_timestamp(): @@ -77,7 +68,6 @@ def generate_test_scripts(config, work_dir): PYTORCH_CHANNEL=pytorch["conda_channel"], WORK_DIR=work_dir, RESULT_DIR=work_dir.joinpath(run_key), - ALGORITHMIC_EFFICIENCY_ROOT=work_dir.joinpath("algorithmic_efficiency") ) run_scripts[run_key] = run_script print(f"run_script: {run_script} ...") @@ -145,6 +135,9 @@ def prepare_release_tests(args: argparse.Namespace, work_dir: Path): # clone the examples repo Repo.clone_from(EXAMPLE_URL, work_dir.joinpath("examples")) Repo.clone_from(ALGORITHMIC_EFFICIENCY_URL, work_dir.joinpath("algorithmic_efficiency")) + print("algorithmic_efficiency cloned.") + algorithmic_efficiency_path = work_dir.joinpath("algorithmic_efficiency") + print(f"algorithmic_efficiency_path.exist(): {algorithmic_efficiency_path.exists()}.") return run_scripts From 79bc6afd1fbbb7ee3c8a7afd2f770e998535585b Mon Sep 17 00:00:00 2001 From: Julia Guo <153684546+juliagmt-google@users.noreply.github.com> Date: Mon, 11 Nov 2024 10:57:29 -0800 Subject: [PATCH 04/18] Update run.py --- userbenchmark/release-test/run.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/userbenchmark/release-test/run.py b/userbenchmark/release-test/run.py index 83c32835d..d6dbfc7f6 100644 --- a/userbenchmark/release-test/run.py +++ b/userbenchmark/release-test/run.py @@ -134,8 +134,8 @@ def prepare_release_tests(args: argparse.Namespace, work_dir: Path): dump_test_scripts(run_scripts, work_dir) # clone the examples repo Repo.clone_from(EXAMPLE_URL, work_dir.joinpath("examples")) - Repo.clone_from(ALGORITHMIC_EFFICIENCY_URL, work_dir.joinpath("algorithmic_efficiency")) - print("algorithmic_efficiency cloned.") + # Repo.clone_from(ALGORITHMIC_EFFICIENCY_URL, work_dir.joinpath("algorithmic_efficiency")) + # print("algorithmic_efficiency cloned.") algorithmic_efficiency_path = work_dir.joinpath("algorithmic_efficiency") print(f"algorithmic_efficiency_path.exist(): {algorithmic_efficiency_path.exists()}.") return run_scripts From efb4b07016e0e26e79d6f987432c44f8ddac09c3 Mon Sep 17 00:00:00 2001 From: Julia Guo <153684546+juliagmt-google@users.noreply.github.com> Date: Mon, 11 Nov 2024 12:48:13 -0800 Subject: [PATCH 05/18] Update run.py --- userbenchmark/release-test/run.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/userbenchmark/release-test/run.py b/userbenchmark/release-test/run.py index d6dbfc7f6..86e88f300 100644 --- a/userbenchmark/release-test/run.py +++ b/userbenchmark/release-test/run.py @@ -27,18 +27,17 @@ BM_NAME = "release-test" EXAMPLE_URL = "https://github.com/pytorch/examples.git" -ALGORITHMIC_EFFICIENCY_URL = "https://github.com/mlcommons/algorithmic-efficiency.git" CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) DEFAULT_CONFIG_PATH = os.path.join( os.path.dirname(os.path.abspath(__file__)), "configs" ) - -# Updated RUN_TEMPLATE RUN_TEMPLATE = """ # GENERATED BY userbenchmark/release-test/__init__.py. DO NOT EDIT! bash {RELEASE_TEST_ROOT}/setup_env.sh '{CUDA_VERSION}' '{MAGMA_VERSION}' '{PYTORCH_VERSION}' '{PYTORCH_CHANNEL}' '{WORK_DIR}' +bash {RELEASE_TEST_ROOT}/run_release_test.sh '{CUDA_VERSION}' '{RESULT_DIR}' """ + def get_timestamp(): return datetime.fromtimestamp(time.time()).strftime("%Y%m%d%H%M%S") @@ -70,7 +69,6 @@ def generate_test_scripts(config, work_dir): RESULT_DIR=work_dir.joinpath(run_key), ) run_scripts[run_key] = run_script - print(f"run_script: {run_script} ...") return run_scripts @@ -79,6 +77,7 @@ def dump_test_scripts(run_scripts, work_dir): run_script_loc = work_dir.joinpath(run_key) run_script_loc.mkdir(exist_ok=True) with open(run_script_loc.joinpath("run.sh"), "w") as rs: + print("writing run_script:", run_script) rs.write(run_script) @@ -134,10 +133,6 @@ def prepare_release_tests(args: argparse.Namespace, work_dir: Path): dump_test_scripts(run_scripts, work_dir) # clone the examples repo Repo.clone_from(EXAMPLE_URL, work_dir.joinpath("examples")) - # Repo.clone_from(ALGORITHMIC_EFFICIENCY_URL, work_dir.joinpath("algorithmic_efficiency")) - # print("algorithmic_efficiency cloned.") - algorithmic_efficiency_path = work_dir.joinpath("algorithmic_efficiency") - print(f"algorithmic_efficiency_path.exist(): {algorithmic_efficiency_path.exists()}.") return run_scripts @@ -145,9 +140,6 @@ def cleanup_release_tests(work_dir): examples_path = work_dir.joinpath("examples") if examples_path.exists(): shutil.rmtree(examples_path) - algorithmic_efficiency_path = work_dir.joinpath("algorithmic_efficiency") - if algorithmic_efficiency_path.exists(): - shutil.rmtree(algorithmic_efficiency_path) def run(args: List[str]): @@ -159,6 +151,7 @@ def run(args: List[str]): run_scripts = prepare_release_tests(args=args, work_dir=work_dir) if not args.dry_run: run_benchmark(run_scripts, work_dir) + print("analyze work_dir:", work_dir) metrics = analyze(work_dir) dump_result_to_json(metrics) cleanup_release_tests(work_dir) From 4b5c7332b7224c58f660937e58ed8fd7703503c9 Mon Sep 17 00:00:00 2001 From: Julia Guo <153684546+juliagmt-google@users.noreply.github.com> Date: Mon, 11 Nov 2024 12:49:30 -0800 Subject: [PATCH 06/18] Update result_analyzer.py --- userbenchmark/release-test/result_analyzer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/userbenchmark/release-test/result_analyzer.py b/userbenchmark/release-test/result_analyzer.py index 7f79330ab..31a2da830 100644 --- a/userbenchmark/release-test/result_analyzer.py +++ b/userbenchmark/release-test/result_analyzer.py @@ -30,7 +30,9 @@ def dump_result_csv(work_dir, result): DELIMITER = ";" # generate header run_keys = sorted(result.keys()) + print("run_keys:", run_keys) workloads = sorted(result[run_keys[0]]) + print("workloads:", workloads) metrics = sorted(result[run_keys[0]][workloads[0]]) for run_key in run_keys: csv_object[0].append(f"{run_key}") From 2ccc92a1bf735da406c2a1ea3afb47441a8fdb6d Mon Sep 17 00:00:00 2001 From: Julia Guo <153684546+juliagmt-google@users.noreply.github.com> Date: Mon, 11 Nov 2024 14:09:51 -0800 Subject: [PATCH 07/18] clone algorithmic-efficiency repo --- userbenchmark/release-test/run.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/userbenchmark/release-test/run.py b/userbenchmark/release-test/run.py index 86e88f300..f9dd0ba82 100644 --- a/userbenchmark/release-test/run.py +++ b/userbenchmark/release-test/run.py @@ -27,6 +27,7 @@ BM_NAME = "release-test" EXAMPLE_URL = "https://github.com/pytorch/examples.git" +ALGORITHMIC_EFFICIENCY_URL = "https://github.com/mlcommons/algorithmic-efficiency.git" CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) DEFAULT_CONFIG_PATH = os.path.join( os.path.dirname(os.path.abspath(__file__)), "configs" @@ -133,6 +134,10 @@ def prepare_release_tests(args: argparse.Namespace, work_dir: Path): dump_test_scripts(run_scripts, work_dir) # clone the examples repo Repo.clone_from(EXAMPLE_URL, work_dir.joinpath("examples")) + Repo.clone_from(ALGORITHMIC_EFFICIENCY_URL, work_dir.joinpath("algorithmic-efficiency")) + print("algorithmic_efficiency cloned.") + algorithmic_efficiency_path = work_dir.joinpath("algorithmic_efficiency") + print(f"algorithmic_efficiency_path.exist(): {algorithmic_efficiency_path.exists()}.") return run_scripts From 63cf7e6599f0c43a3d8844bfb334a86e2303c3b9 Mon Sep 17 00:00:00 2001 From: Julia Guo <153684546+juliagmt-google@users.noreply.github.com> Date: Mon, 11 Nov 2024 15:17:32 -0800 Subject: [PATCH 08/18] Add a function to manually run a model from mlcommons/algorithmic-efficiency --- userbenchmark/release-test/run.py | 42 +++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/userbenchmark/release-test/run.py b/userbenchmark/release-test/run.py index f9dd0ba82..bc3d810cc 100644 --- a/userbenchmark/release-test/run.py +++ b/userbenchmark/release-test/run.py @@ -27,7 +27,6 @@ BM_NAME = "release-test" EXAMPLE_URL = "https://github.com/pytorch/examples.git" -ALGORITHMIC_EFFICIENCY_URL = "https://github.com/mlcommons/algorithmic-efficiency.git" CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) DEFAULT_CONFIG_PATH = os.path.join( os.path.dirname(os.path.abspath(__file__)), "configs" @@ -38,6 +37,38 @@ bash {RELEASE_TEST_ROOT}/run_release_test.sh '{CUDA_VERSION}' '{RESULT_DIR}' """ +def run_algorithmic_efficiency(work_dir, experiment_dir, experiment_name): + """Runs algorithmic efficiency benchmarks.""" + repo_dir = work_dir.joinpath("algorithmic_efficiency") + print("repo_dir:", repo_dir) + repo_url = "https://github.com/mlcommons/algorithmic-efficiency.git" + + if not os.path.exists(repo_dir): + try: + Repo.clone_from(repo_url, repo_dir) + except Exception as e: + print(f"Error cloning algorithmic-efficiency repo using Repo.clone_from: {e}") + return False + + command = [ + "python3", + f"{repo_dir}/submission_runner.py", + "--workload=mnist", + "--framework=pytorch", + f"--submission_path={repo_dir}/reference_algorithms/development_algorithms/mnist/mnist_pytorch/submission.py", + "--tuning_ruleset=external", + f"--tuning_search_space={repo_dir}/reference_algorithms/development_algorithms/mnist/tuning_search_space.json", + "--num_tuning_trials=3", # Adjust as needed + f"--experiment_dir={experiment_dir}", + f"--experiment_name={experiment_name}", + ] + print("running command:", command) + + ret = subprocess.call(command) + if ret != 0: + print(f"Error running algorithmic efficiency benchmark: {ret}") + return False + return True def get_timestamp(): return datetime.fromtimestamp(time.time()).strftime("%Y%m%d%H%M%S") @@ -134,10 +165,6 @@ def prepare_release_tests(args: argparse.Namespace, work_dir: Path): dump_test_scripts(run_scripts, work_dir) # clone the examples repo Repo.clone_from(EXAMPLE_URL, work_dir.joinpath("examples")) - Repo.clone_from(ALGORITHMIC_EFFICIENCY_URL, work_dir.joinpath("algorithmic-efficiency")) - print("algorithmic_efficiency cloned.") - algorithmic_efficiency_path = work_dir.joinpath("algorithmic_efficiency") - print(f"algorithmic_efficiency_path.exist(): {algorithmic_efficiency_path.exists()}.") return run_scripts @@ -155,6 +182,11 @@ def run(args: List[str]): work_dir = get_work_dir(get_output_dir(BM_NAME)) run_scripts = prepare_release_tests(args=args, work_dir=work_dir) if not args.dry_run: + # Run algorithmic efficiency benchmarks + print("Running run_algorithmic_efficiency starts...") + experiment_dir = os.path.join(work_dir, "algorithmic_efficiency_results") # Create a subdirectory + experiment_name = f"algorithmic_efficiency_{get_timestamp()}" + run_algorithmic_efficiency(work_dir, experiment_dir, experiment_name) run_benchmark(run_scripts, work_dir) print("analyze work_dir:", work_dir) metrics = analyze(work_dir) From 37ac07c64a7faf12ceaac78fed4be741587a11e0 Mon Sep 17 00:00:00 2001 From: Julia Guo <153684546+juliagmt-google@users.noreply.github.com> Date: Mon, 11 Nov 2024 20:20:57 -0800 Subject: [PATCH 09/18] Update userbenchmark-a100-release.yml Install requirements to run algorithmic-efficiency following https://github.com/mlcommons/algorithmic-efficiency/blob/main/GETTING_STARTED.md#set-up-and-installation --- .github/workflows/userbenchmark-a100-release.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/userbenchmark-a100-release.yml b/.github/workflows/userbenchmark-a100-release.yml index 6523af969..ea77e03fb 100644 --- a/.github/workflows/userbenchmark-a100-release.yml +++ b/.github/workflows/userbenchmark-a100-release.yml @@ -42,6 +42,12 @@ jobs: # remove old results if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi pushd benchmark + + # Install necessary packages <-- Added these lines + pip3 install -e '.[jax_cpu]' + pip3 install -e '.[pytorch_gpu]' -f 'https://download.pytorch.org/whl/cu121' + pip3 install -e '.[full]' + release_version=$(cat userbenchmark/release-test/version.txt) if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi python run_benchmark.py release-test -c ${release_version} From 4f7124a3cf1f2054421faeb2c8f365cba49532cf Mon Sep 17 00:00:00 2001 From: Julia Guo <153684546+juliagmt-google@users.noreply.github.com> Date: Mon, 11 Nov 2024 20:44:54 -0800 Subject: [PATCH 10/18] Update install.py --- install.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/install.py b/install.py index 78472263e..c9ae1fa98 100644 --- a/install.py +++ b/install.py @@ -11,6 +11,36 @@ REPO_ROOT = Path(__file__).parent +import argparse +import os +import subprocess +import sys +from pathlib import Path + +from userbenchmark import list_userbenchmarks +from utils import generate_pkg_constraints, get_pkg_versions, TORCH_DEPS +from utils.python_utils import pip_install_requirements + +REPO_ROOT = Path(__file__).parent + + +def install_algorithmic_efficiency_deps(): + """Installs algorithmic efficiency dependencies.""" + print("Installing algorithmic efficiency dependencies...") + commands = [ + "pip3 install -e '.[jax_cpu]'", + "pip3 install -e '.[pytorch_gpu]' -f 'https://download.pytorch.org/whl/cu121'", + "pip3 install -e '.[full]'", + ] + for command in commands: + try: + subprocess.check_call(command, shell=True) + except subprocess.CalledProcessError as e: + print(f"Error installing algorithmic efficiency dependencies: {e}") + return False + return True + + if __name__ == "__main__": parser = argparse.ArgumentParser(allow_abbrev=False) parser.add_argument( @@ -77,6 +107,10 @@ if args.check_only: exit(0) + print("start installing deps for algorithmic_efficiency") + install_algorithmic_efficiency_deps() + print("done installing deps for algorithmic_efficiency") + if args.userbenchmark: # Install userbenchmark dependencies if exists userbenchmark_dir = REPO_ROOT.joinpath("userbenchmark", args.userbenchmark) From c86a36919167236a9fbbbb4a1af71764cfd63805 Mon Sep 17 00:00:00 2001 From: Julia Guo <153684546+juliagmt-google@users.noreply.github.com> Date: Tue, 12 Nov 2024 11:16:50 -0800 Subject: [PATCH 11/18] Update install.py --- install.py | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/install.py b/install.py index c9ae1fa98..5447f435e 100644 --- a/install.py +++ b/install.py @@ -24,23 +24,6 @@ REPO_ROOT = Path(__file__).parent -def install_algorithmic_efficiency_deps(): - """Installs algorithmic efficiency dependencies.""" - print("Installing algorithmic efficiency dependencies...") - commands = [ - "pip3 install -e '.[jax_cpu]'", - "pip3 install -e '.[pytorch_gpu]' -f 'https://download.pytorch.org/whl/cu121'", - "pip3 install -e '.[full]'", - ] - for command in commands: - try: - subprocess.check_call(command, shell=True) - except subprocess.CalledProcessError as e: - print(f"Error installing algorithmic efficiency dependencies: {e}") - return False - return True - - if __name__ == "__main__": parser = argparse.ArgumentParser(allow_abbrev=False) parser.add_argument( @@ -107,10 +90,6 @@ def install_algorithmic_efficiency_deps(): if args.check_only: exit(0) - print("start installing deps for algorithmic_efficiency") - install_algorithmic_efficiency_deps() - print("done installing deps for algorithmic_efficiency") - if args.userbenchmark: # Install userbenchmark dependencies if exists userbenchmark_dir = REPO_ROOT.joinpath("userbenchmark", args.userbenchmark) From 1bfc96720f5bd764be32c5da34be4c284f24fefe Mon Sep 17 00:00:00 2001 From: Julia Guo <153684546+juliagmt-google@users.noreply.github.com> Date: Tue, 12 Nov 2024 11:19:16 -0800 Subject: [PATCH 12/18] Update setup_env.sh --- userbenchmark/release-test/setup_env.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/userbenchmark/release-test/setup_env.sh b/userbenchmark/release-test/setup_env.sh index 4f47bb497..1eb5a88d0 100644 --- a/userbenchmark/release-test/setup_env.sh +++ b/userbenchmark/release-test/setup_env.sh @@ -39,8 +39,10 @@ conda install -y -c pytorch ${MAGMA_VERSION} # install pip version of pytorch and torchvision if [[ ${PYTORCH_CHANNEL} == "pytorch-test" ]]; then pip3 install torch==${PYTORCH_VERSION} torchvision --index-url https://download.pytorch.org/whl/test/cu${CUDA_VERSION//./} + pip3 install jax else pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cu${CUDA_VERSION//./} + pip3 install jax fi python -c 'import torch; print(torch.__version__); print(torch.version.git_version)' From a3dac35170bebbe70c70b4e51e3fb984a90b9335 Mon Sep 17 00:00:00 2001 From: Julia Guo <153684546+juliagmt-google@users.noreply.github.com> Date: Tue, 12 Nov 2024 12:10:50 -0800 Subject: [PATCH 13/18] Update setup_env.sh --- userbenchmark/release-test/setup_env.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/userbenchmark/release-test/setup_env.sh b/userbenchmark/release-test/setup_env.sh index 1eb5a88d0..4f47bb497 100644 --- a/userbenchmark/release-test/setup_env.sh +++ b/userbenchmark/release-test/setup_env.sh @@ -39,10 +39,8 @@ conda install -y -c pytorch ${MAGMA_VERSION} # install pip version of pytorch and torchvision if [[ ${PYTORCH_CHANNEL} == "pytorch-test" ]]; then pip3 install torch==${PYTORCH_VERSION} torchvision --index-url https://download.pytorch.org/whl/test/cu${CUDA_VERSION//./} - pip3 install jax else pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cu${CUDA_VERSION//./} - pip3 install jax fi python -c 'import torch; print(torch.__version__); print(torch.version.git_version)' From 30394305158629653ac73d53b7b4db09c4b3ad77 Mon Sep 17 00:00:00 2001 From: Julia Guo <153684546+juliagmt-google@users.noreply.github.com> Date: Tue, 12 Nov 2024 14:03:29 -0800 Subject: [PATCH 14/18] Update userbenchmark-a100-release.yml --- .github/workflows/userbenchmark-a100-release.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/userbenchmark-a100-release.yml b/.github/workflows/userbenchmark-a100-release.yml index ea77e03fb..492b75aff 100644 --- a/.github/workflows/userbenchmark-a100-release.yml +++ b/.github/workflows/userbenchmark-a100-release.yml @@ -1,8 +1,9 @@ name: Release TorchBench Userbenchmark on A100 on: pull_request: - paths: - - userbenchmark/release-test/* + push: + branches: + - main jobs: run-userbenchmark: From d63b9a527119bc9a34bd6f07bcea36938f93762b Mon Sep 17 00:00:00 2001 From: Julia Guo <153684546+juliagmt-google@users.noreply.github.com> Date: Tue, 12 Nov 2024 15:14:58 -0800 Subject: [PATCH 15/18] Update setup_env.sh --- userbenchmark/release-test/setup_env.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/userbenchmark/release-test/setup_env.sh b/userbenchmark/release-test/setup_env.sh index 4f47bb497..1eb5a88d0 100644 --- a/userbenchmark/release-test/setup_env.sh +++ b/userbenchmark/release-test/setup_env.sh @@ -39,8 +39,10 @@ conda install -y -c pytorch ${MAGMA_VERSION} # install pip version of pytorch and torchvision if [[ ${PYTORCH_CHANNEL} == "pytorch-test" ]]; then pip3 install torch==${PYTORCH_VERSION} torchvision --index-url https://download.pytorch.org/whl/test/cu${CUDA_VERSION//./} + pip3 install jax else pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cu${CUDA_VERSION//./} + pip3 install jax fi python -c 'import torch; print(torch.__version__); print(torch.version.git_version)' From 873f994cd26cb4d7e49eaa440f669fae3ac1fb82 Mon Sep 17 00:00:00 2001 From: Julia Guo <153684546+juliagmt-google@users.noreply.github.com> Date: Tue, 12 Nov 2024 15:15:33 -0800 Subject: [PATCH 16/18] revert userbenchmark-a100-release.yml --- .github/workflows/userbenchmark-a100-release.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/userbenchmark-a100-release.yml b/.github/workflows/userbenchmark-a100-release.yml index 492b75aff..ea77e03fb 100644 --- a/.github/workflows/userbenchmark-a100-release.yml +++ b/.github/workflows/userbenchmark-a100-release.yml @@ -1,9 +1,8 @@ name: Release TorchBench Userbenchmark on A100 on: pull_request: - push: - branches: - - main + paths: + - userbenchmark/release-test/* jobs: run-userbenchmark: From 66e0d07953bb94dcb005e2bfb036867d3448ef71 Mon Sep 17 00:00:00 2001 From: Julia Guo <153684546+juliagmt-google@users.noreply.github.com> Date: Thu, 14 Nov 2024 08:01:27 -0800 Subject: [PATCH 17/18] Update run.py --- userbenchmark/release-test/run.py | 1 + 1 file changed, 1 insertion(+) diff --git a/userbenchmark/release-test/run.py b/userbenchmark/release-test/run.py index bc3d810cc..4ffb0339f 100644 --- a/userbenchmark/release-test/run.py +++ b/userbenchmark/release-test/run.py @@ -186,6 +186,7 @@ def run(args: List[str]): print("Running run_algorithmic_efficiency starts...") experiment_dir = os.path.join(work_dir, "algorithmic_efficiency_results") # Create a subdirectory experiment_name = f"algorithmic_efficiency_{get_timestamp()}" + print("start run_algorithmic_efficiency") run_algorithmic_efficiency(work_dir, experiment_dir, experiment_name) run_benchmark(run_scripts, work_dir) print("analyze work_dir:", work_dir) From b0bc729f673159ca610998909bba53f0c0a0587a Mon Sep 17 00:00:00 2001 From: Julia Guo <153684546+juliagmt-google@users.noreply.github.com> Date: Mon, 9 Dec 2024 09:54:41 -0800 Subject: [PATCH 18/18] Update userbenchmark-a100-release.yml --- .github/workflows/userbenchmark-a100-release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/userbenchmark-a100-release.yml b/.github/workflows/userbenchmark-a100-release.yml index ea77e03fb..60704a5d7 100644 --- a/.github/workflows/userbenchmark-a100-release.yml +++ b/.github/workflows/userbenchmark-a100-release.yml @@ -6,7 +6,7 @@ on: jobs: run-userbenchmark: - runs-on: [a100-runner] + runs-on: [linux.aws.a100] timeout-minutes: 1440 # 24 hours environment: docker-s3-upload env: