diff --git a/.github/workflows/run_mobile_comparative_benchmark.yml b/.github/workflows/run_mobile_comparative_benchmark.yml index 876ef3f5..0d24987a 100644 --- a/.github/workflows/run_mobile_comparative_benchmark.yml +++ b/.github/workflows/run_mobile_comparative_benchmark.yml @@ -57,6 +57,7 @@ jobs: fi echo "benchmark-gcs-dir=${BENCHMARK_GCS_DIR}" >> "${GITHUB_OUTPUT}" + build_iree: needs: setup runs-on: @@ -68,6 +69,7 @@ jobs: env: IREE_SOURCE_DIR: iree-src IREE_INSTALL_DIR: iree-install + IREE_ANDROID_TOOLS_DIR: tools ANDROID_PLATFORM_VERSION: 31 X86_BUILD_DIR: iree-build ANDROID_BUILD_DIR: iree-build-android @@ -76,6 +78,9 @@ jobs: iree-install-dir: ${{ env.IREE_INSTALL_DIR }} iree-install-dir-archive: ${{ steps.archive.outputs.iree-install-dir-archive }} iree-install-dir-gcs-artifact: ${{ steps.upload.outputs.iree-install-dir-gcs-artifact }} + iree-android-tools-dir: ${{ env.IREE_ANDROID_TOOLS_DIR }} + iree-android-tools-dir-archive: ${{ steps.archive.outputs.iree-android-tools-dir-archive }} + iree-android-tools-dir-gcs-artifact: ${{ steps.upload.outputs.iree-android-tools-dir-gcs-artifact }} steps: - name: "Checking out PR repository" uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0 @@ -97,18 +102,28 @@ jobs: id: archive env: IREE_INSTALL_DIR_ARCHIVE: ${{ env.IREE_INSTALL_DIR }}.tgz + IREE_ANDROID_TOOLS_DIR_ARCHIVE: ${{ env.IREE_ANDROID_TOOLS_DIR }}.tgz run: | tar -zcvf "${IREE_INSTALL_DIR_ARCHIVE}" -C "${X86_BUILD_DIR}" "${IREE_INSTALL_DIR}" echo "iree-install-dir-archive=${IREE_INSTALL_DIR_ARCHIVE}" >> "${GITHUB_OUTPUT}" + + tar -zcvf "${IREE_ANDROID_TOOLS_DIR_ARCHIVE}" -C "${ANDROID_BUILD_DIR}" "${IREE_ANDROID_TOOLS_DIR}" + echo "iree-android-tools-dir-archive=${IREE_ANDROID_TOOLS_DIR_ARCHIVE}" >> "${GITHUB_OUTPUT}" - name: "Uploading archives" id: upload env: IREE_INSTALL_DIR_ARCHIVE: ${{ steps.archive.outputs.iree-install-dir-archive }} IREE_INSTALL_DIR_GCS_ARTIFACT: ${{ env.BENCHMARK_GCS_DIR }}/${{ steps.archive.outputs.iree-install-dir-archive }} + IREE_ANDROID_TOOLS_DIR_ARCHIVE: ${{ steps.archive.outputs.iree-android-tools-dir-archive }} + IREE_ANDROID_TOOLS_DIR_GCS_ARTIFACT: ${{ env.BENCHMARK_GCS_DIR }}/${{ steps.archive.outputs.iree-android-tools-dir-archive }} run: | gcloud storage cp "${IREE_INSTALL_DIR_ARCHIVE}" "${IREE_INSTALL_DIR_GCS_ARTIFACT}" echo "iree-install-dir-gcs-artifact=${IREE_INSTALL_DIR_GCS_ARTIFACT}" >> "${GITHUB_OUTPUT}" + gcloud storage cp "${IREE_ANDROID_TOOLS_DIR_ARCHIVE}" "${IREE_ANDROID_TOOLS_DIR_GCS_ARTIFACT}" + echo "iree-android-tools-dir-gcs-artifact=${IREE_ANDROID_TOOLS_DIR_GCS_ARTIFACT}" >> "${GITHUB_OUTPUT}" + + # We compile and benchmark on the same machine because it takes too much time to compress and upload compiled artifacts. compile_and_benchmark_on_c2-standard-60: needs: [setup, build_iree] @@ -154,7 +169,7 @@ jobs: --env OOBI_IREE_BENCHMARK_MODULE_PATH="${IREE_INSTALL_DIR}/bin/iree-benchmark-module" \ --env OOBI_IREE_RUN_MODULE_PATH="${IREE_INSTALL_DIR}/bin/iree-run-module" \ "gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251" \ - ./experimental/iree/benchmark_iree.sh \ + ./experimental/iree/benchmark_iree_x86.sh \ "${TARGET_DEVICE}" \ "${BENCHMARK_ARTIFACTS_DIR}" \ "${IREE_RESULTS_FILE}" @@ -163,3 +178,110 @@ jobs: RESULTS_GCS_DIR: ${{ env.BENCHMARK_GCS_DIR }}/${{ env.TARGET_DEVICE }}-results run: | gcloud storage cp "${IREE_RESULTS_FILE}" "${RESULTS_GCS_DIR}/" + + + compile_models_for_android: + needs: [setup, build_iree] + runs-on: + - self-hosted # must come first + - runner-group=${{ needs.setup.outputs.runner-group }} + - environment=prod + - cpu + - os-family=Linux + env: + BENCHMARK_ARTIFACTS_DIR: android-benchmark-artifacts-dir + TARGET_DEVICE: pixel-6-pro + IREE_INSTALL_DIR: ${{ needs.build_iree.outputs.iree-install-dir }} + IREE_INSTALL_DIR_ARCHIVE: ${{ needs.build_iree.outputs.iree-install-dir-archive }} + IREE_INSTALL_DIR_GCS_ARTIFACT: ${{ needs.build_iree.outputs.iree-install-dir-gcs-artifact }} + BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }} + outputs: + benchmark-artifacts-dir: ${{ env.BENCHMARK_ARTIFACTS_DIR }} + benchmark-artifacts-dir-archive: ${{ steps.archive.outputs.benchmark-artifacts-dir-archive }} + benchmark-artifacts-dir-gcs-artifact: ${{ steps.upload.outputs.benchmark-artifacts-dir-gcs-artifact }} + steps: + - name: "Checking out PR repository" + uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0 + - name: "Downloading and unpacking IREE tools" + run: | + gcloud storage cp "${IREE_INSTALL_DIR_GCS_ARTIFACT}" "${IREE_INSTALL_DIR_ARCHIVE}" + tar -xvf "${IREE_INSTALL_DIR_ARCHIVE}" + - name: "Compiling workloads" + id: compile + env: + IREE_COMPILE_PATH: ${{ env.IREE_INSTALL_DIR }}/bin/iree-compile + OOBI_TEMP_DIR: temp + run: | + mkdir "${BENCHMARK_ARTIFACTS_DIR}" + mkdir "${OOBI_TEMP_DIR}" + + docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \ + --env IREE_COMPILE_PATH="${IREE_COMPILE_PATH}" \ + --env OOBI_TEMP_DIR="${OOBI_TEMP_DIR}" \ + "gcr.io/iree-oss/openxla-benchmark/android@sha256:34f140fcf806f64f5d0492dfc5af774ea440406264cd68c0405e23a69cbe6d93" \ + ./experimental/iree/compile_workloads.sh \ + "${TARGET_DEVICE}" \ + "${BENCHMARK_ARTIFACTS_DIR}" + - name: "Creating archives" + id: archive + env: + BENCHMARK_ARTIFACTS_DIR_ARCHIVE: ${{ env.BENCHMARK_ARTIFACTS_DIR }}.tgz + run: | + tar -cvf "${BENCHMARK_ARTIFACTS_DIR_ARCHIVE}" "${BENCHMARK_ARTIFACTS_DIR}" + echo "benchmark-artifacts-dir-archive=${BENCHMARK_ARTIFACTS_DIR_ARCHIVE}" >> "${GITHUB_OUTPUT}" + - name: "Uploading archives" + id: upload + env: + BENCHMARK_ARTIFACTS_DIR_ARCHIVE: ${{ steps.archive.outputs.benchmark-artifacts-dir-archive }} + BENCHMARK_ARTIFACTS_DIR_GCS_ARTIFACT: ${{ env.BENCHMARK_GCS_DIR }}/${{ steps.archive.outputs.benchmark-artifacts-dir-archive }} + run: | + gcloud storage cp "${BENCHMARK_ARTIFACTS_DIR_ARCHIVE}" "${BENCHMARK_ARTIFACTS_DIR_GCS_ARTIFACT}" + echo "benchmark-artifacts-dir-gcs-artifact=${BENCHMARK_ARTIFACTS_DIR_GCS_ARTIFACT}" >> "${GITHUB_OUTPUT}" + + + benchmark_on_pixel-6-pro: + needs: [setup, build_iree, compile_models_for_android] + runs-on: + - self-hosted # must come first + - runner-group=${{ needs.setup.outputs.runner-group }} + - environment=prod + - machine-type=pixel-6-pro + env: + BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }} + RESULTS_DIR: results-dir + TARGET_DEVICE: pixel-6-pro + IREE_ANDROID_TOOLS_DIR: ${{ needs.build_iree.outputs.iree-android-tools-dir }} + IREE_ANDROID_TOOLS_DIR_ARCHIVE: ${{ needs.build_iree.outputs.iree-android-tools-dir-archive }} + IREE_ANDROID_TOOLS_DIR_GCS_ARTIFACT: ${{ needs.build_iree.outputs.iree-android-tools-dir-gcs-artifact }} + IREE_ARTIFACTS_DIR: ${{ needs.compile_models_for_android.outputs.benchmark-artifacts-dir }} + IREE_ARTIFACTS_DIR_ARCHIVE: ${{ needs.compile_models_for_android.outputs.benchmark-artifacts-dir-archive }} + IREE_ARTIFACTS_DIR_GCS_ARTIFACT: ${{ needs.compile_models_for_android.outputs.benchmark-artifacts-dir-gcs-artifact }} + steps: + - name: "Checking out PR repository" + uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0 + - name: "Setup" + id: setup + run: | + echo "results-gcs-dir=${BENCHMARK_GCS_DIR}/${TARGET_DEVICE}-results" >> "${GITHUB_OUTPUT}" + mkdir "${RESULTS_DIR}" + - name: "Downloading and unpacking tools" + run: | + gcloud storage cp "${IREE_ANDROID_TOOLS_DIR_GCS_ARTIFACT}" "${IREE_ANDROID_TOOLS_DIR_ARCHIVE}" + tar -xvf "${IREE_ANDROID_TOOLS_DIR_ARCHIVE}" + - name: "Benchmarking IREE on Android" + env: + IREE_RESULTS_JSON: iree-${{ env.TARGET_DEVICE }}.json + RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }} + run: | + RESULTS_PATH="${RESULTS_DIR}/${IREE_RESULTS_JSON}" + # We use the `https` path instead of `gs` since artifact streaming only works with `https`. + IREE_ARTIFACTS_DIR_HTTPS_ARTIFACT=$(echo "${IREE_ARTIFACTS_DIR_GCS_ARTIFACT}" | sed -E 's/gs:\/\/(.*)/https:\/\/storage.googleapis.com\/\1/') + + OOBI_IREE_RUN_MODULE_PATH="${IREE_ANDROID_TOOLS_DIR}/iree-run-module" \ + OOBI_IREE_BENCHMARK_MODULE_PATH="${IREE_ANDROID_TOOLS_DIR}/iree-benchmark-module" \ + ./experimental/iree/benchmark_iree_android.sh \ + "${TARGET_DEVICE}" \ + "${IREE_ARTIFACTS_DIR_HTTPS_ARTIFACT}" \ + "${RESULTS_PATH}" + + gcloud storage cp "${RESULTS_PATH}" "${RESULTS_GCS_DIR}/" diff --git a/common_benchmark_suite/openxla/benchmark/devices/mobile_devices.py b/common_benchmark_suite/openxla/benchmark/devices/mobile_devices.py index d2db164f..a291b67b 100644 --- a/common_benchmark_suite/openxla/benchmark/devices/mobile_devices.py +++ b/common_benchmark_suite/openxla/benchmark/devices/mobile_devices.py @@ -19,4 +19,17 @@ }, ) -ALL_DEVICES = [MOBILE_PIXEL_6_PRO] +MOBILE_PIXEL_8_PRO = def_types.DeviceSpec( + name="pixel-8-pro", + host_type="mobile", + host_model="pixel-8-pro", + host_environment="android", + accelerator_type="cpu", + accelerator_model="armv9-a", + accelerator_architecture="armv9-a", + accelerator_attributes={ + "num_of_cores": 9, + }, +) + +ALL_DEVICES = [MOBILE_PIXEL_6_PRO, MOBILE_PIXEL_8_PRO] diff --git a/comparative_benchmark/scripts/adb_fetch_and_push.py b/comparative_benchmark/scripts/adb_fetch_and_push.py new file mode 100755 index 00000000..2ad30afb --- /dev/null +++ b/comparative_benchmark/scripts/adb_fetch_and_push.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +# +# Copyright 2023 The OpenXLA Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import argparse +import requests +import socket +import struct +import time + +ADB_SERVER_ADDR = ("localhost", 5037) + + +def adb_download_and_push_file(source_url: str, + destination: str, + verbose: bool = False): + """Fetch file from the URL and stream to the device. + In the case of fetching, this method avoids the temporary file on the host + and reduces the overhead when the file is large. + Args: + source_url: URL to fetch the file. + destination: the full destination path on the device. + verbose: output verbose message. + Returns: + File path on the device. + """ + + if verbose: + print(f"Streaming file {source_url} to {destination}.") + + req = requests.get(source_url, stream=True, timeout=60) + if not req.ok: + raise RuntimeError( + f"Failed to fetch {source_url}: {req.status_code} - {req.text}") + + # Implement the ADB sync protocol to stream file chunk to the device, since + # the adb client tool doesn't support it. + # + # Alternatively we can use thrid-party library such as + # https://github.com/JeffLIrion/adb_shell. But the protocol we need is + # simple and fairly stable. This part can be replaced with other solutions + # if needed. + # + # To understand the details of the protocol, see + # https://cs.android.com/android/_/android/platform/packages/modules/adb/+/93c8e3c26e4de3a2b767a2394200bc0721bb1e24:OVERVIEW.TXT + + def wait_ack_ok(sock: socket.socket): + buf = bytearray() + while len(buf) < 4: + data = sock.recv(4 - len(buf)) + if not data: + break + buf += data + + if buf.decode("utf-8") != "OKAY": + raise RuntimeError(f"ADB communication error: {buf}") + + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: + sock.connect(ADB_SERVER_ADDR) + # Connect to any device (the first 4 hexadecimals is the following text + # command length). + sock.sendall(b"0012host:transport-any") + wait_ack_ok(sock) + # Switch to sync mode. + sock.sendall(b"0005sync:") + wait_ack_ok(sock) + # Send the destination file path and file permissions 0755 (rwxr-xr-x). + file_attr = f"{destination},{0o755}".encode("utf-8") + sock.sendall(b"SEND" + struct.pack("I", len(file_attr)) + file_attr) + # Stream the file chunks. 64k bytes is the max chunk size for adb. + for data in req.iter_content(chunk_size=64 * 1024): + sock.sendall(b"DATA" + struct.pack("I", len(data)) + data) + # End the file stream and set the creation time. + sock.sendall(b"DONE" + struct.pack("I", int(time.time()))) + wait_ack_ok(sock) + + return destination + + +def _parse_arguments() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Runs benchmarks.") + parser.add_argument("-s", + "--source_url", + type=str, + required=True, + help="The url of file to download.") + parser.add_argument("-d", + "--destination", + type=str, + required=True, + help="The path on the device to stream the file to.") + parser.add_argument("--verbose", + action="store_true", + help="Show verbose messages.") + return parser.parse_args() + + +if __name__ == "__main__": + adb_download_and_push_file(**vars(_parse_arguments())) diff --git a/experimental/iree/benchmark_iree_android.sh b/experimental/iree/benchmark_iree_android.sh new file mode 100755 index 00000000..0c9e1e16 --- /dev/null +++ b/experimental/iree/benchmark_iree_android.sh @@ -0,0 +1,87 @@ +#!/bin/bash +# +# Copyright 2023 The OpenXLA Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Environment variables: +# PYTHON: Python interpreter, default: /usr/bin/python3 +# OOBI_TARGET_DEVICE: target benchmark device, can also be specified the first +# argument. +# OOBI_VENV_DIR: name of the virtual environment. +# OOBI_IREE_BENCHMARK_MODULE_PATH: the path to `iree-benchmark-module`. +# OOBI_IREE_RUN_MODULE_PATH: the path to `iree-run-module`. +# OOBI_IREE_COMPILED_ARTIFACTS_PATH: the path to the IREE vmfb files to benchmark. +# OOBI_ANDROID_BENCHMARK_DIR: the on-device directory where benchmark artifacts are copied to. +# +# Example usage: +# ./benchmark_iree.sh + +set -xeuo pipefail + +VENV_DIR="${OOBI_VENV_DIR:-iree-benchmarks.venv}" +PYTHON="${PYTHON:-/usr/bin/python3}" +ROOT_DIR="${OOBI_ANDROID_BENCHMARK_DIR:-/data/local/tmp/oobi_benchmarks}" +IREE_BENCHMARK_MODULE_PATH="${OOBI_IREE_BENCHMARK_MODULE_PATH:-/tmp/iree-build-android/tools/iree-benchmark-module}" +IREE_RUN_MODULE_PATH="${OOBI_IREE_RUN_MODULE_PATH:-/tmp/iree-build-android/tools/iree-run-module}" +TARGET_DEVICE="${1:-"${OOBI_TARGET_DEVICE}"}" +COMPILED_ARTIFACTS_PATH="${2:-"${OOBI_IREE_COMPILED_ARTIFACTS_PATH}"}" +OUTPUT_PATH="${3:-"${OOBI_OUTPUT}"}" + +TD="$(cd $(dirname $0) && pwd)" + +# Setup benchmarking environment. +adb shell "rm -rf ${ROOT_DIR}" +adb shell "mkdir ${ROOT_DIR}" + +adb push "${TD}/../../comparative_benchmark/scripts/set_android_scaling_governor.sh" "${ROOT_DIR}" +adb shell "chmod +x ${ROOT_DIR}/set_android_scaling_governor.sh" +adb shell "su root sh ${ROOT_DIR}/set_android_scaling_governor.sh performance" +#adb shell "su root sendhint -m DISPLAY_INACTIVE -e 0" +adb shell "su root setprop persist.vendor.disable.thermal.control 1" + +adb push "${TD}/benchmark_lib.py" "${ROOT_DIR}" +adb shell "chmod +x ${ROOT_DIR}/benchmark_lib.py" + +adb push "${IREE_RUN_MODULE_PATH}" "${ROOT_DIR}" +IREE_RUN_MODULE_PATH="${ROOT_DIR}/iree-run-module" + +adb push "${IREE_BENCHMARK_MODULE_PATH}" "${ROOT_DIR}" +IREE_BENCHMARK_MODULE_PATH="${ROOT_DIR}/iree-benchmark-module" + +VENV_DIR="${VENV_DIR}" PYTHON="${PYTHON}" source "${TD}/setup_venv.sh" + +DEVICE_ARTIFACT_DIR="${ROOT_DIR}/artifacts" +adb shell mkdir "${DEVICE_ARTIFACT_DIR}" + +if [[ "${COMPILED_ARTIFACTS_PATH}" = https* ]]; then + archive_name=$(basename "${COMPILED_ARTIFACTS_PATH}") + + "${TD}/../../comparative_benchmark/scripts/adb_fetch_and_push.py" \ + --source_url="${COMPILED_ARTIFACTS_PATH}" \ + --destination="${ROOT_DIR}/${archive_name}" \ + --verbose + + adb shell "tar -xf ${ROOT_DIR}/${archive_name} --strip-components=1 -C ${DEVICE_ARTIFACT_DIR}" +else + adb push "${COMPILED_ARTIFACTS_PATH}/." "${DEVICE_ARTIFACT_DIR}/" +fi + +"${TD}/../../comparative_benchmark/scripts/create_results_json.sh" "${OUTPUT_PATH}" + +# A num_threads to cpu_ids map. We use the biggest cores for each configuration. +THREAD_CONFIG="{1: '0', 4: '0,1,2,3'}" + +"${TD}/run_benchmarks.py" \ + --target_device="${TARGET_DEVICE}" \ + --output="${OUTPUT_PATH}" \ + --artifact_dir="${DEVICE_ARTIFACT_DIR}" \ + --iree_run_module_path="${IREE_RUN_MODULE_PATH}" \ + --iree_benchmark_module_path="${IREE_BENCHMARK_MODULE_PATH}" \ + --thread_config="${THREAD_CONFIG}" \ + --verbose + +# Cleanup. +adb shell rm -rf "${ROOT_DIR}" diff --git a/experimental/iree/benchmark_iree.sh b/experimental/iree/benchmark_iree_x86.sh similarity index 88% rename from experimental/iree/benchmark_iree.sh rename to experimental/iree/benchmark_iree_x86.sh index 9818368e..5ff2a448 100755 --- a/experimental/iree/benchmark_iree.sh +++ b/experimental/iree/benchmark_iree_x86.sh @@ -34,7 +34,7 @@ VENV_DIR="${VENV_DIR}" PYTHON="${PYTHON}" source "${TD}/setup_venv.sh" "${TD}/../../comparative_benchmark/scripts/create_results_json.sh" "${OUTPUT_PATH}" -THREADS="1,8,15,30" +THREAD_CONFIG="{1: '0', 8: '0,1,2,3,4,5,6,7', 15: '0,1,2,3,4,5,6,7,8,9,10,11,12,13,14', 30: '0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29'}" "${TD}/run_benchmarks.py" \ --target_device="${TARGET_DEVICE}" \ @@ -42,5 +42,5 @@ THREADS="1,8,15,30" --artifact_dir="${COMPILED_ARTIFACTS_PATH}" \ --iree_run_module_path="${IREE_RUN_MODULE_PATH}" \ --iree_benchmark_module_path="${IREE_BENCHMARK_MODULE_PATH}" \ - --threads="${THREADS}" \ + --thread_config="${THREAD_CONFIG}" \ --verbose diff --git a/experimental/iree/benchmark_lib.py b/experimental/iree/benchmark_lib.py index 06b02a95..09dd9d04 100644 --- a/experimental/iree/benchmark_lib.py +++ b/experimental/iree/benchmark_lib.py @@ -7,7 +7,6 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception import argparse -import json import pathlib import re import subprocess @@ -94,25 +93,22 @@ def run_benchmark_command(benchmark_command: str, def _parse_arguments() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Runs benchmarks.") - parser.add_argument("--command", - type=str, - required=True, - help="The command to run.") - parser.add_argument("-o", - "--output", + # We need to store the command in a text file because argparse is unable to + # ignore quoted string with dashes in it, instead interpreting them as + # arguments. + parser.add_argument("--command_path", type=pathlib.Path, required=True, - help="JSON filepath to save results to.") + help="The command to run stored in a text file.") parser.add_argument("--verbose", action="store_true", help="Show verbose messages.") return parser.parse_args() -def main(command: str, output: pathlib.Path): - results = run_benchmark_command(command) - with open(output, "w") as outfile: - json.dump(results, outfile) +def main(command_path: pathlib.Path, verbose: bool = False): + results = run_benchmark_command(command_path.read_text(), verbose) + print(f"results_dict: {results}") if __name__ == "__main__": diff --git a/experimental/iree/compile_workloads.sh b/experimental/iree/compile_workloads.sh index ab59046d..2d5b1c04 100755 --- a/experimental/iree/compile_workloads.sh +++ b/experimental/iree/compile_workloads.sh @@ -44,10 +44,10 @@ declare -a BENCHMARK_NAMES=( "models/BERT_BASE_FP16_JAX_I32_SEQLEN.+/.+" "models/BERT_BASE_BF16_JAX_I32_SEQLEN.+/.+" "models/T5_4CG_SMALL_FP32_JAX_1X128XI32_GEN.+/.+" - "models/SD_PIPELINE_FP32_JAX_64XI32_BATCH.+/.+" - "models/SD_PIPELINE_FP16_JAX_64XI32_BATCH.+/.+" - "models/SD_PIPELINE_BF16_JAX_64XI32_BATCH.+/.+" "models/VIT_CLASSIFICATION_JAX_3X224X224XF32/.+" + "models/SD_PIPELINE_FP32_JAX_64XI32_BATCH1/.+" + "models/SD_PIPELINE_FP16_JAX_64XI32_BATCH1/.+" + "models/SD_PIPELINE_BF16_JAX_64XI32_BATCH1/.+" ) for benchmark_name in "${BENCHMARK_NAMES[@]}"; do diff --git a/experimental/iree/run_benchmarks.py b/experimental/iree/run_benchmarks.py index e4626dd1..27bc2632 100755 --- a/experimental/iree/run_benchmarks.py +++ b/experimental/iree/run_benchmarks.py @@ -7,6 +7,7 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception import argparse +import ast import dataclasses import json import os @@ -17,6 +18,8 @@ import benchmark_lib +from typing import Any, Dict, List + # Add common_benchmark_suite dir to the search path. sys.path.insert( 0, str(pathlib.Path(__file__).parents[2] / "common_benchmark_suite")) @@ -32,48 +35,155 @@ ALL_DEVICE_NAMES = [device.name for device in devices.ALL_DEVICES] -def check_accuracy(artifact_dir: pathlib.Path, - iree_run_module_path: pathlib.Path, - atol: float, - num_threads: str, - verbose: bool = False) -> bool: - module_path = artifact_dir / f"module.vmfb" - output_npy = artifact_dir / "outputs_npy" / "output_0.npy" - command = [ - str(iree_run_module_path), +def get_directory_names(target_device: def_types.DeviceSpec, + directory: pathlib.Path): + if target_device in devices.mobile_devices.ALL_DEVICES: + output = subprocess.run( + ["adb", "shell", "ls", str(directory)], check=True, capture_output=True) + output = output.stdout.decode() + contents = output.split("\n") + # Remove empty elements. + return [item for item in contents if item] + else: + return os.listdir(directory) + + +def get_common_command_parameters(target_device: def_types.DeviceSpec, + artifacts_dir: pathlib.Path, + task_topology_cpu_ids: str) -> List[str]: + module_path = artifacts_dir / "module.vmfb" + parameters = [ f"--module={module_path}", - f"--task_topology_group_count={num_threads}", + f"--task_topology_cpu_ids={task_topology_cpu_ids}", "--device=local-task", "--function=main", - f"--expected_output=@{output_npy}", - f"--expected_f32_threshold={atol}", - f"--expected_f16_threshold={atol}", - f"--expected_f64_threshold={atol}", ] - inputs_dir = artifact_dir / "inputs_npy" - num_inputs = len(list(inputs_dir.glob("*.npy"))) - for i in range(num_inputs): - command.append(f"--input=@{inputs_dir}/input_{i}.npy") + inputs_dir = artifacts_dir / "inputs_npy" + inputs = get_directory_names(target_device, inputs_dir) + for input in inputs: + parameters.append(f"--input=@{inputs_dir}/{input}") + + return parameters + + +def generate_accuracy_check_command(target_device: def_types.DeviceSpec, + artifacts_dir: pathlib.Path, + iree_run_module_path: pathlib.Path, + atol: float, + task_topology_cpu_ids: str) -> str: + output_npy = artifacts_dir / "outputs_npy" / "output_0.npy" + command = [str(iree_run_module_path)] + get_common_command_parameters( + target_device, artifacts_dir, task_topology_cpu_ids) + [ + f"--expected_output=@{output_npy}", + f"--expected_f32_threshold={atol}", + f"--expected_f16_threshold={atol}", + f"--expected_f64_threshold={atol}", + ] + return " ".join(command) + + +def benchmark_on_x86(target_device: def_types.DeviceSpec, + benchmark: def_types.BenchmarkCase, + artifacts_dir: pathlib.Path, + iree_run_module_path: pathlib.Path, + iree_benchmark_module_path: pathlib.Path, + task_topology_cpu_ids: str, + verbose: bool) -> Dict[str, Any]: + # Check accuracy. + atol = benchmark.verify_parameters["absolute_tolerance"] + command = generate_accuracy_check_command(target_device, artifacts_dir, + iree_run_module_path, atol, + task_topology_cpu_ids) + + try: + output = subprocess.run(command, + shell=True, + check=True, + capture_output=True) + if verbose: + print(output.stdout.decode()) + is_accurate = True + except subprocess.CalledProcessError as e: + print(f"Error running command: {e}") + is_accurate = False + + # Run benchmark. + command = [str(iree_benchmark_module_path)] + get_common_command_parameters( + target_device, artifacts_dir, + task_topology_cpu_ids) + ["--print_statistics"] + metrics = benchmark_lib.run_benchmark_command(" ".join(command), verbose) + metrics["accuracy"] = is_accurate + return metrics - command_str = " ".join(command) - print(f"Running command: {command_str}") + +def benchmark_on_android(target_device: def_types.DeviceSpec, + benchmark: def_types.BenchmarkCase, + artifacts_dir: pathlib.Path, + iree_run_module_device_path: pathlib.Path, + iree_benchmark_module_device_path: pathlib.Path, + task_topology_cpu_ids: str, + verbose: bool) -> Dict[str, Any]: + # Check accuracy. + atol = benchmark.verify_parameters["absolute_tolerance"] + command = generate_accuracy_check_command(target_device, artifacts_dir, + iree_run_module_device_path, atol, + task_topology_cpu_ids) + command = f"adb shell su root {command}" try: - output = subprocess.run(command, check=True, capture_output=True) + output = subprocess.run(command, + shell=True, + check=True, + capture_output=True) if verbose: print(output.stdout.decode()) - return True + is_accurate = True except subprocess.CalledProcessError as e: print(f"Error running command: {e}") + is_accurate = False + + # Run benchmark. + root_dir = iree_benchmark_module_device_path.parent + benchmark_command = [str(iree_benchmark_module_device_path) + ] + get_common_command_parameters( + target_device, artifacts_dir, + task_topology_cpu_ids) + ["--print_statistics"] + benchmark_command = " ".join(benchmark_command) - return False + command_path = root_dir / "command.txt" + subprocess.run(f"adb shell \"echo '{benchmark_command}' > {command_path}\"", + shell=True, + check=True, + capture_output=True) + + benchmark_lib_path = root_dir / "benchmark_lib.py" + command = f"adb shell su root /data/data/com.termux/files/usr/bin/python {benchmark_lib_path} --command_path=\"{command_path}\"" + if verbose: + command += " --verbose" + + output = subprocess.run(command, shell=True, check=True, capture_output=True) + output = output.stdout.decode() + if verbose: + print(output) + + match = re.search(r"results_dict: (\{.*\})", output) + if match: + dictionary_string = match.group(1) + metrics = ast.literal_eval(dictionary_string) + else: + metrics = {"error": f"Could not parse results"} + + metrics["accuracy"] = is_accurate + return metrics def benchmark_one(benchmark: def_types.BenchmarkCase, target_device: def_types.DeviceSpec, - artifact_dir: pathlib.Path, + artifacts_dir: pathlib.Path, + iree_run_module_path: pathlib.Path, iree_benchmark_module_path: pathlib.Path, num_threads: str, + task_topology_cpu_ids: str, verbose: bool) -> utils.BenchmarkResult: model = benchmark.model benchmark_definition = { @@ -85,23 +195,16 @@ def benchmark_one(benchmark: def_types.BenchmarkCase, "tags": model.model_impl.tags + model.tags, } - inputs_dir = artifact_dir / "inputs_npy" - num_inputs = len(list(inputs_dir.glob("*.npy"))) - - module_path = artifact_dir / "module.vmfb" - command = [ - str(iree_benchmark_module_path), - f"--module={module_path}", - f"--task_topology_group_count={num_threads}", - "--device=local-task", - "--function=main", - "--print_statistics", - ] - - for i in range(num_inputs): - command.append(f"--input=@{inputs_dir}/input_{i}.npy") + if target_device in devices.mobile_devices.ALL_DEVICES: + metrics = benchmark_on_android(target_device, benchmark, artifacts_dir, + iree_run_module_path, + iree_benchmark_module_path, + task_topology_cpu_ids, verbose) + else: + metrics = benchmark_on_x86(target_device, benchmark, artifacts_dir, + iree_run_module_path, iree_benchmark_module_path, + task_topology_cpu_ids, verbose) - metrics = benchmark_lib.run_benchmark_command(" ".join(command), verbose) return utils.BenchmarkResult( definition=benchmark_definition, metrics={ @@ -121,7 +224,8 @@ def _parse_arguments() -> argparse.Namespace: "--artifact_dir", type=pathlib.Path, required=True, - help="The directory containing all required benchmark artifacts.") + help= + "The directory containing all required benchmark artifacts on the host.") parser.add_argument("-device", "--target_device", dest="target_device_name", @@ -137,9 +241,12 @@ def _parse_arguments() -> argparse.Namespace: type=pathlib.Path, required=True, help="Path to the iree-benchmark-module binary.") - parser.add_argument("--threads", - type=str, - help="A comma-separated list of threads.") + parser.add_argument( + "--thread_config", + type=str, + help= + "A string dictionary of num_threads to cpu_ids. If cpu_ids is empty, does not pin threads to a specific CPU. Example: {1: '0', 4: '1,2,3,4', 5: '0,1,2,3,4'}" + ) parser.add_argument("--verbose", action="store_true", help="Show verbose messages.") @@ -148,7 +255,8 @@ def _parse_arguments() -> argparse.Namespace: def main(output: pathlib.Path, artifact_dir: pathlib.Path, target_device_name: str, iree_run_module_path: pathlib.Path, - iree_benchmark_module_path: pathlib.Path, threads: str, verbose: bool): + iree_benchmark_module_path: pathlib.Path, thread_config: str, + verbose: bool): try: target_device = next(device for device in devices.ALL_DEVICES @@ -161,30 +269,22 @@ def main(output: pathlib.Path, artifact_dir: pathlib.Path, all_benchmarks = jax_benchmark_definitions.ALL_BENCHMARKS + tflite_benchmark_definitions.ALL_BENCHMARKS benchmarks = {} - contents = os.listdir(artifact_dir) + contents = get_directory_names(target_device, artifact_dir) for item in contents: - if os.path.isdir(artifact_dir / item): - name_pattern = re.compile(f".*{item}.*") - for benchmark in all_benchmarks: - if name_pattern.match(benchmark.name): - benchmarks[item] = benchmark + name_pattern = re.compile(f".*{item}.*") + for benchmark in all_benchmarks: + if name_pattern.match(benchmark.name): + benchmarks[item] = benchmark - threads = threads.split(",") + thread_config = ast.literal_eval(thread_config) for directory, benchmark in benchmarks.items(): - benchmark_artifacts = artifact_dir / directory - - for num_thread in threads: - atol = benchmark.verify_parameters["absolute_tolerance"] - is_accurate = check_accuracy(benchmark_artifacts, iree_run_module_path, - atol, num_thread, verbose) - - result = benchmark_one(benchmark, target_device, benchmark_artifacts, - iree_benchmark_module_path, num_thread, verbose) - result.metrics["compiler_level"]["accuracy"] = is_accurate - + model_artifact_dir = artifact_dir / directory + for num_thread, cpu_ids in thread_config.items(): + result = benchmark_one(benchmark, target_device, model_artifact_dir, + iree_run_module_path, iree_benchmark_module_path, + num_thread, cpu_ids, verbose) if verbose: print(json.dumps(dataclasses.asdict(result), indent=2)) - utils.append_benchmark_result(output, result)