From 974bdbe84a011acf358a09cde273ace985340890 Mon Sep 17 00:00:00 2001 From: mariecwhite Date: Tue, 5 Sep 2023 03:54:47 +0000 Subject: [PATCH] Add GPT2 GGML Android Benchmarks --- .github/workflows/run_ggml_benchmark.yml | 128 +++++++++++++++- .../openxla/benchmark/devices/__init__.py | 5 +- .../benchmark/devices/mobile_devices.py | 22 +++ .../docker/dockerfiles/android.Dockerfile | 20 +++ devtools/docker/image_deps.json | 3 +- devtools/docker/prod_digests.txt | 1 + experimental/ggml/benchmark_ggml.sh | 96 ++++++------ .../ggml/{benchmark.py => benchmark_lib.py} | 142 ++++++------------ experimental/ggml/build_ggml.sh | 72 +++++++++ experimental/ggml/requirements.txt | 3 +- experimental/ggml/run_benchmarks.py | 66 ++++++++ experimental/ggml/run_benchmarks_android.py | 92 ++++++++++++ .../ggml/set_android_scaling_governor.sh | 51 +++++++ 13 files changed, 554 insertions(+), 147 deletions(-) create mode 100644 common_benchmark_suite/openxla/benchmark/devices/mobile_devices.py create mode 100644 devtools/docker/dockerfiles/android.Dockerfile rename experimental/ggml/{benchmark.py => benchmark_lib.py} (61%) mode change 100755 => 100644 create mode 100755 experimental/ggml/build_ggml.sh create mode 100755 experimental/ggml/run_benchmarks.py create mode 100755 experimental/ggml/run_benchmarks_android.py create mode 100755 experimental/ggml/set_android_scaling_governor.sh diff --git a/.github/workflows/run_ggml_benchmark.yml b/.github/workflows/run_ggml_benchmark.yml index ff837cb0..f981e275 100644 --- a/.github/workflows/run_ggml_benchmark.yml +++ b/.github/workflows/run_ggml_benchmark.yml @@ -66,6 +66,7 @@ jobs: BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }} RESULTS_DIR: results-dir TARGET_DEVICE: c2-standard-16 + GGML_BUILD_DIR: build-dir steps: - name: "Checking out PR repository" uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0 @@ -74,7 +75,14 @@ jobs: run: | echo "results-gcs-dir=${BENCHMARK_GCS_DIR}/${TARGET_DEVICE}-results" >> "${GITHUB_OUTPUT}" mkdir "${RESULTS_DIR}" - - name: "Benchmarking GGML CPU" + - name: "Building GGML CPU" + run: | + docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \ + "gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251" \ + ./experimental/ggml/build_ggml.sh \ + "${TARGET_DEVICE}" \ + "${GGML_BUILD_DIR}" + - name: "Benchmarking GGML" env: GGML_RESULTS_JSON: ggml.json RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }} @@ -83,6 +91,122 @@ jobs: docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \ "gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251" \ ./experimental/ggml/benchmark_ggml.sh \ - "${TARGET_DEVICE}"\ + "${TARGET_DEVICE}" \ + "${GGML_BUILD_DIR}" \ "${RESULTS_PATH}" gcloud storage cp "${RESULTS_PATH}" "${RESULTS_GCS_DIR}/" + + build_ggml_for_android: + needs: [setup] + runs-on: + - self-hosted # must come first + - runner-group=${{ needs.setup.outputs.runner-group }} + - environment=prod + - cpu + - os-family=Linux + env: + GGML_BUILD_DIR: ggml-build + TARGET_DEVICE: pixel-6-pro + BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }} + outputs: + ggml-build-dir: ${{ env.GGML_BUILD_DIR }} + ggml-build-dir-archive: ${{ steps.archive.outputs.ggml-build-dir-archive }} + ggml-build-dir-gcs-artifact: ${{ steps.upload.outputs.ggml-build-dir-gcs-artifact }} + steps: + - name: "Checking out PR repository" + uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0 + - name: "Building GGML" + run: | + mkdir -p "${GGML_BUILD_DIR}" + docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \ + "gcr.io/iree-oss/openxla-benchmark/android@sha256:3211ade3856dfd46469e573f17baaf367f9c0830dfcc70c6d85891447cadc39e" \ + ./experimental/ggml/build_ggml.sh \ + "${TARGET_DEVICE}" \ + "${GGML_BUILD_DIR}" + - name: "Creating build dir archive" + id: archive + env: + GGML_BUILD_DIR_ARCHIVE: ${{ env.GGML_BUILD_DIR }}.tgz + run: | + tar -zcvf ${GGML_BUILD_DIR_ARCHIVE} ${GGML_BUILD_DIR} + echo "ggml-build-dir-archive=${GGML_BUILD_DIR_ARCHIVE}" >> "${GITHUB_OUTPUT}" + - name: "Uploading build dir archive" + id: upload + env: + GGML_BUILD_DIR_ARCHIVE: ${{ steps.archive.outputs.ggml-build-dir-archive }} + GGML_BUILD_DIR_GCS_ARTIFACT: ${{ env.BENCHMARK_GCS_DIR }}/${{ steps.archive.outputs.ggml-build-dir-archive }} + run: | + gcloud storage cp "${GGML_BUILD_DIR_ARCHIVE}" "${GGML_BUILD_DIR_GCS_ARTIFACT}" + echo "ggml-build-dir-gcs-artifact=${GGML_BUILD_DIR_GCS_ARTIFACT}" >> "${GITHUB_OUTPUT}" + + benchmark_on_pixel-6-pro: + needs: [setup, build_ggml_for_android] + runs-on: + - self-hosted # must come first + - runner-group=${{ needs.setup.outputs.runner-group }} + - environment=prod + - machine-type=pixel-6-pro + env: + BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }} + RESULTS_DIR: results-dir + TARGET_DEVICE: pixel-6-pro + GGML_BUILD_DIR: ${{ needs.build_ggml_for_android.outputs.ggml-build-dir }} + GGML_BUILD_DIR_ARCHIVE: ${{ needs.build_ggml_for_android.outputs.ggml-build-dir-archive }} + GGML_BUILD_DIR_GCS_ARTIFACT: ${{ needs.build_ggml_for_android.outputs.ggml-build-dir-gcs-artifact }} + steps: + - name: "Checking out PR repository" + uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0 + - name: "Setup" + id: setup + run: | + echo "results-gcs-dir=${BENCHMARK_GCS_DIR}/${TARGET_DEVICE}-results" >> "${GITHUB_OUTPUT}" + mkdir "${RESULTS_DIR}" + - name: "Downloading and unpacking GGML build" + run: | + gcloud storage cp "${GGML_BUILD_DIR_GCS_ARTIFACT}" "${GGML_BUILD_DIR_ARCHIVE}" + tar -xvf "${GGML_BUILD_DIR_ARCHIVE}" + - name: "Benchmarking GGML on Android" + env: + GGML_RESULTS_JSON: ggml-android.json + RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }} + run: | + RESULTS_PATH="${RESULTS_DIR}/${GGML_RESULTS_JSON}" + ./experimental/ggml/benchmark_ggml.sh "${TARGET_DEVICE}" "${GGML_BUILD_DIR}" "${RESULTS_PATH}" + cat "${RESULTS_PATH}" + +# adb push "./experimental/ggml/set_android_scaling_governor.sh" "/data/local/tmp" +# adb shell "chmod +x /data/local/tmp/set_android_scaling_governor.sh" +# adb shell "su root sh /data/local/tmp/set_android_scaling_governor.sh performance" +# +# adb push "${GGML_BUILD_DIR}/bin/gpt-2" "/data/local/tmp" +# adb shell "chmod +x /data/local/tmp/gpt-2" +# adb push "${GGML_BUILD_DIR}/models/gpt-2-117M/ggml-model-f32.bin" "/data/local/tmp" +# +# echo "Benchmarking ggml-model-f32.bin with 1 thread" +# adb shell 'taskset f0 /data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f32.bin --prompt "Once upon a time" --seed 0 --threads 1' +# +# echo "Benchmarking ggml-model-f32.bin with 4 threads" +# adb shell 'taskset f0 /data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f32.bin --prompt "Once upon a time" --seed 0 --threads 4' +# +# echo "Benchmarking ggml-model-f32.bin with 8 threads" +# adb shell '/data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f32.bin --prompt "Once upon a time" --seed 0 --threads 8' +# +# echo "Benchmarking ggml-model-f32.bin with 16 threads" +# adb shell '/data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f32.bin --prompt "Once upon a time" --seed 0 --threads 16' +# +# echo "Removing ggml-model-f32.bin" +# adb shell "rm /data/local/tmp/ggml-model-f32.bin" +# +# adb push "${GGML_BUILD_DIR}/models/gpt-2-117M/ggml-model-f16.bin" "/data/local/tmp" +# +# echo "Benchmarking ggml-model-f16.bin with 1 thread" +# adb shell 'taskset f0 /data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f16.bin --prompt "Once upon a time" --seed 0 --threads 1' +# +# echo "Benchmarking ggml-model-f16.bin with 4 threads" +# adb shell 'taskset f0 /data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f16.bin --prompt "Once upon a time" --seed 0 --threads 4' +# +# echo "Benchmarking ggml-model-f16.bin with 8 threads" +# adb shell '/data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f16.bin --prompt "Once upon a time" --seed 0 --threads 8' +# +# echo "Benchmarking ggml-model-f16.bin with 16 threads" +# adb shell '/data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f16.bin --prompt "Once upon a time" --seed 0 --threads 16' diff --git a/common_benchmark_suite/openxla/benchmark/devices/__init__.py b/common_benchmark_suite/openxla/benchmark/devices/__init__.py index 049b6fd1..9a9b2106 100644 --- a/common_benchmark_suite/openxla/benchmark/devices/__init__.py +++ b/common_benchmark_suite/openxla/benchmark/devices/__init__.py @@ -4,7 +4,8 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -from . import gcp_devices, host_devices +from . import gcp_devices, host_devices, mobile_devices # All defined device specs. -ALL_DEVICES = gcp_devices.ALL_DEVICES + host_devices.ALL_DEVICES +ALL_DEVICES = gcp_devices.ALL_DEVICES + host_devices.ALL_DEVICES + mobile_devices.ALL_DEVICES +ALL_DEVICE_NAMES = [device.name for device in ALL_DEVICES] diff --git a/common_benchmark_suite/openxla/benchmark/devices/mobile_devices.py b/common_benchmark_suite/openxla/benchmark/devices/mobile_devices.py new file mode 100644 index 00000000..ca716280 --- /dev/null +++ b/common_benchmark_suite/openxla/benchmark/devices/mobile_devices.py @@ -0,0 +1,22 @@ +# Copyright 2023 The OpenXLA Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from openxla.benchmark import def_types + +MOBILE_PIXEL_6_PRO = def_types.DeviceSpec( + name="pixel-6-pro", + host_type="mobile", + host_model="pixel-6-pro", + host_environment="android", + accelerator_type="cpu", + accelerator_model="armv8.2-a", + accelerator_architecture="armv8.2-a", + accelerator_attributes={ + "num_of_cores": 8, + }, +) + +ALL_DEVICES = [MOBILE_PIXEL_6_PRO] \ No newline at end of file diff --git a/devtools/docker/dockerfiles/android.Dockerfile b/devtools/docker/dockerfiles/android.Dockerfile new file mode 100644 index 00000000..b6272b40 --- /dev/null +++ b/devtools/docker/dockerfiles/android.Dockerfile @@ -0,0 +1,20 @@ +# Copyright 2023 The OpenXLA Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# An image for cross-compiling towards Android. + +FROM gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251 + +ARG NDK_VERSION=r25c +WORKDIR /install-ndk + +ENV ANDROID_NDK "/usr/src/android-ndk-${NDK_VERSION}" + +RUN wget -q "https://dl.google.com/android/repository/android-ndk-${NDK_VERSION}-linux.zip" \ + && unzip -q "android-ndk-${NDK_VERSION}-linux.zip" -d /usr/src/ \ + && rm -rf /install-ndk + +WORKDIR / diff --git a/devtools/docker/image_deps.json b/devtools/docker/image_deps.json index 7215e309..243f4320 100644 --- a/devtools/docker/image_deps.json +++ b/devtools/docker/image_deps.json @@ -3,5 +3,6 @@ "cuda11.8-cudnn8.9": ["base"], "db_import": [], "mmperf": ["base"], - "convperf": ["base"] + "convperf": ["base"], + "android": ["base"] } diff --git a/devtools/docker/prod_digests.txt b/devtools/docker/prod_digests.txt index 013a9d5d..4a2f94a0 100644 --- a/devtools/docker/prod_digests.txt +++ b/devtools/docker/prod_digests.txt @@ -3,3 +3,4 @@ gcr.io/iree-oss/openxla-benchmark/cuda11.8-cudnn8.9@sha256:f43984cd6c16ad1faad4d gcr.io/iree-oss/openxla-benchmark/db_import@sha256:3de8a702b51ca1906fc2ef5bab2415a79e46bc132f2ceba994215539dd0ecdd4 gcr.io/iree-oss/openxla-benchmark/mmperf@sha256:c972ce5b2144de0786f103611fecbd88d93dd45ecd068f8c97d98c08677cee57 gcr.io/iree-oss/openxla-benchmark/convperf@sha256:0807d5e8144900752cfae72f3aa4d12530b408f73fc6f010a6cbad11cc09832c +gcr.io/iree-oss/openxla-benchmark/android@sha256:3211ade3856dfd46469e573f17baaf367f9c0830dfcc70c6d85891447cadc39e diff --git a/experimental/ggml/benchmark_ggml.sh b/experimental/ggml/benchmark_ggml.sh index 6cb28dc1..137592d0 100755 --- a/experimental/ggml/benchmark_ggml.sh +++ b/experimental/ggml/benchmark_ggml.sh @@ -11,20 +11,20 @@ # OOBI_VENV_DIR: path to create Python virtualenv, default: ggml-benchmarks.venv # OOBI_TARGET_DEVICE: target benchmark device, can also be specified the first # argument. +# OOBI_BUILD_DIR: path to the GGMl build directory. # OOBI_OUTPUT: path to output benchmark results, can also be specified the # second argument. -# OOBI_SCRATCH_DIR: the directory to place temporary benchmarking artifacts. # # Example usage: -# ./benchmark_ggml.sh c2-standard-16 /tmp/results.json +# ./benchmark_ggml.sh set -xeuo pipefail VENV_DIR="${OOBI_VENV_DIR:-ggml-benchmarks.venv}" -ROOT_DIR="${OOBI_SCRATCH_DIR:-/tmp}" PYTHON="${PYTHON:-/usr/bin/python3}" -TARGET_DEVICE="${1:-${OOBI_TARGET_DEVICE}}" -OUTPUT_PATH="${2:-${OOBI_OUTPUT}}" +TARGET_DEVICE_NAME="${1:-${OOBI_TARGET_DEVICE}}" +BUILD_DIR="${2:-${OOBI_BUILD_DIR}}" +OUTPUT_PATH="${3:-${OOBI_OUTPUT}}" TD="$(cd $(dirname $0) && pwd)" @@ -35,33 +35,12 @@ VENV_DIR="${VENV_DIR}" PYTHON="${PYTHON}" source "${TD}/setup_venv.sh" OUTPUT_PATH="$(realpath ${OUTPUT_PATH})" "${TD}/../../comparative_benchmark/scripts/create_results_json.sh" "${OUTPUT_PATH}" -pushd "${ROOT_DIR}" - -# We clone a fork of ggml which includes additional benchmark logging. -git clone --branch benchmark https://github.com/mariecwhite/ggml.git -pushd ggml - -# Build -mkdir build -pushd build -cmake .. -make -j8 - -# Generate FP32, FP16 and INT4 versions of GPT2 117M (Small). -GPT_VARIANT="117M" -../examples/gpt-2/download-model.sh "${GPT_VARIANT}" -# Generate FP32. -python ../examples/gpt-2/convert-ckpt-to-ggml.py models/gpt-2-${GPT_VARIANT}/ 0 -# Generate FP16. -python ../examples/gpt-2/convert-ckpt-to-ggml.py models/gpt-2-${GPT_VARIANT}/ 1 -# Generate INT4. -./bin/gpt-2-quantize models/gpt-2-${GPT_VARIANT}/ggml-model-f16.bin models/gpt-2-${GPT_VARIANT}/ggml-model-q4_0.bin 2 +pushd "${BUILD_DIR}" PROMPT="Once upon a time" BENCHMARK_BINARY="$(realpath bin/gpt-2)" WARMUP_ITERAIONS=2 NUM_ITERATIONS=10 -declare -a NUM_THREADS=(1 8 16) MODEL="$(realpath models/gpt-2-117M/ggml-model-f32.bin)" @@ -81,26 +60,51 @@ declare -a DATA_TYPES=( "int4" ) +declare -a args=( + --warmup_iterations "${WARMUP_ITERAIONS}" + --iterations "${NUM_ITERATIONS}" + --benchmark_binary "${BENCHMARK_BINARY}" + --prompt "${PROMPT}" + --seed 0 + --output "${OUTPUT_PATH}" + --target_device "${TARGET_DEVICE_NAME}" + --verbose +) + +if [[ "${TARGET_DEVICE_NAME}" =~ ^(pixel-4|pixel-6-pro|moto-edge-x30)$ ]]; then +BENCHMARK_SCRIPT="run_benchmarks_android.py" +# Pixel 6 has a maximum of 8 cores. +THREADS="1,4,8" +TASKSETS="80,f0,ff" + +args+=( + --threads "${THREADS}" + --tasksets "${TASKSETS}" +) + +# Setup mobile device for benchmarking. +adb push "${TD}/set_android_scaling_governor.sh" "/data/local/tmp" +adb shell "chmod +x /data/local/tmp/set_android_scaling_governor.sh" +adb shell "su root sh /data/local/tmp/set_android_scaling_governor.sh performance" + +else +# c2-standard-16 has 16 cores. +BENCHMARK_SCRIPT="run_benchmarks.py" +THREADS="1,8,16" + +args+=( + --threads "${THREADS}" +) +fi + for i in ${!BENCHMARK_NAMES[@]}; do MODEL="$(realpath models/gpt-2-117M/${MODELS[$i]})" - - for threads in "${NUM_THREADS[@]}"; do - "${TD}/benchmark.py" \ - --benchmark_name "${BENCHMARK_NAMES[$i]}" \ - --warmup_iterations "${WARMUP_ITERAIONS}" \ - --iterations "${NUM_ITERATIONS}" \ - --benchmark_binary "${BENCHMARK_BINARY}" \ - --model "${MODEL}" \ - --data_type "${DATA_TYPES[$i]}" \ - --prompt "${PROMPT}" \ - --seed 0 \ - --threads "${threads}" \ - --output "${OUTPUT_PATH}" \ - --target_device "${TARGET_DEVICE}" \ - --verbose - done + args+=( + --benchmark_name "${BENCHMARK_NAMES[$i]}" + --model "${MODEL}" + --data_type "${DATA_TYPES[$i]}" + ) + "${TD}/${BENCHMARK_SCRIPT}" "${args[@]}" done -popd # build -popd # ggml -popd # ROOT_DIR +popd # BUILD_DIR diff --git a/experimental/ggml/benchmark.py b/experimental/ggml/benchmark_lib.py old mode 100755 new mode 100644 similarity index 61% rename from experimental/ggml/benchmark.py rename to experimental/ggml/benchmark_lib.py index 59c5d047..55e7416c --- a/experimental/ggml/benchmark.py +++ b/experimental/ggml/benchmark_lib.py @@ -23,65 +23,51 @@ # Add common_benchmark_suite dir to the search path. sys.path.insert( 0, str(pathlib.Path(__file__).parents[2] / "common_benchmark_suite")) -from openxla.benchmark import def_types, devices - -ALL_DEVICE_NAMES = [device.name for device in devices.ALL_DEVICES] +from openxla.benchmark import devices + +# Regular expressions to parse GGML benchmark output. +# Example output: +# main: mem per token = 2011380 bytes +# main: load time = 120.92 ms +# main: sample time = 73.86 ms +# main: first predict time = 14.71 ms +# main: loop predict time = 2261.72 ms / 11.20 ms per token +# main: predict time = 2276.43 ms / 11.21 ms per token +# main: total time = 2494.66 ms +LOAD_TIME_REGEXP = re.compile(f".+ load time = (.+) ms") +SAMPLE_TIME_REGEXP = re.compile(f".+ sample time = (.+) ms") +FIRST_PREDICTION_TIME_REGEXP = re.compile(f".+ first predict time = (.+) ms") +LOOP_PREDICTION_TIME_REGEXP = re.compile( + f".+ loop predict time = .+ ms / (.+) ms per token") +TOTAL_PREDICTION_TIME_REGEXP = re.compile( + f".+ predict time = (.+) ms / .+ ms per token") +E2E_TIME_REGEXP = re.compile(f".+ total time = (.+) ms") def _parse_output(output_text): - # Example output. - # main: mem per token = 2011380 bytes - # main: load time = 120.92 ms - # main: sample time = 73.86 ms - # main: first predict time = 14.71 ms - # main: loop predict time = 2261.72 ms / 11.20 ms per token - # main: predict time = 2276.43 ms / 11.21 ms per token - # main: total time = 2494.66 ms - - LOAD_TIME_REGEXP = re.compile(f"main: load time = (.+) ms") match = LOAD_TIME_REGEXP.search(output_text) - if not match: - "Unable to parse first prediction time" - return - load_time_ms = float(match.group(1)) + load_time_ms = float(match.group(1)) if match else print( + "Unable to parse first prediction time") - SAMPLE_TIME_REGEXP = re.compile(f"main: sample time = (.+) ms") match = SAMPLE_TIME_REGEXP.search(output_text) - if not match: - "Unable to parse first prediction time" - return - sample_time_ms = float(match.group(1)) + sample_time_ms = float(match.group(1)) if match else print( + "Unable to parse first prediction time") - FIRST_PREDICTION_TIME_REGEXP = re.compile( - f"main: first predict time = (.+) ms") match = FIRST_PREDICTION_TIME_REGEXP.search(output_text) - if not match: - "Unable to parse first prediction time" - return - first_prediction_ms = float(match.group(1)) + first_prediction_ms = float(match.group(1)) if match else print( + "Unable to parse first prediction time") - LOOP_PREDICTION_TIME_REGEXP = re.compile( - f"main: loop predict time = .+ ms / (.+) ms per token") match = LOOP_PREDICTION_TIME_REGEXP.search(output_text) - if not match: - "Unable to parse loop prediction time" - return - loop_prediction_ms = float(match.group(1)) + loop_prediction_ms = float(match.group(1)) if match else print( + "Unable to parse loop prediction time") - TOTAL_PREDICTION_TIME_REGEXP = re.compile( - f"main: predict time = (.+) ms / .+ ms per token") match = TOTAL_PREDICTION_TIME_REGEXP.search(output_text) - if not match: - "Unable to parse total prediction time" - return - total_prediction_ms = float(match.group(1)) + total_prediction_ms = float(match.group(1)) if match else print( + "Unable to parse total prediction time") - E2E_TIME_REGEXP = re.compile(f"main: total time = (.+) ms") match = E2E_TIME_REGEXP.search(output_text) - if not match: - "Unable to parse total prediction time" - return - e2e_prediction_ms = float(match.group(1)) + e2e_prediction_ms = float(match.group(1)) if match else print( + "Unable to parse total prediction time") return { "load_time_ms": load_time_ms, @@ -93,8 +79,7 @@ def _parse_output(output_text): } -def _parse_arguments() -> argparse.Namespace: - parser = argparse.ArgumentParser(description="Run GGML benchmarks.") +def configure_parser(parser: argparse.ArgumentParser): parser.add_argument("-name", "--benchmark_name", type=str, @@ -109,7 +94,7 @@ def _parse_arguments() -> argparse.Namespace: parser.add_argument( "-m", "--model", - type=str, + type=pathlib.Path, required=True, help= "The GGML model to benchmark e.g. /tmp/ggml/build/models/gpt-2-117M/ggml-model.bin" @@ -127,9 +112,9 @@ def _parse_arguments() -> argparse.Namespace: help="The seed to use for the RNG.") parser.add_argument("-t", "--threads", - type=int, - default=8, - help="The number of threads to use.") + type=str, + default="1,4", + help="A comma-delimited list of threads.") parser.add_argument("-o", "--output", type=pathlib.Path, @@ -140,7 +125,7 @@ def _parse_arguments() -> argparse.Namespace: dest="target_device_name", type=str, required=True, - choices=ALL_DEVICE_NAMES, + choices=devices.ALL_DEVICE_NAMES, help="The target device to benchmark.") parser.add_argument("-w", "--warmup_iterations", @@ -155,49 +140,17 @@ def _parse_arguments() -> argparse.Namespace: parser.add_argument("--verbose", action="store_true", help="Show verbose messages.") - return parser.parse_args() - -def main(benchmark_name: str, benchmark_binary: pathlib.Path, - warmup_iterations: int, iterations: int, model: str, data_type: str, - prompt: str, seed: int, threads: int, output: pathlib.Path, - target_device_name: str, verbose: bool): - try: - target_device = next(device for device in devices.ALL_DEVICES - if device.name == target_device_name) - except StopIteration: - raise ValueError(f'Target device "{target_device_name}" is not defined.' - f' Available device options:\n{ALL_DEVICE_NAMES}') - - benchmark_definition = { - "benchmark_name": benchmark_name, - "framework": str(def_types.ModelFrameworkType.GGML), - "data_type": data_type, - "batch_size": 1, - "compiler": str(def_types.ModelFrameworkType.GGML), - "device": target_device.name, - "num_threads": threads, - "warmup_iterations": warmup_iterations, - "num_iterations": iterations, - "tags": ["gpt2", "ggml"], - } - - cmd = [ - benchmark_binary, - "--model", - f"{model}", - "--prompt", - f"{prompt}", - "--seed", - f"{seed}", - "--threads", - f"{threads}", - ] +def benchmark(benchmark_command: str, benchmark_definition: dict, + warmup_iterations: int, iterations: int, output: pathlib.Path, + verbose: bool): # Run warmup iterations. for i in range(warmup_iterations): - subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + subprocess.run(benchmark_command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) load_times = [] first_prediction_times = [] @@ -208,10 +161,13 @@ def main(benchmark_name: str, benchmark_binary: pathlib.Path, # Run iterations. for i in range(iterations): - raw_result = subprocess.run(cmd, + raw_result = subprocess.run(benchmark_command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) raw_result = raw_result.stdout.decode("utf-8") + if verbose: + print(raw_result) + metrics = _parse_output(raw_result) load_times.append(metrics["load_time_ms"]) @@ -250,7 +206,3 @@ def main(benchmark_name: str, benchmark_binary: pathlib.Path, if verbose: print(json.dumps(dataclasses.asdict(benchmark_result), indent=2)) utils.append_benchmark_result(output, benchmark_result) - - -if __name__ == "__main__": - main(**vars(_parse_arguments())) diff --git a/experimental/ggml/build_ggml.sh b/experimental/ggml/build_ggml.sh new file mode 100755 index 00000000..c87cfea8 --- /dev/null +++ b/experimental/ggml/build_ggml.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# +# Copyright 2023 The OpenXLA Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Environment variables: +# PYTHON: Python interpreter, default: /usr/bin/python3 +# ANDROID_NDK: the path to the Android NDK if building for Android. +# OOBI_VENV_DIR: path to create Python virtualenv, default: ggml-build.venv +# OOBI_TARGET_DEVICE: target benchmark device, can also be specified the first +# argument. +# OOBI_OUTPUT: path to output benchmark results, can also be specified the +# second argument. +# OOBI_SCRATCH_DIR: the directory to place temporary benchmarking artifacts. +# +# Example usage: +# ./build_ggml.sh > + +set -xeuo pipefail + +VENV_DIR="${OOBI_VENV_DIR:-ggml-build.venv}" +ROOT_DIR="${OOBI_SCRATCH_DIR:-/tmp}" +PYTHON="${PYTHON:-/usr/bin/python3}" +TARGET_DEVICE_NAME="${1:-${OOBI_TARGET_DEVICE}}" +BUILD_DIR="${2:-/tmp/ggml-build}" + +TD="$(cd $(dirname $0) && pwd)" +BUILD_DIR="$(realpath ${BUILD_DIR})" + +# Setup virtual environment. +VENV_DIR="${VENV_DIR}" PYTHON="${PYTHON}" source "${TD}/setup_venv.sh" + +pushd "${ROOT_DIR}" + +# We clone a fork of ggml which includes additional benchmark logging. +git clone --branch benchmark https://github.com/mariecwhite/ggml.git +pushd ggml + +REPO_DIR="$(pwd)" + +# Build gpt-2-quantize. +cmake -G Ninja -B local-build . +cmake --build local-build -t gpt-2-quantize + +# Build gpt-2. +if [[ "${TARGET_DEVICE_NAME}" =~ ^(pixel-4|pixel-6-pro|moto-edge-x30)$ ]]; then +cmake -GNinja -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-23 -DCMAKE_C_FLAGS=-march=armv8.4a+dotprod -B "${BUILD_DIR}" . +cmake --build "${BUILD_DIR}" -t gpt-2 +else +cmake -G Ninja -B "${BUILD_DIR}" . +cmake --build "${BUILD_DIR}" -t gpt-2 +fi + +popd # ggml +popd # ROOT_DIR + +# Generate FP32 and FP16 versions of GPT2 117M (Small). +pushd "${BUILD_DIR}" + +GPT_VARIANT="117M" +${REPO_DIR}/examples/gpt-2/download-model.sh "${GPT_VARIANT}" +# Generate FP32. +python ${REPO_DIR}/examples/gpt-2/convert-ckpt-to-ggml.py models/gpt-2-${GPT_VARIANT}/ 0 +# Generate FP16. +python ${REPO_DIR}/examples/gpt-2/convert-ckpt-to-ggml.py models/gpt-2-${GPT_VARIANT}/ 1 +# Generate INT4. Keep this disabled until we want to use it. +${REPO_DIR}/local-build/bin/gpt-2-quantize models/gpt-2-${GPT_VARIANT}/ggml-model-f16.bin models/gpt-2-${GPT_VARIANT}/ggml-model-q4_0.bin 2 + +popd # BUILD_DIR diff --git a/experimental/ggml/requirements.txt b/experimental/ggml/requirements.txt index c2bbfa1f..7947059c 100644 --- a/experimental/ggml/requirements.txt +++ b/experimental/ggml/requirements.txt @@ -1,2 +1,3 @@ numpy -tensorflow \ No newline at end of file +tensorflow +requests diff --git a/experimental/ggml/run_benchmarks.py b/experimental/ggml/run_benchmarks.py new file mode 100755 index 00000000..88cb4f1e --- /dev/null +++ b/experimental/ggml/run_benchmarks.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +# +# Copyright 2023 The OpenXLA Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import argparse +import pathlib +import sys + +import benchmark_lib + +# Add common_benchmark_suite dir to the search path. +sys.path.insert( + 0, str(pathlib.Path(__file__).parents[2] / "common_benchmark_suite")) +from openxla.benchmark import def_types, devices + + +def _parse_arguments() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Run GGML benchmarks.") + benchmark_lib.configure_parser(parser) + return parser.parse_args() + + +def main(benchmark_name: str, benchmark_binary: pathlib.Path, + warmup_iterations: int, iterations: int, model: pathlib.Path, + data_type: str, prompt: str, seed: int, threads: str, + output: pathlib.Path, target_device_name: str, verbose: bool): + + try: + target_device = next(device for device in devices.ALL_DEVICES + if device.name == target_device_name) + except StopIteration: + raise ValueError(f'Target device "{target_device_name}" is not defined.' + f' Available device options:\n{devices.ALL_DEVICE_NAMES}') + + threads = threads.split(",") + for thread in threads: + benchmark_definition = { + "benchmark_name": benchmark_name, + "framework": str(def_types.ModelFrameworkType.GGML), + "data_type": data_type, + "batch_size": 1, + "compiler": str(def_types.ModelFrameworkType.GGML), + "device": target_device.name, + "num_threads": thread, + "warmup_iterations": warmup_iterations, + "num_iterations": iterations, + "tags": ["gpt2", "ggml"], + } + + cmd = [ + benchmark_binary, "--model", model, "--prompt", f"\"{prompt}\"", + "--seed", + str(seed), "--threads", + str(thread) + ] + + benchmark_lib.benchmark(cmd, benchmark_definition, warmup_iterations, + iterations, output, verbose) + + +if __name__ == "__main__": + main(**vars(_parse_arguments())) diff --git a/experimental/ggml/run_benchmarks_android.py b/experimental/ggml/run_benchmarks_android.py new file mode 100755 index 00000000..8cb35641 --- /dev/null +++ b/experimental/ggml/run_benchmarks_android.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +# +# Copyright 2023 The OpenXLA Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import argparse +import pathlib +import subprocess +import sys + +import benchmark_lib + +# Add common_benchmark_suite dir to the search path. +sys.path.insert( + 0, str(pathlib.Path(__file__).parents[2] / "common_benchmark_suite")) +from openxla.benchmark import def_types, devices + + +def _parse_arguments() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Run GGML benchmarks.") + parser.add_argument( + "--tasksets", + type=str, + default="f0", + help= + "A comma-separated list of tasksets to run under each thread configuration." + ) + benchmark_lib.configure_parser(parser) + return parser.parse_args() + + +def main(benchmark_name: str, benchmark_binary: pathlib.Path, + warmup_iterations: int, iterations: int, model: pathlib.Path, + data_type: str, prompt: str, seed: int, threads: str, tasksets: str, + output: pathlib.Path, target_device_name: str, verbose: bool): + try: + target_device = next(device for device in devices.ALL_DEVICES + if device.name == target_device_name) + except StopIteration: + raise ValueError(f'Target device "{target_device_name}" is not defined.' + f' Available device options:\n{devices.ALL_DEVICE_NAMES}') + + threads = threads.split(",") + tasksets = tasksets.split(",") + if len(threads) != len(tasksets): + raise ValueError( + "The number of tasksets specified must be equal to the number of threads." + ) + + # Push artifacts to the Android device. + subprocess.run(["adb", "push", benchmark_binary, "/data/local/tmp"]) + subprocess.run([ + "adb", "shell", "chmod", "+x", f"/data/local/tmp/{benchmark_binary.name}" + ]) + subprocess.run(["adb", "push", model, "/data/local/tmp"]) + + for taskset, thread in zip(tasksets, threads): + benchmark_definition = { + "benchmark_name": benchmark_name, + "framework": str(def_types.ModelFrameworkType.GGML), + "data_type": data_type, + "batch_size": 1, + "compiler": str(def_types.ModelFrameworkType.GGML), + "device": target_device.name, + "taskset": taskset, + "num_threads": thread, + "warmup_iterations": warmup_iterations, + "num_iterations": iterations, + "tags": ["gpt2", "ggml"], + } + + cmd = [ + "adb", "shell", "taskset", taskset, + f"/data/local/tmp/{benchmark_binary.name}", "--model", + f"/data/local/tmp/{model.name}", "--prompt", f"\"{prompt}\"", "--seed", + str(seed), "--threads", + str(thread) + ] + + benchmark_lib.benchmark(cmd, benchmark_definition, warmup_iterations, + iterations, output, verbose) + + # Cleanup. + subprocess.run(["adb", "rm", f"/data/local/tmp/{benchmark_binary.name}"]) + subprocess.run(["adb", "rm", f"/data/local/tmp/{model.name}"]) + + +if __name__ == "__main__": + main(**vars(_parse_arguments())) diff --git a/experimental/ggml/set_android_scaling_governor.sh b/experimental/ggml/set_android_scaling_governor.sh new file mode 100755 index 00000000..0a297304 --- /dev/null +++ b/experimental/ggml/set_android_scaling_governor.sh @@ -0,0 +1,51 @@ +#!/bin/sh + +# Copyright 2023 The IREE Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# Runs on an android device itself to set the frequency scaling governor for all +# CPUs (default performance). + +################################### WARNING #################################### +# This will overheat the phone if it's not on a cooling plate, resulting in # +# thermal throttling. To prevent anything catching on fire, the actual CPU # +# frequencies will be throttled to below the maximum, skewing your results. # +################################################################################ + +set -euo pipefail + +GOVERNOR="${1:-performance}" + +echo "CPU info (before changing governor):" +echo 'cpu\tgovernor\tcur\tmin\tmax' +echo "------------------------------------------------" +for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \ + echo "cpu${i}" | paste \ + - \ + "/sys/devices/system/cpu/cpu${i}/cpufreq/scaling_governor" \ + "/sys/devices/system/cpu/cpu${i}/cpufreq/cpuinfo_cur_freq" \ + "/sys/devices/system/cpu/cpu${i}/cpufreq/cpuinfo_min_freq" \ + "/sys/devices/system/cpu/cpu${i}/cpufreq/cpuinfo_max_freq"; \ +done + +echo "Setting CPU frequency governor to ${GOVERNOR}" + +for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \ + echo "${GOVERNOR}" > \ + "/sys/devices/system/cpu/cpu${i?}/cpufreq/scaling_governor"; \ +done + +echo "CPU info (after changing governor):" +echo 'cpu\tgovernor\tcur\tmin\tmax' +echo "------------------------------------------------" +for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \ + echo "cpu${i}" | paste \ + - \ + "/sys/devices/system/cpu/cpu${i}/cpufreq/scaling_governor" \ + "/sys/devices/system/cpu/cpu${i}/cpufreq/cpuinfo_cur_freq" \ + "/sys/devices/system/cpu/cpu${i}/cpufreq/cpuinfo_min_freq" \ + "/sys/devices/system/cpu/cpu${i}/cpufreq/cpuinfo_max_freq"; \ +done