From 86fd79da7a0060e19258fc8f76ee0ecd376735d7 Mon Sep 17 00:00:00 2001 From: mariecwhite Date: Tue, 5 Sep 2023 03:54:47 +0000 Subject: [PATCH] Add GPT2 GGML Android Benchmarks --- .github/workflows/run_ggml_benchmark.yml | 128 ++++++++- .../openxla/benchmark/devices/__init__.py | 4 +- .../benchmark/devices/mobile_devices.py | 22 ++ .../docker/dockerfiles/android.Dockerfile | 20 ++ devtools/docker/image_deps.json | 3 +- devtools/docker/prod_digests.txt | 1 + experimental/ggml/benchmark_android.py | 261 ++++++++++++++++++ experimental/ggml/benchmark_ggml.sh | 51 ++-- experimental/ggml/build_ggml.sh | 68 +++++ experimental/ggml/requirements.txt | 3 +- .../ggml/set_android_scaling_governor.sh | 51 ++++ 11 files changed, 572 insertions(+), 40 deletions(-) create mode 100644 common_benchmark_suite/openxla/benchmark/devices/mobile_devices.py create mode 100644 devtools/docker/dockerfiles/android.Dockerfile create mode 100755 experimental/ggml/benchmark_android.py create mode 100755 experimental/ggml/build_ggml.sh create mode 100755 experimental/ggml/set_android_scaling_governor.sh diff --git a/.github/workflows/run_ggml_benchmark.yml b/.github/workflows/run_ggml_benchmark.yml index ff837cb0..f981e275 100644 --- a/.github/workflows/run_ggml_benchmark.yml +++ b/.github/workflows/run_ggml_benchmark.yml @@ -66,6 +66,7 @@ jobs: BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }} RESULTS_DIR: results-dir TARGET_DEVICE: c2-standard-16 + GGML_BUILD_DIR: build-dir steps: - name: "Checking out PR repository" uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0 @@ -74,7 +75,14 @@ jobs: run: | echo "results-gcs-dir=${BENCHMARK_GCS_DIR}/${TARGET_DEVICE}-results" >> "${GITHUB_OUTPUT}" mkdir "${RESULTS_DIR}" - - name: "Benchmarking GGML CPU" + - name: "Building GGML CPU" + run: | + docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \ + "gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251" \ + ./experimental/ggml/build_ggml.sh \ + "${TARGET_DEVICE}" \ + "${GGML_BUILD_DIR}" + - name: "Benchmarking GGML" env: GGML_RESULTS_JSON: ggml.json RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }} @@ -83,6 +91,122 @@ jobs: docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \ "gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251" \ ./experimental/ggml/benchmark_ggml.sh \ - "${TARGET_DEVICE}"\ + "${TARGET_DEVICE}" \ + "${GGML_BUILD_DIR}" \ "${RESULTS_PATH}" gcloud storage cp "${RESULTS_PATH}" "${RESULTS_GCS_DIR}/" + + build_ggml_for_android: + needs: [setup] + runs-on: + - self-hosted # must come first + - runner-group=${{ needs.setup.outputs.runner-group }} + - environment=prod + - cpu + - os-family=Linux + env: + GGML_BUILD_DIR: ggml-build + TARGET_DEVICE: pixel-6-pro + BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }} + outputs: + ggml-build-dir: ${{ env.GGML_BUILD_DIR }} + ggml-build-dir-archive: ${{ steps.archive.outputs.ggml-build-dir-archive }} + ggml-build-dir-gcs-artifact: ${{ steps.upload.outputs.ggml-build-dir-gcs-artifact }} + steps: + - name: "Checking out PR repository" + uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0 + - name: "Building GGML" + run: | + mkdir -p "${GGML_BUILD_DIR}" + docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \ + "gcr.io/iree-oss/openxla-benchmark/android@sha256:3211ade3856dfd46469e573f17baaf367f9c0830dfcc70c6d85891447cadc39e" \ + ./experimental/ggml/build_ggml.sh \ + "${TARGET_DEVICE}" \ + "${GGML_BUILD_DIR}" + - name: "Creating build dir archive" + id: archive + env: + GGML_BUILD_DIR_ARCHIVE: ${{ env.GGML_BUILD_DIR }}.tgz + run: | + tar -zcvf ${GGML_BUILD_DIR_ARCHIVE} ${GGML_BUILD_DIR} + echo "ggml-build-dir-archive=${GGML_BUILD_DIR_ARCHIVE}" >> "${GITHUB_OUTPUT}" + - name: "Uploading build dir archive" + id: upload + env: + GGML_BUILD_DIR_ARCHIVE: ${{ steps.archive.outputs.ggml-build-dir-archive }} + GGML_BUILD_DIR_GCS_ARTIFACT: ${{ env.BENCHMARK_GCS_DIR }}/${{ steps.archive.outputs.ggml-build-dir-archive }} + run: | + gcloud storage cp "${GGML_BUILD_DIR_ARCHIVE}" "${GGML_BUILD_DIR_GCS_ARTIFACT}" + echo "ggml-build-dir-gcs-artifact=${GGML_BUILD_DIR_GCS_ARTIFACT}" >> "${GITHUB_OUTPUT}" + + benchmark_on_pixel-6-pro: + needs: [setup, build_ggml_for_android] + runs-on: + - self-hosted # must come first + - runner-group=${{ needs.setup.outputs.runner-group }} + - environment=prod + - machine-type=pixel-6-pro + env: + BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }} + RESULTS_DIR: results-dir + TARGET_DEVICE: pixel-6-pro + GGML_BUILD_DIR: ${{ needs.build_ggml_for_android.outputs.ggml-build-dir }} + GGML_BUILD_DIR_ARCHIVE: ${{ needs.build_ggml_for_android.outputs.ggml-build-dir-archive }} + GGML_BUILD_DIR_GCS_ARTIFACT: ${{ needs.build_ggml_for_android.outputs.ggml-build-dir-gcs-artifact }} + steps: + - name: "Checking out PR repository" + uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0 + - name: "Setup" + id: setup + run: | + echo "results-gcs-dir=${BENCHMARK_GCS_DIR}/${TARGET_DEVICE}-results" >> "${GITHUB_OUTPUT}" + mkdir "${RESULTS_DIR}" + - name: "Downloading and unpacking GGML build" + run: | + gcloud storage cp "${GGML_BUILD_DIR_GCS_ARTIFACT}" "${GGML_BUILD_DIR_ARCHIVE}" + tar -xvf "${GGML_BUILD_DIR_ARCHIVE}" + - name: "Benchmarking GGML on Android" + env: + GGML_RESULTS_JSON: ggml-android.json + RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }} + run: | + RESULTS_PATH="${RESULTS_DIR}/${GGML_RESULTS_JSON}" + ./experimental/ggml/benchmark_ggml.sh "${TARGET_DEVICE}" "${GGML_BUILD_DIR}" "${RESULTS_PATH}" + cat "${RESULTS_PATH}" + +# adb push "./experimental/ggml/set_android_scaling_governor.sh" "/data/local/tmp" +# adb shell "chmod +x /data/local/tmp/set_android_scaling_governor.sh" +# adb shell "su root sh /data/local/tmp/set_android_scaling_governor.sh performance" +# +# adb push "${GGML_BUILD_DIR}/bin/gpt-2" "/data/local/tmp" +# adb shell "chmod +x /data/local/tmp/gpt-2" +# adb push "${GGML_BUILD_DIR}/models/gpt-2-117M/ggml-model-f32.bin" "/data/local/tmp" +# +# echo "Benchmarking ggml-model-f32.bin with 1 thread" +# adb shell 'taskset f0 /data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f32.bin --prompt "Once upon a time" --seed 0 --threads 1' +# +# echo "Benchmarking ggml-model-f32.bin with 4 threads" +# adb shell 'taskset f0 /data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f32.bin --prompt "Once upon a time" --seed 0 --threads 4' +# +# echo "Benchmarking ggml-model-f32.bin with 8 threads" +# adb shell '/data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f32.bin --prompt "Once upon a time" --seed 0 --threads 8' +# +# echo "Benchmarking ggml-model-f32.bin with 16 threads" +# adb shell '/data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f32.bin --prompt "Once upon a time" --seed 0 --threads 16' +# +# echo "Removing ggml-model-f32.bin" +# adb shell "rm /data/local/tmp/ggml-model-f32.bin" +# +# adb push "${GGML_BUILD_DIR}/models/gpt-2-117M/ggml-model-f16.bin" "/data/local/tmp" +# +# echo "Benchmarking ggml-model-f16.bin with 1 thread" +# adb shell 'taskset f0 /data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f16.bin --prompt "Once upon a time" --seed 0 --threads 1' +# +# echo "Benchmarking ggml-model-f16.bin with 4 threads" +# adb shell 'taskset f0 /data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f16.bin --prompt "Once upon a time" --seed 0 --threads 4' +# +# echo "Benchmarking ggml-model-f16.bin with 8 threads" +# adb shell '/data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f16.bin --prompt "Once upon a time" --seed 0 --threads 8' +# +# echo "Benchmarking ggml-model-f16.bin with 16 threads" +# adb shell '/data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f16.bin --prompt "Once upon a time" --seed 0 --threads 16' diff --git a/common_benchmark_suite/openxla/benchmark/devices/__init__.py b/common_benchmark_suite/openxla/benchmark/devices/__init__.py index 049b6fd1..c9fd5892 100644 --- a/common_benchmark_suite/openxla/benchmark/devices/__init__.py +++ b/common_benchmark_suite/openxla/benchmark/devices/__init__.py @@ -4,7 +4,7 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -from . import gcp_devices, host_devices +from . import gcp_devices, host_devices, mobile_devices # All defined device specs. -ALL_DEVICES = gcp_devices.ALL_DEVICES + host_devices.ALL_DEVICES +ALL_DEVICES = gcp_devices.ALL_DEVICES + host_devices.ALL_DEVICES + mobile_devices.ALL_DEVICES diff --git a/common_benchmark_suite/openxla/benchmark/devices/mobile_devices.py b/common_benchmark_suite/openxla/benchmark/devices/mobile_devices.py new file mode 100644 index 00000000..ca716280 --- /dev/null +++ b/common_benchmark_suite/openxla/benchmark/devices/mobile_devices.py @@ -0,0 +1,22 @@ +# Copyright 2023 The OpenXLA Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from openxla.benchmark import def_types + +MOBILE_PIXEL_6_PRO = def_types.DeviceSpec( + name="pixel-6-pro", + host_type="mobile", + host_model="pixel-6-pro", + host_environment="android", + accelerator_type="cpu", + accelerator_model="armv8.2-a", + accelerator_architecture="armv8.2-a", + accelerator_attributes={ + "num_of_cores": 8, + }, +) + +ALL_DEVICES = [MOBILE_PIXEL_6_PRO] \ No newline at end of file diff --git a/devtools/docker/dockerfiles/android.Dockerfile b/devtools/docker/dockerfiles/android.Dockerfile new file mode 100644 index 00000000..b6272b40 --- /dev/null +++ b/devtools/docker/dockerfiles/android.Dockerfile @@ -0,0 +1,20 @@ +# Copyright 2023 The OpenXLA Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# An image for cross-compiling towards Android. + +FROM gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251 + +ARG NDK_VERSION=r25c +WORKDIR /install-ndk + +ENV ANDROID_NDK "/usr/src/android-ndk-${NDK_VERSION}" + +RUN wget -q "https://dl.google.com/android/repository/android-ndk-${NDK_VERSION}-linux.zip" \ + && unzip -q "android-ndk-${NDK_VERSION}-linux.zip" -d /usr/src/ \ + && rm -rf /install-ndk + +WORKDIR / diff --git a/devtools/docker/image_deps.json b/devtools/docker/image_deps.json index 7215e309..243f4320 100644 --- a/devtools/docker/image_deps.json +++ b/devtools/docker/image_deps.json @@ -3,5 +3,6 @@ "cuda11.8-cudnn8.9": ["base"], "db_import": [], "mmperf": ["base"], - "convperf": ["base"] + "convperf": ["base"], + "android": ["base"] } diff --git a/devtools/docker/prod_digests.txt b/devtools/docker/prod_digests.txt index 013a9d5d..4a2f94a0 100644 --- a/devtools/docker/prod_digests.txt +++ b/devtools/docker/prod_digests.txt @@ -3,3 +3,4 @@ gcr.io/iree-oss/openxla-benchmark/cuda11.8-cudnn8.9@sha256:f43984cd6c16ad1faad4d gcr.io/iree-oss/openxla-benchmark/db_import@sha256:3de8a702b51ca1906fc2ef5bab2415a79e46bc132f2ceba994215539dd0ecdd4 gcr.io/iree-oss/openxla-benchmark/mmperf@sha256:c972ce5b2144de0786f103611fecbd88d93dd45ecd068f8c97d98c08677cee57 gcr.io/iree-oss/openxla-benchmark/convperf@sha256:0807d5e8144900752cfae72f3aa4d12530b408f73fc6f010a6cbad11cc09832c +gcr.io/iree-oss/openxla-benchmark/android@sha256:3211ade3856dfd46469e573f17baaf367f9c0830dfcc70c6d85891447cadc39e diff --git a/experimental/ggml/benchmark_android.py b/experimental/ggml/benchmark_android.py new file mode 100755 index 00000000..0295e0b6 --- /dev/null +++ b/experimental/ggml/benchmark_android.py @@ -0,0 +1,261 @@ +#!/usr/bin/env python3 +# +# Copyright 2023 The OpenXLA Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import argparse +import dataclasses +import json +import pathlib +import re +import statistics +import subprocess +import sys + +# Add comparative_benchmark dir to the search path. +sys.path.insert( + 0, str(pathlib.Path(__file__).parents[2] / "comparative_benchmark")) +import utils + +# Add common_benchmark_suite dir to the search path. +sys.path.insert( + 0, str(pathlib.Path(__file__).parents[2] / "common_benchmark_suite")) +from openxla.benchmark import def_types, devices + +ALL_DEVICE_NAMES = [device.name for device in devices.ALL_DEVICES] + + +def _parse_output(output_text): + # Example output. + # main: mem per token = 2011380 bytes + # main: load time = 120.92 ms + # main: sample time = 73.86 ms + # main: first predict time = 14.71 ms + # main: loop predict time = 2261.72 ms / 11.20 ms per token + # main: predict time = 2276.43 ms / 11.21 ms per token + # main: total time = 2494.66 ms + + LOAD_TIME_REGEXP = re.compile(f"main: load time = (.+) ms") + match = LOAD_TIME_REGEXP.search(output_text) + if not match: + "Unable to parse first prediction time" + return + load_time_ms = float(match.group(1)) + + SAMPLE_TIME_REGEXP = re.compile(f"main: sample time = (.+) ms") + match = SAMPLE_TIME_REGEXP.search(output_text) + if not match: + "Unable to parse first prediction time" + return + sample_time_ms = float(match.group(1)) + + FIRST_PREDICTION_TIME_REGEXP = re.compile( + f"main: first predict time = (.+) ms") + match = FIRST_PREDICTION_TIME_REGEXP.search(output_text) + if not match: + "Unable to parse first prediction time" + return + first_prediction_ms = float(match.group(1)) + + LOOP_PREDICTION_TIME_REGEXP = re.compile( + f"main: loop predict time = .+ ms / (.+) ms per token") + match = LOOP_PREDICTION_TIME_REGEXP.search(output_text) + if not match: + "Unable to parse loop prediction time" + return + loop_prediction_ms = float(match.group(1)) + + TOTAL_PREDICTION_TIME_REGEXP = re.compile( + f"main: predict time = (.+) ms / .+ ms per token") + match = TOTAL_PREDICTION_TIME_REGEXP.search(output_text) + if not match: + "Unable to parse total prediction time" + return + total_prediction_ms = float(match.group(1)) + + E2E_TIME_REGEXP = re.compile(f"main: total time = (.+) ms") + match = E2E_TIME_REGEXP.search(output_text) + if not match: + "Unable to parse total prediction time" + return + e2e_prediction_ms = float(match.group(1)) + + return { + "load_time_ms": load_time_ms, + "first_prediction_ms": first_prediction_ms, + "loop_prediction_ms": loop_prediction_ms, + "total_prediction_ms": total_prediction_ms, + "sample_time_ms": sample_time_ms, + "e2e_prediction_ms": e2e_prediction_ms, + } + + +def _parse_arguments() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Run GGML benchmarks.") + parser.add_argument("-name", + "--benchmark_name", + type=str, + required=True, + help="The regex pattern to match benchmark names.") + parser.add_argument( + "-b", + "--benchmark_binary", + type=pathlib.Path, + required=True, + help="Path to benchmark binary e.g. /tmp/ggml/build/bin/gpt2") + parser.add_argument( + "-m", + "--model", + type=pathlib.Path, + required=True, + help= + "The GGML model to benchmark e.g. /tmp/ggml/build/models/gpt-2-117M/ggml-model.bin" + ) + parser.add_argument("--data_type", type=str, help="The model data type.") + parser.add_argument("-p", + "--prompt", + type=str, + default="Once upon a time", + help="The input prompt to the model.") + parser.add_argument("-s", + "--seed", + type=int, + default=0, + help="The seed to use for the RNG.") + parser.add_argument("-t", + "--threads", + type=int, + default=8, + help="The number of threads to use.") + parser.add_argument("-o", + "--output", + type=pathlib.Path, + required=True, + help="JSON file path to merge the results.") + parser.add_argument("-device", + "--target_device", + dest="target_device_name", + type=str, + required=True, + choices=ALL_DEVICE_NAMES, + help="The target device to benchmark.") + parser.add_argument("-w", + "--warmup_iterations", + type=int, + default=5, + help="The number of warmup steps.") + parser.add_argument("-iter", + "--iterations", + type=int, + default=100, + help="The number of iterations to benchmark.") + parser.add_argument("--verbose", + action="store_true", + help="Show verbose messages.") + return parser.parse_args() + + +def main(benchmark_name: str, benchmark_binary: pathlib.Path, + warmup_iterations: int, iterations: int, model: pathlib.Path, + data_type: str, prompt: str, seed: int, threads: int, + output: pathlib.Path, target_device_name: str, verbose: bool): + + try: + target_device = next(device for device in devices.ALL_DEVICES + if device.name == target_device_name) + except StopIteration: + raise ValueError(f'Target device "{target_device_name}" is not defined.' + f' Available device options:\n{ALL_DEVICE_NAMES}') + + benchmark_definition = { + "benchmark_name": benchmark_name, + "framework": str(def_types.ModelFrameworkType.GGML), + "data_type": data_type, + "batch_size": 1, + "compiler": str(def_types.ModelFrameworkType.GGML), + "device": target_device.name, + "num_threads": threads, + "warmup_iterations": warmup_iterations, + "num_iterations": iterations, + "tags": ["gpt2", "ggml"], + } + + # Push artifacts to the Android device. + subprocess.run(["adb", "push", benchmark_binary, "/data/local/tmp"]) + subprocess.run([ + "adb", "shell", "chmod", "+x", f"/data/local/tmp/{benchmark_binary.name}" + ]) + subprocess.run(["adb", "push", model, "/data/local/tmp"]) + + # Run benchmark. + cmd = [ + "adb", "shell", f"/data/local/tmp/{benchmark_binary.name}", "--model", + f"/data/local/tmp/{model.name}", "--prompt", f"\"{prompt}\"", "--seed", + f"{seed}", "--threads", f"{threads}" + ] + print(f"cmd: {cmd}") + + # Run warmup iterations. + for i in range(warmup_iterations): + subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + + load_times = [] + first_prediction_times = [] + loop_prediction_times = [] + total_prediction_times = [] + sample_times = [] + e2e_prediction_times = [] + + # Run iterations. + for i in range(iterations): + raw_result = subprocess.run(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + raw_result = raw_result.stdout.decode("utf-8") + print(f"raw_result: {raw_result}") + metrics = _parse_output(raw_result) + print(f"metrics: {metrics}") + + load_times.append(metrics["load_time_ms"]) + first_prediction_times.append(metrics["first_prediction_ms"]) + loop_prediction_times.append(metrics["loop_prediction_ms"]) + total_prediction_times.append(metrics["total_prediction_ms"]) + sample_times.append(metrics["sample_time_ms"]) + e2e_prediction_times.append(metrics["e2e_prediction_ms"]) + + benchmark_metrics = { + "median_load_time_ms": + statistics.median(load_times) if load_times else None, + "median_first_prediction_ms": + statistics.median(first_prediction_times) + if first_prediction_times else None, + "median_loop_prediction_ms": + statistics.median(loop_prediction_times) + if loop_prediction_times else None, + "median_total_prediction_ms": + statistics.median(total_prediction_times) + if total_prediction_times else None, + "median_sample_time_ms": + statistics.median(sample_times) if sample_times else None, + "median_e2e_prediction_times": + statistics.median(e2e_prediction_times) + if e2e_prediction_times else None, + } + + benchmark_result = utils.BenchmarkResult( + definition=benchmark_definition, + metrics={ + "compiler_level": benchmark_metrics, + }, + ) + + if verbose: + print(json.dumps(dataclasses.asdict(benchmark_result), indent=2)) + utils.append_benchmark_result(output, benchmark_result) + + +if __name__ == "__main__": + main(**vars(_parse_arguments())) diff --git a/experimental/ggml/benchmark_ggml.sh b/experimental/ggml/benchmark_ggml.sh index 6cb28dc1..01757e47 100755 --- a/experimental/ggml/benchmark_ggml.sh +++ b/experimental/ggml/benchmark_ggml.sh @@ -11,20 +11,20 @@ # OOBI_VENV_DIR: path to create Python virtualenv, default: ggml-benchmarks.venv # OOBI_TARGET_DEVICE: target benchmark device, can also be specified the first # argument. +# OOBI_BUILD_DIR: path to the GGMl build directory. # OOBI_OUTPUT: path to output benchmark results, can also be specified the # second argument. -# OOBI_SCRATCH_DIR: the directory to place temporary benchmarking artifacts. # # Example usage: -# ./benchmark_ggml.sh c2-standard-16 /tmp/results.json +# ./benchmark_ggml.sh set -xeuo pipefail VENV_DIR="${OOBI_VENV_DIR:-ggml-benchmarks.venv}" -ROOT_DIR="${OOBI_SCRATCH_DIR:-/tmp}" PYTHON="${PYTHON:-/usr/bin/python3}" -TARGET_DEVICE="${1:-${OOBI_TARGET_DEVICE}}" -OUTPUT_PATH="${2:-${OOBI_OUTPUT}}" +TARGET_DEVICE_NAME="${1:-${OOBI_TARGET_DEVICE}}" +BUILD_DIR="${2:-${OOBI_BUILD_DIR}}" +OUTPUT_PATH="${3:-${OOBI_OUTPUT}}" TD="$(cd $(dirname $0) && pwd)" @@ -35,27 +35,7 @@ VENV_DIR="${VENV_DIR}" PYTHON="${PYTHON}" source "${TD}/setup_venv.sh" OUTPUT_PATH="$(realpath ${OUTPUT_PATH})" "${TD}/../../comparative_benchmark/scripts/create_results_json.sh" "${OUTPUT_PATH}" -pushd "${ROOT_DIR}" - -# We clone a fork of ggml which includes additional benchmark logging. -git clone --branch benchmark https://github.com/mariecwhite/ggml.git -pushd ggml - -# Build -mkdir build -pushd build -cmake .. -make -j8 - -# Generate FP32, FP16 and INT4 versions of GPT2 117M (Small). -GPT_VARIANT="117M" -../examples/gpt-2/download-model.sh "${GPT_VARIANT}" -# Generate FP32. -python ../examples/gpt-2/convert-ckpt-to-ggml.py models/gpt-2-${GPT_VARIANT}/ 0 -# Generate FP16. -python ../examples/gpt-2/convert-ckpt-to-ggml.py models/gpt-2-${GPT_VARIANT}/ 1 -# Generate INT4. -./bin/gpt-2-quantize models/gpt-2-${GPT_VARIANT}/ggml-model-f16.bin models/gpt-2-${GPT_VARIANT}/ggml-model-q4_0.bin 2 +pushd "${BUILD_DIR}" PROMPT="Once upon a time" BENCHMARK_BINARY="$(realpath bin/gpt-2)" @@ -68,24 +48,29 @@ MODEL="$(realpath models/gpt-2-117M/ggml-model-f32.bin)" declare -a BENCHMARK_NAMES=( "models/GPT2LMHEAD_FP32_GGML/inputs/INPUT_DATA_MODEL_DEFAULT" "models/GPT2LMHEAD_FP16_GGML/inputs/INPUT_DATA_MODEL_DEFAULT" - "models/GPT2LMHEAD_INT4_GGML/inputs/INPUT_DATA_MODEL_DEFAULT" ) declare -a MODELS=( ggml-model-f32.bin ggml-model-f16.bin - ggml-model-q4_0.bin + #ggml-model-q4_0.bin ) declare -a DATA_TYPES=( "fp32" "fp16" - "int4" + #"int4" ) +if [[ "${TARGET_DEVICE_NAME}" =~ ^(pixel-4|pixel-6-pro|moto-edge-x30)$ ]]; then +BENCHMARK_SCRIPT="benchmark_android.py" +else +BENCHMARK_SCRIPT="benchmark.py" +fi + for i in ${!BENCHMARK_NAMES[@]}; do MODEL="$(realpath models/gpt-2-117M/${MODELS[$i]})" for threads in "${NUM_THREADS[@]}"; do - "${TD}/benchmark.py" \ + "${TD}/${BENCHMARK_SCRIPT}" \ --benchmark_name "${BENCHMARK_NAMES[$i]}" \ --warmup_iterations "${WARMUP_ITERAIONS}" \ --iterations "${NUM_ITERATIONS}" \ @@ -96,11 +81,9 @@ for i in ${!BENCHMARK_NAMES[@]}; do --seed 0 \ --threads "${threads}" \ --output "${OUTPUT_PATH}" \ - --target_device "${TARGET_DEVICE}" \ + --target_device "${TARGET_DEVICE_NAME}" \ --verbose done done -popd # build -popd # ggml -popd # ROOT_DIR +popd # BUILD_DIR diff --git a/experimental/ggml/build_ggml.sh b/experimental/ggml/build_ggml.sh new file mode 100755 index 00000000..1e4202b0 --- /dev/null +++ b/experimental/ggml/build_ggml.sh @@ -0,0 +1,68 @@ +#!/bin/bash +# +# Copyright 2023 The OpenXLA Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Environment variables: +# PYTHON: Python interpreter, default: /usr/bin/python3 +# ANDROID_NDK: the path to the Android NDK if building for Android. +# OOBI_VENV_DIR: path to create Python virtualenv, default: ggml-build.venv +# OOBI_TARGET_DEVICE: target benchmark device, can also be specified the first +# argument. +# OOBI_OUTPUT: path to output benchmark results, can also be specified the +# second argument. +# OOBI_SCRATCH_DIR: the directory to place temporary benchmarking artifacts. +# +# Example usage: +# ./build_ggml.sh > + +set -xeuo pipefail + +VENV_DIR="${OOBI_VENV_DIR:-ggml-build.venv}" +ROOT_DIR="${OOBI_SCRATCH_DIR:-/tmp}" +PYTHON="${PYTHON:-/usr/bin/python3}" +TARGET_DEVICE_NAME="${1:-${OOBI_TARGET_DEVICE}}" +BUILD_DIR="${2:-/tmp/ggml-build}" + +TD="$(cd $(dirname $0) && pwd)" +BUILD_DIR="$(realpath ${BUILD_DIR})" + +# Setup virtual environment. +VENV_DIR="${VENV_DIR}" PYTHON="${PYTHON}" source "${TD}/setup_venv.sh" + +pushd "${ROOT_DIR}" + +# We clone a fork of ggml which includes additional benchmark logging. +git clone --branch benchmark https://github.com/mariecwhite/ggml.git +pushd ggml + +REPO_DIR="$(pwd)" + +# Build. +if [[ "${TARGET_DEVICE_NAME}" =~ ^(pixel-4|pixel-6-pro|moto-edge-x30)$ ]]; then +cmake -GNinja -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-23 -DCMAKE_C_FLAGS=-march=armv8.4a+dotprod -B "${BUILD_DIR}" . +cmake --build "${BUILD_DIR}" -t gpt-2 gpt-2-quantize +else +cmake -G Ninja -B "${BUILD_DIR}" . +cmake --build "${BUILD_DIR}" -t gpt-2 gpt-2-quantize +fi + +popd # ggml +popd # ROOT_DIR + +# Generate FP32 and FP16 versions of GPT2 117M (Small). +pushd "${BUILD_DIR}" + +GPT_VARIANT="117M" +${REPO_DIR}/examples/gpt-2/download-model.sh "${GPT_VARIANT}" +# Generate FP32. +python ${REPO_DIR}/examples/gpt-2/convert-ckpt-to-ggml.py models/gpt-2-${GPT_VARIANT}/ 0 +# Generate FP16. +python ${REPO_DIR}/examples/gpt-2/convert-ckpt-to-ggml.py models/gpt-2-${GPT_VARIANT}/ 1 +# Generate INT4. Keep this disabled until we want to use it. +#./bin/gpt-2-quantize models/gpt-2-${GPT_VARIANT}/ggml-model-f16.bin models/gpt-2-${GPT_VARIANT}/ggml-model-q4_0.bin 2 + +popd # BUILD_DIR diff --git a/experimental/ggml/requirements.txt b/experimental/ggml/requirements.txt index c2bbfa1f..7947059c 100644 --- a/experimental/ggml/requirements.txt +++ b/experimental/ggml/requirements.txt @@ -1,2 +1,3 @@ numpy -tensorflow \ No newline at end of file +tensorflow +requests diff --git a/experimental/ggml/set_android_scaling_governor.sh b/experimental/ggml/set_android_scaling_governor.sh new file mode 100755 index 00000000..0a297304 --- /dev/null +++ b/experimental/ggml/set_android_scaling_governor.sh @@ -0,0 +1,51 @@ +#!/bin/sh + +# Copyright 2023 The IREE Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# Runs on an android device itself to set the frequency scaling governor for all +# CPUs (default performance). + +################################### WARNING #################################### +# This will overheat the phone if it's not on a cooling plate, resulting in # +# thermal throttling. To prevent anything catching on fire, the actual CPU # +# frequencies will be throttled to below the maximum, skewing your results. # +################################################################################ + +set -euo pipefail + +GOVERNOR="${1:-performance}" + +echo "CPU info (before changing governor):" +echo 'cpu\tgovernor\tcur\tmin\tmax' +echo "------------------------------------------------" +for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \ + echo "cpu${i}" | paste \ + - \ + "/sys/devices/system/cpu/cpu${i}/cpufreq/scaling_governor" \ + "/sys/devices/system/cpu/cpu${i}/cpufreq/cpuinfo_cur_freq" \ + "/sys/devices/system/cpu/cpu${i}/cpufreq/cpuinfo_min_freq" \ + "/sys/devices/system/cpu/cpu${i}/cpufreq/cpuinfo_max_freq"; \ +done + +echo "Setting CPU frequency governor to ${GOVERNOR}" + +for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \ + echo "${GOVERNOR}" > \ + "/sys/devices/system/cpu/cpu${i?}/cpufreq/scaling_governor"; \ +done + +echo "CPU info (after changing governor):" +echo 'cpu\tgovernor\tcur\tmin\tmax' +echo "------------------------------------------------" +for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \ + echo "cpu${i}" | paste \ + - \ + "/sys/devices/system/cpu/cpu${i}/cpufreq/scaling_governor" \ + "/sys/devices/system/cpu/cpu${i}/cpufreq/cpuinfo_cur_freq" \ + "/sys/devices/system/cpu/cpu${i}/cpufreq/cpuinfo_min_freq" \ + "/sys/devices/system/cpu/cpu${i}/cpufreq/cpuinfo_max_freq"; \ +done