Skip to content

Commit

Permalink
Add GPT2 GGML Android Benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
mariecwhite committed Sep 11, 2023
1 parent 89817e9 commit 974bdbe
Show file tree
Hide file tree
Showing 13 changed files with 554 additions and 147 deletions.
128 changes: 126 additions & 2 deletions .github/workflows/run_ggml_benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ jobs:
BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }}
RESULTS_DIR: results-dir
TARGET_DEVICE: c2-standard-16
GGML_BUILD_DIR: build-dir
steps:
- name: "Checking out PR repository"
uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
Expand All @@ -74,7 +75,14 @@ jobs:
run: |
echo "results-gcs-dir=${BENCHMARK_GCS_DIR}/${TARGET_DEVICE}-results" >> "${GITHUB_OUTPUT}"
mkdir "${RESULTS_DIR}"
- name: "Benchmarking GGML CPU"
- name: "Building GGML CPU"
run: |
docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \
"gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251" \
./experimental/ggml/build_ggml.sh \
"${TARGET_DEVICE}" \
"${GGML_BUILD_DIR}"
- name: "Benchmarking GGML"
env:
GGML_RESULTS_JSON: ggml.json
RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }}
Expand All @@ -83,6 +91,122 @@ jobs:
docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \
"gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251" \
./experimental/ggml/benchmark_ggml.sh \
"${TARGET_DEVICE}"\
"${TARGET_DEVICE}" \
"${GGML_BUILD_DIR}" \
"${RESULTS_PATH}"
gcloud storage cp "${RESULTS_PATH}" "${RESULTS_GCS_DIR}/"
build_ggml_for_android:
needs: [setup]
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
- environment=prod
- cpu
- os-family=Linux
env:
GGML_BUILD_DIR: ggml-build
TARGET_DEVICE: pixel-6-pro
BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }}
outputs:
ggml-build-dir: ${{ env.GGML_BUILD_DIR }}
ggml-build-dir-archive: ${{ steps.archive.outputs.ggml-build-dir-archive }}
ggml-build-dir-gcs-artifact: ${{ steps.upload.outputs.ggml-build-dir-gcs-artifact }}
steps:
- name: "Checking out PR repository"
uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
- name: "Building GGML"
run: |
mkdir -p "${GGML_BUILD_DIR}"
docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \
"gcr.io/iree-oss/openxla-benchmark/android@sha256:3211ade3856dfd46469e573f17baaf367f9c0830dfcc70c6d85891447cadc39e" \
./experimental/ggml/build_ggml.sh \
"${TARGET_DEVICE}" \
"${GGML_BUILD_DIR}"
- name: "Creating build dir archive"
id: archive
env:
GGML_BUILD_DIR_ARCHIVE: ${{ env.GGML_BUILD_DIR }}.tgz
run: |
tar -zcvf ${GGML_BUILD_DIR_ARCHIVE} ${GGML_BUILD_DIR}
echo "ggml-build-dir-archive=${GGML_BUILD_DIR_ARCHIVE}" >> "${GITHUB_OUTPUT}"
- name: "Uploading build dir archive"
id: upload
env:
GGML_BUILD_DIR_ARCHIVE: ${{ steps.archive.outputs.ggml-build-dir-archive }}
GGML_BUILD_DIR_GCS_ARTIFACT: ${{ env.BENCHMARK_GCS_DIR }}/${{ steps.archive.outputs.ggml-build-dir-archive }}
run: |
gcloud storage cp "${GGML_BUILD_DIR_ARCHIVE}" "${GGML_BUILD_DIR_GCS_ARTIFACT}"
echo "ggml-build-dir-gcs-artifact=${GGML_BUILD_DIR_GCS_ARTIFACT}" >> "${GITHUB_OUTPUT}"
benchmark_on_pixel-6-pro:
needs: [setup, build_ggml_for_android]
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
- environment=prod
- machine-type=pixel-6-pro
env:
BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }}
RESULTS_DIR: results-dir
TARGET_DEVICE: pixel-6-pro
GGML_BUILD_DIR: ${{ needs.build_ggml_for_android.outputs.ggml-build-dir }}
GGML_BUILD_DIR_ARCHIVE: ${{ needs.build_ggml_for_android.outputs.ggml-build-dir-archive }}
GGML_BUILD_DIR_GCS_ARTIFACT: ${{ needs.build_ggml_for_android.outputs.ggml-build-dir-gcs-artifact }}
steps:
- name: "Checking out PR repository"
uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
- name: "Setup"
id: setup
run: |
echo "results-gcs-dir=${BENCHMARK_GCS_DIR}/${TARGET_DEVICE}-results" >> "${GITHUB_OUTPUT}"
mkdir "${RESULTS_DIR}"
- name: "Downloading and unpacking GGML build"
run: |
gcloud storage cp "${GGML_BUILD_DIR_GCS_ARTIFACT}" "${GGML_BUILD_DIR_ARCHIVE}"
tar -xvf "${GGML_BUILD_DIR_ARCHIVE}"
- name: "Benchmarking GGML on Android"
env:
GGML_RESULTS_JSON: ggml-android.json
RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }}
run: |
RESULTS_PATH="${RESULTS_DIR}/${GGML_RESULTS_JSON}"
./experimental/ggml/benchmark_ggml.sh "${TARGET_DEVICE}" "${GGML_BUILD_DIR}" "${RESULTS_PATH}"
cat "${RESULTS_PATH}"
# adb push "./experimental/ggml/set_android_scaling_governor.sh" "/data/local/tmp"
# adb shell "chmod +x /data/local/tmp/set_android_scaling_governor.sh"
# adb shell "su root sh /data/local/tmp/set_android_scaling_governor.sh performance"
#
# adb push "${GGML_BUILD_DIR}/bin/gpt-2" "/data/local/tmp"
# adb shell "chmod +x /data/local/tmp/gpt-2"
# adb push "${GGML_BUILD_DIR}/models/gpt-2-117M/ggml-model-f32.bin" "/data/local/tmp"
#
# echo "Benchmarking ggml-model-f32.bin with 1 thread"
# adb shell 'taskset f0 /data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f32.bin --prompt "Once upon a time" --seed 0 --threads 1'
#
# echo "Benchmarking ggml-model-f32.bin with 4 threads"
# adb shell 'taskset f0 /data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f32.bin --prompt "Once upon a time" --seed 0 --threads 4'
#
# echo "Benchmarking ggml-model-f32.bin with 8 threads"
# adb shell '/data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f32.bin --prompt "Once upon a time" --seed 0 --threads 8'
#
# echo "Benchmarking ggml-model-f32.bin with 16 threads"
# adb shell '/data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f32.bin --prompt "Once upon a time" --seed 0 --threads 16'
#
# echo "Removing ggml-model-f32.bin"
# adb shell "rm /data/local/tmp/ggml-model-f32.bin"
#
# adb push "${GGML_BUILD_DIR}/models/gpt-2-117M/ggml-model-f16.bin" "/data/local/tmp"
#
# echo "Benchmarking ggml-model-f16.bin with 1 thread"
# adb shell 'taskset f0 /data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f16.bin --prompt "Once upon a time" --seed 0 --threads 1'
#
# echo "Benchmarking ggml-model-f16.bin with 4 threads"
# adb shell 'taskset f0 /data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f16.bin --prompt "Once upon a time" --seed 0 --threads 4'
#
# echo "Benchmarking ggml-model-f16.bin with 8 threads"
# adb shell '/data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f16.bin --prompt "Once upon a time" --seed 0 --threads 8'
#
# echo "Benchmarking ggml-model-f16.bin with 16 threads"
# adb shell '/data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f16.bin --prompt "Once upon a time" --seed 0 --threads 16'
5 changes: 3 additions & 2 deletions common_benchmark_suite/openxla/benchmark/devices/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

from . import gcp_devices, host_devices
from . import gcp_devices, host_devices, mobile_devices

# All defined device specs.
ALL_DEVICES = gcp_devices.ALL_DEVICES + host_devices.ALL_DEVICES
ALL_DEVICES = gcp_devices.ALL_DEVICES + host_devices.ALL_DEVICES + mobile_devices.ALL_DEVICES
ALL_DEVICE_NAMES = [device.name for device in ALL_DEVICES]
22 changes: 22 additions & 0 deletions common_benchmark_suite/openxla/benchmark/devices/mobile_devices.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Copyright 2023 The OpenXLA Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

from openxla.benchmark import def_types

MOBILE_PIXEL_6_PRO = def_types.DeviceSpec(
name="pixel-6-pro",
host_type="mobile",
host_model="pixel-6-pro",
host_environment="android",
accelerator_type="cpu",
accelerator_model="armv8.2-a",
accelerator_architecture="armv8.2-a",
accelerator_attributes={
"num_of_cores": 8,
},
)

ALL_DEVICES = [MOBILE_PIXEL_6_PRO]
20 changes: 20 additions & 0 deletions devtools/docker/dockerfiles/android.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright 2023 The OpenXLA Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

# An image for cross-compiling towards Android.

FROM gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251

ARG NDK_VERSION=r25c
WORKDIR /install-ndk

ENV ANDROID_NDK "/usr/src/android-ndk-${NDK_VERSION}"

RUN wget -q "https://dl.google.com/android/repository/android-ndk-${NDK_VERSION}-linux.zip" \
&& unzip -q "android-ndk-${NDK_VERSION}-linux.zip" -d /usr/src/ \
&& rm -rf /install-ndk

WORKDIR /
3 changes: 2 additions & 1 deletion devtools/docker/image_deps.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
"cuda11.8-cudnn8.9": ["base"],
"db_import": [],
"mmperf": ["base"],
"convperf": ["base"]
"convperf": ["base"],
"android": ["base"]
}
1 change: 1 addition & 0 deletions devtools/docker/prod_digests.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ gcr.io/iree-oss/openxla-benchmark/cuda11.8-cudnn8.9@sha256:f43984cd6c16ad1faad4d
gcr.io/iree-oss/openxla-benchmark/db_import@sha256:3de8a702b51ca1906fc2ef5bab2415a79e46bc132f2ceba994215539dd0ecdd4
gcr.io/iree-oss/openxla-benchmark/mmperf@sha256:c972ce5b2144de0786f103611fecbd88d93dd45ecd068f8c97d98c08677cee57
gcr.io/iree-oss/openxla-benchmark/convperf@sha256:0807d5e8144900752cfae72f3aa4d12530b408f73fc6f010a6cbad11cc09832c
gcr.io/iree-oss/openxla-benchmark/android@sha256:3211ade3856dfd46469e573f17baaf367f9c0830dfcc70c6d85891447cadc39e
96 changes: 50 additions & 46 deletions experimental/ggml/benchmark_ggml.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,20 @@
# OOBI_VENV_DIR: path to create Python virtualenv, default: ggml-benchmarks.venv
# OOBI_TARGET_DEVICE: target benchmark device, can also be specified the first
# argument.
# OOBI_BUILD_DIR: path to the GGMl build directory.
# OOBI_OUTPUT: path to output benchmark results, can also be specified the
# second argument.
# OOBI_SCRATCH_DIR: the directory to place temporary benchmarking artifacts.
#
# Example usage:
# ./benchmark_ggml.sh c2-standard-16 /tmp/results.json
# ./benchmark_ggml.sh <target-device> <build-dir> <result-path>

set -xeuo pipefail

VENV_DIR="${OOBI_VENV_DIR:-ggml-benchmarks.venv}"
ROOT_DIR="${OOBI_SCRATCH_DIR:-/tmp}"
PYTHON="${PYTHON:-/usr/bin/python3}"
TARGET_DEVICE="${1:-${OOBI_TARGET_DEVICE}}"
OUTPUT_PATH="${2:-${OOBI_OUTPUT}}"
TARGET_DEVICE_NAME="${1:-${OOBI_TARGET_DEVICE}}"
BUILD_DIR="${2:-${OOBI_BUILD_DIR}}"
OUTPUT_PATH="${3:-${OOBI_OUTPUT}}"

TD="$(cd $(dirname $0) && pwd)"

Expand All @@ -35,33 +35,12 @@ VENV_DIR="${VENV_DIR}" PYTHON="${PYTHON}" source "${TD}/setup_venv.sh"
OUTPUT_PATH="$(realpath ${OUTPUT_PATH})"
"${TD}/../../comparative_benchmark/scripts/create_results_json.sh" "${OUTPUT_PATH}"

pushd "${ROOT_DIR}"

# We clone a fork of ggml which includes additional benchmark logging.
git clone --branch benchmark https://github.com/mariecwhite/ggml.git
pushd ggml

# Build
mkdir build
pushd build
cmake ..
make -j8

# Generate FP32, FP16 and INT4 versions of GPT2 117M (Small).
GPT_VARIANT="117M"
../examples/gpt-2/download-model.sh "${GPT_VARIANT}"
# Generate FP32.
python ../examples/gpt-2/convert-ckpt-to-ggml.py models/gpt-2-${GPT_VARIANT}/ 0
# Generate FP16.
python ../examples/gpt-2/convert-ckpt-to-ggml.py models/gpt-2-${GPT_VARIANT}/ 1
# Generate INT4.
./bin/gpt-2-quantize models/gpt-2-${GPT_VARIANT}/ggml-model-f16.bin models/gpt-2-${GPT_VARIANT}/ggml-model-q4_0.bin 2
pushd "${BUILD_DIR}"

PROMPT="Once upon a time"
BENCHMARK_BINARY="$(realpath bin/gpt-2)"
WARMUP_ITERAIONS=2
NUM_ITERATIONS=10
declare -a NUM_THREADS=(1 8 16)

MODEL="$(realpath models/gpt-2-117M/ggml-model-f32.bin)"

Expand All @@ -81,26 +60,51 @@ declare -a DATA_TYPES=(
"int4"
)

declare -a args=(
--warmup_iterations "${WARMUP_ITERAIONS}"
--iterations "${NUM_ITERATIONS}"
--benchmark_binary "${BENCHMARK_BINARY}"
--prompt "${PROMPT}"
--seed 0
--output "${OUTPUT_PATH}"
--target_device "${TARGET_DEVICE_NAME}"
--verbose
)

if [[ "${TARGET_DEVICE_NAME}" =~ ^(pixel-4|pixel-6-pro|moto-edge-x30)$ ]]; then
BENCHMARK_SCRIPT="run_benchmarks_android.py"
# Pixel 6 has a maximum of 8 cores.
THREADS="1,4,8"
TASKSETS="80,f0,ff"

args+=(
--threads "${THREADS}"
--tasksets "${TASKSETS}"
)

# Setup mobile device for benchmarking.
adb push "${TD}/set_android_scaling_governor.sh" "/data/local/tmp"
adb shell "chmod +x /data/local/tmp/set_android_scaling_governor.sh"
adb shell "su root sh /data/local/tmp/set_android_scaling_governor.sh performance"

else
# c2-standard-16 has 16 cores.
BENCHMARK_SCRIPT="run_benchmarks.py"
THREADS="1,8,16"

args+=(
--threads "${THREADS}"
)
fi

for i in ${!BENCHMARK_NAMES[@]}; do
MODEL="$(realpath models/gpt-2-117M/${MODELS[$i]})"

for threads in "${NUM_THREADS[@]}"; do
"${TD}/benchmark.py" \
--benchmark_name "${BENCHMARK_NAMES[$i]}" \
--warmup_iterations "${WARMUP_ITERAIONS}" \
--iterations "${NUM_ITERATIONS}" \
--benchmark_binary "${BENCHMARK_BINARY}" \
--model "${MODEL}" \
--data_type "${DATA_TYPES[$i]}" \
--prompt "${PROMPT}" \
--seed 0 \
--threads "${threads}" \
--output "${OUTPUT_PATH}" \
--target_device "${TARGET_DEVICE}" \
--verbose
done
args+=(
--benchmark_name "${BENCHMARK_NAMES[$i]}"
--model "${MODEL}"
--data_type "${DATA_TYPES[$i]}"
)
"${TD}/${BENCHMARK_SCRIPT}" "${args[@]}"
done

popd # build
popd # ggml
popd # ROOT_DIR
popd # BUILD_DIR
Loading

0 comments on commit 974bdbe

Please sign in to comment.