Skip to content

Commit

Permalink
Add GPT2 GGML Android Benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
mariecwhite committed Sep 6, 2023
1 parent 89817e9 commit 6f3f640
Show file tree
Hide file tree
Showing 7 changed files with 272 additions and 32 deletions.
93 changes: 91 additions & 2 deletions .github/workflows/run_ggml_benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ jobs:
BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }}
RESULTS_DIR: results-dir
TARGET_DEVICE: c2-standard-16
GGML_BUILD_DIR: build-dir
steps:
- name: "Checking out PR repository"
uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
Expand All @@ -74,7 +75,14 @@ jobs:
run: |
echo "results-gcs-dir=${BENCHMARK_GCS_DIR}/${TARGET_DEVICE}-results" >> "${GITHUB_OUTPUT}"
mkdir "${RESULTS_DIR}"
- name: "Benchmarking GGML CPU"
- name: "Building GGML CPU"
run: |
docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \
"gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251" \
./experimental/ggml/build_ggml.sh \
"${TARGET_DEVICE}" \
"${GGML_BUILD_DIR}"
- name: "Benchmarking GGML"
env:
GGML_RESULTS_JSON: ggml.json
RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }}
Expand All @@ -83,6 +91,87 @@ jobs:
docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \
"gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251" \
./experimental/ggml/benchmark_ggml.sh \
"${TARGET_DEVICE}"\
"${TARGET_DEVICE}" \
"${GGML_BUILD_DIR}" \
"${RESULTS_PATH}"
gcloud storage cp "${RESULTS_PATH}" "${RESULTS_GCS_DIR}/"
build_ggml_for_android:
needs: [setup]
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
- environment=prod
- cpu
- os-family=Linux
env:
GGML_BUILD_DIR: ggml-build
TARGET_DEVICE: pixel-6-pro
BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }}
outputs:
ggml-build-dir: ${{ env.GGML_BUILD_DIR }}
ggml-build-dir-archive: ${{ steps.archive.outputs.ggml-build-dir-archive }}
ggml-build-dir-gcs-artifact: ${{ steps.upload.outputs.ggml-build-dir-gcs-artifact }}
steps:
- name: "Checking out PR repository"
uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
- name: "Building GGML"
run: |
mkdir -p "${GGML_BUILD_DIR}"
docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \
"gcr.io/iree-oss/openxla-benchmark/android@sha256:3211ade3856dfd46469e573f17baaf367f9c0830dfcc70c6d85891447cadc39e" \
./experimental/ggml/build_ggml.sh \
"${TARGET_DEVICE}" \
"${GGML_BUILD_DIR}"
- name: "Creating build dir archive"
id: archive
env:
GGML_BUILD_DIR_ARCHIVE: ${{ env.GGML_BUILD_DIR }}.tgz
run: |
tar -zcvf ${GGML_BUILD_DIR_ARCHIVE} ${GGML_BUILD_DIR}
echo "ggml-build-dir-archive=${GGML_BUILD_DIR_ARCHIVE}" >> "${GITHUB_OUTPUT}"
- name: "Uploading build dir archive"
id: upload
env:
GGML_BUILD_DIR_ARCHIVE: ${{ steps.archive.outputs.ggml-build-dir-archive }}
GGML_BUILD_DIR_GCS_ARTIFACT: ${{ env.BENCHMARK_GCS_DIR }}/${{ steps.archive.outputs.ggml-build-dir-archive }}
run: |
gcloud storage cp "${GGML_BUILD_DIR_ARCHIVE}" "${GGML_BUILD_DIR_GCS_ARTIFACT}"
echo "ggml-build-dir-gcs-artifact=${GGML_BUILD_DIR_GCS_ARTIFACT}" >> "${GITHUB_OUTPUT}"
benchmark_on_pixel-6-pro:
needs: [setup, build_ggml_for_android]
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
- environment=prod
- machine-type=pixel-6-pro
env:
BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }}
RESULTS_DIR: results-dir
TARGET_DEVICE: pixel-6-pro
GGML_BUILD_DIR: ${{ needs.build_ggml_for_android.outputs.ggml-build-dir }}
GGML_BUILD_DIR_ARCHIVE: ${{ needs.build_ggml_for_android.outputs.ggml-build-dir-archive }}
GGML_BUILD_DIR_GCS_ARTIFACT: ${{ needs.build_ggml_for_android.outputs.ggml-build-dir-gcs-artifact }}
steps:
- name: "Checking out PR repository"
uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
- name: "Setup"
id: setup
run: |
echo "results-gcs-dir=${BENCHMARK_GCS_DIR}/${TARGET_DEVICE}-results" >> "${GITHUB_OUTPUT}"
mkdir "${RESULTS_DIR}"
- name: "Downloading and unpacking GGML build"
run: |
gcloud storage cp "${GGML_BUILD_DIR_GCS_ARTIFACT}" "${GGML_BUILD_DIR_ARCHIVE}"
tar -xvf "${GGML_BUILD_DIR_ARCHIVE}"
- name: "Benchmarking GGML on Android"
env:
GGML_RESULTS_JSON: ggml-android.json
RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }}
run: |
RESULTS_PATH="${RESULTS_DIR}/${GGML_RESULTS_JSON}"
adb push "${GGML_BUILD_DIR}/bin/gpt-2" /data/local/tmp
adb shell chmod +x /data/local/tmp/gpt-2
adb push "${GGML_BUILD_DIR}/models/gpt-2-117M/ggml-model-f32.bin" /data/local/tmp
adb shell /data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f32.bin --prompt "Once upon a time" --threads 8
20 changes: 20 additions & 0 deletions devtools/docker/dockerfiles/android.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright 2023 The OpenXLA Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

# An image for cross-compiling towards Android.

FROM gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251

ARG NDK_VERSION=r25c
WORKDIR /install-ndk

ENV ANDROID_NDK "/usr/src/android-ndk-${NDK_VERSION}"

RUN wget -q "https://dl.google.com/android/repository/android-ndk-${NDK_VERSION}-linux.zip" \
&& unzip -q "android-ndk-${NDK_VERSION}-linux.zip" -d /usr/src/ \
&& rm -rf /install-ndk

WORKDIR /
3 changes: 2 additions & 1 deletion devtools/docker/image_deps.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
"cuda11.8-cudnn8.9": ["base"],
"db_import": [],
"mmperf": ["base"],
"convperf": ["base"]
"convperf": ["base"],
"android": ["base"]
}
1 change: 1 addition & 0 deletions devtools/docker/prod_digests.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ gcr.io/iree-oss/openxla-benchmark/cuda11.8-cudnn8.9@sha256:f43984cd6c16ad1faad4d
gcr.io/iree-oss/openxla-benchmark/db_import@sha256:3de8a702b51ca1906fc2ef5bab2415a79e46bc132f2ceba994215539dd0ecdd4
gcr.io/iree-oss/openxla-benchmark/mmperf@sha256:c972ce5b2144de0786f103611fecbd88d93dd45ecd068f8c97d98c08677cee57
gcr.io/iree-oss/openxla-benchmark/convperf@sha256:0807d5e8144900752cfae72f3aa4d12530b408f73fc6f010a6cbad11cc09832c
gcr.io/iree-oss/openxla-benchmark/android@sha256:3211ade3856dfd46469e573f17baaf367f9c0830dfcc70c6d85891447cadc39e
35 changes: 6 additions & 29 deletions experimental/ggml/benchmark_ggml.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,20 @@
# OOBI_VENV_DIR: path to create Python virtualenv, default: ggml-benchmarks.venv
# OOBI_TARGET_DEVICE: target benchmark device, can also be specified the first
# argument.
# OOBI_BUILD_DIR: path to the GGMl build directory.
# OOBI_OUTPUT: path to output benchmark results, can also be specified the
# second argument.
# OOBI_SCRATCH_DIR: the directory to place temporary benchmarking artifacts.
#
# Example usage:
# ./benchmark_ggml.sh c2-standard-16 /tmp/results.json
# ./benchmark_ggml.sh <target-device> <build-dir> <result-path>

set -xeuo pipefail

VENV_DIR="${OOBI_VENV_DIR:-ggml-benchmarks.venv}"
ROOT_DIR="${OOBI_SCRATCH_DIR:-/tmp}"
PYTHON="${PYTHON:-/usr/bin/python3}"
TARGET_DEVICE="${1:-${OOBI_TARGET_DEVICE}}"
OUTPUT_PATH="${2:-${OOBI_OUTPUT}}"
BUILD_DIR="${2:-${OOBI_BUILD_DIR}}"
OUTPUT_PATH="${3:-${OOBI_OUTPUT}}"

TD="$(cd $(dirname $0) && pwd)"

Expand All @@ -35,27 +35,7 @@ VENV_DIR="${VENV_DIR}" PYTHON="${PYTHON}" source "${TD}/setup_venv.sh"
OUTPUT_PATH="$(realpath ${OUTPUT_PATH})"
"${TD}/../../comparative_benchmark/scripts/create_results_json.sh" "${OUTPUT_PATH}"

pushd "${ROOT_DIR}"

# We clone a fork of ggml which includes additional benchmark logging.
git clone --branch benchmark https://github.com/mariecwhite/ggml.git
pushd ggml

# Build
mkdir build
pushd build
cmake ..
make -j8

# Generate FP32, FP16 and INT4 versions of GPT2 117M (Small).
GPT_VARIANT="117M"
../examples/gpt-2/download-model.sh "${GPT_VARIANT}"
# Generate FP32.
python ../examples/gpt-2/convert-ckpt-to-ggml.py models/gpt-2-${GPT_VARIANT}/ 0
# Generate FP16.
python ../examples/gpt-2/convert-ckpt-to-ggml.py models/gpt-2-${GPT_VARIANT}/ 1
# Generate INT4.
./bin/gpt-2-quantize models/gpt-2-${GPT_VARIANT}/ggml-model-f16.bin models/gpt-2-${GPT_VARIANT}/ggml-model-q4_0.bin 2
pushd "${BUILD_DIR}"

PROMPT="Once upon a time"
BENCHMARK_BINARY="$(realpath bin/gpt-2)"
Expand All @@ -68,7 +48,6 @@ MODEL="$(realpath models/gpt-2-117M/ggml-model-f32.bin)"
declare -a BENCHMARK_NAMES=(
"models/GPT2LMHEAD_FP32_GGML/inputs/INPUT_DATA_MODEL_DEFAULT"
"models/GPT2LMHEAD_FP16_GGML/inputs/INPUT_DATA_MODEL_DEFAULT"
"models/GPT2LMHEAD_INT4_GGML/inputs/INPUT_DATA_MODEL_DEFAULT"
)
declare -a MODELS=(
ggml-model-f32.bin
Expand Down Expand Up @@ -101,6 +80,4 @@ for i in ${!BENCHMARK_NAMES[@]}; do
done
done

popd # build
popd # ggml
popd # ROOT_DIR
popd # BUILD_DIR
84 changes: 84 additions & 0 deletions experimental/ggml/benchmark_ggml_android.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/bin/bash
#
# Copyright 2023 The OpenXLA Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# Environment variables:
# PYTHON: Python interpreter, default: /usr/bin/python3
# OOBI_VENV_DIR: path to create Python virtualenv, default: ggml-benchmarks.venv
# OOBI_TARGET_DEVICE: target benchmark device, can also be specified the first
# argument.
# OOBI_BUILD_DIR: path to the GGMl build directory.
# OOBI_OUTPUT: path to output benchmark results, can also be specified the
# second argument.
#
# Example usage:
# ./benchmark_ggml_android.sh <target-device> <build-dir> <result-path>

set -xeuo pipefail

VENV_DIR="${OOBI_VENV_DIR:-ggml-benchmarks.venv}"
PYTHON="${PYTHON:-/usr/bin/python3}"
TARGET_DEVICE="${1:-${OOBI_TARGET_DEVICE}}"
BUILD_DIR="${2:-${OOBI_BUILD_DIR}}"
OUTPUT_PATH="${3:-${OOBI_OUTPUT}}"

TD="$(cd $(dirname $0) && pwd)"

# Setup virtual environment.
VENV_DIR="${VENV_DIR}" PYTHON="${PYTHON}" source "${TD}/setup_venv.sh"

# Initialize results json.
OUTPUT_PATH="$(realpath ${OUTPUT_PATH})"
"${TD}/../../comparative_benchmark/scripts/create_results_json.sh" "${OUTPUT_PATH}"

pushd "${BUILD_DIR}"

PROMPT="Once upon a time"
BENCHMARK_BINARY="$(realpath bin/gpt-2)"
WARMUP_ITERAIONS=2
NUM_ITERATIONS=10
declare -a NUM_THREADS=(1 8 16)

MODEL="$(realpath models/gpt-2-117M/ggml-model-f32.bin)"

declare -a BENCHMARK_NAMES=(
"models/GPT2LMHEAD_FP32_GGML/inputs/INPUT_DATA_MODEL_DEFAULT"
"models/GPT2LMHEAD_FP16_GGML/inputs/INPUT_DATA_MODEL_DEFAULT"
"models/GPT2LMHEAD_INT4_GGML/inputs/INPUT_DATA_MODEL_DEFAULT"
)
declare -a MODELS=(
ggml-model-f32.bin
ggml-model-f16.bin
ggml-model-q4_0.bin
)
declare -a DATA_TYPES=(
"fp32"
"fp16"
"int4"
)

for i in ${!BENCHMARK_NAMES[@]}; do
MODEL="$(realpath models/gpt-2-117M/${MODELS[$i]})"

for threads in "${NUM_THREADS[@]}"; do
"${TD}/benchmark.py" \
--benchmark_name "${BENCHMARK_NAMES[$i]}" \
--warmup_iterations "${WARMUP_ITERAIONS}" \
--iterations "${NUM_ITERATIONS}" \
--benchmark_binary "${BENCHMARK_BINARY}" \
--model "${MODEL}" \
--data_type "${DATA_TYPES[$i]}" \
--prompt "${PROMPT}" \
--seed 0 \
--threads "${threads}" \
--output "${OUTPUT_PATH}" \
--target_device "${TARGET_DEVICE}" \
--verbose
done
done

popd # BUILD_DIR
68 changes: 68 additions & 0 deletions experimental/ggml/build_ggml.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/bin/bash
#
# Copyright 2023 The OpenXLA Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# Environment variables:
# PYTHON: Python interpreter, default: /usr/bin/python3
# ANDROID_NDK: the path to the Android NDK if building for Android.
# OOBI_VENV_DIR: path to create Python virtualenv, default: ggml-build.venv
# OOBI_TARGET_DEVICE: target benchmark device, can also be specified the first
# argument.
# OOBI_OUTPUT: path to output benchmark results, can also be specified the
# second argument.
# OOBI_SCRATCH_DIR: the directory to place temporary benchmarking artifacts.
#
# Example usage:
# ./build_ggml.sh <target-device>> <build-dir>

set -xeuo pipefail

VENV_DIR="${OOBI_VENV_DIR:-ggml-build.venv}"
ROOT_DIR="${OOBI_SCRATCH_DIR:-/tmp}"
PYTHON="${PYTHON:-/usr/bin/python3}"
TARGET_DEVICE_NAME="${1:-${OOBI_TARGET_DEVICE}}"
BUILD_DIR="${2:-/tmp/ggml-build}"

TD="$(cd $(dirname $0) && pwd)"
BUILD_DIR="$(realpath ${BUILD_DIR})"

# Setup virtual environment.
VENV_DIR="${VENV_DIR}" PYTHON="${PYTHON}" source "${TD}/setup_venv.sh"

pushd "${ROOT_DIR}"

# We clone a fork of ggml which includes additional benchmark logging.
git clone --branch benchmark https://github.com/mariecwhite/ggml.git
pushd ggml

REPO_DIR="$(pwd)"

# Build.
if [[ "${TARGET_DEVICE_NAME}" =~ ^(pixel-4|pixel-6-pro|moto-edge-x30)$ ]]; then
cmake -GNinja -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-23 -DCMAKE_C_FLAGS=-march=armv8.4a+dotprod -B "${BUILD_DIR}" .
cmake --build "${BUILD_DIR}" -t gpt-2 gpt-2-quantize
else
cmake -G Ninja -B "${BUILD_DIR}" .
cmake --build "${BUILD_DIR}" -t gpt-2 gpt-2-quantize
fi

popd # ggml
popd # ROOT_DIR

# Generate FP32 and FP16 versions of GPT2 117M (Small).
pushd "${BUILD_DIR}"

GPT_VARIANT="117M"
${REPO_DIR}/examples/gpt-2/download-model.sh "${GPT_VARIANT}"
# Generate FP32.
python ${REPO_DIR}/examples/gpt-2/convert-ckpt-to-ggml.py models/gpt-2-${GPT_VARIANT}/ 0
# Generate FP16.
python ${REPO_DIR}/examples/gpt-2/convert-ckpt-to-ggml.py models/gpt-2-${GPT_VARIANT}/ 1
# Generate INT4. Keep this disabled until we want to use it.
#./bin/gpt-2-quantize models/gpt-2-${GPT_VARIANT}/ggml-model-f16.bin models/gpt-2-${GPT_VARIANT}/ggml-model-q4_0.bin 2

popd # BUILD_DIR

0 comments on commit 6f3f640

Please sign in to comment.