Add GPT2 GGML Android Benchmarks

iree-org · Sep 11, 2023 · 974bdbe · 974bdbe
1 parent 89817e9
commit 974bdbe
Show file tree

Hide file tree

Showing 13 changed files with 554 additions and 147 deletions.
diff --git a/.github/workflows/run_ggml_benchmark.yml b/.github/workflows/run_ggml_benchmark.yml
@@ -66,6 +66,7 @@ jobs:
       BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }}
       RESULTS_DIR: results-dir
       TARGET_DEVICE: c2-standard-16
+      GGML_BUILD_DIR: build-dir
     steps:
       - name: "Checking out PR repository"
         uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791  # v2.5.0
@@ -74,7 +75,14 @@ jobs:
         run: |
           echo "results-gcs-dir=${BENCHMARK_GCS_DIR}/${TARGET_DEVICE}-results" >> "${GITHUB_OUTPUT}"
           mkdir "${RESULTS_DIR}"
-      - name: "Benchmarking GGML CPU"
+      - name: "Building GGML CPU"
+        run: |
+          docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \
+            "gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251" \
+            ./experimental/ggml/build_ggml.sh \
+              "${TARGET_DEVICE}" \
+              "${GGML_BUILD_DIR}"
+      - name: "Benchmarking GGML"
         env:
           GGML_RESULTS_JSON: ggml.json
           RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }}
@@ -83,6 +91,122 @@ jobs:
           docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \
             "gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251" \
             ./experimental/ggml/benchmark_ggml.sh \
-              "${TARGET_DEVICE}"\
+              "${TARGET_DEVICE}" \
+              "${GGML_BUILD_DIR}" \
               "${RESULTS_PATH}"
           gcloud storage cp "${RESULTS_PATH}" "${RESULTS_GCS_DIR}/"
+
+  build_ggml_for_android:
+    needs: [setup]
+    runs-on:
+      - self-hosted  # must come first
+      - runner-group=${{ needs.setup.outputs.runner-group }}
+      - environment=prod
+      - cpu
+      - os-family=Linux
+    env:
+      GGML_BUILD_DIR: ggml-build
+      TARGET_DEVICE: pixel-6-pro
+      BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }}
+    outputs:
+      ggml-build-dir: ${{ env.GGML_BUILD_DIR }}
+      ggml-build-dir-archive: ${{ steps.archive.outputs.ggml-build-dir-archive }}
+      ggml-build-dir-gcs-artifact: ${{ steps.upload.outputs.ggml-build-dir-gcs-artifact }}
+    steps:
+      - name: "Checking out PR repository"
+        uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791  # v2.5.0
+      - name: "Building GGML"
+        run: |
+          mkdir -p "${GGML_BUILD_DIR}"
+          docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \
+            "gcr.io/iree-oss/openxla-benchmark/android@sha256:3211ade3856dfd46469e573f17baaf367f9c0830dfcc70c6d85891447cadc39e" \
+            ./experimental/ggml/build_ggml.sh \
+              "${TARGET_DEVICE}" \
+              "${GGML_BUILD_DIR}"
+      - name: "Creating build dir archive"
+        id: archive
+        env:
+          GGML_BUILD_DIR_ARCHIVE: ${{ env.GGML_BUILD_DIR }}.tgz
+        run: |
+          tar -zcvf ${GGML_BUILD_DIR_ARCHIVE} ${GGML_BUILD_DIR}
+          echo "ggml-build-dir-archive=${GGML_BUILD_DIR_ARCHIVE}" >> "${GITHUB_OUTPUT}"
+      - name: "Uploading build dir archive"
+        id: upload
+        env:
+          GGML_BUILD_DIR_ARCHIVE: ${{ steps.archive.outputs.ggml-build-dir-archive }}
+          GGML_BUILD_DIR_GCS_ARTIFACT: ${{ env.BENCHMARK_GCS_DIR }}/${{ steps.archive.outputs.ggml-build-dir-archive }}
+        run: |
+          gcloud storage cp "${GGML_BUILD_DIR_ARCHIVE}" "${GGML_BUILD_DIR_GCS_ARTIFACT}"
+          echo "ggml-build-dir-gcs-artifact=${GGML_BUILD_DIR_GCS_ARTIFACT}" >> "${GITHUB_OUTPUT}"
+
+  benchmark_on_pixel-6-pro:
+    needs: [setup, build_ggml_for_android]
+    runs-on:
+      - self-hosted  # must come first
+      - runner-group=${{ needs.setup.outputs.runner-group }}
+      - environment=prod
+      - machine-type=pixel-6-pro
+    env:
+      BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }}
+      RESULTS_DIR: results-dir
+      TARGET_DEVICE: pixel-6-pro
+      GGML_BUILD_DIR: ${{ needs.build_ggml_for_android.outputs.ggml-build-dir }}
+      GGML_BUILD_DIR_ARCHIVE: ${{ needs.build_ggml_for_android.outputs.ggml-build-dir-archive }}
+      GGML_BUILD_DIR_GCS_ARTIFACT: ${{ needs.build_ggml_for_android.outputs.ggml-build-dir-gcs-artifact }}
+    steps:
+      - name: "Checking out PR repository"
+        uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791  # v2.5.0
+      - name: "Setup"
+        id: setup
+        run: |
+          echo "results-gcs-dir=${BENCHMARK_GCS_DIR}/${TARGET_DEVICE}-results" >> "${GITHUB_OUTPUT}"
+          mkdir "${RESULTS_DIR}"
+      - name: "Downloading and unpacking GGML build"
+        run: |
+          gcloud storage cp "${GGML_BUILD_DIR_GCS_ARTIFACT}" "${GGML_BUILD_DIR_ARCHIVE}"
+          tar -xvf "${GGML_BUILD_DIR_ARCHIVE}"
+      - name: "Benchmarking GGML on Android"
+        env:
+          GGML_RESULTS_JSON: ggml-android.json
+          RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }}
+        run: |
+          RESULTS_PATH="${RESULTS_DIR}/${GGML_RESULTS_JSON}"
+          ./experimental/ggml/benchmark_ggml.sh "${TARGET_DEVICE}" "${GGML_BUILD_DIR}" "${RESULTS_PATH}"
+          cat "${RESULTS_PATH}"
+
+#          adb push "./experimental/ggml/set_android_scaling_governor.sh" "/data/local/tmp"
+#          adb shell "chmod +x /data/local/tmp/set_android_scaling_governor.sh"
+#          adb shell "su root sh /data/local/tmp/set_android_scaling_governor.sh performance"
+#
+#          adb push "${GGML_BUILD_DIR}/bin/gpt-2" "/data/local/tmp"
+#          adb shell "chmod +x /data/local/tmp/gpt-2"
+#          adb push "${GGML_BUILD_DIR}/models/gpt-2-117M/ggml-model-f32.bin" "/data/local/tmp"
+#
+#          echo "Benchmarking ggml-model-f32.bin with 1 thread"
+#          adb shell 'taskset f0 /data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f32.bin --prompt "Once upon a time" --seed 0 --threads 1'
+#
+#          echo "Benchmarking ggml-model-f32.bin with 4 threads"
+#          adb shell 'taskset f0 /data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f32.bin --prompt "Once upon a time" --seed 0 --threads 4'
+#
+#          echo "Benchmarking ggml-model-f32.bin with 8 threads"
+#          adb shell '/data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f32.bin --prompt "Once upon a time" --seed 0 --threads 8'
+#
+#          echo "Benchmarking ggml-model-f32.bin with 16 threads"
+#          adb shell '/data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f32.bin --prompt "Once upon a time" --seed 0 --threads 16'
+#
+#          echo "Removing ggml-model-f32.bin"
+#          adb shell "rm /data/local/tmp/ggml-model-f32.bin"
+#
+#          adb push "${GGML_BUILD_DIR}/models/gpt-2-117M/ggml-model-f16.bin" "/data/local/tmp"
+#
+#          echo "Benchmarking ggml-model-f16.bin with 1 thread"
+#          adb shell 'taskset f0 /data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f16.bin --prompt "Once upon a time" --seed 0 --threads 1'
+#
+#          echo "Benchmarking ggml-model-f16.bin with 4 threads"
+#          adb shell 'taskset f0 /data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f16.bin --prompt "Once upon a time" --seed 0 --threads 4'
+#
+#          echo "Benchmarking ggml-model-f16.bin with 8 threads"
+#          adb shell '/data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f16.bin --prompt "Once upon a time" --seed 0 --threads 8'
+#
+#          echo "Benchmarking ggml-model-f16.bin with 16 threads"
+#          adb shell '/data/local/tmp/gpt-2 --model /data/local/tmp/ggml-model-f16.bin --prompt "Once upon a time" --seed 0 --threads 16'
diff --git a/common_benchmark_suite/openxla/benchmark/devices/__init__.py b/common_benchmark_suite/openxla/benchmark/devices/__init__.py
@@ -4,7 +4,8 @@
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from . import gcp_devices, host_devices
+from . import gcp_devices, host_devices, mobile_devices
 
 # All defined device specs.
-ALL_DEVICES = gcp_devices.ALL_DEVICES + host_devices.ALL_DEVICES
+ALL_DEVICES = gcp_devices.ALL_DEVICES + host_devices.ALL_DEVICES + mobile_devices.ALL_DEVICES
+ALL_DEVICE_NAMES = [device.name for device in ALL_DEVICES]
diff --git a/common_benchmark_suite/openxla/benchmark/devices/mobile_devices.py b/common_benchmark_suite/openxla/benchmark/devices/mobile_devices.py
@@ -0,0 +1,22 @@
+# Copyright 2023 The OpenXLA Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+from openxla.benchmark import def_types
+
+MOBILE_PIXEL_6_PRO = def_types.DeviceSpec(
+    name="pixel-6-pro",
+    host_type="mobile",
+    host_model="pixel-6-pro",
+    host_environment="android",
+    accelerator_type="cpu",
+    accelerator_model="armv8.2-a",
+    accelerator_architecture="armv8.2-a",
+    accelerator_attributes={
+        "num_of_cores": 8,
+    },
+)
+
+ALL_DEVICES = [MOBILE_PIXEL_6_PRO]
diff --git a/devtools/docker/dockerfiles/android.Dockerfile b/devtools/docker/dockerfiles/android.Dockerfile
@@ -0,0 +1,20 @@
+# Copyright 2023 The OpenXLA Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# An image for cross-compiling towards Android.
+
+FROM gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251
+
+ARG NDK_VERSION=r25c
+WORKDIR /install-ndk
+
+ENV ANDROID_NDK "/usr/src/android-ndk-${NDK_VERSION}"
+
+RUN wget -q "https://dl.google.com/android/repository/android-ndk-${NDK_VERSION}-linux.zip" \
+    && unzip -q "android-ndk-${NDK_VERSION}-linux.zip" -d /usr/src/  \
+    && rm -rf /install-ndk
+
+WORKDIR /
diff --git a/devtools/docker/image_deps.json b/devtools/docker/image_deps.json
@@ -3,5 +3,6 @@
   "cuda11.8-cudnn8.9": ["base"],
   "db_import": [],
   "mmperf": ["base"],
-  "convperf": ["base"]
+  "convperf": ["base"],
+  "android": ["base"]
 }
diff --git a/devtools/docker/prod_digests.txt b/devtools/docker/prod_digests.txt
@@ -3,3 +3,4 @@ gcr.io/iree-oss/openxla-benchmark/cuda11.8-cudnn8.9@sha256:f43984cd6c16ad1faad4d
 gcr.io/iree-oss/openxla-benchmark/db_import@sha256:3de8a702b51ca1906fc2ef5bab2415a79e46bc132f2ceba994215539dd0ecdd4
 gcr.io/iree-oss/openxla-benchmark/mmperf@sha256:c972ce5b2144de0786f103611fecbd88d93dd45ecd068f8c97d98c08677cee57
 gcr.io/iree-oss/openxla-benchmark/convperf@sha256:0807d5e8144900752cfae72f3aa4d12530b408f73fc6f010a6cbad11cc09832c
+gcr.io/iree-oss/openxla-benchmark/android@sha256:3211ade3856dfd46469e573f17baaf367f9c0830dfcc70c6d85891447cadc39e
diff --git a/experimental/ggml/benchmark_ggml.sh b/experimental/ggml/benchmark_ggml.sh
@@ -11,20 +11,20 @@
 # OOBI_VENV_DIR: path to create Python virtualenv, default: ggml-benchmarks.venv
 # OOBI_TARGET_DEVICE: target benchmark device, can also be specified the first
 #   argument.
+# OOBI_BUILD_DIR: path to the GGMl build directory.
 # OOBI_OUTPUT: path to output benchmark results, can also be specified the
 #   second argument.
-# OOBI_SCRATCH_DIR: the directory to place temporary benchmarking artifacts.
 #
 # Example usage:
-# ./benchmark_ggml.sh c2-standard-16 /tmp/results.json
+# ./benchmark_ggml.sh <target-device> <build-dir> <result-path>
 
 set -xeuo pipefail
 
 VENV_DIR="${OOBI_VENV_DIR:-ggml-benchmarks.venv}"
-ROOT_DIR="${OOBI_SCRATCH_DIR:-/tmp}"
 PYTHON="${PYTHON:-/usr/bin/python3}"
-TARGET_DEVICE="${1:-${OOBI_TARGET_DEVICE}}"
-OUTPUT_PATH="${2:-${OOBI_OUTPUT}}"
+TARGET_DEVICE_NAME="${1:-${OOBI_TARGET_DEVICE}}"
+BUILD_DIR="${2:-${OOBI_BUILD_DIR}}"
+OUTPUT_PATH="${3:-${OOBI_OUTPUT}}"
 
 TD="$(cd $(dirname $0) && pwd)"
 
@@ -35,33 +35,12 @@ VENV_DIR="${VENV_DIR}" PYTHON="${PYTHON}" source "${TD}/setup_venv.sh"
 OUTPUT_PATH="$(realpath ${OUTPUT_PATH})"
 "${TD}/../../comparative_benchmark/scripts/create_results_json.sh" "${OUTPUT_PATH}"
 
-pushd "${ROOT_DIR}"
-
-# We clone a fork of ggml which includes additional benchmark logging.
-git clone --branch benchmark https://github.com/mariecwhite/ggml.git
-pushd ggml
-
-# Build
-mkdir build
-pushd build
-cmake ..
-make -j8
-
-# Generate FP32, FP16 and INT4 versions of GPT2 117M (Small).
-GPT_VARIANT="117M"
-../examples/gpt-2/download-model.sh "${GPT_VARIANT}"
-# Generate FP32.
-python ../examples/gpt-2/convert-ckpt-to-ggml.py models/gpt-2-${GPT_VARIANT}/ 0
-# Generate FP16.
-python ../examples/gpt-2/convert-ckpt-to-ggml.py models/gpt-2-${GPT_VARIANT}/ 1
-# Generate INT4.
-./bin/gpt-2-quantize models/gpt-2-${GPT_VARIANT}/ggml-model-f16.bin models/gpt-2-${GPT_VARIANT}/ggml-model-q4_0.bin 2
+pushd "${BUILD_DIR}"
 
 PROMPT="Once upon a time"
 BENCHMARK_BINARY="$(realpath bin/gpt-2)"
 WARMUP_ITERAIONS=2
 NUM_ITERATIONS=10
-declare -a NUM_THREADS=(1 8 16)
 
 MODEL="$(realpath models/gpt-2-117M/ggml-model-f32.bin)"
 
@@ -81,26 +60,51 @@ declare -a DATA_TYPES=(
   "int4"
 )
 
+declare -a args=(
+  --warmup_iterations "${WARMUP_ITERAIONS}"
+  --iterations "${NUM_ITERATIONS}"
+  --benchmark_binary "${BENCHMARK_BINARY}"
+  --prompt "${PROMPT}"
+  --seed 0
+  --output "${OUTPUT_PATH}"
+  --target_device "${TARGET_DEVICE_NAME}"
+  --verbose
+)
+
+if [[ "${TARGET_DEVICE_NAME}" =~ ^(pixel-4|pixel-6-pro|moto-edge-x30)$ ]]; then
+BENCHMARK_SCRIPT="run_benchmarks_android.py"
+# Pixel 6 has a maximum of 8 cores.
+THREADS="1,4,8"
+TASKSETS="80,f0,ff"
+
+args+=(
+   --threads "${THREADS}"
+   --tasksets "${TASKSETS}"
+)
+
+# Setup mobile device for benchmarking.
+adb push "${TD}/set_android_scaling_governor.sh" "/data/local/tmp"
+adb shell "chmod +x /data/local/tmp/set_android_scaling_governor.sh"
+adb shell "su root sh /data/local/tmp/set_android_scaling_governor.sh performance"
+
+else
+# c2-standard-16 has 16 cores.
+BENCHMARK_SCRIPT="run_benchmarks.py"
+THREADS="1,8,16"
+
+args+=(
+   --threads "${THREADS}"
+)
+fi
+
 for i in ${!BENCHMARK_NAMES[@]}; do
   MODEL="$(realpath models/gpt-2-117M/${MODELS[$i]})"
-
-  for threads in "${NUM_THREADS[@]}"; do
-    "${TD}/benchmark.py" \
-      --benchmark_name "${BENCHMARK_NAMES[$i]}" \
-      --warmup_iterations "${WARMUP_ITERAIONS}" \
-      --iterations "${NUM_ITERATIONS}" \
-      --benchmark_binary "${BENCHMARK_BINARY}" \
-      --model "${MODEL}" \
-      --data_type "${DATA_TYPES[$i]}" \
-      --prompt "${PROMPT}" \
-      --seed 0 \
-      --threads "${threads}" \
-      --output "${OUTPUT_PATH}" \
-      --target_device "${TARGET_DEVICE}" \
-      --verbose
-  done
+  args+=(
+    --benchmark_name "${BENCHMARK_NAMES[$i]}"
+    --model "${MODEL}"
+    --data_type "${DATA_TYPES[$i]}"
+  )
+  "${TD}/${BENCHMARK_SCRIPT}" "${args[@]}"
 done
 
-popd # build
-popd # ggml
-popd # ROOT_DIR
+popd # BUILD_DIR