Add GPT2 GGML x86 and Android Benchmarks

iree-org · Sep 11, 2023 · 55d152a · 55d152a
1 parent 2f011c2
commit 55d152a
Show file tree

Hide file tree

Showing 17 changed files with 867 additions and 3 deletions.
diff --git a/.github/workflows/run_ggml_benchmark.yml b/.github/workflows/run_ggml_benchmark.yml
@@ -0,0 +1,174 @@
+# Copyright 2023 The OpenXLA Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# GGML Benchmarks Workflow.
+
+name: GGML Benchmarks
+
+on:
+  workflow_dispatch:
+
+concurrency:
+  # A PR number if a pull request and otherwise the commit hash. This cancels
+  # queued and in-progress runs for the same PR (presubmit) or commit
+  # (postsubmit).
+  group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
+  cancel-in-progress: true
+
+env:
+  GCS_DIR: gs://openxla-github-actions-${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }}-artifacts/${{ github.run_id }}/${{ github.run_attempt }}
+
+jobs:
+  setup:
+    runs-on: ubuntu-22.04
+    outputs:
+      runner-group: ${{ steps.configure.outputs.runner-group }}
+      benchmark-gcs-dir: ${{ steps.configure.outputs.benchmark-gcs-dir }}
+    steps:
+      - name: "Checking out PR repository"
+        uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791  # v2.5.0
+      - name: "Configuring CI options"
+        id: configure
+        env:
+          RUNNER_GROUP: ${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }}
+        run: |
+          # Just informative logging. There should only be two commits in the
+          # history here, but limiting the depth helps when copying from a local
+          # repo instead of using checkout, e.g. with
+          # https://github.com/nektos/act where there will be more.
+          git log --oneline --graph --max-count=3
+          # Workflow jobs can't access `env` in `runs-on`, so we need to make
+          # `runner-group` a job output variable.
+          echo "runner-group=${RUNNER_GROUP}" > "${GITHUB_OUTPUT}"
+
+          # For presubmit testing, the result artifacts are uploaded to the
+          # temporary workflow GCS dir. In postsubmit, the result artifacts are
+          # uploaded to the comparative benchmark GCS dir.
+          if [[ "${RUNNER_GROUP}" == "presubmit" ]]; then
+            BENCHMARK_GCS_DIR="${GCS_DIR}/comparative-benchmark-artifacts"
+          else
+            BENCHMARK_GCS_DIR="gs://comparative-benchmark-artifacts/$(date +'%Y-%m-%d').$(date +'%s')"
+          fi
+          echo "benchmark-gcs-dir=${BENCHMARK_GCS_DIR}" >> "${GITHUB_OUTPUT}"
+
+  benchmark_on_c2-standard-16:
+    needs: [setup]
+    runs-on:
+      - self-hosted  # must come first
+      - runner-group=${{ needs.setup.outputs.runner-group }}
+      - environment=prod
+      - machine-type=c2-standard-16
+    env:
+      BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }}
+      RESULTS_DIR: results-dir
+      TARGET_DEVICE: c2-standard-16
+      GGML_BUILD_DIR: build-dir
+    steps:
+      - name: "Checking out PR repository"
+        uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791  # v2.5.0
+      - name: "Setup"
+        id: setup
+        run: |
+          echo "results-gcs-dir=${BENCHMARK_GCS_DIR}/${TARGET_DEVICE}-results" >> "${GITHUB_OUTPUT}"
+          mkdir "${RESULTS_DIR}"
+      - name: "Building GGML CPU"
+        run: |
+          docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \
+            "gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251" \
+            ./experimental/ggml/build_ggml.sh \
+              "${TARGET_DEVICE}" \
+              "${GGML_BUILD_DIR}"
+      - name: "Benchmarking GGML"
+        env:
+          GGML_RESULTS_JSON: ggml.json
+          RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }}
+        run: |
+          RESULTS_PATH="${RESULTS_DIR}/${GGML_RESULTS_JSON}"
+          docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \
+            "gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251" \
+            ./experimental/ggml/benchmark_ggml.sh \
+              "${TARGET_DEVICE}" \
+              "${GGML_BUILD_DIR}" \
+              "${RESULTS_PATH}"
+          gcloud storage cp "${RESULTS_PATH}" "${RESULTS_GCS_DIR}/"
+
+  build_ggml_for_android:
+    needs: [setup]
+    runs-on:
+      - self-hosted  # must come first
+      - runner-group=${{ needs.setup.outputs.runner-group }}
+      - environment=prod
+      - cpu
+      - os-family=Linux
+    env:
+      GGML_BUILD_DIR: ggml-build
+      TARGET_DEVICE: pixel-6-pro
+      BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }}
+    outputs:
+      ggml-build-dir: ${{ env.GGML_BUILD_DIR }}
+      ggml-build-dir-archive: ${{ steps.archive.outputs.ggml-build-dir-archive }}
+      ggml-build-dir-gcs-artifact: ${{ steps.upload.outputs.ggml-build-dir-gcs-artifact }}
+    steps:
+      - name: "Checking out PR repository"
+        uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791  # v2.5.0
+      - name: "Building GGML"
+        run: |
+          mkdir -p "${GGML_BUILD_DIR}"
+          docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \
+            "gcr.io/iree-oss/openxla-benchmark/android@sha256:3211ade3856dfd46469e573f17baaf367f9c0830dfcc70c6d85891447cadc39e" \
+            ./experimental/ggml/build_ggml.sh \
+              "${TARGET_DEVICE}" \
+              "${GGML_BUILD_DIR}"
+      - name: "Creating build dir archive"
+        id: archive
+        env:
+          GGML_BUILD_DIR_ARCHIVE: ${{ env.GGML_BUILD_DIR }}.tgz
+        run: |
+          tar -zcvf ${GGML_BUILD_DIR_ARCHIVE} ${GGML_BUILD_DIR}
+          echo "ggml-build-dir-archive=${GGML_BUILD_DIR_ARCHIVE}" >> "${GITHUB_OUTPUT}"
+      - name: "Uploading build dir archive"
+        id: upload
+        env:
+          GGML_BUILD_DIR_ARCHIVE: ${{ steps.archive.outputs.ggml-build-dir-archive }}
+          GGML_BUILD_DIR_GCS_ARTIFACT: ${{ env.BENCHMARK_GCS_DIR }}/${{ steps.archive.outputs.ggml-build-dir-archive }}
+        run: |
+          gcloud storage cp "${GGML_BUILD_DIR_ARCHIVE}" "${GGML_BUILD_DIR_GCS_ARTIFACT}"
+          echo "ggml-build-dir-gcs-artifact=${GGML_BUILD_DIR_GCS_ARTIFACT}" >> "${GITHUB_OUTPUT}"
+
+  benchmark_on_pixel-6-pro:
+    needs: [setup, build_ggml_for_android]
+    runs-on:
+      - self-hosted  # must come first
+      - runner-group=${{ needs.setup.outputs.runner-group }}
+      - environment=prod
+      - machine-type=pixel-6-pro
+    env:
+      BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }}
+      RESULTS_DIR: results-dir
+      TARGET_DEVICE: pixel-6-pro
+      GGML_BUILD_DIR: ${{ needs.build_ggml_for_android.outputs.ggml-build-dir }}
+      GGML_BUILD_DIR_ARCHIVE: ${{ needs.build_ggml_for_android.outputs.ggml-build-dir-archive }}
+      GGML_BUILD_DIR_GCS_ARTIFACT: ${{ needs.build_ggml_for_android.outputs.ggml-build-dir-gcs-artifact }}
+    steps:
+      - name: "Checking out PR repository"
+        uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791  # v2.5.0
+      - name: "Setup"
+        id: setup
+        run: |
+          echo "results-gcs-dir=${BENCHMARK_GCS_DIR}/${TARGET_DEVICE}-results" >> "${GITHUB_OUTPUT}"
+          mkdir "${RESULTS_DIR}"
+      - name: "Downloading and unpacking GGML build"
+        run: |
+          gcloud storage cp "${GGML_BUILD_DIR_GCS_ARTIFACT}" "${GGML_BUILD_DIR_ARCHIVE}"
+          tar -xvf "${GGML_BUILD_DIR_ARCHIVE}"
+      - name: "Benchmarking GGML on Android"
+        env:
+          GGML_RESULTS_JSON: ggml-android.json
+          RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }}
+        run: |
+          RESULTS_PATH="${RESULTS_DIR}/${GGML_RESULTS_JSON}"
+          ./experimental/ggml/benchmark_ggml.sh "${TARGET_DEVICE}" "${GGML_BUILD_DIR}" "${RESULTS_PATH}"
+          gcloud storage cp "${RESULTS_PATH}" "${RESULTS_GCS_DIR}/"
diff --git a/common_benchmark_suite/openxla/benchmark/comparative_suite/tf/model_definitions.py b/common_benchmark_suite/openxla/benchmark/comparative_suite/tf/model_definitions.py
@@ -137,6 +137,17 @@
     template=EFFICIENTNETB7_FP32_TF_600X600X3XF32_BATCH_TEMPLATE,
     batch_sizes=[1, 64, 128])
 
+# GPT2LMHead models.
+# Model implementation from https://huggingface.co/docs/transformers/model_doc/gpt2#transformers.TFGPT2Model.
+GPT2LMHEAD_TF_IMPL = def_types.ModelImplementation(
+    name="GPT2_TF",
+    tags=["transformer-decoder", "gpt2", "ggml"],
+    framework_type=def_types.ModelFrameworkType.TF_V2,
+    module_path=f"{utils.MODELS_MODULE_PATH}.tf.gpt2.gpt2lmhead_model",
+    source_info=
+    "https://huggingface.co/docs/transformers/model_doc/gpt2#transformers.TFGPT2Model",
+)
+
 ALL_MODELS = list(
     itertools.chain(
         T5_LARGE_FP32_TF_512XI32_BATCHES.values(),

diff --git a/common_benchmark_suite/openxla/benchmark/def_types.py b/common_benchmark_suite/openxla/benchmark/def_types.py
@@ -17,6 +17,7 @@ class ModelFrameworkType(Enum):
   TF_V2 = "tensorflow_v2"
   PYTORCH = "pytorch"
   JAX = "jax"
+  GGML = "ggml"
 
 
 @dataclass(frozen=True)

diff --git a/common_benchmark_suite/openxla/benchmark/devices/__init__.py b/common_benchmark_suite/openxla/benchmark/devices/__init__.py
@@ -4,7 +4,8 @@
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from . import gcp_devices, host_devices
+from . import gcp_devices, host_devices, mobile_devices
 
 # All defined device specs.
-ALL_DEVICES = gcp_devices.ALL_DEVICES + host_devices.ALL_DEVICES
+ALL_DEVICES = gcp_devices.ALL_DEVICES + host_devices.ALL_DEVICES + mobile_devices.ALL_DEVICES
+ALL_DEVICE_NAMES = [device.name for device in ALL_DEVICES]
diff --git a/common_benchmark_suite/openxla/benchmark/devices/mobile_devices.py b/common_benchmark_suite/openxla/benchmark/devices/mobile_devices.py
@@ -0,0 +1,22 @@
+# Copyright 2023 The OpenXLA Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+from openxla.benchmark import def_types
+
+MOBILE_PIXEL_6_PRO = def_types.DeviceSpec(
+    name="pixel-6-pro",
+    host_type="mobile",
+    host_model="pixel-6-pro",
+    host_environment="android",
+    accelerator_type="cpu",
+    accelerator_model="armv8.2-a",
+    accelerator_architecture="armv8.2-a",
+    accelerator_attributes={
+        "num_of_cores": 8,
+    },
+)
+
+ALL_DEVICES = [MOBILE_PIXEL_6_PRO]
diff --git a/devtools/docker/dockerfiles/android.Dockerfile b/devtools/docker/dockerfiles/android.Dockerfile
@@ -0,0 +1,20 @@
+# Copyright 2023 The OpenXLA Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# An image for cross-compiling towards Android.
+
+FROM gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251
+
+ARG NDK_VERSION=r25c
+WORKDIR /install-ndk
+
+ENV ANDROID_NDK "/usr/src/android-ndk-${NDK_VERSION}"
+
+RUN wget -q "https://dl.google.com/android/repository/android-ndk-${NDK_VERSION}-linux.zip" \
+    && unzip -q "android-ndk-${NDK_VERSION}-linux.zip" -d /usr/src/  \
+    && rm -rf /install-ndk
+
+WORKDIR /
diff --git a/devtools/docker/image_deps.json b/devtools/docker/image_deps.json
@@ -3,5 +3,6 @@
   "cuda11.8-cudnn8.9": ["base"],
   "db_import": [],
   "mmperf": ["base"],
-  "convperf": ["base"]
+  "convperf": ["base"],
+  "android": ["base"]
 }
diff --git a/devtools/docker/prod_digests.txt b/devtools/docker/prod_digests.txt
@@ -3,3 +3,4 @@ gcr.io/iree-oss/openxla-benchmark/cuda11.8-cudnn8.9@sha256:f43984cd6c16ad1faad4d
 gcr.io/iree-oss/openxla-benchmark/db_import@sha256:3de8a702b51ca1906fc2ef5bab2415a79e46bc132f2ceba994215539dd0ecdd4
 gcr.io/iree-oss/openxla-benchmark/mmperf@sha256:c972ce5b2144de0786f103611fecbd88d93dd45ecd068f8c97d98c08677cee57
 gcr.io/iree-oss/openxla-benchmark/convperf@sha256:0807d5e8144900752cfae72f3aa4d12530b408f73fc6f010a6cbad11cc09832c
+gcr.io/iree-oss/openxla-benchmark/android@sha256:3211ade3856dfd46469e573f17baaf367f9c0830dfcc70c6d85891447cadc39e
diff --git a/experimental/ggml/__init__.py b/experimental/ggml/__init__.py
diff --git a/experimental/ggml/benchmark_ggml.sh b/experimental/ggml/benchmark_ggml.sh
@@ -0,0 +1,109 @@
+#!/bin/bash
+#
+# Copyright 2023 The OpenXLA Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# Environment variables:
+# PYTHON: Python interpreter, default: /usr/bin/python3
+# OOBI_VENV_DIR: path to create Python virtualenv, default: ggml-benchmarks.venv
+# OOBI_TARGET_DEVICE: target benchmark device, can also be specified the first
+#   argument.
+# OOBI_BUILD_DIR: path to the GGMl build directory.
+# OOBI_OUTPUT: path to output benchmark results, can also be specified the
+#   second argument.
+#
+# Example usage:
+# ./benchmark_ggml.sh <target-device> <build-dir> <result-path>
+
+set -xeuo pipefail
+
+VENV_DIR="${OOBI_VENV_DIR:-ggml-benchmarks.venv}"
+PYTHON="${PYTHON:-"$(which python3)"}"
+TARGET_DEVICE_NAME="${1:-${OOBI_TARGET_DEVICE}}"
+BUILD_DIR="${2:-${OOBI_BUILD_DIR}}"
+OUTPUT_PATH="${3:-${OOBI_OUTPUT}}"
+
+TD="$(cd $(dirname $0) && pwd)"
+
+# Setup virtual environment.
+VENV_DIR="${VENV_DIR}" PYTHON="${PYTHON}" source "${TD}/setup_venv.sh"
+
+# Initialize results json.
+OUTPUT_PATH="$(realpath ${OUTPUT_PATH})"
+"${TD}/../../comparative_benchmark/scripts/create_results_json.sh" "${OUTPUT_PATH}"
+
+pushd "${BUILD_DIR}"
+
+PROMPT="Once upon a time"
+BENCHMARK_BINARY="$(realpath bin/gpt-2)"
+WARMUP_ITERAIONS=2
+NUM_ITERATIONS=10
+
+MODEL="$(realpath models/gpt-2-117M/ggml-model-f32.bin)"
+
+declare -a BENCHMARK_NAMES=(
+  "models/GPT2LMHEAD_FP32_GGML/inputs/INPUT_DATA_MODEL_DEFAULT"
+  "models/GPT2LMHEAD_FP16_GGML/inputs/INPUT_DATA_MODEL_DEFAULT"
+  "models/GPT2LMHEAD_INT4_GGML/inputs/INPUT_DATA_MODEL_DEFAULT"
+)
+declare -a MODELS=(
+  ggml-model-f32.bin
+  ggml-model-f16.bin
+  ggml-model-q4_0.bin
+)
+declare -a DATA_TYPES=(
+  "fp32"
+  "fp16"
+  "int4"
+)
+
+declare -a args=(
+  --warmup_iterations "${WARMUP_ITERAIONS}"
+  --iterations "${NUM_ITERATIONS}"
+  --benchmark_binary "${BENCHMARK_BINARY}"
+  --prompt "${PROMPT}"
+  --seed 0
+  --output "${OUTPUT_PATH}"
+  --target_device "${TARGET_DEVICE_NAME}"
+  --verbose
+)
+
+if [[ "${TARGET_DEVICE_NAME}" =~ ^(pixel-4|pixel-6-pro|moto-edge-x30)$ ]]; then
+  BENCHMARK_SCRIPT="run_benchmarks_android.py"
+  # Pixel 6 has a maximum of 8 cores.
+  THREADS="1,4,8"
+  TASKSETS="80,f0,ff"
+
+  args+=(
+     --threads "${THREADS}"
+     --tasksets "${TASKSETS}"
+  )
+
+  # Setup mobile device for benchmarking.
+  adb push "${TD}/set_android_scaling_governor.sh" "/data/local/tmp"
+  adb shell "chmod +x /data/local/tmp/set_android_scaling_governor.sh"
+  adb shell "su root sh /data/local/tmp/set_android_scaling_governor.sh performance"
+else
+  BENCHMARK_SCRIPT="run_benchmarks.py"
+  # c2-standard-16 has 16 cores.
+  THREADS="1,8,16"
+
+  args+=(
+     --threads "${THREADS}"
+  )
+fi
+
+for i in ${!BENCHMARK_NAMES[@]}; do
+  MODEL="$(realpath models/gpt-2-117M/${MODELS[$i]})"
+  args+=(
+    --benchmark_name "${BENCHMARK_NAMES[$i]}"
+    --model "${MODEL}"
+    --data_type "${DATA_TYPES[$i]}"
+  )
+  "${TD}/${BENCHMARK_SCRIPT}" "${args[@]}"
+done
+
+popd # BUILD_DIR