Skip to content

Commit

Permalink
Add GGML
Browse files Browse the repository at this point in the history
  • Loading branch information
mariecwhite committed Sep 4, 2023
1 parent afad4da commit 7a2dacc
Show file tree
Hide file tree
Showing 8 changed files with 493 additions and 0 deletions.
88 changes: 88 additions & 0 deletions .github/workflows/run_ggml_benchmark.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Copyright 2023 The OpenXLA Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# GGML Benchmarks Workflow.

name: GGML Benchmarks

on:
workflow_dispatch:
pull_request:

concurrency:
# A PR number if a pull request and otherwise the commit hash. This cancels
# queued and in-progress runs for the same PR (presubmit) or commit
# (postsubmit).
group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
cancel-in-progress: true

env:
GCS_DIR: gs://openxla-github-actions-${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }}-artifacts/${{ github.run_id }}/${{ github.run_attempt }}

jobs:
setup:
runs-on: ubuntu-22.04
outputs:
runner-group: ${{ steps.configure.outputs.runner-group }}
benchmark-gcs-dir: ${{ steps.configure.outputs.benchmark-gcs-dir }}
steps:
- name: "Checking out PR repository"
uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
- name: "Configuring CI options"
id: configure
env:
RUNNER_GROUP: ${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }}
run: |
# Just informative logging. There should only be two commits in the
# history here, but limiting the depth helps when copying from a local
# repo instead of using checkout, e.g. with
# https://github.com/nektos/act where there will be more.
git log --oneline --graph --max-count=3
# Workflow jobs can't access `env` in `runs-on`, so we need to make
# `runner-group` a job output variable.
echo "runner-group=${RUNNER_GROUP}" > "${GITHUB_OUTPUT}"
# For presubmit testing, the result artifacts are uploaded to the
# temporary workflow GCS dir. In postsubmit, the result artifacts are
# uploaded to the comparative benchmark GCS dir.
if [[ "${RUNNER_GROUP}" == "presubmit" ]]; then
BENCHMARK_GCS_DIR="${GCS_DIR}/comparative-benchmark-artifacts"
else
BENCHMARK_GCS_DIR="gs://comparative-benchmark-artifacts/$(date +'%Y-%m-%d').$(date +'%s')"
fi
echo "benchmark-gcs-dir=${BENCHMARK_GCS_DIR}" >> "${GITHUB_OUTPUT}"
benchmark_on_c2-standard-16:
needs: [setup]
runs-on:
- self-hosted # must come first
- runner-group=${{ needs.setup.outputs.runner-group }}
- environment=prod
- machine-type=c2-standard-16
env:
BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }}
RESULTS_DIR: results-dir
TARGET_DEVICE: c2-standard-16
steps:
- name: "Checking out PR repository"
uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
- name: "Setup"
id: setup
run: |
echo "results-gcs-dir=${BENCHMARK_GCS_DIR}/${TARGET_DEVICE}-results" >> "${GITHUB_OUTPUT}"
mkdir "${RESULTS_DIR}"
- name: "Benchmarking GGML CPU"
env:
GGML_RESULTS_JSON: ggml.json
RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }}
run: |
RESULTS_PATH="${RESULTS_DIR}/${GGML_RESULTS_JSON}"
docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \
"gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251" \
./experimental/ggml/benchmark_ggml.sh \
"${TARGET_DEVICE}"\
"${RESULTS_PATH}"
gcloud storage cp "${RESULTS_PATH}" "${RESULTS_GCS_DIR}/"
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,17 @@
template=EFFICIENTNETB7_FP32_TF_600X600X3XF32_BATCH_TEMPLATE,
batch_sizes=[1, 64, 128])

# GPT2LMHead models.
# Model implementation from https://huggingface.co/docs/transformers/model_doc/gpt2#transformers.TFGPT2Model.
GPT2LMHEAD_TF_IMPL = def_types.ModelImplementation(
name="GPT2_TF",
tags=["transformer-decoder", "gpt2", "ggml"],
framework_type=def_types.ModelFrameworkType.TF_V2,
module_path=f"{utils.MODELS_MODULE_PATH}.tf.gpt2.gpt2lmhead_model",
source_info=
"https://huggingface.co/docs/transformers/model_doc/gpt2#transformers.TFGPT2Model",
)

ALL_MODELS = list(
itertools.chain(
T5_LARGE_FP32_TF_512XI32_BATCHES.values(),
Expand Down
1 change: 1 addition & 0 deletions common_benchmark_suite/openxla/benchmark/def_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class ModelFrameworkType(Enum):
TF_V2 = "tensorflow_v2"
PYTORCH = "pytorch"
JAX = "jax"
GGML = "ggml"


@dataclass(frozen=True)
Expand Down
Empty file added experimental/ggml/__init__.py
Empty file.
254 changes: 254 additions & 0 deletions experimental/ggml/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,254 @@
#!/usr/bin/env python3
#
# Copyright 2023 The OpenXLA Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

import argparse
import dataclasses
import json
import pathlib
import re
import statistics
import subprocess
import sys

# Add comparative_benchmark dir to the search path.
sys.path.insert(
0, str(pathlib.Path(__file__).parents[2] / "comparative_benchmark"))
import utils

# Add common_benchmark_suite dir to the search path.
sys.path.insert(
0, str(pathlib.Path(__file__).parents[2] / "common_benchmark_suite"))
from openxla.benchmark import def_types, devices

ALL_DEVICE_NAMES = [device.name for device in devices.ALL_DEVICES]


def _parse_output(output_text):
# Example output.
# main: mem per token = 2011380 bytes
# main: load time = 120.92 ms
# main: sample time = 73.86 ms
# main: first predict time = 14.71 ms
# main: loop predict time = 2261.72 ms / 11.20 ms per token
# main: predict time = 2276.43 ms / 11.21 ms per token
# main: total time = 2494.66 ms

LOAD_TIME_REGEXP = re.compile(f"main: load time = (.+) ms")
match = LOAD_TIME_REGEXP.search(output_text)
if not match:
"Unable to parse first prediction time"
return
load_time_ms = float(match.group(1))

SAMPLE_TIME_REGEXP = re.compile(f"main: sample time = (.+) ms")
match = SAMPLE_TIME_REGEXP.search(output_text)
if not match:
"Unable to parse first prediction time"
return
sample_time_ms = float(match.group(1))

FIRST_PREDICTION_TIME_REGEXP = re.compile(
f"main: first predict time = (.+) ms")
match = FIRST_PREDICTION_TIME_REGEXP.search(output_text)
if not match:
"Unable to parse first prediction time"
return
first_prediction_ms = float(match.group(1))

LOOP_PREDICTION_TIME_REGEXP = re.compile(
f"main: loop predict time = .+ ms / (.+) ms per token")
match = LOOP_PREDICTION_TIME_REGEXP.search(output_text)
if not match:
"Unable to parse loop prediction time"
return
loop_prediction_ms = float(match.group(1))

TOTAL_PREDICTION_TIME_REGEXP = re.compile(
f"main: predict time = (.+) ms / .+ ms per token")
match = TOTAL_PREDICTION_TIME_REGEXP.search(output_text)
if not match:
"Unable to parse total prediction time"
return
total_prediction_ms = float(match.group(1))

E2E_TIME_REGEXP = re.compile(f"main: total time = (.+) ms")
match = E2E_TIME_REGEXP.search(output_text)
if not match:
"Unable to parse total prediction time"
return
e2e_prediction_ms = float(match.group(1))

return {
"load_time_ms": load_time_ms,
"first_prediction_ms": first_prediction_ms,
"loop_prediction_ms": loop_prediction_ms,
"total_prediction_ms": total_prediction_ms,
"sample_time_ms": sample_time_ms,
"e2e_prediction_ms": e2e_prediction_ms,
}


def _parse_arguments() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Run GGML benchmarks.")
parser.add_argument("-name",
"--benchmark_name",
type=str,
required=True,
help="The regex pattern to match benchmark names.")
parser.add_argument(
"-b",
"--benchmark_binary",
type=pathlib.Path,
help="Path to benchmark binary e.g. /tmp/ggml/build/bin/gpt2")
parser.add_argument(
"-m",
"--model",
type=str,
help=
"The GGML model to benchmark e.g. /tmp/ggml/build/models/gpt-2-117M/ggml-model.bin"
)
parser.add_argument("--data_type", type=str, help="The model data type.")
parser.add_argument("-p",
"--prompt",
type=str,
default="Once upon a time",
help="The input prompt to the model.")
parser.add_argument("-s",
"--seed",
type=int,
default=0,
help="The seed to use for the RNG.")
parser.add_argument("-t",
"--threads",
type=int,
default=8,
help="The number of threads to use.")
parser.add_argument("-o",
"--output",
type=pathlib.Path,
required=True,
help="JSON file path to merge the results.")
parser.add_argument("-device",
"--target_device",
dest="target_device_name",
type=str,
required=True,
choices=ALL_DEVICE_NAMES,
help="The target device to benchmark.")
parser.add_argument("-w",
"--warmup_iterations",
type=int,
default=5,
help="The number of warmup steps.")
parser.add_argument("-iter",
"--iterations",
type=int,
default=100,
help="The number of iterations to benchmark.")
parser.add_argument("--verbose",
action="store_true",
help="Show verbose messages.")
return parser.parse_args()


def main(benchmark_name: str, benchmark_binary: pathlib.Path,
warmup_iterations: int, iterations: int, model: str, data_type: str,
prompt: str, seed: int, threads: int, output: pathlib.Path,
target_device_name: str, verbose: bool):

try:
target_device = next(device for device in devices.ALL_DEVICES
if device.name == target_device_name)
except StopIteration:
raise ValueError(f'Target device "{target_device_name}" is not defined.'
f' Available device options:\n{ALL_DEVICE_NAMES}')

benchmark_definition = {
"benchmark_name": benchmark_name,
"framework": str(def_types.ModelFrameworkType.GGML),
"data_type": data_type,
"batch_size": 1,
"compiler": str(def_types.ModelFrameworkType.GGML),
"device": target_device.name,
"num_threads": threads,
"warmup_iterations": warmup_iterations,
"num_iterations": iterations,
"tags": ["gpt2", "ggml"],
}

cmd = [
benchmark_binary,
"--model",
f"{model}",
"--prompt",
f"{prompt}",
"--seed",
f"{seed}",
"--threads",
f"{threads}",
]

# Run warmup iterations.
for i in range(warmup_iterations):
subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)

load_times = []
first_prediction_times = []
loop_prediction_times = []
total_prediction_times = []
sample_times = []
e2e_prediction_times = []

# Run iterations.
for i in range(iterations):
raw_result = subprocess.run(cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
raw_result = raw_result.stdout.decode("utf-8")
metrics = _parse_output(raw_result)

load_times.append(metrics["load_time_ms"])
first_prediction_times.append(metrics["first_prediction_ms"])
loop_prediction_times.append(metrics["loop_prediction_ms"])
total_prediction_times.append(metrics["total_prediction_ms"])
sample_times.append(metrics["sample_time_ms"])
e2e_prediction_times.append(metrics["e2e_prediction_ms"])

benchmark_metrics = {
"median_load_time_ms":
statistics.median(load_times) if load_times else None,
"median_first_prediction_ms":
statistics.median(first_prediction_times)
if first_prediction_times else None,
"median_loop_prediction_ms":
statistics.median(loop_prediction_times)
if loop_prediction_times else None,
"median_total_prediction_ms":
statistics.median(total_prediction_times)
if total_prediction_times else None,
"median_sample_time_ms":
statistics.median(sample_times) if sample_times else None,
"median_e2e_prediction_times":
statistics.median(e2e_prediction_times)
if e2e_prediction_times else None,
}

benchmark_result = utils.BenchmarkResult(
definition=benchmark_definition,
metrics={
"compiler_level": benchmark_metrics,
},
)

if verbose:
print(json.dumps(dataclasses.asdict(benchmark_result), indent=2))
utils.append_benchmark_result(output, benchmark_result)


if __name__ == "__main__":
main(**vars(_parse_arguments()))
Loading

0 comments on commit 7a2dacc

Please sign in to comment.