Skip to content

Commit

Permalink
chore: Rename project
Browse files Browse the repository at this point in the history
  • Loading branch information
Hugoch committed Nov 5, 2024
1 parent 561e3c1 commit 604504c
Show file tree
Hide file tree
Showing 20 changed files with 80 additions and 77 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:
uses: docker/metadata-action@v5
with:
images: |
registry.internal.huggingface.tech/api-inference/text-generation-inference-benchmark
registry.internal.huggingface.tech/api-inference/inference-benchmarker
tags: |
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}
# If main, release or tag
Expand All @@ -62,8 +62,8 @@ jobs:
flavor: |
latest=auto
images: |
registry.internal.huggingface.tech/api-inference/text-generation-inference-benchmark
ghcr.io/huggingface/text-generation-inference-benchmark
registry.internal.huggingface.tech/api-inference/inference-benchmarker
ghcr.io/huggingface/inference-benchmarker
tags: |
type=semver,pattern={{version}}${{ env.LABEL }}
type=semver,pattern={{major}}.{{minor}}${{ env.LABEL }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,5 +83,5 @@ jobs:
key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('app/Cargo.lock') }}
- name: Run unit tests
run: |
cargo test --package text-generation-inference-benchmark
cargo test --package inference-benchmarker
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[package]
name = "text-generation-inference-benchmark"
name = "inference-benchmarker"
version = "0.1.0"
edition = "2021"

Expand Down
12 changes: 6 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
FROM rust:1-bullseye AS builder
LABEL org.opencontainers.image.source=https://github.com/huggingface/text-generation-inference-benchmark
LABEL org.opencontainers.image.source=https://github.com/huggingface/inference-benchmarker
LABEL org.opencontainers.image.description="A benchmark tool for LLM inference engines"
LABEL org.opencontainers.image.licenses="Apache-2.0"
ARG GIT_SHA
WORKDIR /usr/src/text-generation-inference-benchmark
WORKDIR /usr/src/inference-benchmarker
COPY . .
RUN cargo install --path .
FROM debian:bullseye-slim
RUN mkdir -p /opt/text-generation-inference-benchmark/results
WORKDIR /opt/text-generation-inference-benchmark
COPY --from=builder /usr/local/cargo/bin/text-generation-inference-benchmark /usr/local/bin/text-generation-inference-benchmark
CMD ["text-generation-inference-benchmark"]
RUN mkdir -p /opt/inference-benchmarker/results
WORKDIR /opt/inference-benchmarker
COPY --from=builder /usr/local/cargo/bin/inference-benchmarker /usr/local/bin/inference-benchmarker
CMD ["inference-benchmarker"]
8 changes: 4 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
build:
cargo build --release --package text-generation-inference-benchmark --bin text-generation-inference-benchmark
cargo build --release --package inference-benchmarker --bin inference-benchmarker

run: build
cargo run --package text-generation-inference-benchmark --bin text-generation-inference-benchmark -- $@
cargo run --package inference-benchmarker --bin inference-benchmarker -- $@

test:
cargo test --package text-generation-inference-benchmark
cargo test --package inference-benchmarker

lint:
cargo +nightly fmt
cargo clippy --package text-generation-inference-benchmark
cargo clippy --package inference-benchmarker
27 changes: 15 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
# TGI Benchmark: A High-Performance Tool for Text Generation Model Benchmarking
# Inference Benchmarker 🐢
## The best tool for benchmarking inference engines and LLM performance


Benchmarking inference servers for text generation models presents unique challenges.
The performance of these models can vary greatly depending on factors like input prompts,
The performance of LLM models can vary greatly depending on factors like input prompts,
decoding strategies, hardware specifications, and server configurations.

**TGI Benchmark** is designed to streamline this process by providing a comprehensive benchmarking tool
**Inference Benchmarker** is designed to streamline this process by providing a comprehensive benchmarking tool
that evaluates the real-world performance of text generation models and servers.
With **TGI Benchmark**, you can easily test your model's throughput and efficiency under various workloads,
With **Inference Benchmarker**, you can easily test your model's throughput and efficiency under various workloads,
identify performance bottlenecks, and optimize your deployment for production environments.

It can be used to benchmark any text generation server that exposes an OpenAI-compliant API.
Expand All @@ -24,7 +26,8 @@ It can be used to benchmark any text generation server that exposes an OpenAI-co
## Table of contents

<!-- TOC -->
* [TGI Benchmark: A High-Performance Tool for Text Generation Model Benchmarking](#tgi-benchmark-a-high-performance-tool-for-text-generation-model-benchmarking)
* [Inference Benchmarker 🐢](#inference-benchmarker-)
* [The best tool for benchmarking inference engines and LLM performance](#the-best-tool-for-benchmarking-inference-engines-and-llm-performance)
* [Features](#features)
* [Table of contents](#table-of-contents)
* [Get started](#get-started)
Expand Down Expand Up @@ -83,10 +86,10 @@ $ docker run \
--rm \
-it \
--net host \
-v $(pwd):/opt/text-generation-inference-benchmark/results \
-v $(pwd):/opt/inference-benchmarker/results \
-e "HF_TOKEN=$HF_TOKEN" \
ghcr.io/huggingface/text-generation-inference-benchmark:latest \
text-generation-inference-benchmark \
ghcr.io/huggingface/inference-benchmarker:latest \
inference-benchmarker \
--tokenizer-name "$MODEL" \
--max-vus 800 \
--url http://localhost:8080 \
Expand Down Expand Up @@ -120,10 +123,10 @@ $ docker run \
--rm \
-it \
--net host \
-v $(pwd):/opt/text-generation-inference-benchmark/results \
-v $(pwd):/opt/inference-benchmarker/results \
-e "HF_TOKEN=$HF_TOKEN" \
ghcr.io/huggingface/text-generation-inference-benchmark:latest \
text-generation-inference-benchmark \
ghcr.io/huggingface/inference-benchmarker:latest \
inference-benchmarker \
--tokenizer-name "meta-llama/Llama-3.1-8B-Instruct" \
--max-vus 800 \
--duration 120s \
Expand Down Expand Up @@ -202,7 +205,7 @@ You can deploy the benchmarking tool on Kubernetes using the provided Helm chart

Review the values (especially model, HF token and resources), and install the chart:
```shell
$ helm install tgi-benchmark ./extra/k8s/text-generation-inference-benchmark
$ helm install inference-benchmarker ./extra/k8s/inference-benchmarker
```

## Deploy on Slurm
Expand Down
8 changes: 4 additions & 4 deletions extra/dashboard/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def run(from_results_dir, datasource_bench, datasource_ci, github_token, github_
Benchmark are run with:
- Prompts: 200±10 tokens length (normal distribution)
- Generation: 200±10 tokens length (normal distribution)
- Generation: 800 max tokens length
- 120s duration
Each benchmark is run using a constant arrival rate of requests per second (QPS),
Expand Down Expand Up @@ -95,7 +95,7 @@ def update_ci(device_ci, model_ci, percentiles_ci, commit_ref, commit_compare):
return res + [compare_table(device_ci, commit_ref, commit_compare)]

def summary_table(device) -> pd.DataFrame:
rates = [4., 8., 16.]
rates = [4., 12., 20., 24.]
data = df_bench[(df_bench['device'] == device) & (df_bench['rate'].isin(rates))]
data = data.groupby(['model', 'rate', 'engine']).agg(
{'inter_token_latency_ms_p90': 'mean', 'time_to_first_token_ms_p90': 'mean',
Expand Down Expand Up @@ -226,7 +226,7 @@ def build_commit_list(df: pd.DataFrame) -> List[Tuple[str, str]]:
y_title="Time (ms)", percentiles=[0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99]),
"e2e_latency_ms": PlotConfig(title="End to End Latency (lower is better)", x_title="QPS",
y_title="Time (ms)", percentiles=[0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99]),
"token_throughput_secs": PlotConfig(title="Request Output Throughput P90 (higher is better)", x_title="QPS",
"token_throughput_secs": PlotConfig(title="Request Output Throughput (higher is better)", x_title="QPS",
y_title="Tokens/s"),
"successful_requests": PlotConfig(title="Successful requests (higher is better)", x_title="QPS",
y_title="Count"),
Expand Down Expand Up @@ -257,7 +257,7 @@ def build_commit_list(df: pd.DataFrame) -> List[Tuple[str, str]]:
percentiles = map(lambda p: f'p{int(float(p) * 100)}', percentiles)
percentiles = sorted(list(percentiles))
percentiles.append('avg')
with gr.Blocks(css=css, title="TGI benchmarks") as demo:
with gr.Blocks(css=css, title="Inference Benchmarker") as demo:
with gr.Row():
header = gr.Markdown("# TGI benchmarks\nBenchmark results for Hugging Face TGI 🤗")
with gr.Tab(label="TGI benchmarks"):
Expand Down
File renamed without changes.
6 changes: 6 additions & 0 deletions extra/k8s/inference-benchmarker/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: v2
name: inference-benchmarker
description: A Helm chart to run inference-benchmarker
type: application
version: 0.1.0
appVersion: "2.3.0"
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "text-generation-inference-benchmark.name" -}}
{{- define "inference-benchmarker.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}

Expand All @@ -10,7 +10,7 @@ Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "text-generation-inference-benchmark.fullname" -}}
{{- define "inference-benchmarker.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
Expand All @@ -26,16 +26,16 @@ If release name contains chart name it will be used as a full name.
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "text-generation-inference-benchmark.chart" -}}
{{- define "inference-benchmarker.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Common labels
*/}}
{{- define "text-generation-inference-benchmark.labels" -}}
helm.sh/chart: {{ include "text-generation-inference-benchmark.chart" . }}
{{ include "text-generation-inference-benchmark.selectorLabels" . }}
{{- define "inference-benchmarker.labels" -}}
helm.sh/chart: {{ include "inference-benchmarker.chart" . }}
{{ include "inference-benchmarker.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
Expand All @@ -45,17 +45,17 @@ app.kubernetes.io/managed-by: {{ .Release.Service }}
{{/*
Selector labels
*/}}
{{- define "text-generation-inference-benchmark.selectorLabels" -}}
app.kubernetes.io/name: {{ include "text-generation-inference-benchmark.name" . }}
{{- define "inference-benchmarker.selectorLabels" -}}
app.kubernetes.io/name: {{ include "inference-benchmarker.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}

{{/*
Create the name of the service account to use
*/}}
{{- define "text-generation-inference-benchmark.serviceAccountName" -}}
{{- define "inference-benchmarker.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "text-generation-inference-benchmark.fullname" .) .Values.serviceAccount.name }}
{{- default (include "inference-benchmarker.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
apiVersion: batch/v1
kind: Job
metadata:
name: {{ include "text-generation-inference-benchmark.fullname" . }}-benchmark
name: {{ include "inference-benchmarker.fullname" . }}-benchmark
labels:
app.kubernetes.io/component: benchmark
{{- include "text-generation-inference-benchmark.labels" . | nindent 4 }}
{{- include "inference-benchmarker.labels" . | nindent 4 }}
spec:
template:
metadata:
Expand All @@ -14,7 +14,7 @@ spec:
{{- end }}
labels:
app.kubernetes.io/component: benchmark
{{- include "text-generation-inference-benchmark.labels" . | nindent 8 }}
{{- include "inference-benchmarker.labels" . | nindent 8 }}
{{- with .Values.benchmark.podLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
Expand All @@ -33,8 +33,8 @@ spec:
- sh
- -c
- |
until curl -s http://{{ include "text-generation-inference-benchmark.fullname" . }}.{{ .Release.Namespace }}.svc.cluster.local:8080/health; do
echo "Waiting for {{ include "text-generation-inference-benchmark.fullname" . }} service..."
until curl -s http://{{ include "inference-benchmarker.fullname" . }}.{{ .Release.Namespace }}.svc.cluster.local:8080/health; do
echo "Waiting for {{ include "inference-benchmarker.fullname" . }} service..."
sleep 1
done
exit 0
Expand All @@ -45,9 +45,9 @@ spec:
image: "{{ .Values.benchmark.image.repository }}:{{ .Values.benchmark.image.tag | default "latest" }}"
imagePullPolicy: {{ .Values.benchmark.image.pullPolicy }}
args:
- "text-generation-inference-benchmark"
- "inference-benchmarker"
- "--url"
- "http://{{ include "text-generation-inference-benchmark.fullname" . }}.{{ .Release.Namespace }}.svc.cluster.local:8080"
- "http://{{ include "inference-benchmarker.fullname" . }}.{{ .Release.Namespace }}.svc.cluster.local:8080"
- "--tokenizer-name"
- "{{ .Values.model_id }}"
- "--no-console"
Expand All @@ -56,15 +56,15 @@ spec:
- name: HF_TOKEN
valueFrom:
secretKeyRef:
name: {{ include "text-generation-inference-benchmark.fullname" . }}-hf-token
name: {{ include "inference-benchmarker.fullname" . }}-hf-token
key: HF_TOKEN
- name: RUST_LOG
value: "text_generation_inference_benchmark=info"
resources:
{{- toYaml .Values.benchmark.resources | nindent 12 }}
volumeMounts:
- name: results
mountPath: /opt/text-generation-inference-benchmark/results
mountPath: /opt/inference-benchmarker/results
- name: nginx
image: nginx
ports:
Expand All @@ -83,7 +83,7 @@ spec:
emptyDir: { }
- name: default
configMap:
name: {{ include "text-generation-inference-benchmark.fullname" . }}-nginx-config
name: {{ include "inference-benchmarker.fullname" . }}-nginx-config
{{- with .Values.benchmark.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "text-generation-inference-benchmark.fullname" . }}-nginx-config
name: {{ include "inference-benchmarker.fullname" . }}-nginx-config
data:
default: |
server {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
apiVersion: v1
kind: Secret
metadata:
name: {{ include "text-generation-inference-benchmark.fullname" . }}-hf-token
name: {{ include "inference-benchmarker.fullname" . }}-hf-token
type: Opaque
stringData:
HF_TOKEN: "{{ .Values.hf_token }}"
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "text-generation-inference-benchmark.fullname" . }}
name: {{ include "inference-benchmarker.fullname" . }}
labels:
app.kubernetes.io/component: text-generation-inference
{{- include "text-generation-inference-benchmark.labels" . | nindent 4 }}
{{- include "inference-benchmarker.labels" . | nindent 4 }}
spec:
type: ClusterIP
ports:
Expand All @@ -14,15 +14,15 @@ spec:
name: http
selector:
app.kubernetes.io/component: text-generation-inference
{{- include "text-generation-inference-benchmark.selectorLabels" . | nindent 4 }}
{{- include "inference-benchmarker.selectorLabels" . | nindent 4 }}
---
apiVersion: v1
kind: Service
metadata:
name: {{ include "text-generation-inference-benchmark.fullname" . }}-benchmark
name: {{ include "inference-benchmarker.fullname" . }}-benchmark
labels:
app.kubernetes.io/component: benchmark
{{- include "text-generation-inference-benchmark.labels" . | nindent 4 }}
{{- include "inference-benchmarker.labels" . | nindent 4 }}
spec:
type: ClusterIP
ports:
Expand All @@ -33,4 +33,4 @@ spec:
publishNotReadyAddresses: true
selector:
app.kubernetes.io/component: benchmark
{{- include "text-generation-inference-benchmark.selectorLabels" . | nindent 4 }}
{{- include "inference-benchmarker.selectorLabels" . | nindent 4 }}
Loading

0 comments on commit 604504c

Please sign in to comment.