Skip to content

Commit

Permalink
add bert benchmark,support knative and cpu cluster,cannot run on CI
Browse files Browse the repository at this point in the history
Signed-off-by: lrq619 <[email protected]>
  • Loading branch information
lrq619 authored and ustiugov committed Aug 16, 2023
1 parent aaa178d commit 03ea4cc
Show file tree
Hide file tree
Showing 15 changed files with 1,635 additions and 0 deletions.
8 changes: 8 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@ updates:
- dependency-name: "*"
update-types: [ "version-update:semver-patch" ]

- package-ecosystem: "gomod"
directory: "/benchmarks/bert"
schedule:
interval: "weekly"
ignore:
- dependency-name: "*"
update-types: [ "version-update:semver-patch" ]

- package-ecosystem: "gomod"
directory: "/benchmarks/hotel-app"
schedule:
Expand Down
134 changes: 134 additions & 0 deletions .github/workflows/e2e-bert.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
name: Bert End-to-End Tests

on:
schedule:
- cron: "0 9 * * 1"
workflow_dispatch:
push:
branches: [main]
paths:
- "benchmarks/bert/**"
- "utils/**"
- "tools/**"
- "runner/**"

pull_request:
branches: [main]
paths:
- "benchmarks/bert/**"
- "utils/**"
- "tools/**"
- "runner/**"

env:
GOOS: linux
GO111MODULE: on
PORT: 50051
PLATFORMS: linux/amd64,linux/arm64

jobs:
build-and-push:
name: Build and push all images
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
service:
[
bert-python
]

steps:
- name: Check out code into the Go module directory
uses: actions/checkout@v3
with:
lfs: "true"

- uses: actions/setup-go@v4
with:
go-version: '1.18'

- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKER_HUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}

- name: Set up QEMU
uses: docker/setup-qemu-action@v2

- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v2

- name: Install AWS CLI
uses: unfor19/install-aws-cli-action@master
with:
version: '2'

- name: Set up Python version
uses: actions/setup-python@v4
with:
python-version: "3.9"

- name: Set up python dependencies
run: |
python3 -m pip install --upgrade pip
python3 -m pip install wheel ez_setup setuptools
GRPC_PYTHON_BUILD_SYSTEM_ZLIB=true
- name: Setup go dependencies
working-directory: benchmarks/auth
env:
GOPRIVATE_KEY: ${{ secrets.XDT_REPO_ACCESS_KEY }}
run: |
go install google.golang.org/protobuf/cmd/[email protected]
go install google.golang.org/grpc/cmd/[email protected]
# - name: Data Setup
# working-directory: benchmarks/bert
# run: make setup

# - name: Build and push
# working-directory: benchmarks/bert
# run: make push-${{ matrix.service }}
# Downloading the model takes too much time, simply pull the image from docker container

# - name: Pull the Image
# working-directory: benchmarks/bert
# run: make pull-${{ matrix.service }}

test-compose:
name: Test Docker Compose
needs: build-and-push
env:
YAML_DIR: benchmarks/bert/yamls/docker-compose/
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
service:
[
bert-python
]

steps:
- name: Check out code into the Go module directory
uses: actions/checkout@v3
with:
lfs: "true"

# - name: start docker-compose benchmark
# run: |
# docker-compose -f ${{ env.YAML_DIR }}/dc-${{ matrix.service }}.yaml pull
# docker-compose -f ${{ env.YAML_DIR }}/dc-${{ matrix.service }}.yaml up &> log_file &
# sleep 60s
# cat log_file

# - name: invoke the chain
# run: |
# ./tools/bin/grpcurl -plaintext localhost:50000 helloworld.Greeter.SayHello
# Currently cannot run on CI due to no enough space on device

# - name: show docker-compose log
# run: cat log_file
116 changes: 116 additions & 0 deletions benchmarks/bert/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
DOCKER_HUB_ACCOUNT=vhiveease
SHELL := /bin/bash

MAKEFILE_NAME := $(lastword $(MAKEFILE_LIST))
UNAME := $(shell whoami)
UID := $(shell id -u `whoami`)
GROUPNAME := $(shell id -gn `whoami`)
GROUPID := $(shell id -g `whoami`)

HOST_VOL ?= ${PWD}
CONTAINER_VOL ?= /workspace

BUILD_DIR := build
DATA_DIR := $(BUILD_DIR)/data
BERT_DIR := $(DATA_DIR)/bert_tf_v1_1_large_fp32_384_v2
RESULT_DIR := $(BUILD_DIR)/result
MLPERF_CONF := $(BUILD_DIR)/mlperf.conf
FEATURE_CACHE := eval_features.pickle

ROOT = ../../

FUNCTIONS = bert-python
ALL_IMAGES = $(addsuffix -image, $(FUNCTIONS))

# Handle different nvidia-docker version
ifneq ($(wildcard /usr/bin/nvidia-docker),)
DOCKER_RUN_CMD := nvidia-docker run
else
DOCKER_RUN_CMD := docker run --gpus=all
endif

all: all_image

all_image: $(ALL_IMAGES)

.PHONY: setup
setup:
@if [ ! -e $(BUILD_DIR) ]; then \
mkdir $(BUILD_DIR); \
fi
@$(MAKE) -f $(MAKEFILE_NAME) download_data
@$(MAKE) -f $(MAKEFILE_NAME) download_model

.PHONY: download_data
download_data:
@if [ ! -e $(DATA_DIR) ]; then \
mkdir $(DATA_DIR); \
fi
@if [ ! -e $(DATA_DIR)/dev-v1.1.json ]; then \
wget -O $(DATA_DIR)/dev-v1.1.json https://github.com/rajpurkar/SQuAD-explorer/blob/master/dataset/dev-v1.1.json?raw=true; \
fi
@if [ ! -e $(DATA_DIR)/evaluate-v1.1.py ]; then \
wget -O $(DATA_DIR)/evaluate-v1.1.py https://github.com/allenai/bi-att-flow/raw/master/squad/evaluate-v1.1.py; \
fi
@if [ ! -e $(BERT_DIR) ]; then \
mkdir $(BERT_DIR) ; \
fi
@if [ ! -e $(RESULT_DIR) ]; then \
mkdir $(RESULT_DIR); \
fi

.PHONY: download_model
download_model:
@if [ ! -e $(BERT_DIR)/model.ckpt-5474.data-00000-of-00001 ]; then \
wget -O $(BERT_DIR)/model.ckpt-5474.data-00000-of-00001 https://zenodo.org/record/3733868/files/model.ckpt-5474.data-00000-of-00001?download=1; \
fi
@if [ ! -e $(BERT_DIR)/model.ckpt-5474.index ]; then \
wget -O $(BERT_DIR)/model.ckpt-5474.index https://zenodo.org/record/3733868/files/model.ckpt-5474.index?download=1; \
fi
@if [ ! -e $(BERT_DIR)/model.ckpt-5474.meta ]; then \
wget -O $(BERT_DIR)/model.ckpt-5474.meta https://zenodo.org/record/3733868/files/model.ckpt-5474.meta?download=1; \
fi
@if [ ! -e $(BERT_DIR)/vocab.txt ]; then \
wget -O $(BERT_DIR)/vocab.txt https://zenodo.org/record/3733868/files/vocab.txt?download=1; \
fi
@if [ ! -e $(BERT_DIR)/model.pb ]; then \
wget -O $(BERT_DIR)/model.pb https://zenodo.org/record/3939747/files/model.pb?download=1; \
fi
@if [ ! -e $(BERT_DIR)/model.pytorch ]; then \
wget -O $(BERT_DIR)/model.pytorch https://zenodo.org/record/3733896/files/model.pytorch?download=1; \
fi
@if [ ! -e $(BERT_DIR)/vocab.txt ]; then \
wget -O $(BERT_DIR)/vocab.txt https://zenodo.org/record/3733896/files/vocab.txt?download=1; \
fi


# .PHONY: build_docker
# build_docker:
bert-python-image: docker/Dockerfile python/server.py
@docker pull nvcr.io/nvidia/tensorrtserver:19.08-py3
DOCKER_BUILDKIT=1 docker buildx build \
--build-arg GID=$(GROUPID) \
--build-arg UID=$(UID) \
--build-arg GROUP=$(GROUPNAME) \
--build-arg USER=$(UNAME) \
--build-arg BASE_IMAGE=mlperf-inference-bert \
--tag $(DOCKER_HUB_ACCOUNT)/bert-python:latest \
-f docker/Dockerfile \
$(ROOT) --load




.PHONY: launch_docker
launch_docker:
$(DOCKER_RUN_CMD) --rm -it -w /workspace $(DOCKER_HUB_ACCOUNT)/bert-python

.PHONY: clean
clean:
@rm -rf ${BUILD_DIR}

push-%: %-image
docker push docker.io/$(DOCKER_HUB_ACCOUNT)/$(subst push-,,$@):latest

pull-%:
docker pull docker.io/$(DOCKER_HUB_ACCOUNT)/$(subst pull-,,$@):latest
22 changes: 22 additions & 0 deletions benchmarks/bert/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Bert Benchmark

The `bert` benchmark is a large-language model that does inference tasks.

The function currently is only implemented in one runtime, namely Python.


## Running this benchmark locally (using docker)

The detailed and general description how to run benchmarks local you can find [here](../../docs/running_locally.md). The following steps show it on the `bert-python` function.
1. Download the data and model using `make setup`, this may take a lot of time
2. Build or pull the function images using `make all` or `make pull`.
### Invoke once
3. Start the function with docker-compose
```bash
docker-compose -f ./yamls/docker-compose/dc-bert-python.yaml up
```
4. In a new terminal, invoke the interface function with grpcurl.
```bash
./tools/bin/grpcurl -plaintext localhost:50000 helloworld.Greeter.SayHello
```
This will outputs the min, max and mean inference time of 1 inference, this may take around a few seconds. You can change the default settings in `python/config/user.conf`
35 changes: 35 additions & 0 deletions benchmarks/bert/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
ARG FROM_IMAGE_NAME=nvcr.io/nvidia/tensorflow:19.10-py3
FROM ${FROM_IMAGE_NAME}

RUN apt-get update && apt-get install -y pbzip2 pv bzip2 libcurl4 curl

WORKDIR /workspace

# Install third_party library
RUN mkdir /tmp/third_party \
&& cd /tmp/third_party \
&& git clone https://github.com/pybind/pybind11.git \
&& mv pybind11 pybind \
&& cd /tmp/third_party/pybind \
&& git reset --hard 25abf7efba

# Install LoadGen
RUN cd /tmp/ \
&& git clone https://github.com/lrq619/loadgen.git \
&& cd /tmp/loadgen \
&& python3 setup.py install \
&& cd /tmp \
&& rm -rf /tmp/loadgen \
&& rm -rf /tmp/third_party

COPY benchmarks/bert/requirements.txt /workspace/
RUN python3 -m pip install -r requirements.txt

COPY benchmarks/bert/build /workspace/build
COPY benchmarks/bert/python /workspace/python
RUN mv /workspace/python/config/bert_config.json /workspace/ && mv /workspace/python/config/user.conf /workspace/

ADD https://raw.githubusercontent.com/vhive-serverless/vSwarm-proto/add-bert/proto/bert/bert_pb2_grpc.py /workspace/python
ADD https://raw.githubusercontent.com/vhive-serverless/vSwarm-proto/add-bert/proto/bert/bert_pb2.py /workspace/python/proto/bert/

ENTRYPOINT [ "python3", "python/server.py" ,"--addr=0.0.0.0", "--port=50051"]
Loading

0 comments on commit 03ea4cc

Please sign in to comment.