feat: finalize docker compose

vhive-serverless · Aug 9, 2023 · 1233715 · 1233715
1 parent d7e1018
commit 1233715
Show file tree

Hide file tree

Showing 17 changed files with 487 additions and 50,149 deletions.
diff --git a/.github/workflows/e2e-image-classification.yml b/.github/workflows/e2e-image-classification.yml
@@ -0,0 +1,205 @@
+name: Image classification End-to-End Tests
+
+on:
+  schedule:
+    - cron: "0 9 * * 1"
+  workflow_dispatch:
+  push:
+    branches: [main]
+    paths:
+      - "benchmarks/image_classification/**"
+      - "utils/**"
+      - "tools/**"
+      - "runner/**"
+
+  pull_request:
+    branches: [main]
+    paths:
+      - "benchmarks/image_classification/**"
+      - "utils/**"
+      - "tools/**"
+      - "runner/**"
+
+env:
+  GOOS: linux
+  GO111MODULE: on
+  PORT: 50051
+  PLATFORMS: linux/amd64,linux/arm64
+
+jobs:
+  build-and-push:
+    name: Build and push all images
+    runs-on: ubuntu-20.04
+    strategy:
+      fail-fast: false
+      matrix:
+        service:
+          [
+            image_classification-python
+          ]
+
+    steps:
+      - name: Check out code into the Go module directory
+        uses: actions/checkout@v3
+        with:
+          lfs: "true"
+
+      - uses: actions/setup-go@v4
+        with:
+          go-version: '1.18'
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v2
+        with:
+          username: ${{ secrets.DOCKER_HUB_USERNAME }}
+          password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v2
+
+      - name: Set up Docker Buildx
+        id: buildx
+        uses: docker/setup-buildx-action@v2
+
+      - name: Install AWS CLI
+        uses: unfor19/install-aws-cli-action@master
+        with:
+          version: '2'
+
+      - name: Set up Python version
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.9"
+
+      - name: Set up python dependencies
+        run: |
+          python3 -m pip install --upgrade pip
+          python3 -m pip install wheel ez_setup setuptools
+          GRPC_PYTHON_BUILD_SYSTEM_ZLIB=true
+          python3 -m pip install -r benchmarks/image_classification/requirements.txt
+
+      - name: Setup go dependencies
+        working-directory: benchmarks/image_classification
+        env:
+          GOPRIVATE_KEY: ${{ secrets.XDT_REPO_ACCESS_KEY }}
+        run: |
+          go install google.golang.org/protobuf/cmd/[email protected]
+          go install google.golang.org/grpc/cmd/[email protected]
+
+      - name: Build and push
+        working-directory: benchmarks/image_classification
+        run: make push-${{ matrix.service }}
+
+  test-compose:
+    name: Test Docker Compose
+    needs: build-and-push
+    env:
+      YAML_DIR: benchmarks/image_classification/yamls/docker-compose/
+    runs-on: ubuntu-20.04
+    strategy:
+      fail-fast: false
+      matrix:
+        service:
+          [
+            image_classification-python
+          ]
+
+    steps:
+      - name: Check out code into the Go module directory
+        uses: actions/checkout@v3
+        with:
+          lfs: "true"
+
+      - name: start docker-compose benchmark
+        run: |
+          docker-compose -f ${{ env.YAML_DIR }}/dc-${{ matrix.service }}.yaml pull
+          docker-compose -f ${{ env.YAML_DIR }}/dc-${{ matrix.service }}.yaml up &> log_file &
+          sleep 60s
+          cat log_file
+
+      - name: invoke the chain
+        run: |
+          ./tools/bin/grpcurl -plaintext localhost:50000 helloworld.Greeter.SayHello
+
+      - name: show docker-compose log
+        run: cat log_file
+
+  test-knative:
+    name: Test Knative Deployment
+    needs: build-and-push
+    env:
+      KIND_VERSION: v0.14.0
+      K8S_VERSION: v1.23
+      YAML_DIR: benchmarks/image_classification/yamls/knative/
+
+    runs-on: ubuntu-20.04
+    strategy:
+      fail-fast: false
+      matrix:
+        service:
+          [
+            image_classification-python,
+          ]
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          lfs: "true"
+      - name: Checkout LFS objects
+        run: git lfs checkout
+
+      - uses: actions/setup-go@v4
+        with:
+          go-version: 1.18
+
+      ## Setup a Knative cluster to test the service
+      - name: Create k8s Kind Cluster
+        run: bash ./runner/scripts/01-kind.sh
+
+      - name: Install Serving
+        run: bash ./runner/scripts/02-serving.sh
+
+      - name: Install Kourier
+        run: bash ./runner/scripts/02-kourier.sh
+
+      - name: Setup domain
+        run: |
+          INGRESS_HOST="127.0.0.1"
+          KNATIVE_DOMAIN=$INGRESS_HOST.sslip.io
+          kubectl patch configmap -n knative-serving config-domain -p "{\"data\": {\"$KNATIVE_DOMAIN\": \"\"}}"
+
+      ## Test the service
+      - name: Deploy knative
+        run: |
+          kubectl apply -f ${{ env.YAML_DIR }}/kn-${{ matrix.service }}.yaml
+
+      - name: Check if service is ready
+        run: |
+          kubectl wait --for=condition=Ready -f ${{ env.YAML_DIR }}/kn-${{ matrix.service }}.yaml --timeout 120s
+          kubectl get service
+          kubectl get -f ${{ env.YAML_DIR }}/kn-${{ matrix.service }}.yaml
+
+      - name: Test invoking once
+        working-directory: tools/test-client
+        run: |
+          set -x
+          go build ./test-client.go
+
+          NODEPORT=80
+          url=$(kubectl get kservice ${{ matrix.service }} | awk '$2 ~ /http/ {sub(/http\:\/\//,""); print $2}')
+
+          ./test-client --addr $url:$NODEPORT --name "Example text for CI"
+
+      - name: Print logs
+        if: ${{ always() }}
+        run: |
+          set -x
+          container_list=$(kubectl get pods -n default -o jsonpath="{.items[*].spec.containers[*].name}")
+          for container_name in $container_list
+          do
+            kubectl logs -n default -c $container_name -l serving.knative.dev/service=${{ matrix.service }}
+          done
+
+      - name: Down
+        if: ${{ always() }}
+        run: |
+          kubectl delete -f ${{ env.YAML_DIR }}/kn-${{ matrix.service }}.yaml --namespace default --wait
diff --git a/benchmarks/image_classification/Makefile b/benchmarks/image_classification/Makefile
diff --git a/benchmarks/image_classification/README.md b/benchmarks/image_classification/README.md
@@ -1,17 +1,47 @@
 # Image Classification
-![Alt text](image.png)
 
-This benchmark consists of 2 functions:
+The benchmark implements `Resnet50 model inference` to do image classification.
 
-- The Query Generation function sends queries to the image classfication function. Currently it supports two types of queries: `Offline`(send all queries at one time) and `SingleStream`(send the next query as soon as previous one is completed). 
+## Running the benchmark locally(using docker)
+1. Start function using docker compose
+```bash
+docker-compose -f ./yamls/docker-compose/dc-classification.yaml up
+```
 
-- The Image classification function uses `Resnet50` model, and currently only support `Imagenet2012 val` dataset.
+### Invoke once
+2. In a new terminal, invoke the interface function with grpcurl
+```bash
+./tools/bin/grpcurl -plaintext localhost:50000 helloworld.Greeter.SayHello
+```
 
-## Parameters
-- `scenario` - {'SingStream','Offline'}
-- `threads`
-- `qps`
-- `max-latency`
-- `time`: limit the time benchmark run
+The output includes actual QPS, mean and total inference time, number of queries and tiles.
+Hers's an example of benchmark output:
+```
+TestScenario.SingleStream qps=37.27, mean=0.0263, time=2.683, queries=100, tiles=50.0:0.0263,80.0:0.0264,90.0:0.0265,95.0:0.0268,99.0:0.0271,99.9:0.0290
+```
 
-This benchmark implements `Resnet50` model inference on `Imagenet2012` val dataset. Currently, the used scenrio is "SingleStream", which means that 
+### Invoke multiple times
+3. Run the invoker
+   ```bash
+   # build the invoker binary
+   cd ../../tools/invoker
+   make invoker
+
+   # Specify the hostname through "endpoints.json"
+   echo '[ { "hostname": "localhost" } ]' > endpoints.json
+
+   # Start the invoker with a chosen RPS rate and time
+   ./invoker -port 50000 -dbg -time 10 -rps 1
+   ```
+
+### Parameters
+Here are some parameters can be modified in `yamls/docker-compose/dc-classification.yaml` file:
+- `--count`: limits the number of items in the dataset used for accuracy pass
+- `--time`: limits the time the benchmark runs
+- `--scenario`: {'SingStream','Offline'}. Offline means send all queries at one time, SingleStream means send the next query as soon as previous one is completed
+- `--threads`: number of worker threads to use (default: the number of processors in the system)
+- `--qps`: expected QPS
+- `--max-latency`: 
+comma separated list of which latencies (in seconds) we try to reach in the 99 percentile (deault: 0.01,0.05,0.100).
+- `--max-batchsize`: 
+maximum batchsize we generate to backend (default: 128)
diff --git a/benchmarks/image_classification/docker/Dockerfile.cpu b/benchmarks/image_classification/docker/Dockerfile.cpu
@@ -1,9 +1,32 @@
-FROM ubuntu:16.04
+# MIT License
+
+# Copyright (c) 2023 HyScale lab and vSwarm team
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+FROM ubuntu:20.04
 
 ENV PYTHON_VERSION=3.7
 ENV LANG C.UTF-8
 ENV LC_ALL C.UTF-8
 ENV PATH /opt/anaconda3/bin:$PATH
+ENV DEBIAN_FRONTEND=noninteractive
 
 WORKDIR /app
 ENV HOME /app
@@ -31,12 +54,12 @@ RUN cd /opt && \
     echo "conda activate base" >> ~/.bashrc && \
     conda config --set always_yes yes --set changeps1 no
 
-RUN conda install pytorch-cpu torchvision-cpu -c pytorch 
-RUN pip install --upgrade pip
-RUN pip install cmake
-RUN pip install future pillow onnx opencv-python-headless tensorflow onnxruntime
-RUN pip install Cython && pip install pycocotools
-RUN pip install protobuf==3.20.*
+RUN conda install pytorch-cpu torchvision-cpu -c pytorch && \
+    pip install --upgrade pip && \
+    pip install cmake && \
+    pip install future pillow onnx opencv-python-headless tensorflow && \
+    pip install Cython && pip install pycocotools && \
+    pip install protobuf==3.20.*
 
 # loadgen
 RUN cd /tmp && \
@@ -49,18 +72,22 @@ RUN cd /tmp && \
 COPY benchmarks/image_classification/python /app
 
 # Prepare dataset and model
-# RUN cd ./data_imagenet && \
-#     wget --quiet https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar && \
-#     tar xf ILSVRC2012_img_val.tar && \
-#     cd ../models && \
-#     wget --quiet https://zenodo.org/record/2535873/files/resnet50_v1.pb
-
+# Here's the dataset with 500 images
 RUN cd ./data_imagenet && \
-    wget --quiet https://upload.wikimedia.org/wikipedia/commons/thumb/5/57/7weeks_old.JPG/800px-7weeks_old.JPG && \
-    echo "800px-7weeks_old.JPG 207" > val_map.txt
+    unzip ILSVRC2012_img_val.zip && \
+    rm -f ILSVRC2012_img_val.zip && \
+    cd ../models && \
+    unzip resnet50_v1.pb.zip && \
+    rm -rf resnet50_v1.pb.zip
+
+# Here's one image sample used to test Dockerfile
+# RUN cd ./data_imagenet && \
+#     wget --quiet https://upload.wikimedia.org/wikipedia/commons/thumb/5/57/7weeks_old.JPG/800px-7weeks_old.JPG && \
+#     echo "800px-7weeks_old.JPG 207" > val_map.txt && \
+    # cd ./models && \
+    # unzip resnet50_v1.pb.zip && \
+    # rm -rf resnet50_v1.pb.zip
 
- RUN mkdir ./models && cd ./models && \
-    wget --quiet https://zenodo.org/record/2535873/files/resnet50_v1.pb
 
 ADD https://raw.githubusercontent.com/manyiw99/vSwarm-proto/main/proto/image_classification/image_classification_pb2_grpc.py ./
 ADD https://raw.githubusercontent.com/manyiw99/vSwarm-proto/main/proto/image_classification/image_classification_pb2.py ./proto/image_classification/

diff --git a/benchmarks/image_classification/python/backend_tf.py b/benchmarks/image_classification/python/backend_tf.py
@@ -1,3 +1,20 @@
+
+# Copyright (c) MLPerf inference benchmark team. All rights reserved.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+
 """
 tensorflow backend (https://github.com/tensorflow/tensorflow)
 """