From 3e2707f21e776f132a5fb9f9dec7422d3be0cc29 Mon Sep 17 00:00:00 2001 From: Aurora <5505558+duggalsu@users.noreply.github.com> Date: Fri, 9 Feb 2024 11:03:45 +0530 Subject: [PATCH 1/2] Optimize docker for multi-arch builds - Added pytorch optimization for AWS graviton in dockerfile - Modified requirements.txt to work with multi-arch support - Modified docker vid vec github action with multi-arch build support --- .github/workflows/docker-push-vidvec.yml | 5 +++- src/api/Dockerfile.vid_vec_rep_resnet | 24 ++++++++++++++++++- .../vid_vec_rep_resnet_requirements.txt | 10 ++++---- 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/.github/workflows/docker-push-vidvec.yml b/.github/workflows/docker-push-vidvec.yml index 952691ca..bb1104f1 100644 --- a/.github/workflows/docker-push-vidvec.yml +++ b/.github/workflows/docker-push-vidvec.yml @@ -4,7 +4,7 @@ on: workflow_dispatch jobs: api: - runs-on: macos-14 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 with: @@ -16,6 +16,8 @@ jobs: echo "setting variables" echo "::set-output name=sha_short::$(git rev-parse --short HEAD)" + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 - uses: elgohr/Publish-Docker-Github-Action@master with: username: tattletech @@ -24,6 +26,7 @@ jobs: workdir: src/api/ dockerfile: Dockerfile.vid_vec_rep_resnet tags: ${{ steps.vars.outputs.sha_short }} + platforms: linux/amd64,linux/arm64 # - name: deploy to cluster # uses: steebchen/kubectl@v2.0.0 diff --git a/src/api/Dockerfile.vid_vec_rep_resnet b/src/api/Dockerfile.vid_vec_rep_resnet index 77a2ea31..e90ff153 100644 --- a/src/api/Dockerfile.vid_vec_rep_resnet +++ b/src/api/Dockerfile.vid_vec_rep_resnet @@ -1,4 +1,4 @@ -FROM arm64v8/python:3.11-slim-bullseye AS base +FROM python:3.11-slim-bullseye AS base RUN apt-get update \ && apt-get -y upgrade \ && apt-get install -y \ @@ -14,6 +14,28 @@ RUN pip install --no-cache-dir --upgrade pip WORKDIR /app COPY ./core/operators/vid_vec_rep_resnet_requirements.txt /app/core/operators/vid_vec_rep_resnet_requirements.txt RUN pip install --no-cache-dir --user -r /app/core/operators/vid_vec_rep_resnet_requirements.txt + +### AWS Graviton Optimization ### +# Graviton3(E) (e.g. c7g, c7gn and Hpc7g instances) supports BF16 format for ML acceleration. This can be enabled in oneDNN by setting the below environment variable +grep -q bf16 /proc/cpuinfo && export DNNL_DEFAULT_FPMATH_MODE=BF16 + +# Enable primitive caching to avoid the redundant primitive allocation +# latency overhead. Please note this caching feature increases the +# memory footprint. Tune this cache capacity to a lower value to +# reduce the additional memory requirement. +export LRU_CACHE_CAPACITY=1024 + +# Enable Transparent huge page allocations from PyTorch C10 allocator +export THP_MEM_ALLOC_ENABLE=1 + +# Make sure the openmp threads are distributed across all the processes for multi process applications to avoid over subscription for the vcpus. For example if there is a single application process, then num_processes should be set to '1' so that all the vcpus are assigned to it with one-to-one mapping to omp threads +num_vcpus=$(getconf _NPROCESSORS_ONLN) +num_processes= +export OMP_NUM_THREADS=$((1 > ($num_vcpus/$num_processes) ? 1 : ($num_vcpus/$num_processes))) +export OMP_PROC_BIND=false +export OMP_PLACES=cores +### + COPY ./core/operators/vid_vec_rep_resnet.py /app/core/operators/vid_vec_rep_resnet.py COPY ./core/operators/sample_data/sample-cat-video.mp4 /app/core/operators/sample_data/sample-cat-video.mp4 diff --git a/src/api/core/operators/vid_vec_rep_resnet_requirements.txt b/src/api/core/operators/vid_vec_rep_resnet_requirements.txt index 4c177836..ad27c04f 100644 --- a/src/api/core/operators/vid_vec_rep_resnet_requirements.txt +++ b/src/api/core/operators/vid_vec_rep_resnet_requirements.txt @@ -4,7 +4,7 @@ # # pip-compile --find-links=https://download.pytorch.org/whl/torch_stable.html vid_vec_rep_resnet_requirements.in # ---find-links https://download.pytorch.org/whl/torch_stable.html +--find-links https://download.pytorch.org/whl/cpu certifi==2024.2.2 # via requests @@ -65,13 +65,13 @@ scipy==1.11.4 # via -r vid_vec_rep_resnet_requirements.in sympy==1.12 # via torch -textual==0.48.2 +textual==0.50.0 # via memray -torch==2.1.2+cpu +torch # via # -r vid_vec_rep_resnet_requirements.in # torchvision -torchvision==0.16.2+cpu +torchvision # via -r vid_vec_rep_resnet_requirements.in typing-extensions==4.9.0 # via @@ -79,5 +79,5 @@ typing-extensions==4.9.0 # torch uc-micro-py==1.0.2 # via linkify-it-py -urllib3==2.0.7 +urllib3==2.2.0 # via requests From c22bc5891c94d72541874dc6173fd00ce6a4a53e Mon Sep 17 00:00:00 2001 From: Aurora <5505558+duggalsu@users.noreply.github.com> Date: Fri, 9 Feb 2024 11:11:13 +0530 Subject: [PATCH 2/2] - Fix num_processes flag --- src/api/Dockerfile.vid_vec_rep_resnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/api/Dockerfile.vid_vec_rep_resnet b/src/api/Dockerfile.vid_vec_rep_resnet index e90ff153..e298aa0b 100644 --- a/src/api/Dockerfile.vid_vec_rep_resnet +++ b/src/api/Dockerfile.vid_vec_rep_resnet @@ -30,7 +30,7 @@ export THP_MEM_ALLOC_ENABLE=1 # Make sure the openmp threads are distributed across all the processes for multi process applications to avoid over subscription for the vcpus. For example if there is a single application process, then num_processes should be set to '1' so that all the vcpus are assigned to it with one-to-one mapping to omp threads num_vcpus=$(getconf _NPROCESSORS_ONLN) -num_processes= +num_processes=1 export OMP_NUM_THREADS=$((1 > ($num_vcpus/$num_processes) ? 1 : ($num_vcpus/$num_processes))) export OMP_PROC_BIND=false export OMP_PLACES=cores