From 3e2707f21e776f132a5fb9f9dec7422d3be0cc29 Mon Sep 17 00:00:00 2001
From: Aurora <5505558+duggalsu@users.noreply.github.com>
Date: Fri, 9 Feb 2024 11:03:45 +0530
Subject: [PATCH 1/2] Optimize docker for multi-arch builds - Added pytorch
 optimization for AWS graviton in dockerfile - Modified requirements.txt to
 work with multi-arch support - Modified docker vid vec github action with
 multi-arch build support

---
 .github/workflows/docker-push-vidvec.yml      |  5 +++-
 src/api/Dockerfile.vid_vec_rep_resnet         | 24 ++++++++++++++++++-
 .../vid_vec_rep_resnet_requirements.txt       | 10 ++++----
 3 files changed, 32 insertions(+), 7 deletions(-)
diff --git a/.github/workflows/docker-push-vidvec.yml b/.github/workflows/docker-push-vidvec.yml
index 952691ca..bb1104f1 100644
--- a/.github/workflows/docker-push-vidvec.yml
+++ b/.github/workflows/docker-push-vidvec.yml
@@ -4,7 +4,7 @@ on: workflow_dispatch
 
 jobs:
   api:
-    runs-on: macos-14
+    runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2
         with:
@@ -16,6 +16,8 @@ jobs:
           echo "setting variables"
           echo "::set-output name=sha_short::$(git rev-parse --short HEAD)"
 
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
       - uses: elgohr/Publish-Docker-Github-Action@master
         with:
           username: tattletech
@@ -24,6 +26,7 @@ jobs:
           workdir: src/api/
           dockerfile: Dockerfile.vid_vec_rep_resnet
           tags: ${{ steps.vars.outputs.sha_short }}
+          platforms: linux/amd64,linux/arm64
 
       # - name: deploy to cluster
       #   uses: steebchen/kubectl@v2.0.0
diff --git a/src/api/Dockerfile.vid_vec_rep_resnet b/src/api/Dockerfile.vid_vec_rep_resnet
index 77a2ea31..e90ff153 100644
--- a/src/api/Dockerfile.vid_vec_rep_resnet
+++ b/src/api/Dockerfile.vid_vec_rep_resnet
@@ -1,4 +1,4 @@
-FROM arm64v8/python:3.11-slim-bullseye AS base
+FROM python:3.11-slim-bullseye AS base
 RUN apt-get update \
     && apt-get -y upgrade \
     && apt-get install -y \
@@ -14,6 +14,28 @@ RUN pip install --no-cache-dir --upgrade pip
 WORKDIR /app
 COPY ./core/operators/vid_vec_rep_resnet_requirements.txt /app/core/operators/vid_vec_rep_resnet_requirements.txt
 RUN pip install --no-cache-dir --user -r /app/core/operators/vid_vec_rep_resnet_requirements.txt
+
+### AWS Graviton Optimization ###
+# Graviton3(E) (e.g. c7g, c7gn and Hpc7g instances) supports BF16 format for ML acceleration. This can be enabled in oneDNN by setting the below environment variable
+grep -q bf16 /proc/cpuinfo && export DNNL_DEFAULT_FPMATH_MODE=BF16
+
+# Enable primitive caching to avoid the redundant primitive allocation
+# latency overhead. Please note this caching feature increases the
+# memory footprint. Tune this cache capacity to a lower value to
+# reduce the additional memory requirement.
+export LRU_CACHE_CAPACITY=1024
+
+# Enable Transparent huge page allocations from PyTorch C10 allocator
+export THP_MEM_ALLOC_ENABLE=1
+
+# Make sure the openmp threads are distributed across all the processes for multi process applications to avoid over subscription for the vcpus. For example if there is a single application process, then num_processes should be set to '1' so that all the vcpus are assigned to it with one-to-one mapping to omp threads
+num_vcpus=$(getconf _NPROCESSORS_ONLN)
+num_processes=<number of processes>
+export OMP_NUM_THREADS=$((1 > ($num_vcpus/$num_processes) ? 1 : ($num_vcpus/$num_processes)))
+export OMP_PROC_BIND=false
+export OMP_PLACES=cores
+###
+
 COPY ./core/operators/vid_vec_rep_resnet.py /app/core/operators/vid_vec_rep_resnet.py
 
 COPY ./core/operators/sample_data/sample-cat-video.mp4 /app/core/operators/sample_data/sample-cat-video.mp4
diff --git a/src/api/core/operators/vid_vec_rep_resnet_requirements.txt b/src/api/core/operators/vid_vec_rep_resnet_requirements.txt
index 4c177836..ad27c04f 100644
--- a/src/api/core/operators/vid_vec_rep_resnet_requirements.txt
+++ b/src/api/core/operators/vid_vec_rep_resnet_requirements.txt
@@ -4,7 +4,7 @@
 #
 #    pip-compile --find-links=https://download.pytorch.org/whl/torch_stable.html vid_vec_rep_resnet_requirements.in
 #
---find-links https://download.pytorch.org/whl/torch_stable.html
+--find-links https://download.pytorch.org/whl/cpu
 
 certifi==2024.2.2
     # via requests
@@ -65,13 +65,13 @@ scipy==1.11.4
     # via -r vid_vec_rep_resnet_requirements.in
 sympy==1.12
     # via torch
-textual==0.48.2
+textual==0.50.0
     # via memray
-torch==2.1.2+cpu
+torch
     # via
     #   -r vid_vec_rep_resnet_requirements.in
     #   torchvision
-torchvision==0.16.2+cpu
+torchvision
     # via -r vid_vec_rep_resnet_requirements.in
 typing-extensions==4.9.0
     # via
@@ -79,5 +79,5 @@ typing-extensions==4.9.0
     #   torch
 uc-micro-py==1.0.2
     # via linkify-it-py
-urllib3==2.0.7
+urllib3==2.2.0
     # via requests

From c22bc5891c94d72541874dc6173fd00ce6a4a53e Mon Sep 17 00:00:00 2001
From: Aurora <5505558+duggalsu@users.noreply.github.com>
Date: Fri, 9 Feb 2024 11:11:13 +0530
Subject: [PATCH 2/2] - Fix num_processes flag

---
 src/api/Dockerfile.vid_vec_rep_resnet | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/api/Dockerfile.vid_vec_rep_resnet b/src/api/Dockerfile.vid_vec_rep_resnet
index e90ff153..e298aa0b 100644
--- a/src/api/Dockerfile.vid_vec_rep_resnet
+++ b/src/api/Dockerfile.vid_vec_rep_resnet
@@ -30,7 +30,7 @@ export THP_MEM_ALLOC_ENABLE=1
 
 # Make sure the openmp threads are distributed across all the processes for multi process applications to avoid over subscription for the vcpus. For example if there is a single application process, then num_processes should be set to '1' so that all the vcpus are assigned to it with one-to-one mapping to omp threads
 num_vcpus=$(getconf _NPROCESSORS_ONLN)
-num_processes=<number of processes>
+num_processes=1
 export OMP_NUM_THREADS=$((1 > ($num_vcpus/$num_processes) ? 1 : ($num_vcpus/$num_processes)))
 export OMP_PROC_BIND=false
 export OMP_PLACES=cores