diff --git a/.github/workflows/docker-push-vidvec.yml b/.github/workflows/docker-push-vidvec.yml index 952691ca..bb1104f1 100644 --- a/.github/workflows/docker-push-vidvec.yml +++ b/.github/workflows/docker-push-vidvec.yml @@ -4,7 +4,7 @@ on: workflow_dispatch jobs: api: - runs-on: macos-14 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 with: @@ -16,6 +16,8 @@ jobs: echo "setting variables" echo "::set-output name=sha_short::$(git rev-parse --short HEAD)" + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 - uses: elgohr/Publish-Docker-Github-Action@master with: username: tattletech @@ -24,6 +26,7 @@ jobs: workdir: src/api/ dockerfile: Dockerfile.vid_vec_rep_resnet tags: ${{ steps.vars.outputs.sha_short }} + platforms: linux/amd64,linux/arm64 # - name: deploy to cluster # uses: steebchen/kubectl@v2.0.0 diff --git a/src/api/Dockerfile.vid_vec_rep_resnet b/src/api/Dockerfile.vid_vec_rep_resnet index 77a2ea31..e298aa0b 100644 --- a/src/api/Dockerfile.vid_vec_rep_resnet +++ b/src/api/Dockerfile.vid_vec_rep_resnet @@ -1,4 +1,4 @@ -FROM arm64v8/python:3.11-slim-bullseye AS base +FROM python:3.11-slim-bullseye AS base RUN apt-get update \ && apt-get -y upgrade \ && apt-get install -y \ @@ -14,6 +14,28 @@ RUN pip install --no-cache-dir --upgrade pip WORKDIR /app COPY ./core/operators/vid_vec_rep_resnet_requirements.txt /app/core/operators/vid_vec_rep_resnet_requirements.txt RUN pip install --no-cache-dir --user -r /app/core/operators/vid_vec_rep_resnet_requirements.txt + +### AWS Graviton Optimization ### +# Graviton3(E) (e.g. c7g, c7gn and Hpc7g instances) supports BF16 format for ML acceleration. This can be enabled in oneDNN by setting the below environment variable +grep -q bf16 /proc/cpuinfo && export DNNL_DEFAULT_FPMATH_MODE=BF16 + +# Enable primitive caching to avoid the redundant primitive allocation +# latency overhead. Please note this caching feature increases the +# memory footprint. Tune this cache capacity to a lower value to +# reduce the additional memory requirement. +export LRU_CACHE_CAPACITY=1024 + +# Enable Transparent huge page allocations from PyTorch C10 allocator +export THP_MEM_ALLOC_ENABLE=1 + +# Make sure the openmp threads are distributed across all the processes for multi process applications to avoid over subscription for the vcpus. For example if there is a single application process, then num_processes should be set to '1' so that all the vcpus are assigned to it with one-to-one mapping to omp threads +num_vcpus=$(getconf _NPROCESSORS_ONLN) +num_processes=1 +export OMP_NUM_THREADS=$((1 > ($num_vcpus/$num_processes) ? 1 : ($num_vcpus/$num_processes))) +export OMP_PROC_BIND=false +export OMP_PLACES=cores +### + COPY ./core/operators/vid_vec_rep_resnet.py /app/core/operators/vid_vec_rep_resnet.py COPY ./core/operators/sample_data/sample-cat-video.mp4 /app/core/operators/sample_data/sample-cat-video.mp4 diff --git a/src/api/core/operators/vid_vec_rep_resnet_requirements.txt b/src/api/core/operators/vid_vec_rep_resnet_requirements.txt index 4c177836..ad27c04f 100644 --- a/src/api/core/operators/vid_vec_rep_resnet_requirements.txt +++ b/src/api/core/operators/vid_vec_rep_resnet_requirements.txt @@ -4,7 +4,7 @@ # # pip-compile --find-links=https://download.pytorch.org/whl/torch_stable.html vid_vec_rep_resnet_requirements.in # ---find-links https://download.pytorch.org/whl/torch_stable.html +--find-links https://download.pytorch.org/whl/cpu certifi==2024.2.2 # via requests @@ -65,13 +65,13 @@ scipy==1.11.4 # via -r vid_vec_rep_resnet_requirements.in sympy==1.12 # via torch -textual==0.48.2 +textual==0.50.0 # via memray -torch==2.1.2+cpu +torch # via # -r vid_vec_rep_resnet_requirements.in # torchvision -torchvision==0.16.2+cpu +torchvision # via -r vid_vec_rep_resnet_requirements.in typing-extensions==4.9.0 # via @@ -79,5 +79,5 @@ typing-extensions==4.9.0 # torch uc-micro-py==1.0.2 # via linkify-it-py -urllib3==2.0.7 +urllib3==2.2.0 # via requests