Add files via upload

justinjohn0306 · Sep 7, 2023 · d175fa1 · d175fa1
1 parent 75de7f4
commit d175fa1
Show file tree

Hide file tree

Showing 12 changed files with 589 additions and 215 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,57 @@
+FROM nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04
+
+ARG DEBIAN_FRONTEND=noninteractive
+
+# install python via pyenv
+RUN apt-get update && apt-get install -y --no-install-recommends \
+	make \
+	build-essential \
+	libssl-dev \
+	zlib1g-dev \
+	libbz2-dev \
+	libreadline-dev \
+	libsqlite3-dev \
+	wget \
+	curl \
+	llvm \
+	libncurses5-dev \
+	libncursesw5-dev \
+	xz-utils \
+	tk-dev \
+	libffi-dev \
+	liblzma-dev \
+	git \
+	ca-certificates \
+    libgl1 \
+	&& rm -rf /var/lib/apt/lists/*
+ENV PATH="/root/.pyenv/shims:/root/.pyenv/bin:$PATH"
+ARG PYTHON_VERSION=3.8
+RUN curl -s -S -L https://raw.githubusercontent.com/pyenv/pyenv-installer/master/bin/pyenv-installer | bash && \
+	pyenv install $PYTHON_VERSION && \
+	pyenv global $PYTHON_VERSION
+
+# install cog
+RUN pip install cog
+
+# install deps
+RUN apt-get update && apt-get install -y --no-install-recommends \
+	ffmpeg libsndfile1 \
+	&& rm -rf /var/lib/apt/lists/*
+
+# copy to /src
+ENV WORKDIR /src
+RUN mkdir -p $WORKDIR
+WORKDIR $WORKDIR
+
+# install requirements
+COPY requirements.txt .
+RUN pip install -r requirements.txt
+RUN pip install git+https://github.com/elliottzheng/batch-face.git@master
+
+# copy sources
+COPY . .
+
+ENV PYTHONUNBUFFERED=1
+
+# run cog
+CMD python3 -m cog.server.http
diff --git a/README.md b/README.md
@@ -10,7 +10,7 @@ This code is part of the paper: _A Lip Sync Expert Is All You Need for Speech to
 
 |📑 Original Paper|📰 Project Page|🌀 Demo|⚡ Live Testing|📔 Colab Notebook
 |:-:|:-:|:-:|:-:|:-:|
-[Paper](http://arxiv.org/abs/2008.10010) | [Project Page](http://cvit.iiit.ac.in/research/projects/cvit-projects/a-lip-sync-expert-is-all-you-need-for-speech-to-lip-generation-in-the-wild/) | [Demo Video](https://youtu.be/0fXaDCZNOJc) | [Interactive Demo](https://bhaasha.iiit.ac.in/lipsync) | [Colab Notebook](https://colab.research.google.com/github/justinjohn0306/Wav2Lip/blob/master/Wav2Lip_simplified_v5.ipynb)
+[Paper](http://arxiv.org/abs/2008.10010) | [Project Page](http://cvit.iiit.ac.in/research/projects/cvit-projects/a-lip-sync-expert-is-all-you-need-for-speech-to-lip-generation-in-the-wild/) | [Demo Video](https://youtu.be/0fXaDCZNOJc) | [Interactive Demo](https://bhaasha.iiit.ac.in/lipsync) | [Colab Notebook](https://colab.research.google.com/drive/1tZpDWXz49W6wDcTprANRGLo2D_EbD5J8?usp=sharing) /[Updated Collab Notebook](https://colab.research.google.com/drive/1IjFW1cLevs6Ouyu4Yht4mnR4yeuMqO7Y#scrollTo=MH1m608OymLH)
 
  <img src="https://drive.google.com/uc?export=view&id=1Wn0hPmpo4GRbCIJR8Tf20Akzdi1qjjG9"/>
 

diff --git a/audio.py b/audio.py
@@ -97,7 +97,7 @@ def _linear_to_mel(spectogram):
 
 def _build_mel_basis():
     assert hp.fmax <= hp.sample_rate // 2
-    return librosa.filters.mel(sr=hp.sample_rate, n_fft=hp.n_fft, n_mels=hp.num_mels,
+    return librosa.filters.mel(hp.sample_rate, hp.n_fft, n_mels=hp.num_mels,
                                fmin=hp.fmin, fmax=hp.fmax)
 
 def _amp_to_db(x):

diff --git a/cog.yaml b/cog.yaml
@@ -0,0 +1,35 @@
+# Configuration for Cog ⚙️
+# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md
+
+image: r8.im/devxpy/cog-wav2lip
+
+build:
+  # set to true if your model requires a GPU
+  gpu: true
+  cuda: "11.6.2"
+
+  # a list of ubuntu apt packages to install
+  system_packages:
+     - ffmpeg
+     - cmake
+
+  # python version in the form '3.8' or '3.8.12'
+  python_version: "3.8"
+
+  # a list of packages in the format <package-name>==<version>
+  python_packages:
+    - numpy==1.23.4
+    - librosa==0.7.0
+    - opencv-python==4.6.0.66
+    - torch==1.12.1+cu116 --extra-index-url=https://download.pytorch.org/whl/cu116
+    - torchvision==0.13.1+cu116 --extra-index-url=https://download.pytorch.org/whl/cu116
+    - tqdm==4.45.0
+    - numba==0.48
+    - mediapipe==0.8.11
+
+  # commands run after the environment is setup
+  run:
+    - pip install git+https://github.com/elliottzheng/batch-face.git@master
+
+# predict.py defines how predictions are run on your model
+predict: "predict.py:Predictor"
diff --git a/face_detect.py b/face_detect.py
@@ -0,0 +1,55 @@
+import cv2
+import mediapipe as mp
+
+mp_face_mesh = mp.solutions.face_mesh
+mp_drawing = mp.solutions.drawing_utils
+mp_drawing_styles = mp.solutions.drawing_styles
+mp_face_detection = mp.solutions.face_detection
+
+
+def face_rect(images):
+    with mp_face_detection.FaceDetection(
+        model_selection=1, min_detection_confidence=0.5
+    ) as face_detection:
+        for image_cv2 in images:
+            # Convert the BGR image to RGB and process it with MediaPipe Face Detection.
+            results = face_detection.process(cv2.cvtColor(image_cv2, cv2.COLOR_BGR2RGB))
+
+            # Draw face detections of each face.
+            if not results.detections:
+                yield None
+            for detection in results.detections:
+                yield _get_bounding_rect(image_cv2, detection)
+
+
+def _get_bounding_rect(
+    image: mp_drawing.np.ndarray,
+    detection: mp_drawing.detection_pb2.Detection,
+):
+    """
+    Stolen from mediapipe.solutions.drawing_utils.draw_detection()
+    """
+    if not detection.location_data:
+        return
+    if image.shape[2] != mp_drawing._BGR_CHANNELS:
+        raise ValueError("Input image must contain three channel bgr data.")
+    image_rows, image_cols, _ = image.shape
+
+    location = detection.location_data
+
+    # get bounding box if exists.
+    if not location.HasField("relative_bounding_box"):
+        return
+    relative_bounding_box = location.relative_bounding_box
+    rect_start_point = mp_drawing._normalized_to_pixel_coordinates(
+        relative_bounding_box.xmin, relative_bounding_box.ymin, image_cols, image_rows
+    )
+    rect_end_point = mp_drawing._normalized_to_pixel_coordinates(
+        relative_bounding_box.xmin + relative_bounding_box.width,
+        relative_bounding_box.ymin + relative_bounding_box.height,
+        image_cols,
+        image_rows,
+    )
+
+    return *rect_start_point, *rect_end_point
+
diff --git a/face_detection/detection/sfd/sfd_detector.py b/face_detection/detection/sfd/sfd_detector.py
@@ -14,19 +14,20 @@
 
 
 class SFDDetector(FaceDetector):
-    def __init__(self, device, path_to_detector=os.path.join(os.path.dirname(os.path.abspath(__file__)), 's3fd.pth'), verbose=False):
-        super(SFDDetector, self).__init__(device, verbose)
+    @classmethod
+    def load_model(cls, device):
+        path_to_detector = os.path.join(os.path.dirname(os.path.abspath(__file__)), 's3fd.pth')
 
         # Initialise the face detector
         if not os.path.isfile(path_to_detector):
             model_weights = load_url(models_urls['s3fd'])
         else:
             model_weights = torch.load(path_to_detector)
 
-        self.face_detector = s3fd()
-        self.face_detector.load_state_dict(model_weights)
-        self.face_detector.to(device)
-        self.face_detector.eval()
+        cls.face_detector = s3fd()
+        cls.face_detector.load_state_dict(model_weights)
+        cls.face_detector.to(device)
+        cls.face_detector.eval()
 
     def detect_from_image(self, tensor_or_path):
         image = self.tensor_or_path_to_ndarray(tensor_or_path)