Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
justinjohn0306 authored Sep 7, 2023
1 parent 75de7f4 commit d175fa1
Show file tree
Hide file tree
Showing 12 changed files with 589 additions and 215 deletions.
57 changes: 57 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
FROM nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04

ARG DEBIAN_FRONTEND=noninteractive

# install python via pyenv
RUN apt-get update && apt-get install -y --no-install-recommends \
make \
build-essential \
libssl-dev \
zlib1g-dev \
libbz2-dev \
libreadline-dev \
libsqlite3-dev \
wget \
curl \
llvm \
libncurses5-dev \
libncursesw5-dev \
xz-utils \
tk-dev \
libffi-dev \
liblzma-dev \
git \
ca-certificates \
libgl1 \
&& rm -rf /var/lib/apt/lists/*
ENV PATH="/root/.pyenv/shims:/root/.pyenv/bin:$PATH"
ARG PYTHON_VERSION=3.8
RUN curl -s -S -L https://raw.githubusercontent.com/pyenv/pyenv-installer/master/bin/pyenv-installer | bash && \
pyenv install $PYTHON_VERSION && \
pyenv global $PYTHON_VERSION

# install cog
RUN pip install cog

# install deps
RUN apt-get update && apt-get install -y --no-install-recommends \
ffmpeg libsndfile1 \
&& rm -rf /var/lib/apt/lists/*

# copy to /src
ENV WORKDIR /src
RUN mkdir -p $WORKDIR
WORKDIR $WORKDIR

# install requirements
COPY requirements.txt .
RUN pip install -r requirements.txt
RUN pip install git+https://github.com/elliottzheng/batch-face.git@master

# copy sources
COPY . .

ENV PYTHONUNBUFFERED=1

# run cog
CMD python3 -m cog.server.http
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ This code is part of the paper: _A Lip Sync Expert Is All You Need for Speech to

|📑 Original Paper|📰 Project Page|🌀 Demo|⚡ Live Testing|📔 Colab Notebook
|:-:|:-:|:-:|:-:|:-:|
[Paper](http://arxiv.org/abs/2008.10010) | [Project Page](http://cvit.iiit.ac.in/research/projects/cvit-projects/a-lip-sync-expert-is-all-you-need-for-speech-to-lip-generation-in-the-wild/) | [Demo Video](https://youtu.be/0fXaDCZNOJc) | [Interactive Demo](https://bhaasha.iiit.ac.in/lipsync) | [Colab Notebook](https://colab.research.google.com/github/justinjohn0306/Wav2Lip/blob/master/Wav2Lip_simplified_v5.ipynb)
[Paper](http://arxiv.org/abs/2008.10010) | [Project Page](http://cvit.iiit.ac.in/research/projects/cvit-projects/a-lip-sync-expert-is-all-you-need-for-speech-to-lip-generation-in-the-wild/) | [Demo Video](https://youtu.be/0fXaDCZNOJc) | [Interactive Demo](https://bhaasha.iiit.ac.in/lipsync) | [Colab Notebook](https://colab.research.google.com/drive/1tZpDWXz49W6wDcTprANRGLo2D_EbD5J8?usp=sharing) /[Updated Collab Notebook](https://colab.research.google.com/drive/1IjFW1cLevs6Ouyu4Yht4mnR4yeuMqO7Y#scrollTo=MH1m608OymLH)

<img src="https://drive.google.com/uc?export=view&id=1Wn0hPmpo4GRbCIJR8Tf20Akzdi1qjjG9"/>

Expand Down
2 changes: 1 addition & 1 deletion audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def _linear_to_mel(spectogram):

def _build_mel_basis():
assert hp.fmax <= hp.sample_rate // 2
return librosa.filters.mel(sr=hp.sample_rate, n_fft=hp.n_fft, n_mels=hp.num_mels,
return librosa.filters.mel(hp.sample_rate, hp.n_fft, n_mels=hp.num_mels,
fmin=hp.fmin, fmax=hp.fmax)

def _amp_to_db(x):
Expand Down
35 changes: 35 additions & 0 deletions cog.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Configuration for Cog ⚙️
# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md

image: r8.im/devxpy/cog-wav2lip

build:
# set to true if your model requires a GPU
gpu: true
cuda: "11.6.2"

# a list of ubuntu apt packages to install
system_packages:
- ffmpeg
- cmake

# python version in the form '3.8' or '3.8.12'
python_version: "3.8"

# a list of packages in the format <package-name>==<version>
python_packages:
- numpy==1.23.4
- librosa==0.7.0
- opencv-python==4.6.0.66
- torch==1.12.1+cu116 --extra-index-url=https://download.pytorch.org/whl/cu116
- torchvision==0.13.1+cu116 --extra-index-url=https://download.pytorch.org/whl/cu116
- tqdm==4.45.0
- numba==0.48
- mediapipe==0.8.11

# commands run after the environment is setup
run:
- pip install git+https://github.com/elliottzheng/batch-face.git@master

# predict.py defines how predictions are run on your model
predict: "predict.py:Predictor"
55 changes: 55 additions & 0 deletions face_detect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import cv2
import mediapipe as mp

mp_face_mesh = mp.solutions.face_mesh
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_face_detection = mp.solutions.face_detection


def face_rect(images):
with mp_face_detection.FaceDetection(
model_selection=1, min_detection_confidence=0.5
) as face_detection:
for image_cv2 in images:
# Convert the BGR image to RGB and process it with MediaPipe Face Detection.
results = face_detection.process(cv2.cvtColor(image_cv2, cv2.COLOR_BGR2RGB))

# Draw face detections of each face.
if not results.detections:
yield None
for detection in results.detections:
yield _get_bounding_rect(image_cv2, detection)


def _get_bounding_rect(
image: mp_drawing.np.ndarray,
detection: mp_drawing.detection_pb2.Detection,
):
"""
Stolen from mediapipe.solutions.drawing_utils.draw_detection()
"""
if not detection.location_data:
return
if image.shape[2] != mp_drawing._BGR_CHANNELS:
raise ValueError("Input image must contain three channel bgr data.")
image_rows, image_cols, _ = image.shape

location = detection.location_data

# get bounding box if exists.
if not location.HasField("relative_bounding_box"):
return
relative_bounding_box = location.relative_bounding_box
rect_start_point = mp_drawing._normalized_to_pixel_coordinates(
relative_bounding_box.xmin, relative_bounding_box.ymin, image_cols, image_rows
)
rect_end_point = mp_drawing._normalized_to_pixel_coordinates(
relative_bounding_box.xmin + relative_bounding_box.width,
relative_bounding_box.ymin + relative_bounding_box.height,
image_cols,
image_rows,
)

return *rect_start_point, *rect_end_point

13 changes: 7 additions & 6 deletions face_detection/detection/sfd/sfd_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,20 @@


class SFDDetector(FaceDetector):
def __init__(self, device, path_to_detector=os.path.join(os.path.dirname(os.path.abspath(__file__)), 's3fd.pth'), verbose=False):
super(SFDDetector, self).__init__(device, verbose)
@classmethod
def load_model(cls, device):
path_to_detector = os.path.join(os.path.dirname(os.path.abspath(__file__)), 's3fd.pth')

# Initialise the face detector
if not os.path.isfile(path_to_detector):
model_weights = load_url(models_urls['s3fd'])
else:
model_weights = torch.load(path_to_detector)

self.face_detector = s3fd()
self.face_detector.load_state_dict(model_weights)
self.face_detector.to(device)
self.face_detector.eval()
cls.face_detector = s3fd()
cls.face_detector.load_state_dict(model_weights)
cls.face_detector.to(device)
cls.face_detector.eval()

def detect_from_image(self, tensor_or_path):
image = self.tensor_or_path_to_ndarray(tensor_or_path)
Expand Down
Loading

0 comments on commit d175fa1

Please sign in to comment.