Skip to content

Commit

Permalink
ok
Browse files Browse the repository at this point in the history
  • Loading branch information
johndpope committed May 13, 2024
1 parent 9c8f4b7 commit 75ea7d8
Show file tree
Hide file tree
Showing 6 changed files with 1,222 additions and 5 deletions.
34 changes: 31 additions & 3 deletions EmoDataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import decord
from typing import List, Tuple, Dict, Any
from decord import VideoReader,AVReader
import face_alignment

class EMODataset(Dataset):
def __init__(self, use_gpu:False, sample_rate: int, n_sample_frames: int, width: int, height: int, img_scale: Tuple[float, float], img_ratio: Tuple[float, float] = (0.9, 1.0), video_dir: str = ".", drop_ratio: float = 0.1, json_file: str = "", stage: str = 'stage1', transform: transforms.Compose = None):
Expand All @@ -24,8 +25,9 @@ def __init__(self, use_gpu:False, sample_rate: int, n_sample_frames: int, width:
self.video_dir = video_dir
self.transform = transform
self.stage = stage
# self.feature_extractor = Wav2VecFeatureExtractor(model_name='facebook/wav2vec2-base-960h', device='cuda')
self.face_alignment = face_alignment.FaceAlignment(face_alignment.LandmarksType.TWO_D, device='cpu')

# self.feature_extractor = Wav2VecFeatureExtractor(model_name='facebook/wav2vec2-base-960h', device='cuda')
# self.face_mask_generator = FaceHelper()
self.pixel_transform = transforms.Compose(
[
Expand Down Expand Up @@ -62,6 +64,22 @@ def __init__(self, use_gpu:False, sample_rate: int, n_sample_frames: int, width:
decord.bridge.set_bridge('torch') # Optional: This line sets decord to directly output PyTorch tensors.
self.ctx = decord.cpu()

# DRIVING VIDEO
video_drv_reader = VideoReader("./junk/-2KGPYEFnsU_8.mp4", ctx=self.ctx)
video_length = len(video_drv_reader)

driving_vid_pil_image_list = []
# keypoints_list = []

for frame_idx in range(video_length):
# Read frame and convert to PIL Image
frame = Image.fromarray(video_drv_reader[frame_idx].numpy())


# Transform the frame
state = torch.get_rng_state()
pixel_values_frame = self.augmentation(frame, self.pixel_transform, state)
driving_vid_pil_image_list.append(pixel_values_frame)

def __len__(self) -> int:

Expand All @@ -87,12 +105,20 @@ def __getitem__(self, index: int) -> Dict[str, Any]:


vid_pil_image_list = []

keypoints_list = []

for frame_idx in range(video_length):
# Read frame and convert to PIL Image
frame = Image.fromarray(video_reader[frame_idx].numpy())


# Detect keypoints using face_alignment
keypoints = self.face_alignment.get_landmarks(video_reader[frame_idx].numpy())
if keypoints is not None:
keypoints_list.append(keypoints[0])
else:
keypoints_list.append(None)

# Transform the frame
state = torch.get_rng_state()
pixel_values_frame = self.augmentation(frame, self.pixel_transform, state)
Expand All @@ -101,6 +127,8 @@ def __getitem__(self, index: int) -> Dict[str, Any]:
# Convert list of lists to a tensor
sample = {
"video_id": video_id,
"images": vid_pil_image_list
"source_frames": vid_pil_image_list,
"driving_frames": self.driving_vid_pil_image_list,
"keypoints": keypoints_list
}
return sample
3 changes: 3 additions & 0 deletions data/driving_video.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"meta_info": {"appearance_mapping": ["blurry", "male", "young", "chubby", "pale_skin", "rosy_cheeks", "oval_face", "receding_hairline", "bald", "bangs", "black_hair", "blonde_hair", "gray_hair", "brown_hair", "straight_hair", "wavy_hair", "long_hair", "arched_eyebrows", "bushy_eyebrows", "bags_under_eyes", "eyeglasses", "sunglasses", "narrow_eyes", "big_nose", "pointy_nose", "high_cheekbones", "big_lips", "double_chin", "no_beard", "5_o_clock_shadow", "goatee", "mustache", "sideburns", "heavy_makeup", "wearing_earrings", "wearing_hat", "wearing_lipstick", "wearing_necklace", "wearing_necktie", "wearing_mask"], "action_mapping": ["blow", "chew", "close_eyes", "cough", "cry", "drink", "eat", "frown", "gaze", "glare", "head_wagging", "kiss", "laugh", "listen_to_music", "look_around", "make_a_face", "nod", "play_instrument", "read", "shake_head", "shout", "sigh", "sing", "sleep", "smile", "smoke", "sneer", "sneeze", "sniff", "talk", "turn", "weep", "whisper", "wink", "yawn"]}, "clips": {"-2KGPYEFnsU_8": {"ytb_id": "-2KGPYEFnsU", "duration": {"start_sec": 102.6, "end_sec": 106.52}, "bbox": {"top": 0.0991, "bottom": 0.612, "left": 0.1234, "right": 0.412}, "attributes": {"appearance": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "action": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "emotion": {"sep_flag": false, "labels": "neutral"}}, "version": "v0.1"}}}


Binary file added junk/-2KGPYEFnsU_8.mp4
Binary file not shown.
Binary file added junk/M2Ohb0FAaJU_1.mp4
Binary file not shown.
Loading

0 comments on commit 75ea7d8

Please sign in to comment.