Skip to content

Commit

Permalink
Hand gestures (#2)
Browse files Browse the repository at this point in the history
* 2 windows for left and right hand

* using mediapipe fr but only showing hand1/2 instead of left/right

* better display, still only hand 1 or 2

* linting

---------

Co-authored-by: Skaiste Motiejunaite <[email protected]>
  • Loading branch information
SkaisteMot and SkaisteMotiejunaite authored Oct 26, 2024
1 parent 72f0ae0 commit 5af615e
Show file tree
Hide file tree
Showing 2 changed files with 170 additions and 59 deletions.
106 changes: 106 additions & 0 deletions Algorithms/Body/hand_gesture_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
"""Hand Gesture and Image display for Hand 1 and Hand 2, no distinction between left or right"""
import sys
import cv2
import mediapipe as mp
import numpy as np
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

# Define paths in a separate dictionary
icon_paths = {
'Thumb_Up': '../../Datasets/HandIcons/thumbs_up.png',
'Thumb_Down': '../../Datasets/HandIcons/thumbs_down.png',
'Pointing_Up': '../../Datasets/HandIcons/point_up.png',
'Victory': '../../Datasets/HandIcons/peace.png',
'Closed_Fist': '../../Datasets/HandIcons/fist.png',
'Open_Palm': '../../Datasets/HandIcons/wave.png',
'ILoveYou': '../../Datasets/HandIcons/rock.png'
}

# Preload icons into a dictionary
gesture_icons = {}
for gesture_name, path in icon_paths.items():
icon = cv2.imread(path, cv2.IMREAD_UNCHANGED)
if icon is None:
print(f"Warning: Unable to load image at {path}") # Debugging print
else:
gesture_icons[gesture_name] = icon

# Create a GestureRecognizer object.
base_options = python.BaseOptions(model_asset_path='gesture_recognizer.task')
options = vision.GestureRecognizerOptions(base_options=base_options, num_hands=2)

# Create the Gesture Recognizer instance
recognizer = vision.GestureRecognizer.create_from_options(options)

# Initialize video capture from webcam (0 for the default camera)
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print("Error: Could not open video device.")
sys.exit() # Use sys.exit() instead of exit()

blank_image = 255 * np.ones((500, 600, 3), dtype=np.uint8) # White blank image

def display_gesture_info(input_frame, gesture_data):
"""Draw gesture info and load corresponding icons."""
emoji_images = [blank_image.copy(), blank_image.copy()] # Initialize emoji for both hands

# Sort hands based on their x-coordinates (leftmost hand first)
sorted_hands = sorted(gesture_data, key=lambda x: x[1][0].x)

for index, (current_hand_gestures, current_hand_landmarks) in enumerate(sorted_hands):
# Get the top gesture for the hand
top_gesture = current_hand_gestures[0]
current_gesture_name = top_gesture.category_name # Get the recognized gesture name

# Prepare the text to display
gesture_text = f"Hand {index + 1}: {current_gesture_name} ({top_gesture.score:.2f})"
cv2.putText(input_frame, gesture_text, (10, 30 + index * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

# Load the icon for the recognized gesture
if current_gesture_name in gesture_icons:
icon_image = gesture_icons[current_gesture_name]
emoji_images[index] = icon_image # Load the icon for Hand 1 or Hand 2

# Draw landmarks for each hand
for landmark in current_hand_landmarks: # Each landmark is a NormalizedLandmark
x = int(landmark.x * input_frame.shape[1])
y = int(landmark.y * input_frame.shape[0])
cv2.circle(input_frame, (x, y), 5, (0, 255, 0), -1)

return emoji_images

while cap.isOpened():
ret, frame = cap.read()
if not ret:
print("Error: Frame not captured.")
continue

rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

# Create a MediaPipe image from the RGB frame
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)

# Recognize gestures in the input frame
recognition_result = recognizer.recognize(mp_image)

# Process the result for both hands
gestures_and_landmarks = []
if recognition_result.gestures:
for hand_gestures, hand_landmarks in zip(recognition_result.gestures, recognition_result.hand_landmarks):
gestures_and_landmarks.append((hand_gestures, hand_landmarks))

# Update emoji images with the recognized gestures
hand_images = display_gesture_info(frame, gestures_and_landmarks)

# Show emojis for both hands in separate windows
cv2.imshow('Hand 1 Emoji', hand_images[0]) # Show Hand 1 emoji
cv2.imshow('Hand 2 Emoji', hand_images[1]) # Show Hand 2 emoji
cv2.imshow('Hand Recognition with Emoji', frame) # Show the original frame

if cv2.waitKey(1) & 0xFF == ord('q'):
break

# Release the video capture and close the window
cap.release()
cv2.destroyAllWindows()
123 changes: 64 additions & 59 deletions Algorithms/Body/hand_gestures.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,24 @@
https://colab.research.google.com/github/googlesamples/mediapipe/blob/main/examples/gesture_recognizer/python/gesture_recognizer.ipynb#scrollTo=Iy4r2_ePylIa
https://towardsdatascience.com/real-time-hand-tracking-and-gesture-recognition-with-mediapipe-rerun-showcase-9ec57cb0c831
"""
from cv2 import cv2
import cv2
import mediapipe as mp
import numpy as np
import sys # Import sys for using sys.exit()

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)

mp_drawing = mp.solutions.drawing_utils

emoji_dict = {
'thumb_up': cv2.imread('DevCode/Datasets/HandIcons/thumbs_up.png'),
'thumb_down': cv2.imread('DevCode/Datasets/HandIcons/thumbs_down.png'),
'point_up': cv2.imread('DevCode/Datasets/HandIcons/point_up.png'),
'peace': cv2.imread('DevCode/Datasets/HandIcons/peace.png'),
'fist': cv2.imread('DevCode/Datasets/HandIcons/fist.png'),
'wave': cv2.imread('DevCode/Datasets/HandIcons/wave.png'),
'rock': cv2.imread('DevCode/Datasets/HandIcons/rock.png')
'thumb_up': cv2.imread('../../Datasets/HandIcons/thumbs_up.png'),
'thumb_down': cv2.imread('../../Datasets/HandIcons/thumbs_down.png'),
'point_up': cv2.imread('../../Datasets/HandIcons/point_up.png'),
'peace': cv2.imread('../../Datasets/HandIcons/peace.png'),
'fist': cv2.imread('../../Datasets/HandIcons/fist.png'),
'wave': cv2.imread('../../Datasets/HandIcons/wave.png'),
'rock': cv2.imread('../../Datasets/HandIcons/rock.png')
}

for gesture, img in emoji_dict.items():
Expand All @@ -34,7 +36,10 @@

if not cap.isOpened():
print("Error: Could not open video device.")
exit()
sys.exit() # Use sys.exit() instead of exit()

# Create a blank image (assuming your emoji images are 100x100)
blank_image = 255 * np.ones((100, 100, 3), dtype=np.uint8) # White blank image

def recognize_gesture(landmarks):
"""Recognize the gestures and return the equivalent image/emoji"""
Expand All @@ -44,52 +49,42 @@ def recognize_gesture(landmarks):
ring_tip = landmarks[mp_hands.HandLandmark.RING_FINGER_TIP]
pinky_tip = landmarks[mp_hands.HandLandmark.PINKY_TIP]

if (thumb_tip.y < landmarks[mp_hands.HandLandmark.THUMB_IP].y and
index_tip.y > landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and
middle_tip.y > landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and
ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and
pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_PIP].y):
return emoji_dict['thumb_up']

if (thumb_tip.y > landmarks[mp_hands.HandLandmark.THUMB_MCP].y and
index_tip.y > landmarks[mp_hands.HandLandmark.INDEX_FINGER_MCP].y and
middle_tip.y > landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_MCP].y and
ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_MCP].y and
pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_MCP].y):
return emoji_dict['thumb_down']

if (index_tip.y < landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and
middle_tip.y < landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and
ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and
pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_PIP].y):
return emoji_dict['peace']

if (index_tip.y < landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and
middle_tip.y > landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and
ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and
pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_PIP].y):
return emoji_dict['point_up']

if (thumb_tip.y > landmarks[mp_hands.HandLandmark.THUMB_MCP].y and
index_tip.y > landmarks[mp_hands.HandLandmark.INDEX_FINGER_MCP].y and
middle_tip.y > landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_MCP].y and
ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_MCP].y and
pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_MCP].y):
return emoji_dict['fist']

if (thumb_tip.y < landmarks[mp_hands.HandLandmark.THUMB_IP].y and
index_tip.y < landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and
middle_tip.y < landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and
ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and
pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_PIP].y):
return emoji_dict['wave']

if (thumb_tip.y < landmarks[mp_hands.HandLandmark.THUMB_IP].y and
index_tip.y < landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and
middle_tip.y > landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and
ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and
pinky_tip.y < landmarks[mp_hands.HandLandmark.PINKY_PIP].y):
return emoji_dict['rock']
# Define gesture conditions
gestures = {
'thumb_up': (thumb_tip.y < landmarks[mp_hands.HandLandmark.THUMB_IP].y and
index_tip.y > landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and
middle_tip.y > landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and
ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and
pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_PIP].y),
'thumb_down': (thumb_tip.y > landmarks[mp_hands.HandLandmark.THUMB_MCP].y and
index_tip.y > landmarks[mp_hands.HandLandmark.INDEX_FINGER_MCP].y and
middle_tip.y > landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_MCP].y and
ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_MCP].y and
pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_MCP].y),
'peace': (index_tip.y < landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and
middle_tip.y < landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and
ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and
pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_PIP].y),
'point_up': (index_tip.y < landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and
middle_tip.y > landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and
ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and
pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_PIP].y),
'fist': (thumb_tip.y > landmarks[mp_hands.HandLandmark.THUMB_MCP].y and
index_tip.y < landmarks[mp_hands.HandLandmark.INDEX_FINGER_MCP].y and
middle_tip.y < landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_MCP].y and
ring_tip.y < landmarks[mp_hands.HandLandmark.RING_FINGER_MCP].y and
pinky_tip.y < landmarks[mp_hands.HandLandmark.PINKY_MCP].y),
'wave': (thumb_tip.y < landmarks[mp_hands.HandLandmark.THUMB_IP].y and
index_tip.y < landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and
middle_tip.y < landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and
ring_tip.y < landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and
pinky_tip.y < landmarks[mp_hands.HandLandmark.PINKY_PIP].y)
}

for gesture, condition in gestures.items():
if condition:
print(f"Detected Gesture: {gesture}") # Debug print
return emoji_dict[gesture]

return None

Expand All @@ -100,19 +95,29 @@ def recognize_gesture(landmarks):
continue

frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

results = hands.process(frame_rgb)

left_emoji_img = blank_image # Initialize emoji images for both hands
right_emoji_img = blank_image

if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
# Determine which hand is detected
handedness = results.multi_handedness[results.multi_hand_landmarks.index(hand_landmarks)].classification[0].label

mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

emoji_img = recognize_gesture(hand_landmarks.landmark)
detected_emoji_img = recognize_gesture(hand_landmarks.landmark)

if emoji_img is not None:
cv2.imshow('Emoji', emoji_img)
# Assign the detected emoji to the correct hand's variable
if handedness == 'Left':
left_emoji_img = detected_emoji_img if detected_emoji_img is not None else blank_image
elif handedness == 'Right':
right_emoji_img = detected_emoji_img if detected_emoji_img is not None else blank_image

# Show emojis for both hands in separate windows
cv2.imshow('Left Hand Emoji', right_emoji_img) # Show right hand in left window
cv2.imshow('Right Hand Emoji', left_emoji_img) # Show left hand in right window
cv2.imshow('Hand Recognition with Emoji', frame)

if cv2.waitKey(1) & 0xFF == ord('q'):
Expand Down

0 comments on commit 5af615e

Please sign in to comment.