From 5af615e530134ee6c28cc6ebd48403dd47a36f23 Mon Sep 17 00:00:00 2001
From: SkaisteMot <118757021+SkaisteMot@users.noreply.github.com>
Date: Sat, 26 Oct 2024 16:22:46 +0100
Subject: [PATCH] Hand gestures (#2)

* 2 windows for left and right hand

* using mediapipe fr but only showing hand1/2 instead of left/right

* better display, still only hand 1 or 2

* linting

---------

Co-authored-by: Skaiste Motiejunaite <s.motiejunaite1@universityofgalway.ie>
---
 Algorithms/Body/hand_gesture_test.py | 106 +++++++++++++++++++++++
 Algorithms/Body/hand_gestures.py     | 123 ++++++++++++++-------------
 2 files changed, 170 insertions(+), 59 deletions(-)
 create mode 100644 Algorithms/Body/hand_gesture_test.py

diff --git a/Algorithms/Body/hand_gesture_test.py b/Algorithms/Body/hand_gesture_test.py
new file mode 100644
index 0000000..d1cd1ef
--- /dev/null
+++ b/Algorithms/Body/hand_gesture_test.py
@@ -0,0 +1,106 @@
+"""Hand Gesture and Image display for Hand 1 and Hand 2, no distinction between left or right"""
+import sys
+import cv2
+import mediapipe as mp
+import numpy as np
+from mediapipe.tasks import python
+from mediapipe.tasks.python import vision
+
+# Define paths in a separate dictionary
+icon_paths = {
+    'Thumb_Up': '../../Datasets/HandIcons/thumbs_up.png',
+    'Thumb_Down': '../../Datasets/HandIcons/thumbs_down.png',
+    'Pointing_Up': '../../Datasets/HandIcons/point_up.png',
+    'Victory': '../../Datasets/HandIcons/peace.png',
+    'Closed_Fist': '../../Datasets/HandIcons/fist.png',
+    'Open_Palm': '../../Datasets/HandIcons/wave.png',
+    'ILoveYou': '../../Datasets/HandIcons/rock.png'
+}
+
+# Preload icons into a dictionary
+gesture_icons = {}
+for gesture_name, path in icon_paths.items():
+    icon = cv2.imread(path, cv2.IMREAD_UNCHANGED)
+    if icon is None:
+        print(f"Warning: Unable to load image at {path}")  # Debugging print
+    else:
+        gesture_icons[gesture_name] = icon
+
+# Create a GestureRecognizer object.
+base_options = python.BaseOptions(model_asset_path='gesture_recognizer.task')
+options = vision.GestureRecognizerOptions(base_options=base_options, num_hands=2)
+
+# Create the Gesture Recognizer instance
+recognizer = vision.GestureRecognizer.create_from_options(options)
+
+# Initialize video capture from webcam (0 for the default camera)
+cap = cv2.VideoCapture(0)
+if not cap.isOpened():
+    print("Error: Could not open video device.")
+    sys.exit()  # Use sys.exit() instead of exit()
+
+blank_image = 255 * np.ones((500, 600, 3), dtype=np.uint8)  # White blank image
+
+def display_gesture_info(input_frame, gesture_data):
+    """Draw gesture info and load corresponding icons."""
+    emoji_images = [blank_image.copy(), blank_image.copy()]  # Initialize emoji for both hands
+
+    # Sort hands based on their x-coordinates (leftmost hand first)
+    sorted_hands = sorted(gesture_data, key=lambda x: x[1][0].x)
+
+    for index, (current_hand_gestures, current_hand_landmarks) in enumerate(sorted_hands):
+        # Get the top gesture for the hand
+        top_gesture = current_hand_gestures[0]
+        current_gesture_name = top_gesture.category_name  # Get the recognized gesture name
+
+        # Prepare the text to display
+        gesture_text = f"Hand {index + 1}: {current_gesture_name} ({top_gesture.score:.2f})"
+        cv2.putText(input_frame, gesture_text, (10, 30 + index * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
+
+        # Load the icon for the recognized gesture
+        if current_gesture_name in gesture_icons:
+            icon_image = gesture_icons[current_gesture_name]
+            emoji_images[index] = icon_image  # Load the icon for Hand 1 or Hand 2
+
+        # Draw landmarks for each hand
+        for landmark in current_hand_landmarks:  # Each landmark is a NormalizedLandmark
+            x = int(landmark.x * input_frame.shape[1])
+            y = int(landmark.y * input_frame.shape[0])
+            cv2.circle(input_frame, (x, y), 5, (0, 255, 0), -1)
+
+    return emoji_images
+
+while cap.isOpened():
+    ret, frame = cap.read()
+    if not ret:
+        print("Error: Frame not captured.")
+        continue
+
+    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+
+    # Create a MediaPipe image from the RGB frame
+    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
+
+    # Recognize gestures in the input frame
+    recognition_result = recognizer.recognize(mp_image)
+
+    # Process the result for both hands
+    gestures_and_landmarks = []
+    if recognition_result.gestures:
+        for hand_gestures, hand_landmarks in zip(recognition_result.gestures, recognition_result.hand_landmarks):
+            gestures_and_landmarks.append((hand_gestures, hand_landmarks))
+
+    # Update emoji images with the recognized gestures
+    hand_images = display_gesture_info(frame, gestures_and_landmarks)
+
+    # Show emojis for both hands in separate windows
+    cv2.imshow('Hand 1 Emoji', hand_images[0])  # Show Hand 1 emoji
+    cv2.imshow('Hand 2 Emoji', hand_images[1])  # Show Hand 2 emoji
+    cv2.imshow('Hand Recognition with Emoji', frame)  # Show the original frame
+
+    if cv2.waitKey(1) & 0xFF == ord('q'):
+        break
+
+# Release the video capture and close the window
+cap.release()
+cv2.destroyAllWindows()
diff --git a/Algorithms/Body/hand_gestures.py b/Algorithms/Body/hand_gestures.py
index 12d336d..6cc318e 100644
--- a/Algorithms/Body/hand_gestures.py
+++ b/Algorithms/Body/hand_gestures.py
@@ -6,8 +6,10 @@
 https://colab.research.google.com/github/googlesamples/mediapipe/blob/main/examples/gesture_recognizer/python/gesture_recognizer.ipynb#scrollTo=Iy4r2_ePylIa
 https://towardsdatascience.com/real-time-hand-tracking-and-gesture-recognition-with-mediapipe-rerun-showcase-9ec57cb0c831
 """
-from cv2 import cv2
+import cv2
 import mediapipe as mp
+import numpy as np
+import sys  # Import sys for using sys.exit()
 
 mp_hands = mp.solutions.hands
 hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)
@@ -15,13 +17,13 @@
 mp_drawing = mp.solutions.drawing_utils
 
 emoji_dict = {
-    'thumb_up': cv2.imread('DevCode/Datasets/HandIcons/thumbs_up.png'),
-    'thumb_down': cv2.imread('DevCode/Datasets/HandIcons/thumbs_down.png'),
-    'point_up': cv2.imread('DevCode/Datasets/HandIcons/point_up.png'),
-    'peace': cv2.imread('DevCode/Datasets/HandIcons/peace.png'),
-    'fist': cv2.imread('DevCode/Datasets/HandIcons/fist.png'),
-    'wave': cv2.imread('DevCode/Datasets/HandIcons/wave.png'),
-    'rock': cv2.imread('DevCode/Datasets/HandIcons/rock.png')
+    'thumb_up': cv2.imread('../../Datasets/HandIcons/thumbs_up.png'),
+    'thumb_down': cv2.imread('../../Datasets/HandIcons/thumbs_down.png'),
+    'point_up': cv2.imread('../../Datasets/HandIcons/point_up.png'),
+    'peace': cv2.imread('../../Datasets/HandIcons/peace.png'),
+    'fist': cv2.imread('../../Datasets/HandIcons/fist.png'),
+    'wave': cv2.imread('../../Datasets/HandIcons/wave.png'),
+    'rock': cv2.imread('../../Datasets/HandIcons/rock.png')
 }
 
 for gesture, img in emoji_dict.items():
@@ -34,7 +36,10 @@
 
 if not cap.isOpened():
     print("Error: Could not open video device.")
-    exit()
+    sys.exit()  # Use sys.exit() instead of exit()
+
+# Create a blank image (assuming your emoji images are 100x100)
+blank_image = 255 * np.ones((100, 100, 3), dtype=np.uint8)  # White blank image
 
 def recognize_gesture(landmarks):
     """Recognize the gestures and return the equivalent image/emoji"""
@@ -44,52 +49,42 @@ def recognize_gesture(landmarks):
     ring_tip = landmarks[mp_hands.HandLandmark.RING_FINGER_TIP]
     pinky_tip = landmarks[mp_hands.HandLandmark.PINKY_TIP]
 
-    if (thumb_tip.y < landmarks[mp_hands.HandLandmark.THUMB_IP].y and
-        index_tip.y > landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and
-        middle_tip.y > landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and
-        ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and
-        pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_PIP].y):
-        return emoji_dict['thumb_up']
-
-    if (thumb_tip.y > landmarks[mp_hands.HandLandmark.THUMB_MCP].y and
-        index_tip.y > landmarks[mp_hands.HandLandmark.INDEX_FINGER_MCP].y and
-        middle_tip.y > landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_MCP].y and
-        ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_MCP].y and
-        pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_MCP].y):
-        return emoji_dict['thumb_down']
-
-    if (index_tip.y < landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and
-        middle_tip.y < landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and
-        ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and
-        pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_PIP].y):
-        return emoji_dict['peace']
-
-    if (index_tip.y < landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and
-        middle_tip.y > landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and
-        ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and
-        pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_PIP].y):
-        return emoji_dict['point_up']
-
-    if (thumb_tip.y > landmarks[mp_hands.HandLandmark.THUMB_MCP].y and
-        index_tip.y > landmarks[mp_hands.HandLandmark.INDEX_FINGER_MCP].y and
-        middle_tip.y > landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_MCP].y and
-        ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_MCP].y and
-        pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_MCP].y):
-        return emoji_dict['fist']
-
-    if (thumb_tip.y < landmarks[mp_hands.HandLandmark.THUMB_IP].y and
-        index_tip.y < landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and
-        middle_tip.y < landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and
-        ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and
-        pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_PIP].y):
-        return emoji_dict['wave']
-
-    if (thumb_tip.y < landmarks[mp_hands.HandLandmark.THUMB_IP].y and
-        index_tip.y < landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and
-        middle_tip.y > landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and
-        ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and
-        pinky_tip.y < landmarks[mp_hands.HandLandmark.PINKY_PIP].y):
-        return emoji_dict['rock']
+    # Define gesture conditions
+    gestures = {
+        'thumb_up': (thumb_tip.y < landmarks[mp_hands.HandLandmark.THUMB_IP].y and
+                     index_tip.y > landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and
+                     middle_tip.y > landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and
+                     ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and
+                     pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_PIP].y),
+        'thumb_down': (thumb_tip.y > landmarks[mp_hands.HandLandmark.THUMB_MCP].y and
+                       index_tip.y > landmarks[mp_hands.HandLandmark.INDEX_FINGER_MCP].y and
+                       middle_tip.y > landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_MCP].y and
+                       ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_MCP].y and
+                       pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_MCP].y),
+        'peace': (index_tip.y < landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and
+                  middle_tip.y < landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and
+                  ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and
+                  pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_PIP].y),
+        'point_up': (index_tip.y < landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and
+                     middle_tip.y > landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and
+                     ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and
+                     pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_PIP].y),
+        'fist': (thumb_tip.y > landmarks[mp_hands.HandLandmark.THUMB_MCP].y and
+                  index_tip.y < landmarks[mp_hands.HandLandmark.INDEX_FINGER_MCP].y and
+                  middle_tip.y < landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_MCP].y and
+                  ring_tip.y < landmarks[mp_hands.HandLandmark.RING_FINGER_MCP].y and
+                  pinky_tip.y < landmarks[mp_hands.HandLandmark.PINKY_MCP].y),
+        'wave': (thumb_tip.y < landmarks[mp_hands.HandLandmark.THUMB_IP].y and
+                 index_tip.y < landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and
+                 middle_tip.y < landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and
+                 ring_tip.y < landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and
+                 pinky_tip.y < landmarks[mp_hands.HandLandmark.PINKY_PIP].y)
+    }
+
+    for gesture, condition in gestures.items():
+        if condition:
+            print(f"Detected Gesture: {gesture}")  # Debug print
+            return emoji_dict[gesture]
 
     return None
 
@@ -100,19 +95,29 @@ def recognize_gesture(landmarks):
         continue
 
     frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-
     results = hands.process(frame_rgb)
 
+    left_emoji_img = blank_image  # Initialize emoji images for both hands
+    right_emoji_img = blank_image
+
     if results.multi_hand_landmarks:
         for hand_landmarks in results.multi_hand_landmarks:
+            # Determine which hand is detected
+            handedness = results.multi_handedness[results.multi_hand_landmarks.index(hand_landmarks)].classification[0].label
 
             mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
 
-            emoji_img = recognize_gesture(hand_landmarks.landmark)
+            detected_emoji_img = recognize_gesture(hand_landmarks.landmark)
 
-            if emoji_img is not None:
-                cv2.imshow('Emoji', emoji_img)
+            # Assign the detected emoji to the correct hand's variable
+            if handedness == 'Left':
+                left_emoji_img = detected_emoji_img if detected_emoji_img is not None else blank_image
+            elif handedness == 'Right':
+                right_emoji_img = detected_emoji_img if detected_emoji_img is not None else blank_image
 
+    # Show emojis for both hands in separate windows
+    cv2.imshow('Left Hand Emoji', right_emoji_img)  # Show right hand in left window
+    cv2.imshow('Right Hand Emoji', left_emoji_img)  # Show left hand in right window
     cv2.imshow('Hand Recognition with Emoji', frame)
 
     if cv2.waitKey(1) & 0xFF == ord('q'):