From 5af615e530134ee6c28cc6ebd48403dd47a36f23 Mon Sep 17 00:00:00 2001 From: SkaisteMot <118757021+SkaisteMot@users.noreply.github.com> Date: Sat, 26 Oct 2024 16:22:46 +0100 Subject: [PATCH] Hand gestures (#2) * 2 windows for left and right hand * using mediapipe fr but only showing hand1/2 instead of left/right * better display, still only hand 1 or 2 * linting --------- Co-authored-by: Skaiste Motiejunaite --- Algorithms/Body/hand_gesture_test.py | 106 +++++++++++++++++++++++ Algorithms/Body/hand_gestures.py | 123 ++++++++++++++------------- 2 files changed, 170 insertions(+), 59 deletions(-) create mode 100644 Algorithms/Body/hand_gesture_test.py diff --git a/Algorithms/Body/hand_gesture_test.py b/Algorithms/Body/hand_gesture_test.py new file mode 100644 index 0000000..d1cd1ef --- /dev/null +++ b/Algorithms/Body/hand_gesture_test.py @@ -0,0 +1,106 @@ +"""Hand Gesture and Image display for Hand 1 and Hand 2, no distinction between left or right""" +import sys +import cv2 +import mediapipe as mp +import numpy as np +from mediapipe.tasks import python +from mediapipe.tasks.python import vision + +# Define paths in a separate dictionary +icon_paths = { + 'Thumb_Up': '../../Datasets/HandIcons/thumbs_up.png', + 'Thumb_Down': '../../Datasets/HandIcons/thumbs_down.png', + 'Pointing_Up': '../../Datasets/HandIcons/point_up.png', + 'Victory': '../../Datasets/HandIcons/peace.png', + 'Closed_Fist': '../../Datasets/HandIcons/fist.png', + 'Open_Palm': '../../Datasets/HandIcons/wave.png', + 'ILoveYou': '../../Datasets/HandIcons/rock.png' +} + +# Preload icons into a dictionary +gesture_icons = {} +for gesture_name, path in icon_paths.items(): + icon = cv2.imread(path, cv2.IMREAD_UNCHANGED) + if icon is None: + print(f"Warning: Unable to load image at {path}") # Debugging print + else: + gesture_icons[gesture_name] = icon + +# Create a GestureRecognizer object. +base_options = python.BaseOptions(model_asset_path='gesture_recognizer.task') +options = vision.GestureRecognizerOptions(base_options=base_options, num_hands=2) + +# Create the Gesture Recognizer instance +recognizer = vision.GestureRecognizer.create_from_options(options) + +# Initialize video capture from webcam (0 for the default camera) +cap = cv2.VideoCapture(0) +if not cap.isOpened(): + print("Error: Could not open video device.") + sys.exit() # Use sys.exit() instead of exit() + +blank_image = 255 * np.ones((500, 600, 3), dtype=np.uint8) # White blank image + +def display_gesture_info(input_frame, gesture_data): + """Draw gesture info and load corresponding icons.""" + emoji_images = [blank_image.copy(), blank_image.copy()] # Initialize emoji for both hands + + # Sort hands based on their x-coordinates (leftmost hand first) + sorted_hands = sorted(gesture_data, key=lambda x: x[1][0].x) + + for index, (current_hand_gestures, current_hand_landmarks) in enumerate(sorted_hands): + # Get the top gesture for the hand + top_gesture = current_hand_gestures[0] + current_gesture_name = top_gesture.category_name # Get the recognized gesture name + + # Prepare the text to display + gesture_text = f"Hand {index + 1}: {current_gesture_name} ({top_gesture.score:.2f})" + cv2.putText(input_frame, gesture_text, (10, 30 + index * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) + + # Load the icon for the recognized gesture + if current_gesture_name in gesture_icons: + icon_image = gesture_icons[current_gesture_name] + emoji_images[index] = icon_image # Load the icon for Hand 1 or Hand 2 + + # Draw landmarks for each hand + for landmark in current_hand_landmarks: # Each landmark is a NormalizedLandmark + x = int(landmark.x * input_frame.shape[1]) + y = int(landmark.y * input_frame.shape[0]) + cv2.circle(input_frame, (x, y), 5, (0, 255, 0), -1) + + return emoji_images + +while cap.isOpened(): + ret, frame = cap.read() + if not ret: + print("Error: Frame not captured.") + continue + + rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + + # Create a MediaPipe image from the RGB frame + mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame) + + # Recognize gestures in the input frame + recognition_result = recognizer.recognize(mp_image) + + # Process the result for both hands + gestures_and_landmarks = [] + if recognition_result.gestures: + for hand_gestures, hand_landmarks in zip(recognition_result.gestures, recognition_result.hand_landmarks): + gestures_and_landmarks.append((hand_gestures, hand_landmarks)) + + # Update emoji images with the recognized gestures + hand_images = display_gesture_info(frame, gestures_and_landmarks) + + # Show emojis for both hands in separate windows + cv2.imshow('Hand 1 Emoji', hand_images[0]) # Show Hand 1 emoji + cv2.imshow('Hand 2 Emoji', hand_images[1]) # Show Hand 2 emoji + cv2.imshow('Hand Recognition with Emoji', frame) # Show the original frame + + if cv2.waitKey(1) & 0xFF == ord('q'): + break + +# Release the video capture and close the window +cap.release() +cv2.destroyAllWindows() diff --git a/Algorithms/Body/hand_gestures.py b/Algorithms/Body/hand_gestures.py index 12d336d..6cc318e 100644 --- a/Algorithms/Body/hand_gestures.py +++ b/Algorithms/Body/hand_gestures.py @@ -6,8 +6,10 @@ https://colab.research.google.com/github/googlesamples/mediapipe/blob/main/examples/gesture_recognizer/python/gesture_recognizer.ipynb#scrollTo=Iy4r2_ePylIa https://towardsdatascience.com/real-time-hand-tracking-and-gesture-recognition-with-mediapipe-rerun-showcase-9ec57cb0c831 """ -from cv2 import cv2 +import cv2 import mediapipe as mp +import numpy as np +import sys # Import sys for using sys.exit() mp_hands = mp.solutions.hands hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7) @@ -15,13 +17,13 @@ mp_drawing = mp.solutions.drawing_utils emoji_dict = { - 'thumb_up': cv2.imread('DevCode/Datasets/HandIcons/thumbs_up.png'), - 'thumb_down': cv2.imread('DevCode/Datasets/HandIcons/thumbs_down.png'), - 'point_up': cv2.imread('DevCode/Datasets/HandIcons/point_up.png'), - 'peace': cv2.imread('DevCode/Datasets/HandIcons/peace.png'), - 'fist': cv2.imread('DevCode/Datasets/HandIcons/fist.png'), - 'wave': cv2.imread('DevCode/Datasets/HandIcons/wave.png'), - 'rock': cv2.imread('DevCode/Datasets/HandIcons/rock.png') + 'thumb_up': cv2.imread('../../Datasets/HandIcons/thumbs_up.png'), + 'thumb_down': cv2.imread('../../Datasets/HandIcons/thumbs_down.png'), + 'point_up': cv2.imread('../../Datasets/HandIcons/point_up.png'), + 'peace': cv2.imread('../../Datasets/HandIcons/peace.png'), + 'fist': cv2.imread('../../Datasets/HandIcons/fist.png'), + 'wave': cv2.imread('../../Datasets/HandIcons/wave.png'), + 'rock': cv2.imread('../../Datasets/HandIcons/rock.png') } for gesture, img in emoji_dict.items(): @@ -34,7 +36,10 @@ if not cap.isOpened(): print("Error: Could not open video device.") - exit() + sys.exit() # Use sys.exit() instead of exit() + +# Create a blank image (assuming your emoji images are 100x100) +blank_image = 255 * np.ones((100, 100, 3), dtype=np.uint8) # White blank image def recognize_gesture(landmarks): """Recognize the gestures and return the equivalent image/emoji""" @@ -44,52 +49,42 @@ def recognize_gesture(landmarks): ring_tip = landmarks[mp_hands.HandLandmark.RING_FINGER_TIP] pinky_tip = landmarks[mp_hands.HandLandmark.PINKY_TIP] - if (thumb_tip.y < landmarks[mp_hands.HandLandmark.THUMB_IP].y and - index_tip.y > landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and - middle_tip.y > landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and - ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and - pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_PIP].y): - return emoji_dict['thumb_up'] - - if (thumb_tip.y > landmarks[mp_hands.HandLandmark.THUMB_MCP].y and - index_tip.y > landmarks[mp_hands.HandLandmark.INDEX_FINGER_MCP].y and - middle_tip.y > landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_MCP].y and - ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_MCP].y and - pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_MCP].y): - return emoji_dict['thumb_down'] - - if (index_tip.y < landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and - middle_tip.y < landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and - ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and - pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_PIP].y): - return emoji_dict['peace'] - - if (index_tip.y < landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and - middle_tip.y > landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and - ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and - pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_PIP].y): - return emoji_dict['point_up'] - - if (thumb_tip.y > landmarks[mp_hands.HandLandmark.THUMB_MCP].y and - index_tip.y > landmarks[mp_hands.HandLandmark.INDEX_FINGER_MCP].y and - middle_tip.y > landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_MCP].y and - ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_MCP].y and - pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_MCP].y): - return emoji_dict['fist'] - - if (thumb_tip.y < landmarks[mp_hands.HandLandmark.THUMB_IP].y and - index_tip.y < landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and - middle_tip.y < landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and - ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and - pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_PIP].y): - return emoji_dict['wave'] - - if (thumb_tip.y < landmarks[mp_hands.HandLandmark.THUMB_IP].y and - index_tip.y < landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and - middle_tip.y > landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and - ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and - pinky_tip.y < landmarks[mp_hands.HandLandmark.PINKY_PIP].y): - return emoji_dict['rock'] + # Define gesture conditions + gestures = { + 'thumb_up': (thumb_tip.y < landmarks[mp_hands.HandLandmark.THUMB_IP].y and + index_tip.y > landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and + middle_tip.y > landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and + ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and + pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_PIP].y), + 'thumb_down': (thumb_tip.y > landmarks[mp_hands.HandLandmark.THUMB_MCP].y and + index_tip.y > landmarks[mp_hands.HandLandmark.INDEX_FINGER_MCP].y and + middle_tip.y > landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_MCP].y and + ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_MCP].y and + pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_MCP].y), + 'peace': (index_tip.y < landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and + middle_tip.y < landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and + ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and + pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_PIP].y), + 'point_up': (index_tip.y < landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and + middle_tip.y > landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and + ring_tip.y > landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and + pinky_tip.y > landmarks[mp_hands.HandLandmark.PINKY_PIP].y), + 'fist': (thumb_tip.y > landmarks[mp_hands.HandLandmark.THUMB_MCP].y and + index_tip.y < landmarks[mp_hands.HandLandmark.INDEX_FINGER_MCP].y and + middle_tip.y < landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_MCP].y and + ring_tip.y < landmarks[mp_hands.HandLandmark.RING_FINGER_MCP].y and + pinky_tip.y < landmarks[mp_hands.HandLandmark.PINKY_MCP].y), + 'wave': (thumb_tip.y < landmarks[mp_hands.HandLandmark.THUMB_IP].y and + index_tip.y < landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP].y and + middle_tip.y < landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y and + ring_tip.y < landmarks[mp_hands.HandLandmark.RING_FINGER_PIP].y and + pinky_tip.y < landmarks[mp_hands.HandLandmark.PINKY_PIP].y) + } + + for gesture, condition in gestures.items(): + if condition: + print(f"Detected Gesture: {gesture}") # Debug print + return emoji_dict[gesture] return None @@ -100,19 +95,29 @@ def recognize_gesture(landmarks): continue frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - results = hands.process(frame_rgb) + left_emoji_img = blank_image # Initialize emoji images for both hands + right_emoji_img = blank_image + if results.multi_hand_landmarks: for hand_landmarks in results.multi_hand_landmarks: + # Determine which hand is detected + handedness = results.multi_handedness[results.multi_hand_landmarks.index(hand_landmarks)].classification[0].label mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS) - emoji_img = recognize_gesture(hand_landmarks.landmark) + detected_emoji_img = recognize_gesture(hand_landmarks.landmark) - if emoji_img is not None: - cv2.imshow('Emoji', emoji_img) + # Assign the detected emoji to the correct hand's variable + if handedness == 'Left': + left_emoji_img = detected_emoji_img if detected_emoji_img is not None else blank_image + elif handedness == 'Right': + right_emoji_img = detected_emoji_img if detected_emoji_img is not None else blank_image + # Show emojis for both hands in separate windows + cv2.imshow('Left Hand Emoji', right_emoji_img) # Show right hand in left window + cv2.imshow('Right Hand Emoji', left_emoji_img) # Show left hand in right window cv2.imshow('Hand Recognition with Emoji', frame) if cv2.waitKey(1) & 0xFF == ord('q'):