使用Python进行左右手检测

在本文中，我们将了解如何使用Python检测手部。

我们将在Python中使用mediapipe和OpenCV库来检测右手和左手。我们将使用 mediapipe 解决方案中的 Hands 模型来检测手部，它是一种手掌检测模型，可对完整图像进行操作并返回一个定向的手部边界框。

所需的库

Mediapipe是 Google 的开源框架，用于媒体处理。它是跨平台的，或者我们可以说它是平台友好的。它可以在 Android、iOS 和 Web 上运行，这就是跨平台的意思，可以在任何地方运行。
开放式CV 是一个Python库，旨在解决计算机视觉问题。 OpenCV 支持多种编程语言，如 C++、 Python、 Java等。支持多种平台，包括 Windows、Linux 和 MacOS。

安装所需的库

pip install mediapipe  
pip install opencv-python

逐步实施

第 1 步：导入所有必需的库

Python3

# Importing Libraries
import cv2
import mediapipe as mp
 
# Used to convert protobuf message
# to a dictionary.
from google.protobuf.json_format import MessageToDict

Python3

# Initializing the Model
mpHands = mp.solutions.hands
hands = mpHands.Hands(
    static_image_mode=False,
    model_complexity=1
    min_detection_confidence=0.75,
    min_tracking_confidence=0.75,
    max_num_hands=2)

Python3

# Start capturing video from webcam
cap = cv2.VideoCapture(0)
 
while True:
   
    # Read video frame by frame
    success, img = cap.read()
 
    # Flip the image(frame)
    img = cv2.flip(img, 1)
 
    # Convert BGR image to RGB image
    imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 
    # Process the RGB image
    results = hands.process(imgRGB)
 
    # If hands are present in image(frame)
    if results.multi_hand_landmarks:
 
        # Both Hands are present in image(frame)
        if len(results.multi_handedness) == 2:
                # Display 'Both Hands' on the image
            cv2.putText(img, 'Both Hands', (250, 50),
                        cv2.FONT_HERSHEY_COMPLEX, 0.9,
                        (0, 255, 0), 2)
 
        # If any hand present
        else:
            for i in results.multi_handedness:
               
                # Return weather it is Right or Left Hand
                label = MessageToDict(i)[
                    'classification'][0]['label']
 
                if label == 'Left':
                   
                    # Display 'Left Hand' on left side of window
                    cv2.putText(img, label+' Hand', (20, 50),
                                cv2.FONT_HERSHEY_COMPLEX, 0.9,
                                (0, 255, 0), 2)
 
                if label == 'Right':
                   
                    # Display 'Left Hand' on left side of window
                    cv2.putText(img, label+' Hand', (460, 50),
                                cv2.FONT_HERSHEY_COMPLEX,
                                0.9, (0, 255, 0), 2)
 
    # Display Video and when 'q' is entered, destroy the window
    cv2.imshow('Image', img)
    if cv2.waitKey(1) & 0xff == ord('q'):
        break

Python3

# Importing Libraries
import cv2
import mediapipe as mp
 
# Used to convert protobuf message to a dictionary.
from google.protobuf.json_format import MessageToDict
 
# Initializing the Model
mpHands = mp.solutions.hands
hands = mpHands.Hands(
    static_image_mode=False,
    model_complexity=1
    min_detection_confidence=0.75,
    min_tracking_confidence=0.75,
    max_num_hands=2)
 
# Start capturing video from webcam
cap = cv2.VideoCapture(0)
 
while True:
    # Read video frame by frame
    success, img = cap.read()
 
    # Flip the image(frame)
    img = cv2.flip(img, 1)
 
    # Convert BGR image to RGB image
    imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 
    # Process the RGB image
    results = hands.process(imgRGB)
 
    # If hands are present in image(frame)
    if results.multi_hand_landmarks:
 
        # Both Hands are present in image(frame)
        if len(results.multi_handedness) == 2:
                # Display 'Both Hands' on the image
            cv2.putText(img, 'Both Hands', (250, 50),
                        cv2.FONT_HERSHEY_COMPLEX,
                        0.9, (0, 255, 0), 2)
 
        # If any hand present
        else:
            for i in results.multi_handedness:
               
                # Return weather it is Right or Left Hand
                label = MessageToDict(i)
                ['classification'][0]['label']
 
                if label == 'Left':
                   
                    # Display 'Left Hand' on
                    # left side of window
                    cv2.putText(img, label+' Hand',
                                (20, 50),
                                cv2.FONT_HERSHEY_COMPLEX,
                                0.9, (0, 255, 0), 2)
 
                if label == 'Right':
                     
                    # Display 'Left Hand'
                    # on left side of window
                    cv2.putText(img, label+' Hand', (460, 50),
                                cv2.FONT_HERSHEY_COMPLEX,
                                0.9, (0, 255, 0), 2)
 
    # Display Video and when 'q'
    # is entered, destroy the window
    cv2.imshow('Image', img)
    if cv2.waitKey(1) & 0xff == ord('q'):
        break

第 2 步：初始化 Hands 模型

Python3

# Initializing the Model
mpHands = mp.solutions.hands
hands = mpHands.Hands(
    static_image_mode=False,
    model_complexity=1
    min_detection_confidence=0.75,
    min_tracking_confidence=0.75,
    max_num_hands=2)

让我们看看手模型的参数：

Hands( static_image_mode=False, model_complexity=1 min_detection_confidence=0.75, min_tracking_confidence=0.75, max_num_hands=2 )

Where:

static_image_mode: It is used to specify whether the input image must be static images or as a video stream. The default value is False.
model_complexity: Complexity of the hand landmark model: 0 or 1. Landmark accuracy, as well as inference latency, generally go up with the model complexity. Default to 1.
min_detection_confidence: It is used to specify the minimum confidence value with which the detection from the person-detection model needs to be considered as successful. Can specify a value in [0.0,1.0]. The default value is 0.5.
min_tracking_confidence: It is used to specify the minimum confidence value with which the detection from the landmark-tracking model must be considered as successful. Can specify a value in [0.0,1.0]. The default value is 0.5.
max_num_hands: Maximum number of hands to detect. Default it is 2.

编程需要懂一点英语

步骤 3：手模型处理图像并检测手

使用 OpenCV 从相机连续捕获帧，然后围绕 y 轴翻转图像，即 cv2.flip(image, flip code) 并将 BGR 图像转换为 RGB 图像，并使用初始化的手模型进行预测。

模型所做的预测保存在 results 变量中，我们可以分别使用 results.multi_hand_landmarks 和 results.multi_handedness 访问地标，如果框架中存在手，请检查双手，如果是，则将文本“双手”放在单手图像，将 MessageToDict()函数存储在标签变量上。如果标签是“左”，则在图像上放置文本“左手”，如果标签是“右”，则在图像上放置文本“右手”。

Python3

# Start capturing video from webcam
cap = cv2.VideoCapture(0)
 
while True:
   
    # Read video frame by frame
    success, img = cap.read()
 
    # Flip the image(frame)
    img = cv2.flip(img, 1)
 
    # Convert BGR image to RGB image
    imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 
    # Process the RGB image
    results = hands.process(imgRGB)
 
    # If hands are present in image(frame)
    if results.multi_hand_landmarks:
 
        # Both Hands are present in image(frame)
        if len(results.multi_handedness) == 2:
                # Display 'Both Hands' on the image
            cv2.putText(img, 'Both Hands', (250, 50),
                        cv2.FONT_HERSHEY_COMPLEX, 0.9,
                        (0, 255, 0), 2)
 
        # If any hand present
        else:
            for i in results.multi_handedness:
               
                # Return weather it is Right or Left Hand
                label = MessageToDict(i)[
                    'classification'][0]['label']
 
                if label == 'Left':
                   
                    # Display 'Left Hand' on left side of window
                    cv2.putText(img, label+' Hand', (20, 50),
                                cv2.FONT_HERSHEY_COMPLEX, 0.9,
                                (0, 255, 0), 2)
 
                if label == 'Right':
                   
                    # Display 'Left Hand' on left side of window
                    cv2.putText(img, label+' Hand', (460, 50),
                                cv2.FONT_HERSHEY_COMPLEX,
                                0.9, (0, 255, 0), 2)
 
    # Display Video and when 'q' is entered, destroy the window
    cv2.imshow('Image', img)
    if cv2.waitKey(1) & 0xff == ord('q'):
        break

下面是完整的实现：

Python3

# Importing Libraries
import cv2
import mediapipe as mp
 
# Used to convert protobuf message to a dictionary.
from google.protobuf.json_format import MessageToDict
 
# Initializing the Model
mpHands = mp.solutions.hands
hands = mpHands.Hands(
    static_image_mode=False,
    model_complexity=1
    min_detection_confidence=0.75,
    min_tracking_confidence=0.75,
    max_num_hands=2)
 
# Start capturing video from webcam
cap = cv2.VideoCapture(0)
 
while True:
    # Read video frame by frame
    success, img = cap.read()
 
    # Flip the image(frame)
    img = cv2.flip(img, 1)
 
    # Convert BGR image to RGB image
    imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 
    # Process the RGB image
    results = hands.process(imgRGB)
 
    # If hands are present in image(frame)
    if results.multi_hand_landmarks:
 
        # Both Hands are present in image(frame)
        if len(results.multi_handedness) == 2:
                # Display 'Both Hands' on the image
            cv2.putText(img, 'Both Hands', (250, 50),
                        cv2.FONT_HERSHEY_COMPLEX,
                        0.9, (0, 255, 0), 2)
 
        # If any hand present
        else:
            for i in results.multi_handedness:
               
                # Return weather it is Right or Left Hand
                label = MessageToDict(i)
                ['classification'][0]['label']
 
                if label == 'Left':
                   
                    # Display 'Left Hand' on
                    # left side of window
                    cv2.putText(img, label+' Hand',
                                (20, 50),
                                cv2.FONT_HERSHEY_COMPLEX,
                                0.9, (0, 255, 0), 2)
 
                if label == 'Right':
                     
                    # Display 'Left Hand'
                    # on left side of window
                    cv2.putText(img, label+' Hand', (460, 50),
                                cv2.FONT_HERSHEY_COMPLEX,
                                0.9, (0, 255, 0), 2)
 
    # Display Video and when 'q'
    # is entered, destroy the window
    cv2.imshow('Image', img)
    if cv2.waitKey(1) & 0xff == ord('q'):
        break

输出：

输出