使用 OpenCV、 Python和 dlib 进行眨眼检测
在本文中,我们将了解如何使用 OpenCV、 Python和 dlib 检测眨眼。这是一个相当简单的任务,它需要您对 OpenCV 以及如何使用 OpenCV 和 dlib 实现面部标志检测程序有基本的了解,因为我们将使用它作为今天项目的基础。
逐步实施
第 1 步:安装所有必需的软件包
因此,我们将在此步骤中安装所有依赖项。我们将使用 OpenCV 进行计算机视觉,使用 dlib 库进行面部识别,以及使用 imutils 包来使用一些函数来帮助我们将地标转换为 NumPy 数组并使其易于使用,所以让我们先安装这些.
pip install opencv-python numpy dlib imutils
第 2 步:初始化并从网络摄像头读取
Python3
import cv2
cam = cv2.VideoCapture(0)
while True:
_, frame = cam.read()
cv2.imshow('Camera Feed', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cam.release()
Python3
# Importing the required dependencies
import cv2 # for video rendering
import dlib # for face and landmark detection
import imutils
# for calculating dist b/w the eye landmarks
from scipy.spatial import distance as dist
# to get the landmark ids of the left
# and right eyes ----you can do this
# manually too
from imutils import face_utils
cam = cv2.VideoCapture('assets/Video.mp4')
# Initializing the Models for Landmark and
# face Detection
detector = dlib.get_frontal_face_detector()
landmark_predict = dlib.shape_predictor(
'Model/shape_predictor_68_face_landmarks.dat')
while 1:
# If the video is finished then reset it
# to the start
if cam.get(cv2.CAP_PROP_POS_FRAMES) == cam.get(
cv2.CAP_PROP_FRAME_COUNT):
cam.set(cv2.CAP_PROP_POS_FRAMES, 0)
else:
_, frame = cam.read()
frame = imutils.resize(frame, width=640)
# converting frame to gray scale to pass
# to detector
img_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# detecting the faces---#
faces = detector(img_gray)
for face in faces:
cv2.rectangle(frame, face[0], face[1],
(200, 0, 0), 1)
cv2.imshow("Video", frame)
if cv2.waitKey(5) & 0xFF == ord('q'):
break
cam.release()
cv2.destroyAllWindows()
Python3
def calculate_EAR(eye):
# calculate the vertical distances
# euclidean distance is basically
# the same when you calculate the
# hypotenuse in a right triangle
y1 = dist.euclidean(eye[1], eye[5])
y2 = dist.euclidean(eye[2], eye[4])
# calculate the horizontal distance
x1 = dist.euclidean(eye[0], eye[3])
# calculate the EAR
EAR = (y1+y2) / x1
return EAR
Python3
# Importing the required dependencies
import cv2 # for video rendering
import dlib # for face and landmark detection
import imutils
# for calculating dist b/w the eye landmarks
from scipy.spatial import distance as dist
# to get the landmark ids of the left and right eyes
# you can do this manually too
from imutils import face_utils
# from imutils import
cam = cv2.VideoCapture('assets/my_blink.mp4')
# defining a function to calculate the EAR
def calculate_EAR(eye):
# calculate the vertical distances
y1 = dist.euclidean(eye[1], eye[5])
y2 = dist.euclidean(eye[2], eye[4])
# calculate the horizontal distance
x1 = dist.euclidean(eye[0], eye[3])
# calculate the EAR
EAR = (y1+y2) / x1
return EAR
# Variables
blink_thresh = 0.45
succ_frame = 2
count_frame = 0
# Eye landmarks
(L_start, L_end) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
(R_start, R_end) = face_utils.FACIAL_LANDMARKS_IDXS['right_eye']
# Initializing the Models for Landmark and
# face Detection
detector = dlib.get_frontal_face_detector()
landmark_predict = dlib.shape_predictor(
'Model/shape_predictor_68_face_landmarks.dat')
while 1:
# If the video is finished then reset it
# to the start
if cam.get(cv2.CAP_PROP_POS_FRAMES) == cam.get(
cv2.CAP_PROP_FRAME_COUNT):
cam.set(cv2.CAP_PROP_POS_FRAMES, 0)
else:
_, frame = cam.read()
frame = imutils.resize(frame, width=640)
# converting frame to gray scale to
# pass to detector
img_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# detecting the faces
faces = detector(img_gray)
for face in faces:
# landmark detection
shape = landmark_predict(img_gray, face)
# converting the shape class directly
# to a list of (x,y) coordinates
shape = face_utils.shape_to_np(shape)
# parsing the landmarks list to extract
# lefteye and righteye landmarks--#
lefteye = shape[L_start: L_end]
righteye = shape[R_start:R_end]
# Calculate the EAR
left_EAR = calculate_EAR(lefteye)
right_EAR = calculate_EAR(righteye)
# Avg of left and right eye EAR
avg = (left_EAR+right_EAR)/2
if avg < blink_thresh:
count_frame += 1 # incrementing the frame count
else:
if count_frame >= succ_frame:
cv2.putText(frame, 'Blink Detected', (30, 30),
cv2.FONT_HERSHEY_DUPLEX, 1, (0, 200, 0), 1)
else:
count_frame = 0
cv2.imshow("Video", frame)
if cv2.waitKey(5) & 0xFF == ord('q'):
break
cam.release()
cv2.destroyAllWindows()
第 3 步:使用 dlib 进行面部地标检测
注意: dlib 库中包含的面部标志检测器是 Kazemi 和 Sullivan (2014) 的 One Millisecond Face Alignment with an Ensemble of Regression Trees 论文的实现。
面部标志是图像中面部的关键属性,如眼睛、眉毛、鼻子、嘴巴和下巴。由于步骤1-3不是本文的主要重点,所以我们不会深入,而是在代码上写注释以便于理解。
这是面部标志检测的基本代码,我们稍后将用于眨眼检测。
Python3
# Importing the required dependencies
import cv2 # for video rendering
import dlib # for face and landmark detection
import imutils
# for calculating dist b/w the eye landmarks
from scipy.spatial import distance as dist
# to get the landmark ids of the left
# and right eyes ----you can do this
# manually too
from imutils import face_utils
cam = cv2.VideoCapture('assets/Video.mp4')
# Initializing the Models for Landmark and
# face Detection
detector = dlib.get_frontal_face_detector()
landmark_predict = dlib.shape_predictor(
'Model/shape_predictor_68_face_landmarks.dat')
while 1:
# If the video is finished then reset it
# to the start
if cam.get(cv2.CAP_PROP_POS_FRAMES) == cam.get(
cv2.CAP_PROP_FRAME_COUNT):
cam.set(cv2.CAP_PROP_POS_FRAMES, 0)
else:
_, frame = cam.read()
frame = imutils.resize(frame, width=640)
# converting frame to gray scale to pass
# to detector
img_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# detecting the faces---#
faces = detector(img_gray)
for face in faces:
cv2.rectangle(frame, face[0], face[1],
(200, 0, 0), 1)
cv2.imshow("Video", frame)
if cv2.waitKey(5) & 0xFF == ord('q'):
break
cam.release()
cv2.destroyAllWindows()
现在问题出现了,我们将如何使用这些地标进行眼睛检测。
眼睛地标
我们看到我们可以从我们检测到的 68 个面部地标中提取任何面部结构。因此,对于图像中的任何给定面部,我们将提取眼睛的地标,即每只眼睛的 6 (x,y) 坐标。然后我们将计算这些地标的 EAR。
眼睛纵横比 (EAR)
这种方法非常简单、高效,并且不需要图像处理之类的任何东西。基本上,这个比率为我们提供了眼睛的水平和垂直测量值之间的某种关系。这是使用眼睛的六个参数计算 EAR 的公式:
我们可以使用给定的函数来计算 EAR :
Python3
def calculate_EAR(eye):
# calculate the vertical distances
# euclidean distance is basically
# the same when you calculate the
# hypotenuse in a right triangle
y1 = dist.euclidean(eye[1], eye[5])
y2 = dist.euclidean(eye[2], eye[4])
# calculate the horizontal distance
x1 = dist.euclidean(eye[0], eye[3])
# calculate the EAR
EAR = (y1+y2) / x1
return EAR
这个EAR有什么神奇之处?
这是最重要的部分,当你计算一只眼睛的EAR时,睁眼时它保持不变,但眨眼时它突然下降。下面,我展示了一个图表来显示它的工作:
正如您在图像中看到的,EAR 的整体值始终保持不变,除了在某一时刻,即眨眼时,这使其成为检测眨眼的最简单和最有效的方法之一。
由于我们每只眼睛分别有两个 EAR,我们将取右眼 EAR 和左眼 EAR 的平均值,然后检查它是否低于某个阈值(我们将创建一个变量来设置它的值)并且这个阈值可能会有所不同,对我来说它适用于 0.4 或 0.5,但在某些情况下,它也适用于 0.25 或 0.3。这取决于您的视频或网络摄像头的 FPS。
下一步:当 EAR 低于阈值时,我们将保持帧计数,如果计数为 3(或 5,取决于 fps)帧,那么我们将考虑检测到眨眼。
下面是完整的实现
Python3
# Importing the required dependencies
import cv2 # for video rendering
import dlib # for face and landmark detection
import imutils
# for calculating dist b/w the eye landmarks
from scipy.spatial import distance as dist
# to get the landmark ids of the left and right eyes
# you can do this manually too
from imutils import face_utils
# from imutils import
cam = cv2.VideoCapture('assets/my_blink.mp4')
# defining a function to calculate the EAR
def calculate_EAR(eye):
# calculate the vertical distances
y1 = dist.euclidean(eye[1], eye[5])
y2 = dist.euclidean(eye[2], eye[4])
# calculate the horizontal distance
x1 = dist.euclidean(eye[0], eye[3])
# calculate the EAR
EAR = (y1+y2) / x1
return EAR
# Variables
blink_thresh = 0.45
succ_frame = 2
count_frame = 0
# Eye landmarks
(L_start, L_end) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
(R_start, R_end) = face_utils.FACIAL_LANDMARKS_IDXS['right_eye']
# Initializing the Models for Landmark and
# face Detection
detector = dlib.get_frontal_face_detector()
landmark_predict = dlib.shape_predictor(
'Model/shape_predictor_68_face_landmarks.dat')
while 1:
# If the video is finished then reset it
# to the start
if cam.get(cv2.CAP_PROP_POS_FRAMES) == cam.get(
cv2.CAP_PROP_FRAME_COUNT):
cam.set(cv2.CAP_PROP_POS_FRAMES, 0)
else:
_, frame = cam.read()
frame = imutils.resize(frame, width=640)
# converting frame to gray scale to
# pass to detector
img_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# detecting the faces
faces = detector(img_gray)
for face in faces:
# landmark detection
shape = landmark_predict(img_gray, face)
# converting the shape class directly
# to a list of (x,y) coordinates
shape = face_utils.shape_to_np(shape)
# parsing the landmarks list to extract
# lefteye and righteye landmarks--#
lefteye = shape[L_start: L_end]
righteye = shape[R_start:R_end]
# Calculate the EAR
left_EAR = calculate_EAR(lefteye)
right_EAR = calculate_EAR(righteye)
# Avg of left and right eye EAR
avg = (left_EAR+right_EAR)/2
if avg < blink_thresh:
count_frame += 1 # incrementing the frame count
else:
if count_frame >= succ_frame:
cv2.putText(frame, 'Blink Detected', (30, 30),
cv2.FONT_HERSHEY_DUPLEX, 1, (0, 200, 0), 1)
else:
count_frame = 0
cv2.imshow("Video", frame)
if cv2.waitKey(5) & 0xFF == ord('q'):
break
cam.release()
cv2.destroyAllWindows()
输出:
如果您使用不同的视频或使用网络摄像头,您的 FPS 会有所不同,因此可能想尝试更改我们定义的变量的值,尽管它们在大多数情况下都可以正常工作。