Mediapipe是google的一个开源项目,用于构建机器学习管道
提供了16个预训练模型的案例:人脸检测、Face Mesh、虹膜、手、姿态、人体、人物分割、头发分割、目标检测、Box Tracking、Instant Motion Tracking、3D目标检测、特征匹配、AutoFlip、MediaSequence、YouTube-8M
肢体识别本质上还是分类任务,该技术有很多应用场景,比如手势识别控制类应用、动作检测类应用、动作评测类应用、以及一些移动设备AR视频合成类应用
检测一只手中21个关键点坐标,每个点对应一个名称
import cv2
import mediapipe as mp
# 用来在图片中画出关键点
mp_drawing = mp.solutions.drawing_utils
# 关键点图样式
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands
#MAX_NUM_HANDS 要检测的最大手数 默认为2
hands = mp_hands.Hands(max_num_hands=1)
if __name__ == '__main__':
file = '1.png'
# 图片翻转
image = cv2.flip(cv2.imread(file), 1)
results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
image_height, image_width, _ = image.shape
annotated_image = image.copy()
# 遍历检测出来的手掌
for hand_landmarks in results.multi_hand_landmarks:
for lm in hand_landmarks.landmark:
# 获取每个点的坐标
x = lm.x * image_width
y = lm.y * image_height
# 画关键点
mp_drawing.draw_landmarks(
annotated_image,
hand_landmarks,
mp_hands.HAND_CONNECTIONS,
mp_drawing_styles.get_default_hand_landmarks_style(),
mp_drawing_styles.get_default_hand_connections_style())
cv2.imwrite('0.png', cv2.flip(annotated_image, 1))
0bc36qaayaaa6qaoryyfmfrvb5gdb
import cv2
import mediapipe as mp
# 用来在图片中画出关键点
mp_drawing = mp.solutions.drawing_utils
# 关键点图样式
mp_drawing_styles = mp.solutions.drawing_styles
mpPose = mp.solutions.pose
pose = mpPose.Pose()
if __name__ == '__main__':
file = '1.png'
image = cv2.imread(file)
image_height, image_width, _ = image.shape
imgRGB = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = pose.process(imgRGB)
# mediapipe姿态检测只能检测一个人
if results.pose_landmarks:
for lm in results.pose_landmarks.landmark:
h, w, c = image.shape
cx, cy = int(lm.x * w), int(lm.y * h)
# 画关键点
annotated_image = image.copy()
mp_drawing.draw_landmarks(
annotated_image,
results.pose_landmarks,
mpPose.POSE_CONNECTIONS,
landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
cv2.imwrite('0.png', annotated_image)
其他案例具体可以查看官网详细介绍:
https://google.github.io/mediapipe/solutions/pose.html