深蹲是一项健身运动,是练大腿肌肉的王牌动作,坚持做还会起到减肥的作用。深蹲被认为是增强腿部和臀部力量和围度,以及发展核心力量(core strength)必不可少的练习。深蹲时,有明确阶段和大幅度变化的基本运动,可以通过机器视觉技术来实现对深蹲的检测识别。
- import io
- from PIL import ImageFont
- from PIL import ImageDraw
- import csv
- import cv2
- from matplotlib import pyplot as plt
- import numpy as np
- import os
- from PIL import Image
- import sys
- import tqdm
- from mediapipe.python.solutions import drawing_utils as mp_drawing
- from mediapipe.python.solutions import pose as mp_pose
要将样本转换为 k-NN 分类器训练集,我们可以在给定图像上运行 BlazePose 模型,并将预测的landmarks转储到 CSV 文件中。此外,Pose Classification Colab (Extended)通过针对整个训练集对每个样本进行分类,提供了有用的工具来查找异常值(例如,错误预测的姿势)和代表性不足的类别(例如,不覆盖所有摄像机角度)。
- # 人体姿态编码模块
- class FullBodyPoseEmbedder(object):
- """Converts 3D pose landmarks into 3D embedding."""
- def __init__(self, torso_size_multiplier=2.5):
- # Multiplier to apply to the torso to get minimal body size.
- self._torso_size_multiplier = torso_size_multiplier
- # Names of the landmarks as they appear in the prediction.
- self._landmark_names = [
- 'nose',
- 'left_eye_inner', 'left_eye', 'left_eye_outer',
- 'right_eye_inner', 'right_eye', 'right_eye_outer',
- 'left_ear', 'right_ear',
- 'mouth_left', 'mouth_right',
- 'left_shoulder', 'right_shoulder',
- 'left_elbow', 'right_elbow',
- 'left_wrist', 'right_wrist',
- 'left_pinky_1', 'right_pinky_1',
- 'left_index_1', 'right_index_1',
- 'left_thumb_2', 'right_thumb_2',
- 'left_hip', 'right_hip',
- 'left_knee', 'right_knee',
- 'left_ankle', 'right_ankle',
- 'left_heel', 'right_heel',
- 'left_foot_index', 'right_foot_index',
- ]
- def __call__(self, landmarks):
- """Normalizes pose landmarks and converts to embedding
- Args:
- landmarks - NumPy array with 3D landmarks of shape (N, 3).
- Result:
- Numpy array with pose embedding of shape (M, 3) where `M` is the number of
- pairwise distances defined in `_get_pose_distance_embedding`.
- """
- assert landmarks.shape[0] == len(self._landmark_names), 'Unexpected number of landmarks: {}'.format(
- landmarks.shape[0])
- # Get pose landmarks.
- landmarks = np.copy(landmarks)
- # Normalize landmarks.
- landmarks = self._normalize_pose_landmarks(landmarks)
- # Get embedding.
- embedding = self._get_pose_distance_embedding(landmarks)
- return embedding
- def _normalize_pose_landmarks(self, landmarks):
- """Normalizes landmarks translation and scale."""
- landmarks = np.copy(landmarks)
- # Normalize translation.
- pose_center = self._get_pose_center(landmarks)
- landmarks -= pose_center
- # Normalize scale.
- pose_size = self._get_pose_size(landmarks, self._torso_size_multiplier)
- landmarks /= pose_size
- # Multiplication by 100 is not required, but makes it eaasier to debug.
- landmarks *= 100
- return landmarks
- def _get_pose_size(self, landmarks, torso_size_multiplier):
- """Calculates pose size.
- It is the maximum of two values:
- * Torso size multiplied by `torso_size_multiplier`
- * Maximum distance from pose center to any pose landmark
- """
- # This approach uses only 2D landmarks to compute pose size.
- landmarks = landmarks[:, :2]
- # Hips center.
- left_hip = landmarks[self._landmark_names.index('left_hip')]
- right_hip = landmarks[self._landmark_names.index('right_hip')]
- hips = (left_hip + right_hip) * 0.5
- # Shoulders center.
- left_shoulder = landmarks[self._landmark_names.index('left_shoulder')]
- right_shoulder = landmarks[self._landmark_names.index('right_shoulder')]
- shoulders = (left_shoulder + right_shoulder) * 0.5
- # Torso size as the minimum body size.
- torso_size = np.linalg.norm(shoulders - hips)
- # Max dist to pose center.
- pose_center = self._get_pose_center(landmarks)
- max_dist = np.max(np.linalg.norm(landmarks - pose_center, axis=1))
- return max(torso_size * torso_size_multiplier, max_dist)
- def _get_pose_distance_embedding(self, landmarks):
- """Converts pose landmarks into 3D embedding.
- We use several pairwise 3D distances to form pose embedding. All distances
- include X and Y components with sign. We differnt types of pairs to cover
- different pose classes. Feel free to remove some or add new.
- Args:
- landmarks - NumPy array with 3D landmarks of shape (N, 3).
- Result:
- Numpy array with pose embedding of shape (M, 3) where `M` is the number of
- pairwise distances.
- """
- embedding = np.array([
- # One joint.
- self._get_distance(
- self._get_average_by_names(landmarks, 'left_hip', 'right_hip'),
- self._get_average_by_names(landmarks, 'left_shoulder', 'right_shoulder')),
- self._get_distance_by_names(landmarks, 'left_shoulder', 'left_elbow'),
- self._get_distance_by_names(landmarks, 'right_shoulder', 'right_elbow'),
- self._get_distance_by_names(landmarks, 'left_elbow', 'left_wrist'),
- self._get_distance_by_names(landmarks, 'right_elbow', 'right_wrist'),
- self._get_distance_by_names(landmarks, 'left_hip', 'left_knee'),
- self._get_distance_by_names(landmarks, 'right_hip', 'right_knee'),
- self._get_distance_by_names(landmarks, 'left_knee', 'left_ankle'),
- self._get_distance_by_names(landmarks, 'right_knee', 'right_ankle'),
- # Two joints.
- self._get_distance_by_names(landmarks, 'left_shoulder', 'left_wrist'),
- self._get_distance_by_names(landmarks, 'right_shoulder', 'right_wrist'),
- self._get_distance_by_names(landmarks, 'left_hip', 'left_ankle'),
- self._get_distance_by_names(landmarks, 'right_hip', 'right_ankle'),
- # Four joints.
- self._get_distance_by_names(landmarks, 'left_hip', 'left_wrist'),
- self._get_distance_by_names(landmarks, 'right_hip', 'right_wrist'),
- # Five joints.
- self._get_distance_by_names(landmarks, 'left_shoulder', 'left_ankle'),
- self._get_distance_by_names(landmarks, 'right_shoulder', 'right_ankle'),
- self._get_distance_by_names(landmarks, 'left_hip', 'left_wrist'),
- self._get_distance_by_names(landmarks, 'right_hip', 'right_wrist'),
- # Cross body.
- self._get_distance_by_names(landmarks, 'left_elbow', 'right_elbow'),
- self._get_distance_by_names(landmarks, 'left_knee', 'right_knee'),
- self._get_distance_by_names(landmarks, 'left_wrist', 'right_wrist'),
- self._get_distance_by_names(landmarks, 'left_ankle', 'right_ankle'),
- # Body bent direction.
- # self._get_distance(
- # self._get_average_by_names(landmarks, 'left_wrist', 'left_ankle'),
- # landmarks[self._landmark_names.index('left_hip')]),
- # self._get_distance(
- # self._get_average_by_names(landmarks, 'right_wrist', 'right_ankle'),
- # landmarks[self._landmark_names.index('right_hip')]),
- ])
- return embedding
- def _get_average_by_names(self, landmarks, name_from, name_to):
- lmk_from = landmarks[self._landmark_names.index(name_from)]
- lmk_to = landmarks[self._landmark_names.index(name_to)]
- return (lmk_from + lmk_to) * 0.5
- def _get_distance_by_names(self, landmarks, name_from, name_to):
- lmk_from = landmarks[self._landmark_names.index(name_from)]
- lmk_to = landmarks[self._landmark_names.index(name_to)]
- return self._get_distance(lmk_from, lmk_to)
- def _get_distance(self, lmk_from, lmk_to):
- return lmk_to - lmk_from
用于姿势分类的 k-NN 算法需要每个样本的特征向量表示和一个度量来计算两个这样的向量之间的距离,以找到最接近目标的姿势样本。
为了获得更好的分类结果,使用不同的距离度量调用了两次 k-NN 搜索:
最后,我们应用指数移动平均(EMA) 平滑来平衡来自姿势预测或分类的任何噪声。为此,我们不仅搜索最近的姿势簇,而且计算每个姿势簇的概率,并将其用于随着时间的推移进行平滑处理。
- # 姿态分类结果平滑
- class EMADictSmoothing(object):
- """Smoothes pose classification."""
- def __init__(self, window_size=10, alpha=0.2):
- self._window_size = window_size
- self._alpha = alpha
- self._data_in_window = []
- def __call__(self, data):
- """Smoothes given pose classification.
- Smoothing is done by computing Exponential Moving Average for every pose
- class observed in the given time window. Missed pose classes arre replaced
- with 0.
- Args:
- data: Dictionary with pose classification. Sample:
- {
- 'pushups_down': 8,
- 'pushups_up': 2,
- }
- Result:
- Dictionary in the same format but with smoothed and float instead of
- integer values. Sample:
- {
- 'pushups_down': 8.3,
- 'pushups_up': 1.7,
- }
- """
- # Add new data to the beginning of the window for simpler code.
- self._data_in_window.insert(0, data)
- self._data_in_window = self._data_in_window[:self._window_size]
- # Get all keys.
- keys = set([key for data in self._data_in_window for key, _ in data.items()])
- # Get smoothed values.
- smoothed_data = dict()
- for key in keys:
- factor = 1.0
- top_sum = 0.0
- bottom_sum = 0.0
- for data in self._data_in_window:
- value = data[key] if key in data else 0.0
- top_sum += factor * value
- bottom_sum += factor
- # Update factor.
- factor *= (1.0 - self._alpha)
- smoothed_data[key] = top_sum / bottom_sum
- return smoothed_data