• 3D视觉——3.人体姿态估计(Pose Estimation) 算法对比 即 效果展示——MediaPipe与OpenPose


    上一话

    3D视觉——2.人体姿态估计(Pose Estimation)入门——OpenPose含安装、编译、使用(单帧、实时视频)


    1.MediaPipe

    代码

    1. import cv2
    2. import time
    3. import numpy as np
    4. from tqdm import tqdm
    5. import mediapipe as mp
    6. mp_pose = mp.solutions.pose
    7. pose = mp_pose.Pose(static_image_mode=True,
    8. model_complexity=2,
    9. smooth_landmarks=True,
    10. min_detection_confidence=0.5,
    11. min_tracking_confidence=0.5)
    12. drawing = mp.solutions.drawing_utils
    13. def process_frame(img):
    14. height, width, channels = img.shape
    15. start = time.time()
    16. results = pose.process(img)
    17. if results.pose_landmarks:
    18. drawing.draw_landmarks(img, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
    19. coords = np.array(results.pose_landmarks.landmark)
    20. for index, each in enumerate(coords):
    21. cx = int(each.x * width)
    22. cy = int(each.y * height)
    23. cz = each.z
    24. radius = 5
    25. # nose
    26. if index == 0:
    27. img = cv2.circle(img, (cx, cy), radius, (0, 0, 255), -1)
    28. # shoulder
    29. elif index in [11, 12]:
    30. img = cv2.circle(img, (cx, cy), radius, (193, 182, 255), -1)
    31. # hip joint
    32. elif index in [23, 24]:
    33. img = cv2.circle(img, (cx, cy), radius, (16, 144, 247), -1)
    34. # elbow
    35. elif index in [13, 14]:
    36. img = cv2.circle(img, (cx, cy), radius, (1, 240, 255), -1)
    37. # lap
    38. elif index in [25, 26]:
    39. img = cv2.circle(img, (cx, cy), radius, (140, 47, 240), -1)
    40. # wrist and ankle
    41. elif index in [15, 16, 27, 28]:
    42. img = cv2.circle(img, (cx, cy), radius, (223, 155, 60), -1)
    43. # left hand
    44. elif index in [17, 19, 21]:
    45. img = cv2.circle(img, (cx, cy), radius, (16, 144, 247), -1)
    46. # right hand
    47. elif index in [18, 20, 22]:
    48. img = cv2.circle(img, (cx, cy), radius, (1, 240, 255), -1)
    49. # left feet
    50. elif index in [27, 29, 31]:
    51. img = cv2.circle(img, (cx, cy), radius, (140, 47, 240), -1)
    52. # right feet
    53. elif index in [28, 30, 32]:
    54. img = cv2.circle(img, (cx, cy), radius, (223, 155, 6), -1)
    55. # mouth
    56. elif index in [9, 10]:
    57. img = cv2.circle(img, (cx, cy), radius, (16, 144, 247), -1)
    58. # face and eye
    59. elif index in [1, 2, 3, 4, 5, 6, 7, 8]:
    60. img = cv2.circle(img, (cx, cy), radius, (1, 240, 255), -1)
    61. # other
    62. else:
    63. img = cv2.circle(img, (cx, cy), radius, (140, 47, 240), -1)
    64. else:
    65. fail = "fail detection"
    66. img = cv2.putText(img, fail, (25, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)
    67. FPS = 1 / (time.time() - start)
    68. img = cv2.putText(img, "FPS" + str(int(FPS)), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)
    69. return img
    70. def out_video(input):
    71. file = input.split("/")[-1]
    72. output = "out-optim-" + file
    73. print("It will start processing video: {}".format(input))
    74. cap = cv2.VideoCapture(input)
    75. frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    76. frame_size = (cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    77. # create VideoWriter,VideoWriter_fourcc is video decode
    78. fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    79. fps = cap.get(cv2.CAP_PROP_FPS)
    80. out = cv2.VideoWriter(output, fourcc, fps, (int(frame_size[0]), int(frame_size[1])))
    81. # the progress bar
    82. with tqdm(range(frame_count)) as pbar:
    83. while cap.isOpened():
    84. success, frame = cap.read()
    85. if not success:
    86. break
    87. try:
    88. frame = process_frame(frame)
    89. out.write(frame)
    90. pbar.update(1)
    91. except:
    92. print("ERROR")
    93. pass
    94. pbar.close()
    95. cv2.destroyAllWindows()
    96. out.release()
    97. cap.release()
    98. print("{} finished!".format(output))
    99. if __name__ == '__main__':
    100. video_dirs = "1.mp4"
    101. out_video(video_dirs)

    运行结果

    MediaPipe人体姿态估计效果展示


    2.OpenPose

    使用ffmpeg写入(Pytorch深度学习框架只使用OpenPose的models)代码

    demo_video.py

    1. import copy
    2. import math
    3. import time
    4. import numpy as np
    5. import cv2
    6. from glob import glob
    7. import os
    8. import argparse
    9. import json
    10. # video file processing setup
    11. # from: https://stackoverflow.com/a/61927951
    12. import argparse
    13. import subprocess
    14. import sys
    15. from pathlib import Path
    16. from typing import NamedTuple
    17. class FFProbeResult(NamedTuple):
    18. return_code: int
    19. json: str
    20. error: str
    21. def ffprobe(file_path) -> FFProbeResult:
    22. command_array = ["ffprobe",
    23. "-v", "quiet",
    24. "-print_format", "json",
    25. "-show_format",
    26. "-show_streams",
    27. file_path]
    28. result = subprocess.run(command_array, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
    29. return FFProbeResult(return_code=result.returncode,
    30. json=result.stdout,
    31. error=result.stderr)
    32. # openpose setup
    33. from src import model
    34. from src import util
    35. from src.body import Body
    36. from src.hand import Hand
    37. body_estimation = Body('model/body_pose_model.pth')
    38. hand_estimation = Hand('model/hand_pose_model.pth')
    39. def process_frame(frame, body=True, hands=True):
    40. start = time.time()
    41. canvas = copy.deepcopy(frame)
    42. if body:
    43. candidate, subset = body_estimation(frame)
    44. canvas = util.draw_bodypose(canvas, candidate, subset)
    45. if hands:
    46. hands_list = util.handDetect(candidate, subset, frame)
    47. all_hand_peaks = []
    48. for x, y, w, is_left in hands_list:
    49. peaks = hand_estimation(frame[y:y + w, x:x + w, :])
    50. peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0], peaks[:, 0] + x)
    51. peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1], peaks[:, 1] + y)
    52. all_hand_peaks.append(peaks)
    53. canvas = util.draw_handpose(canvas, all_hand_peaks)
    54. FPS = math.ceil(1 / (time.time() - start))
    55. canvas = cv2.putText(canvas, "FPS" + str(int(FPS)), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)
    56. return canvas
    57. # writing video with ffmpeg because cv2 writer failed
    58. # https://stackoverflow.com/questions/61036822/opencv-videowriter-produces-cant-find-starting-number-error
    59. import ffmpeg
    60. # open specified video
    61. parser = argparse.ArgumentParser(
    62. description="Process a video annotating poses detected.")
    63. parser.add_argument('--file', type=str, default='video/1.avi', help='Video file location to process.')
    64. parser.add_argument('--no_hands', action='store_true', help='No hand pose')
    65. parser.add_argument('--no_body', action='store_true', help='No body pose')
    66. args = parser.parse_args()
    67. video_file = args.file
    68. cap = cv2.VideoCapture(video_file)
    69. # get video file info
    70. ffprobe_result = ffprobe(args.file)
    71. info = json.loads(ffprobe_result.json)
    72. videoinfo = [i for i in info["streams"] if i["codec_type"] == "video"][0]
    73. input_fps = videoinfo["avg_frame_rate"]
    74. # input_fps = float(input_fps[0])/float(input_fps[1])
    75. input_pix_fmt = videoinfo["pix_fmt"]
    76. input_vcodec = videoinfo["codec_name"]
    77. # define a writer object to write to a movidified file
    78. postfix = info["format"]["format_name"].split(",")[0]
    79. output_file = ".".join(video_file.split(".")[:-1]) + ".processed." + postfix
    80. class Writer():
    81. def __init__(self, output_file, input_fps, input_framesize, input_pix_fmt,
    82. input_vcodec):
    83. if os.path.exists(output_file):
    84. os.remove(output_file)
    85. self.ff_proc = (
    86. ffmpeg
    87. .input('pipe:',
    88. format='rawvideo',
    89. pix_fmt="bgr24",
    90. s='%sx%s' % (input_framesize[1], input_framesize[0]),
    91. r=input_fps)
    92. .output(output_file, pix_fmt=input_pix_fmt, vcodec=input_vcodec)
    93. .overwrite_output()
    94. .run_async(pipe_stdin=True)
    95. )
    96. def __call__(self, frame):
    97. self.ff_proc.stdin.write(frame.tobytes())
    98. def close(self):
    99. self.ff_proc.stdin.close()
    100. self.ff_proc.wait()
    101. writer = None
    102. iteration = 0
    103. while (cap.isOpened()):
    104. ret, frame = cap.read()
    105. if frame is None:
    106. break
    107. posed_frame = process_frame(frame, body=not args.no_body,
    108. hands=not args.no_hands)
    109. if writer is None:
    110. input_framesize = posed_frame.shape[:2]
    111. writer = Writer(output_file, input_fps, input_framesize, input_pix_fmt,
    112. input_vcodec)
    113. # cv2.imshow('frame', posed_frame)
    114. # write the frame
    115. writer(posed_frame)
    116. iteration += 1
    117. print("iteration: {}".format(iteration))
    118. if cv2.waitKey(1) & 0xFF == ord('q'):
    119. break
    120. cap.release()
    121. writer.close()
    122. cv2.destroyAllWindows()

    src/util.py

    1. import numpy as np
    2. import math
    3. import cv2
    4. import matplotlib
    5. from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
    6. from matplotlib.figure import Figure
    7. import numpy as np
    8. import matplotlib.pyplot as plt
    9. import cv2
    10. def padRightDownCorner(img, stride, padValue):
    11. h = img.shape[0]
    12. w = img.shape[1]
    13. pad = 4 * [None]
    14. pad[0] = 0 # up
    15. pad[1] = 0 # left
    16. pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down
    17. pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right
    18. img_padded = img
    19. pad_up = np.tile(img_padded[0:1, :, :]*0 + padValue, (pad[0], 1, 1))
    20. img_padded = np.concatenate((pad_up, img_padded), axis=0)
    21. pad_left = np.tile(img_padded[:, 0:1, :]*0 + padValue, (1, pad[1], 1))
    22. img_padded = np.concatenate((pad_left, img_padded), axis=1)
    23. pad_down = np.tile(img_padded[-2:-1, :, :]*0 + padValue, (pad[2], 1, 1))
    24. img_padded = np.concatenate((img_padded, pad_down), axis=0)
    25. pad_right = np.tile(img_padded[:, -2:-1, :]*0 + padValue, (1, pad[3], 1))
    26. img_padded = np.concatenate((img_padded, pad_right), axis=1)
    27. return img_padded, pad
    28. # transfer caffe model to pytorch which will match the layer name
    29. def transfer(model, model_weights):
    30. transfered_model_weights = {}
    31. for weights_name in model.state_dict().keys():
    32. transfered_model_weights[weights_name] = model_weights['.'.join(weights_name.split('.')[1:])]
    33. return transfered_model_weights
    34. # draw the body keypoint and lims
    35. def draw_bodypose(canvas, candidate, subset):
    36. stickwidth = 4
    37. limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
    38. [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
    39. [1, 16], [16, 18], [3, 17], [6, 18]]
    40. colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
    41. [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
    42. [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
    43. for i in range(18):
    44. for n in range(len(subset)):
    45. index = int(subset[n][i])
    46. if index == -1:
    47. continue
    48. x, y = candidate[index][0:2]
    49. cv2.circle(canvas, (int(x), int(y)), 4, colors[i], thickness=-1)
    50. for i in range(17):
    51. for n in range(len(subset)):
    52. index = subset[n][np.array(limbSeq[i]) - 1]
    53. if -1 in index:
    54. continue
    55. cur_canvas = canvas.copy()
    56. Y = candidate[index.astype(int), 0]
    57. X = candidate[index.astype(int), 1]
    58. mX = np.mean(X)
    59. mY = np.mean(Y)
    60. length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
    61. angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
    62. polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
    63. cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
    64. canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
    65. # plt.imsave("preview.jpg", canvas[:, :, [2, 1, 0]])
    66. # plt.imshow(canvas[:, :, [2, 1, 0]])
    67. return canvas
    68. def draw_handpose(canvas, all_hand_peaks, show_number=False):
    69. edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
    70. [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
    71. fig = Figure(figsize=plt.figaspect(canvas))
    72. fig.subplots_adjust(0, 0, 1, 1)
    73. fig.subplots_adjust(bottom=0, top=1, left=0, right=1)
    74. bg = FigureCanvas(fig)
    75. ax = fig.subplots()
    76. ax.axis('off')
    77. ax.imshow(canvas)
    78. width, height = ax.figure.get_size_inches() * ax.figure.get_dpi()
    79. for peaks in all_hand_peaks:
    80. for ie, e in enumerate(edges):
    81. if np.sum(np.all(peaks[e], axis=1)==0)==0:
    82. x1, y1 = peaks[e[0]]
    83. x2, y2 = peaks[e[1]]
    84. ax.plot([x1, x2], [y1, y2], color=matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0]))
    85. for i, keyponit in enumerate(peaks):
    86. x, y = keyponit
    87. ax.plot(x, y, 'r.')
    88. if show_number:
    89. ax.text(x, y, str(i))
    90. bg.draw()
    91. canvas = np.fromstring(bg.tostring_rgb(), dtype='uint8').reshape(int(height), int(width), 3)
    92. return canvas
    93. # image drawed by opencv is not good.
    94. def draw_handpose_by_opencv(canvas, peaks, show_number=False):
    95. edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
    96. [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
    97. # cv2.rectangle(canvas, (x, y), (x+w, y+w), (0, 255, 0), 2, lineType=cv2.LINE_AA)
    98. # cv2.putText(canvas, 'left' if is_left else 'right', (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    99. for ie, e in enumerate(edges):
    100. if np.sum(np.all(peaks[e], axis=1)==0)==0:
    101. x1, y1 = peaks[e[0]]
    102. x2, y2 = peaks[e[1]]
    103. cv2.line(canvas, (x1, y1), (x2, y2), matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0])*255, thickness=2)
    104. for i, keyponit in enumerate(peaks):
    105. x, y = keyponit
    106. cv2.circle(canvas, (x, y), 4, (0, 0, 255), thickness=-1)
    107. if show_number:
    108. cv2.putText(canvas, str(i), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), lineType=cv2.LINE_AA)
    109. return canvas
    110. # detect hand according to body pose keypoints
    111. # please refer to https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/hand/handDetector.cpp
    112. def handDetect(candidate, subset, oriImg):
    113. # right hand: wrist 4, elbow 3, shoulder 2
    114. # left hand: wrist 7, elbow 6, shoulder 5
    115. ratioWristElbow = 0.33
    116. detect_result = []
    117. image_height, image_width = oriImg.shape[0:2]
    118. for person in subset.astype(int):
    119. # if any of three not detected
    120. has_left = np.sum(person[[5, 6, 7]] == -1) == 0
    121. has_right = np.sum(person[[2, 3, 4]] == -1) == 0
    122. if not (has_left or has_right):
    123. continue
    124. hands = []
    125. #left hand
    126. if has_left:
    127. left_shoulder_index, left_elbow_index, left_wrist_index = person[[5, 6, 7]]
    128. x1, y1 = candidate[left_shoulder_index][:2]
    129. x2, y2 = candidate[left_elbow_index][:2]
    130. x3, y3 = candidate[left_wrist_index][:2]
    131. hands.append([x1, y1, x2, y2, x3, y3, True])
    132. # right hand
    133. if has_right:
    134. right_shoulder_index, right_elbow_index, right_wrist_index = person[[2, 3, 4]]
    135. x1, y1 = candidate[right_shoulder_index][:2]
    136. x2, y2 = candidate[right_elbow_index][:2]
    137. x3, y3 = candidate[right_wrist_index][:2]
    138. hands.append([x1, y1, x2, y2, x3, y3, False])
    139. for x1, y1, x2, y2, x3, y3, is_left in hands:
    140. # pos_hand = pos_wrist + ratio * (pos_wrist - pos_elbox) = (1 + ratio) * pos_wrist - ratio * pos_elbox
    141. # handRectangle.x = posePtr[wrist*3] + ratioWristElbow * (posePtr[wrist*3] - posePtr[elbow*3]);
    142. # handRectangle.y = posePtr[wrist*3+1] + ratioWristElbow * (posePtr[wrist*3+1] - posePtr[elbow*3+1]);
    143. # const auto distanceWristElbow = getDistance(poseKeypoints, person, wrist, elbow);
    144. # const auto distanceElbowShoulder = getDistance(poseKeypoints, person, elbow, shoulder);
    145. # handRectangle.width = 1.5f * fastMax(distanceWristElbow, 0.9f * distanceElbowShoulder);
    146. x = x3 + ratioWristElbow * (x3 - x2)
    147. y = y3 + ratioWristElbow * (y3 - y2)
    148. distanceWristElbow = math.sqrt((x3 - x2) ** 2 + (y3 - y2) ** 2)
    149. distanceElbowShoulder = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
    150. width = 1.5 * max(distanceWristElbow, 0.9 * distanceElbowShoulder)
    151. # x-y refers to the center --> offset to topLeft point
    152. # handRectangle.x -= handRectangle.width / 2.f;
    153. # handRectangle.y -= handRectangle.height / 2.f;
    154. x -= width / 2
    155. y -= width / 2 # width = height
    156. # overflow the image
    157. if x < 0: x = 0
    158. if y < 0: y = 0
    159. width1 = width
    160. width2 = width
    161. if x + width > image_width: width1 = image_width - x
    162. if y + width > image_height: width2 = image_height - y
    163. width = min(width1, width2)
    164. # the max hand box value is 20 pixels
    165. if width >= 20:
    166. detect_result.append([int(x), int(y), int(width), is_left])
    167. '''
    168. return value: [[x, y, w, True if left hand else False]].
    169. width=height since the network require squared input.
    170. x, y is the coordinate of top left
    171. '''
    172. return detect_result
    173. # get max index of 2d array
    174. def npmax(array):
    175. arrayindex = array.argmax(1)
    176. arrayvalue = array.max(1)
    177. i = arrayvalue.argmax()
    178. j = arrayindex[i]
    179. return i, j
    src/hand.py
    
    1. import cv2
    2. import json
    3. import numpy as np
    4. import math
    5. import time
    6. from scipy.ndimage.filters import gaussian_filter
    7. import matplotlib.pyplot as plt
    8. import matplotlib
    9. import torch
    10. from skimage.measure import label
    11. from src.model import handpose_model
    12. from src import util
    13. class Hand(object):
    14. def __init__(self, model_path):
    15. self.model = handpose_model()
    16. if torch.cuda.is_available():
    17. self.model = self.model.cuda()
    18. model_dict = util.transfer(self.model, torch.load(model_path))
    19. self.model.load_state_dict(model_dict)
    20. self.model.eval()
    21. def __call__(self, oriImg):
    22. scale_search = [0.5, 1.0, 1.5, 2.0]
    23. # scale_search = [0.5]
    24. boxsize = 368
    25. stride = 8
    26. padValue = 128
    27. thre = 0.05
    28. multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
    29. heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 22))
    30. # paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
    31. for m in range(len(multiplier)):
    32. scale = multiplier[m]
    33. imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
    34. imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
    35. im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
    36. im = np.ascontiguousarray(im)
    37. data = torch.from_numpy(im).float()
    38. if torch.cuda.is_available():
    39. data = data.cuda()
    40. # data = data.permute([2, 0, 1]).unsqueeze(0).float()
    41. with torch.no_grad():
    42. output = self.model(data).cpu().numpy()
    43. # output = self.model(data).numpy()q
    44. # extract outputs, resize, and remove padding
    45. heatmap = np.transpose(np.squeeze(output), (1, 2, 0)) # output 1 is heatmaps
    46. heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
    47. heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
    48. heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
    49. heatmap_avg += heatmap / len(multiplier)
    50. all_peaks = []
    51. for part in range(21):
    52. map_ori = heatmap_avg[:, :, part]
    53. one_heatmap = gaussian_filter(map_ori, sigma=3)
    54. binary = np.ascontiguousarray(one_heatmap > thre, dtype=np.uint8)
    55. # 全部小于阈值
    56. if np.sum(binary) == 0:
    57. all_peaks.append([0, 0])
    58. continue
    59. label_img, label_numbers = label(binary, return_num=True, connectivity=binary.ndim)
    60. max_index = np.argmax([np.sum(map_ori[label_img == i]) for i in range(1, label_numbers + 1)]) + 1
    61. label_img[label_img != max_index] = 0
    62. map_ori[label_img == 0] = 0
    63. y, x = util.npmax(map_ori)
    64. all_peaks.append([x, y])
    65. return np.array(all_peaks)
    66. if __name__ == "__main__":
    67. hand_estimation = Hand('../model/hand_pose_model.pth')
    68. # test_image = '../images/hand.jpg'
    69. test_image = '../images/hand.jpg'
    70. oriImg = cv2.imread(test_image) # B,G,R order
    71. peaks = hand_estimation(oriImg)
    72. canvas = util.draw_handpose(oriImg, peaks, True)
    73. cv2.imshow('', canvas)
    74. cv2.waitKey(0)

    src/body.py

    1. import cv2
    2. import numpy as np
    3. import math
    4. import time
    5. from scipy.ndimage.filters import gaussian_filter
    6. import matplotlib.pyplot as plt
    7. import matplotlib
    8. import torch
    9. from torchvision import transforms
    10. from src import util
    11. from src.model import bodypose_model
    12. class Body(object):
    13. def __init__(self, model_path):
    14. self.model = bodypose_model()
    15. if torch.cuda.is_available():
    16. self.model = self.model.cuda()
    17. model_dict = util.transfer(self.model, torch.load(model_path))
    18. self.model.load_state_dict(model_dict)
    19. self.model.eval()
    20. def __call__(self, oriImg):
    21. # scale_search = [0.5, 1.0, 1.5, 2.0]
    22. scale_search = [0.5]
    23. boxsize = 368
    24. stride = 8
    25. padValue = 128
    26. thre1 = 0.1
    27. thre2 = 0.05
    28. multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
    29. heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 19))
    30. paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
    31. for m in range(len(multiplier)):
    32. scale = multiplier[m]
    33. imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
    34. imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
    35. im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
    36. im = np.ascontiguousarray(im)
    37. data = torch.from_numpy(im).float()
    38. if torch.cuda.is_available():
    39. data = data.cuda()
    40. # data = data.permute([2, 0, 1]).unsqueeze(0).float()
    41. with torch.no_grad():
    42. Mconv7_stage6_L1, Mconv7_stage6_L2 = self.model(data)
    43. Mconv7_stage6_L1 = Mconv7_stage6_L1.cpu().numpy()
    44. Mconv7_stage6_L2 = Mconv7_stage6_L2.cpu().numpy()
    45. # extract outputs, resize, and remove padding
    46. # heatmap = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[1]].data), (1, 2, 0)) # output 1 is heatmaps
    47. heatmap = np.transpose(np.squeeze(Mconv7_stage6_L2), (1, 2, 0)) # output 1 is heatmaps
    48. heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
    49. heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
    50. heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
    51. # paf = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[0]].data), (1, 2, 0)) # output 0 is PAFs
    52. paf = np.transpose(np.squeeze(Mconv7_stage6_L1), (1, 2, 0)) # output 0 is PAFs
    53. paf = cv2.resize(paf, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
    54. paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
    55. paf = cv2.resize(paf, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
    56. heatmap_avg += heatmap_avg + heatmap / len(multiplier)
    57. paf_avg += + paf / len(multiplier)
    58. all_peaks = []
    59. peak_counter = 0
    60. for part in range(18):
    61. map_ori = heatmap_avg[:, :, part]
    62. one_heatmap = gaussian_filter(map_ori, sigma=3)
    63. map_left = np.zeros(one_heatmap.shape)
    64. map_left[1:, :] = one_heatmap[:-1, :]
    65. map_right = np.zeros(one_heatmap.shape)
    66. map_right[:-1, :] = one_heatmap[1:, :]
    67. map_up = np.zeros(one_heatmap.shape)
    68. map_up[:, 1:] = one_heatmap[:, :-1]
    69. map_down = np.zeros(one_heatmap.shape)
    70. map_down[:, :-1] = one_heatmap[:, 1:]
    71. peaks_binary = np.logical_and.reduce(
    72. (one_heatmap >= map_left, one_heatmap >= map_right, one_heatmap >= map_up, one_heatmap >= map_down, one_heatmap > thre1))
    73. peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse
    74. peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
    75. peak_id = range(peak_counter, peak_counter + len(peaks))
    76. peaks_with_score_and_id = [peaks_with_score[i] + (peak_id[i],) for i in range(len(peak_id))]
    77. all_peaks.append(peaks_with_score_and_id)
    78. peak_counter += len(peaks)
    79. # find connection in the specified sequence, center 29 is in the position 15
    80. limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
    81. [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
    82. [1, 16], [16, 18], [3, 17], [6, 18]]
    83. # the middle joints heatmap correpondence
    84. mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], \
    85. [23, 24], [25, 26], [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], \
    86. [55, 56], [37, 38], [45, 46]]
    87. connection_all = []
    88. special_k = []
    89. mid_num = 10
    90. for k in range(len(mapIdx)):
    91. score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]]
    92. candA = all_peaks[limbSeq[k][0] - 1]
    93. candB = all_peaks[limbSeq[k][1] - 1]
    94. nA = len(candA)
    95. nB = len(candB)
    96. indexA, indexB = limbSeq[k]
    97. if (nA != 0 and nB != 0):
    98. connection_candidate = []
    99. for i in range(nA):
    100. for j in range(nB):
    101. vec = np.subtract(candB[j][:2], candA[i][:2])
    102. norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1])
    103. norm = max(0.001, norm)
    104. vec = np.divide(vec, norm)
    105. startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
    106. np.linspace(candA[i][1], candB[j][1], num=mid_num)))
    107. vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \
    108. for I in range(len(startend))])
    109. vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \
    110. for I in range(len(startend))])
    111. score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
    112. score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min(
    113. 0.5 * oriImg.shape[0] / norm - 1, 0)
    114. criterion1 = len(np.nonzero(score_midpts > thre2)[0]) > 0.8 * len(score_midpts)
    115. criterion2 = score_with_dist_prior > 0
    116. if criterion1 and criterion2:
    117. connection_candidate.append(
    118. [i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]])
    119. connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)
    120. connection = np.zeros((0, 5))
    121. for c in range(len(connection_candidate)):
    122. i, j, s = connection_candidate[c][0:3]
    123. if (i not in connection[:, 3] and j not in connection[:, 4]):
    124. connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])
    125. if (len(connection) >= min(nA, nB)):
    126. break
    127. connection_all.append(connection)
    128. else:
    129. special_k.append(k)
    130. connection_all.append([])
    131. # last number in each row is the total parts number of that person
    132. # the second last number in each row is the score of the overall configuration
    133. subset = -1 * np.ones((0, 20))
    134. candidate = np.array([item for sublist in all_peaks for item in sublist])
    135. for k in range(len(mapIdx)):
    136. if k not in special_k:
    137. partAs = connection_all[k][:, 0]
    138. partBs = connection_all[k][:, 1]
    139. indexA, indexB = np.array(limbSeq[k]) - 1
    140. for i in range(len(connection_all[k])): # = 1:size(temp,1)
    141. found = 0
    142. subset_idx = [-1, -1]
    143. for j in range(len(subset)): # 1:size(subset,1):
    144. if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
    145. subset_idx[found] = j
    146. found += 1
    147. if found == 1:
    148. j = subset_idx[0]
    149. if subset[j][indexB] != partBs[i]:
    150. subset[j][indexB] = partBs[i]
    151. subset[j][-1] += 1
    152. subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
    153. elif found == 2: # if found 2 and disjoint, merge them
    154. j1, j2 = subset_idx
    155. membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2]
    156. if len(np.nonzero(membership == 2)[0]) == 0: # merge
    157. subset[j1][:-2] += (subset[j2][:-2] + 1)
    158. subset[j1][-2:] += subset[j2][-2:]
    159. subset[j1][-2] += connection_all[k][i][2]
    160. subset = np.delete(subset, j2, 0)
    161. else: # as like found == 1
    162. subset[j1][indexB] = partBs[i]
    163. subset[j1][-1] += 1
    164. subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
    165. # if find no partA in the subset, create a new subset
    166. elif not found and k < 17:
    167. row = -1 * np.ones(20)
    168. row[indexA] = partAs[i]
    169. row[indexB] = partBs[i]
    170. row[-1] = 2
    171. row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2]
    172. subset = np.vstack([subset, row])
    173. # delete some rows of subset which has few parts occur
    174. deleteIdx = []
    175. for i in range(len(subset)):
    176. if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4:
    177. deleteIdx.append(i)
    178. subset = np.delete(subset, deleteIdx, axis=0)
    179. # subset: n*20 array, 0-17 is the index in candidate, 18 is the total score, 19 is the total parts
    180. # candidate: x, y, score, id
    181. return candidate, subset
    182. if __name__ == "__main__":
    183. body_estimation = Body('../model/body_pose_model.pth')
    184. test_image = '../images/ski.jpg'
    185. oriImg = cv2.imread(test_image) # B,G,R order
    186. candidate, subset = body_estimation(oriImg)
    187. canvas = util.draw_bodypose(oriImg, candidate, subset)
    188. plt.imshow(canvas[:, :, [2, 1, 0]])
    189. plt.show()

    运行结果

    OpenPose效果展示


    使用cv2写入代码

    需要安装OpenPose,请看我的另一篇博客:

    3D视觉——2.人体姿态估计(Pose Estimation)入门——OpenPose含安装、编译、使用(单帧、实时视频)

    1. import os
    2. import time
    3. import cv2
    4. import sys
    5. from tqdm import tqdm
    6. from sys import platform
    7. BASE_DIR = os.path.dirname(os.path.realpath(__file__))
    8. if platform == 'win32':
    9. lib_dir = 'Release'
    10. bin_dir = 'bin'
    11. x64_dir = 'x64'
    12. lib_path = os.path.join(BASE_DIR, lib_dir)
    13. bin_path = os.path.join(BASE_DIR, bin_dir)
    14. x64_path = os.path.join(BASE_DIR, x64_dir)
    15. sys.path.append(lib_path)
    16. os.environ['PATH'] += ';' + bin_path + ';' + x64_path + '\Release;'
    17. try:
    18. import pyopenpose as op
    19. print("successful, import pyopenpose!")
    20. except ImportError as e:
    21. print("fail to import pyopenpose!")
    22. raise e
    23. else:
    24. print(f"当前电脑环境:\n{platform}\n")
    25. sys.exit(-1)
    26. def out_video(input):
    27. datum = op.Datum()
    28. opWrapper = op.WrapperPython()
    29. params = dict()
    30. params["model_folder"] = BASE_DIR + "\models"
    31. params["model_pose"] = "BODY_25"
    32. params["number_people_max"] = 3
    33. params["disable_blending"] = False
    34. opWrapper.configure(params)
    35. opWrapper.start()
    36. file = input.split("/")[-1]
    37. output = "video/out-optim-" + file
    38. print("It will start processing video: {}".format(input))
    39. cap = cv2.VideoCapture(input)
    40. frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    41. frame_size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    42. # create VideoWriter,VideoWriter_fourcc is video decode
    43. fourcc = cv2.VideoWriter_fourcc('D', 'I', 'V', 'X')
    44. fps = cap.get(cv2.CAP_PROP_FPS)
    45. out = cv2.VideoWriter(output, fourcc, fps, frame_size)
    46. # the progress bar
    47. with tqdm(range(frame_count)) as pbar:
    48. while cap.isOpened():
    49. start = time.time()
    50. success, frame = cap.read()
    51. if success:
    52. datum.cvInputData = frame
    53. opWrapper.emplaceAndPop(op.VectorDatum([datum]))
    54. opframe = datum.cvOutputData
    55. FPS = 1 / (time.time() - start)
    56. opframe = cv2.putText(opframe, "FPS" + str(int(FPS)), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1,
    57. (0, 255, 0), 3)
    58. out.write(opframe)
    59. pbar.update(1)
    60. else:
    61. break
    62. pbar.close()
    63. cv2.destroyAllWindows()
    64. out.release()
    65. cap.release()
    66. print("{} finished!".format(output))
    67. if __name__ == "__main__":
    68. video_dir = "video/2.avi"
    69. out_video(video_dir)

    运行结果

    OpenPose运行结果


    评论

    其实:

    1.MediaPipe比较成熟,并且不需要下载或者配置一些乱七八糟的东西,而OpenPose就得配置一大堆乱七八糟的东西,很乱,除非是把caffe的模型转化成pytorch的模型,本人更熟悉pytorch。

    2.MediaPipe推理速度比OpenPose快,快了不止一点点!

    3.OpenPose的效果比MediaPipe的效果更好,并且有多人在画框时OpenPose能检测到所有人,而MediaPipe却只能检测到一个。


    下一话

    MediaPipe手势识别

  • 相关阅读:
    【初试404分】杭电843学长经验分享
    能源区块链实验室同俄罗斯碳基金签署重要战略合作协议
    从0搭建前端脚手架详解(小白也可以搭建)
    UGUI自动布局Layout
    pytest自动化测试-element not interactable、xpath元素定位不到、多页签下子表定位、pip使用问题
    卡方检验-python代码
    迅为iTOP3568开发板Android11获取root权限关闭selinux
    virtio-blk简易驱动
    es笔记六之聚合操作之指标聚合
    UEFI之DXE阶段
  • 原文地址:https://blog.csdn.net/XiaoyYidiaodiao/article/details/125571632