• HigherHRNet 源码分析


    HigherHRNet 是一个 bottom-up 模型

    Bottom-up的人体姿势估计方法由于尺度变化的挑战,对于小人物的尺寸预测效果并不是太好。

    HigherHRNet 训练时采用多分辨率监督,简单来说就是输出多分辨率的heatmap

    实现也就是在HRNet的后面 添加一个反卷积分支,让其输出一个更大尺寸的heatmap

    关于HRNet 可以参考:

    HRNet 源码分析_那时那月那人的博客-CSDN博客

    下面分析 模型代码:基本上和HRNet基本一致,仅仅在后面添加一个反卷积分支

    1. class PoseHigherResolutionNet(nn.Module):
    2. def __init__(self, cfg, **kwargs):
    3. pass
    4. def forward(self, x):
    5. # 模型和HRNet基本一致 只是在最后添加一个返回更高分辨率的的输出
    6. x = self.conv1(x)
    7. x = self.bn1(x)
    8. x = self.relu(x)
    9. x = self.conv2(x)
    10. x = self.bn2(x)
    11. x = self.relu(x)
    12. x = self.layer1(x)
    13. x_list = []
    14. for i in range(self.stage2_cfg['NUM_BRANCHES']):
    15. if self.transition1[i] is not None:
    16. x_list.append(self.transition1[i](x))
    17. else:
    18. x_list.append(x)
    19. y_list = self.stage2(x_list)
    20. x_list = []
    21. for i in range(self.stage3_cfg['NUM_BRANCHES']):
    22. if self.transition2[i] is not None:
    23. x_list.append(self.transition2[i](y_list[-1]))
    24. else:
    25. x_list.append(y_list[i])
    26. y_list = self.stage3(x_list)
    27. x_list = []
    28. for i in range(self.stage4_cfg['NUM_BRANCHES']):
    29. if self.transition3[i] is not None:
    30. x_list.append(self.transition3[i](y_list[-1]))
    31. else:
    32. x_list.append(y_list[i])
    33. y_list = self.stage4(x_list)
    34. final_outputs = []
    35. x = y_list[0]
    36. y = self.final_layers[0](x)
    37. final_outputs.append(y)
    38. # 在最后添加一个反卷积增大分辨率 如果需要更大的分比率可以加添加反卷积
    39. # concat([features, heatmaps])
    40. # 论文表面 添加一个反卷积在coco上达到最优
    41. for i in range(self.num_deconvs):
    42. if self.deconv_config.CAT_OUTPUT[i]:
    43. x = torch.cat((x, y), 1)
    44. x = self.deconv_layers[i](x)
    45. y = self.final_layers[i + 1](x)
    46. final_outputs.append(y)
    47. return final_outputs

    下面分析下heatmap label的生成代码,

    1. class CocoKeypoints(CocoDataset):
    2. def __init__(self,
    3. cfg,
    4. dataset_name,
    5. remove_images_without_annotations,
    6. heatmap_generator,
    7. joints_generator,
    8. transforms=None):
    9. super().__init__(cfg.DATASET.ROOT,
    10. dataset_name,
    11. cfg.DATASET.DATA_FORMAT)
    12. pass
    13. self.heatmap_generator = heatmap_generator
    14. self.joints_generator = joints_generator
    15. def __getitem__(self, idx):
    16. # 调用COCODataset的__getitem()__得到 img, target
    17. img, anno = super().__getitem__(idx)
    18. if img is None:
    19. img_info = self.coco.loadImgs(self.ids[idx])[0]
    20. img = np.zeros((3, img_info['height'], img_info['width']))
    21. mask = self.get_mask(anno, idx)
    22. anno = [
    23. obj for obj in anno
    24. if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0
    25. ]
    26. # TODO(bowen): to generate scale-aware sigma, modify `get_joints` to associate a sigma to each joint
    27. joints = self.get_joints(anno)
    28. mask_list = [mask.copy() for _ in range(self.num_scales)]
    29. joints_list = [joints.copy() for _ in range(self.num_scales)]
    30. target_list = list()
    31. if self.transforms:
    32. img, mask_list, joints_list = self.transforms(
    33. img, mask_list, joints_list
    34. )
    35. for scale_id in range(self.num_scales):
    36. # 对多尺寸生成heatmap
    37. target_t = self.heatmap_generator[scale_id](joints_list[scale_id])
    38. joints_t = self.joints_generator[scale_id](joints_list[scale_id])
    39. target_list.append(target_t.astype(np.float32))
    40. mask_list[scale_id] = mask_list[scale_id].astype(np.float32)
    41. joints_list[scale_id] = joints_t.astype(np.int32)
    42. return img, target_list, mask_list, joints_list
    1. class HeatmapGenerator():
    2. def __init__(self, output_res, num_joints, sigma=-1):
    3. # heatmap输出的尺寸
    4. self.output_res = output_res
    5. # 17
    6. self.num_joints = num_joints
    7. # w32_512_adam_lr1e-3.yaml sigma = 2
    8. if sigma < 0:
    9. sigma = self.output_res/64
    10. self.sigma = sigma
    11. # 高斯核的大小
    12. size = 6*sigma + 3
    13. x = np.arange(0, size, 1, float)
    14. y = x[:, np.newaxis]
    15. x0, y0 = 3*sigma + 1, 3*sigma + 1
    16. # 用2d高斯函数生成 label
    17. self.g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
    18. def __call__(self, joints):
    19. # heatmaps 尺寸
    20. hms = np.zeros((self.num_joints, self.output_res, self.output_res),
    21. dtype=np.float32)
    22. sigma = self.sigma
    23. for p in joints:
    24. for idx, pt in enumerate(p):
    25. if pt[2] > 0:
    26. # 对每个点用高斯函数来生成label
    27. x, y = int(pt[0]), int(pt[1])
    28. if x < 0 or y < 0 or \
    29. x >= self.output_res or y >= self.output_res:
    30. continue
    31. ul = int(np.round(x - 3 * sigma - 1)), int(np.round(y - 3 * sigma - 1))
    32. br = int(np.round(x + 3 * sigma + 2)), int(np.round(y + 3 * sigma + 2))
    33. c, d = max(0, -ul[0]), min(br[0], self.output_res) - ul[0]
    34. a, b = max(0, -ul[1]), min(br[1], self.output_res) - ul[1]
    35. cc, dd = max(0, ul[0]), min(br[0], self.output_res)
    36. aa, bb = max(0, ul[1]), min(br[1], self.output_res)
    37. # 高斯函数赋值
    38. hms[idx, aa:bb, cc:dd] = np.maximum(
    39. hms[idx, aa:bb, cc:dd], self.g[a:b, c:d])
    40. return hms

    论文中提出一种 尺寸感知 (scale-aware)的训练方法 下面分析下具体的实现

    从 scale-ware 字面意思 尺寸感知,根据不同尺寸得到不同的高斯函数。从代码中可以看出根据关键点的可见度来进行判断。不太懂?可见度和尺寸有什么联系?

    1. class ScaleAwareHeatmapGenerator():
    2. def __init__(self, output_res, num_joints):
    3. self.output_res = output_res
    4. self.num_joints = num_joints
    5. def get_gaussian_kernel(self, sigma):
    6. size = 6*sigma + 3
    7. x = np.arange(0, size, 1, float)
    8. y = x[:, np.newaxis]
    9. x0, y0 = 3*sigma + 1, 3*sigma + 1
    10. g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
    11. return g
    12. def __call__(self, joints):
    13. hms = np.zeros((self.num_joints, self.output_res, self.output_res),
    14. dtype=np.float32)
    15. for p in joints:
    16. sigma = p[0, 3]
    17. # 根据 可见性生成不同的高斯函数
    18. # simga:
    19. # 0: 未标注 1: 标注不可见 2: 标注可见
    20. # 这不同的尺寸感知 和 可见性有关联? 越小的的尺寸越不可见?
    21. g = self.get_gaussian_kernel(sigma)
    22. for idx, pt in enumerate(p):
    23. if pt[2] > 0:
    24. x, y = int(pt[0]), int(pt[1])
    25. if x < 0 or y < 0 or \
    26. x >= self.output_res or y >= self.output_res:
    27. continue
    28. ul = int(np.round(x - 3 * sigma - 1)), int(np.round(y - 3 * sigma - 1))
    29. br = int(np.round(x + 3 * sigma + 2)), int(np.round(y + 3 * sigma + 2))
    30. c, d = max(0, -ul[0]), min(br[0], self.output_res) - ul[0]
    31. a, b = max(0, -ul[1]), min(br[1], self.output_res) - ul[1]
    32. cc, dd = max(0, ul[0]), min(br[0], self.output_res)
    33. aa, bb = max(0, ul[1]), min(br[1], self.output_res)
    34. hms[idx, aa:bb, cc:dd] = np.maximum(
    35. hms[idx, aa:bb, cc:dd], g[a:b, c:d])
    36. return hms

    接下来分析损失函数

    一般bottom-up是先找点,然后在分组。openpose 采用的PAF 来进行划分。

    HigherHRNet使用 associate embedding 方法来进行分组的。

    如果不了解 associate embedding 请关注下面这篇论文

    Associative Embedding: End-to-End Learning for Joint Detection and Grouping

    https://arxiv.org/abs/1611.05424

     Lg损失函数又上面两部分组成。

    第一部分表示 同一个人所有关键点之间的损失函数,让所有关键点 embedding 先 reference embedding(平均 embedding)靠近。

    第二部分表示 不同人之间的损失,两着reference embedding 距离越大 损失函数越小 exp(-d**2)

    接下来分析下损失函数的代码实现

    1. class MultiLossFactory(nn.Module):
    2. def __init__(self, cfg):
    3. super().__init__()
    4. # init check
    5. self._init_check(cfg)
    6. self.num_joints = cfg.MODEL.NUM_JOINTS
    7. self.num_stages = cfg.LOSS.NUM_STAGES
    8. # heatmap 损失 MSE 损失
    9. # WITH_HEATMAPS_LOSS: [True, True]
    10. # [HeatmapLoss(), HeatmapLoss()]
    11. self.heatmaps_loss = \
    12. nn.ModuleList(
    13. [
    14. HeatmapLoss()
    15. if with_heatmaps_loss else None
    16. for with_heatmaps_loss in cfg.LOSS.WITH_HEATMAPS_LOSS
    17. ]
    18. )
    19. self.heatmaps_loss_factor = cfg.LOSS.HEATMAPS_LOSS_FACTOR
    20. # associative embedding 损失
    21. # WITH_AE_LOSS: [True, False]
    22. # [AELoss(), None]
    23. self.ae_loss = \
    24. nn.ModuleList(
    25. [
    26. AELoss(cfg.LOSS.AE_LOSS_TYPE) if with_ae_loss else None
    27. for with_ae_loss in cfg.LOSS.WITH_AE_LOSS
    28. ]
    29. )
    30. self.push_loss_factor = cfg.LOSS.PUSH_LOSS_FACTOR
    31. self.pull_loss_factor = cfg.LOSS.PULL_LOSS_FACTOR
    32. def forward(self, outputs, heatmaps, masks, joints):
    33. # loss_factory(outputs, heatmaps, masks, joints)
    34. # forward check
    35. self._forward_check(outputs, heatmaps, masks, joints)
    36. heatmaps_losses = []
    37. push_losses = []
    38. pull_losses = []
    39. # len(outputs) == 2
    40. for idx in range(len(outputs)):
    41. offset_feat = 0
    42. # [HeatmapLoss(), HeatmapLoss()]
    43. if self.heatmaps_loss[idx]:
    44. heatmaps_pred = outputs[idx][:, :self.num_joints]
    45. offset_feat = self.num_joints
    46. # 计算 heatmap 损失
    47. heatmaps_loss = self.heatmaps_loss[idx](
    48. heatmaps_pred, heatmaps[idx], masks[idx]
    49. )
    50. # self.heatmaps_loss_factor[idx] 损失函数权重系数 1
    51. heatmaps_loss = heatmaps_loss * self.heatmaps_loss_factor[idx]
    52. heatmaps_losses.append(heatmaps_loss)
    53. else:
    54. heatmaps_losses.append(None)
    55. # Associative Embedding
    56. # [AELoss(), None]
    57. if self.ae_loss[idx]:
    58. tags_pred = outputs[idx][:, offset_feat:]
    59. batch_size = tags_pred.size()[0]
    60. tags_pred = tags_pred.contiguous().view(batch_size, -1, 1)
    61. # 计算 associative embedding loss
    62. push_loss, pull_loss = self.ae_loss[idx](
    63. tags_pred, joints[idx]
    64. )
    65. push_loss = push_loss * self.push_loss_factor[idx]
    66. pull_loss = pull_loss * self.pull_loss_factor[idx]
    67. push_losses.append(push_loss)
    68. pull_losses.append(pull_loss)
    69. else:
    70. push_losses.append(None)
    71. pull_losses.append(None)
    72. return heatmaps_losses, push_losses, pull_losses

    下面分析ae (associative embedding) 损失

    1. class AELoss(nn.Module):
    2. def __init__(self, loss_type):
    3. super().__init__()
    4. self.loss_type = loss_type
    5. def singleTagLoss(self, pred_tag, joints):
    6. """
    7. associative embedding loss for one image
    8. """
    9. # pred_tag (17 * h * w, 1)
    10. # joints (30, 17, 2)
    11. tags = []
    12. pull = 0
    13. # 遍历人数
    14. for joints_per_person in joints:
    15. tmp = []
    16. # 遍历所有关节点
    17. for joint in joints_per_person:
    18. if joint[1] > 0:
    19. # joint 里面存的值 [关节点在heatmap的位置, 1]
    20. # 该关节点可见 加入tmp
    21. tmp.append(pred_tag[joint[0]])
    22. # 如果没有可见关键点 continue
    23. if len(tmp) == 0:
    24. continue
    25. tmp = torch.stack(tmp)
    26. tags.append(torch.mean(tmp, dim=0))
    27. # 这里是 对单个人得损失 让每个关节点 embedding 向 reference embedding (均值靠拢)
    28. pull = pull + torch.mean((tmp - tags[-1].expand_as(tmp))**2)
    29. num_tags = len(tags)
    30. if num_tags == 0:
    31. return make_input(torch.zeros(1).float()), \
    32. make_input(torch.zeros(1).float())
    33. elif num_tags == 1:
    34. return make_input(torch.zeros(1).float()), \
    35. pull/(num_tags)
    36. tags = torch.stack(tags)
    37. # 不同人之间得损失
    38. size = (num_tags, num_tags)
    39. A = tags.expand(*size)
    40. B = A.permute(1, 0)
    41. diff = A - B
    42. if self.loss_type == 'exp':
    43. # 根据损失函数 希望 不同人之间reference embedding的距离越来越远
    44. diff = torch.pow(diff, 2)
    45. push = torch.exp(-diff)
    46. # 着里 -num_tags 可有可无 仅仅是添加一个margin
    47. push = torch.sum(push) - num_tags
    48. elif self.loss_type == 'max':
    49. diff = 1 - torch.abs(diff)
    50. push = torch.clamp(diff, min=0).sum() - num_tags
    51. else:
    52. raise ValueError('Unkown ae loss type')
    53. return push/((num_tags - 1) * num_tags) * 0.5, \
    54. pull/(num_tags)
    55. def forward(self, tags, joints):
    56. """
    57. accumulate the tag loss for each image in the batch
    58. """
    59. # tags (B, 17 * h * w, 1)
    60. # joints (B, 30, 17, 2)
    61. pushes, pulls = [], []
    62. joints = joints.cpu().data.numpy()
    63. batch_size = tags.size(0)
    64. for i in range(batch_size):
    65. push, pull = self.singleTagLoss(tags[i], joints[i])
    66. pushes.append(push)
    67. pulls.append(pull)
    68. return torch.stack(pushes), torch.stack(pulls)

    接下来分析inference代码  论文中指出使用一个

    Heatmap Aggregation for Inference
    We propose a heatmap aggregation strategy during inference. We use bilinear interpolation to upsample all the predicted heatmaps with different resolutions to the reso lution of the input image and average the heatmaps from all scales for fifinal prediction. This strategy is quite different from previous methods [ 3 , 30 , 33 ] which only use heatmaps from a single scale or single stage for prediction.

    简单来说 使用多尺度的heatmap来进行预测,对于不同尺寸的heatmap采用双向性插值的上采用来进行处理。和其他的模型处理方法不同,其他模型处理方法一般只取最后一个stage的最大尺寸heatmap来进行预测。HigherHRNet之所以使用不同尺寸的heatmap来进行预测,不同的heatmap尺寸对不同尺度的关键点起到的作用也不同。低分辨率的heatmap容易漏掉小尺寸人的关键点。但是小尺寸人的关键点可以在高分辨率的heatmap中恢复。

    下面我们分析下inference代码  这里不同尺度的融合是采用取平均值的方式实现

    1. def get_multi_stage_outputs(
    2. cfg, model, image, with_flip=False,
    3. project2image=False, size_projected=None
    4. ):
    5. # outputs = []
    6. heatmaps_avg = 0
    7. num_heatmaps = 0
    8. heatmaps = []
    9. tags = []
    10. outputs = model(image)
    11. for i, output in enumerate(outputs):
    12. if len(outputs) > 1 and i != len(outputs) - 1:
    13. # 对小尺寸的heatmap用双线性插值放大
    14. output = torch.nn.functional.interpolate(
    15. output,
    16. size=(outputs[-1].size(2), outputs[-1].size(3)),
    17. mode='bilinear',
    18. align_corners=False
    19. )
    20. offset_feat = cfg.DATASET.NUM_JOINTS \
    21. if cfg.LOSS.WITH_HEATMAPS_LOSS[i] else 0
    22. if cfg.LOSS.WITH_HEATMAPS_LOSS[i] and cfg.TEST.WITH_HEATMAPS[i]:
    23. # 这里进行累加 后面计算平均值
    24. heatmaps_avg += output[:, :cfg.DATASET.NUM_JOINTS]
    25. num_heatmaps += 1
    26. if cfg.LOSS.WITH_AE_LOSS[i] and cfg.TEST.WITH_AE[i]:
    27. # 这里是分组信息
    28. tags.append(output[:, offset_feat:])
    29. if num_heatmaps > 0:
    30. heatmaps.append(heatmaps_avg/num_heatmaps)

    HigherHRNet 主要代码分析完。

  • 相关阅读:
    APP自动化测试 ---- Appium介绍及运行原理
    Leetcode 1331. 数组序号转换
    有关于脉动调查的这些问题你都知道吗
    保姆级银河麒麟V10高级服务器离线安装mysql5.7数据库
    AttributeError: Can only use .dt accessor with datetimelike values
    enumerate内置函数的使用
    匿名内部类的概念和使用详解
    php 计算工作时间 排除节假日可设置补班
    C++之5|组合与继承
    淘宝/天猫邻家好货 API 返回值说明
  • 原文地址:https://blog.csdn.net/xiaoxu1025/article/details/127850559