• YOLOX加强特征提取网络Panet分析


    在上一篇文章中,分享了YOLOX的CSPDarknet网络,详见YOLOX backbone——CSPDarknet的实现

    在CSPDarknet中,有三个层次的输出, 分别是dark5(20x20x1024)、dark4(40x40x512)、dark3(80x80x256)。这三个层次的输出,会进入一个加强特征提取网络Panet,进一步进行特征提取,见下图红框标出来的部分:

    Panet基本思想是,将深层特征进行上采样,并与浅层特征进行融合(见图上1~6标注部分),融合后的浅层特征再进行下采样,然后再与深层特征融合(见图上6~10部分)。

    在YOLOX的官方实现代码上,Panet的实现在yolo_pafpn.py文件中的。结合上面数字标注,对官方代码进行了注释:

    1. class YOLOPAFPN(nn.Module):
    2. """
    3. YOLOv3 model. Darknet 53 is the default backbone of this model.
    4. """
    5. def __init__(
    6. self,
    7. depth=1.0,
    8. width=1.0,
    9. in_features=("dark3", "dark4", "dark5"),
    10. in_channels=[256, 512, 1024],
    11. depthwise=False,
    12. act="silu",
    13. ):
    14. super().__init__()
    15. self.backbone = CSPDarknet(depth, width, depthwise=depthwise, act=act)
    16. self.in_features = in_features
    17. self.in_channels = in_channels
    18. Conv = DWConv if depthwise else BaseConv
    19. self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
    20. # 20x20x1024 -> 20x20x512
    21. self.lateral_conv0 = BaseConv(
    22. int(in_channels[2] * width), int(in_channels[1] * width), 1, 1, act=act
    23. )
    24. # 40x40x1024 -> 40x40x512
    25. self.C3_p4 = CSPLayer(
    26. int(2 * in_channels[1] * width),
    27. int(in_channels[1] * width),
    28. round(3 * depth),
    29. False,
    30. depthwise=depthwise,
    31. act=act,
    32. ) # cat
    33. # 40x40x512 -> 40x40x256
    34. self.reduce_conv1 = BaseConv(
    35. int(in_channels[1] * width), int(in_channels[0] * width), 1, 1, act=act
    36. )
    37. # 80x80x512 -> 80x80x256
    38. self.C3_p3 = CSPLayer(
    39. int(2 * in_channels[0] * width), # 2x256
    40. int(in_channels[0] * width), # 256
    41. round(3 * depth),
    42. False,
    43. depthwise=depthwise,
    44. act=act,
    45. )
    46. # bottom-up conv
    47. # 80x80x256 -> 40x40x256
    48. self.bu_conv2 = Conv(
    49. int(in_channels[0] * width), int(in_channels[0] * width), 3, 2, act=act
    50. )
    51. # 40x40x512 -> 40x40x512
    52. self.C3_n3 = CSPLayer(
    53. int(2 * in_channels[0] * width), # 2*256
    54. int(in_channels[1] * width), # 512
    55. round(3 * depth),
    56. False,
    57. depthwise=depthwise,
    58. act=act,
    59. )
    60. # bottom-up conv
    61. # 40x40x512 -> 20x20x512
    62. self.bu_conv1 = Conv(
    63. int(in_channels[1] * width), int(in_channels[1] * width), 3, 2, act=act
    64. )
    65. # 20x20x1024 -> 20x20x1024
    66. self.C3_n4 = CSPLayer(
    67. int(2 * in_channels[1] * width), # 2*512
    68. int(in_channels[2] * width), # 1024
    69. round(3 * depth),
    70. False,
    71. depthwise=depthwise,
    72. act=act,
    73. )
    74. def forward(self, input):
    75. """
    76. Args:
    77. inputs: input images.
    78. Returns:
    79. Tuple[Tensor]: FPN feature.
    80. """
    81. # backbone
    82. out_features = self.backbone(input)
    83. features = [out_features[f] for f in self.in_features]
    84. [x2, x1, x0] = features
    85. # 第1步,对输出feature map进行卷积
    86. # 20x20x1024 -> 20x20x512
    87. fpn_out0 = self.lateral_conv0(x0) # 1024->512/32
    88. # 第2步,对第1步中输出的feature map进行上采样
    89. # Upsampling, 20x20x512 -> 40x40x512
    90. f_out0 = self.upsample(fpn_out0) # 512/16
    91. # 第3步,concat + CSP layer
    92. # 40x40x512 + 40x40x512 -> 40x40x1024
    93. f_out0 = torch.cat([f_out0, x1], 1) # 512->1024/16
    94. # 40x40x1024 -> 40x40x512
    95. f_out0 = self.C3_p4(f_out0) # 1024->512/16
    96. # 第4步,对第3步输出的feature map进行卷积
    97. # 40x40x512 -> 40x40x256
    98. fpn_out1 = self.reduce_conv1(f_out0) # 512->256/16
    99. # 第5步,继续上采样
    100. # 40x40x256 -> 80x80x256
    101. f_out1 = self.upsample(fpn_out1) # 256/8
    102. # 第6步,concat+CSPLayer,输出到yolo head
    103. # 80x80x256 + 80x80x256 -> 80x80x512
    104. f_out1 = torch.cat([f_out1, x2], 1) # 256->512/8
    105. # 80x80x512 -> 80x80x256
    106. pan_out2 = self.C3_p3(f_out1) # 512->256/8
    107. # 第7步,下采样
    108. # 80x80x256 -> 40x40x256
    109. p_out1 = self.bu_conv2(pan_out2) # 256->256/16
    110. # 第8步,concat + CSPLayer, 输出到yolo head
    111. # 40x40x256 + 40x40x256 = 40x40x512
    112. p_out1 = torch.cat([p_out1, fpn_out1], 1) # 256->512/16
    113. # 40x40x512 -> 40x40x512
    114. pan_out1 = self.C3_n3(p_out1) # 512->512/16
    115. # 第9步, 继续下采样
    116. # 40x40x512 -> 20x20x512
    117. p_out0 = self.bu_conv1(pan_out1) # 512->512/32
    118. # 第10步,concat + CSPLayer, 输出到yolo head
    119. # 20x20x512 + 20x20x512 -> 20x20x1024
    120. p_out0 = torch.cat([p_out0, fpn_out0], 1) # 512->1024/32
    121. # 20x20x1024 -> 20x20x1024
    122. pan_out0 = self.C3_n4(p_out0) # 1024->1024/32
    123. outputs = (pan_out2, pan_out1, pan_out0)
    124. return outputs

    参考:Pytorch 搭建自己的YoloX目标检测平台(Bubbliiiing 深度学习 教程)_哔哩哔哩_bilibili

  • 相关阅读:
    刘韧:接近聪明人最容易变聪明
    如何对低代码/无代码平台进行分类?
    Python成为打工人必备技能,悄悄告诉你还能追对象哦
    Spring必背面试题
    实战讲解SpringCloud网关接口限流SpringCloudGateway+Redis(图+文)
    学习笔记:物理渲染-间接光照
    25.Xaml DateGrid控件---->默认单选,可以多项选择的网格控件
    SpringMVC--HttpMessageConverter
    计组 | 交叉编址 & 流水线
    linux内核面试题(2)
  • 原文地址:https://blog.csdn.net/DeliaPu/article/details/125450196