• 【目标检测】|yolov6 结构代码分析


    yolov6s整体:
    model/yolo.py

    Model:

    class Model(nn.Module):
        def __init__():
            self.backbone, self.neck, self.detect = build_network(config, channels, num_classes, anchors, num_layers)
    
        def forward(self, x):
    
            x = self.backbone(x)
            x = self.neck(x)
            x = self.detect(x)
            return x 
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10

    在build_network中生成backbone,neck detect (其中,backbone为EfficientRep)

    EfficientRep位于yolov6/model/efficentrep.py

    
    def build_network(config, channels, num_classes, anchors, num_layers):
    
            backbone = EfficientRep(
                in_channels=channels,
                channels_list=channels_list,
                num_repeats=num_repeat,
                block=block
            )
    
            neck = NECK(
                channels_list=channels_list,
                num_repeats=num_repeat,
                block=block
            )
    
        head_layers = build_effidehead_layer(channels_list, num_anchors, num_classes, reg_max)
    
        head = Detect(num_classes, anchors, num_layers, head_layers=head_layers, use_dfl=use_dfl)
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19

    []

    backbone

        block=RepVGGBlock
    
    • 1
    class EfficientRep(nn.Module):
    
        def __init__(
            self,
            in_channels=3,
            block=RepVGGBlock
        ):
            super().__init__()
    
    
            self.stem = RepVGGBlock( )
            self.ERBlock_2 = nn.Sequential(
                RepVGGBlock(),
                RepBlock(
                    in_channels=channels_list[1],
                    out_channels=channels_list[1],
                    n=num_repeats[1],
                    block=RepVGGBlock,
                )
            )
    
            self.ERBlock_3 = nn.Sequential(
                RepVGGBlock(  ),
                RepBlock(
                    in_channels=channels_list[2],
                    out_channels=channels_list[2],
                    n=num_repeats[2],
                    block=RepVGGBlock,
                )
            )
    
            self.ERBlock_4 = nn.Sequential(
                RepVGGBlock(  ),
                RepBlock(
                    in_channels=channels_list[3],
                    out_channels=channels_list[3],
                    n=num_repeats[3],
                    block=RepVGGBlock,
                )
            )
    
            self.ERBlock_5 = nn.Sequential(
                RepVGGBlock(  ),
                RepBlock(
                    in_channels=channels_list[4],
                    out_channels=channels_list[4],
                    n=num_repeats[4],
                    block=RepVGGBlock,
                ),
                SimSPPF(
                    in_channels=channels_list[4],
                    out_channels=channels_list[4],
                    kernel_size=5
                )
            )
    
        def forward(self, x):
    
            outputs = []
            x = self.stem(x)
            x = self.ERBlock_2(x)
            x = self.ERBlock_3(x)
            outputs.append(x)
            x = self.ERBlock_4(x)
            outputs.append(x)
            x = self.ERBlock_5(x)
            outputs.append(x)
    
            return tuple(outputs)
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70

    stem=RepVGGBlock

    RepVGGBlock RVB

    RepVGGBlock(
          (nonlinearity): ReLU(inplace=True)
          (se): Identity()
          (rbr_dense): Sequential(
            (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
            (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          )
          (rbr_1x1): Sequential(
            (conv): Conv2d(3, 32, kernel_size=(1, 1), stride=(2, 2), bias=False)
            (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          )
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12

    RepVGGBlock
    在yolov6/layer/common.py中,deploy表示推理过程。

    class RepVGGBlock(nn.Module):
        '''RepVGGBlock is a basic rep-style block, including training and deploy status
        This code is based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py
        '''
        def __init__
            if deploy:
                self.rbr_reparam = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride,
                                             padding=padding, dilation=dilation, groups=groups, bias=True, padding_mode=padding_mode)
            else:
                self.rbr_identity = nn.BatchNorm2d(num_features=in_channels) if out_channels == in_channels and stride == 1 else None
                self.rbr_dense = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups)
                self.rbr_1x1 = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride, padding=padding_11, groups=groups)
                
        def forward(self, inputs):
            return self.nonlinearity(self.se(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out))
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16

    训练过程包含33 11 及bn层,三个分支相加输出,在部署时,为了方便部署,直接取3*3的主分支卷积输出。
    在这里插入图片描述
    请添加图片描述

    RepBlock为N个RepVGG Block
    RB为几个RVB的串联,其中第一个RVB用于特征层的size变化(stride=2,下采样),后面N个RVB 用于特征层的融合,size保持不变。

    ERBlock2
    一个RVB +RB(1RVB+1 RVB)
    ERBlock3
    一个RVB +RB(1RVB+3 RVB)
    ERBlock4
    一个RVB +RB(1RVB+5 RVB)
    ERBlock_5
    一个RVB +RB (1RVB+1 RVB)+SimSPPF

    class SimSPPF(nn.Module):
        '''Simplified SPPF with ReLU activation'''
        def __init__(self, in_channels, out_channels, kernel_size=5):
            super().__init__()
            c_ = in_channels // 2  # hidden channels
            self.cv1 = SimConv(in_channels, c_, 1, 1)
            self.cv2 = SimConv(c_ * 4, out_channels, 1, 1)
            self.m = nn.MaxPool2d(kernel_size=kernel_size, stride=1, padding=kernel_size // 2)
    
        def forward(self, x):
            x = self.cv1(x)
            with warnings.catch_warnings():
                warnings.simplefilter('ignore')
                y1 = self.m(x)
                y2 = self.m(y1)
                return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))
    
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18

    其中SConv是有conv+BN+ReLu组成, 先通过一个SConv层,特征图h,w的size不变,outchannel变成inchannel的一半

    class SimConv(nn.Module):
        '''Normal Conv with ReLU activation'''
        def __init__():
            padding = kernel_size // 2
            self.conv = nn.Conv2d( )
            self.bn = nn.BatchNorm2d(out_channels)
            self.act = nn.ReLU()
    
        def forward(self, x):
            return self.act(self.bn(self.conv(x)))
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10

    在这里插入图片描述

    在这里插入图片描述

    simconv输出做为一个分支,而后经过3个maxpooling层,每个maxpooling的kernel=5,s=1,padding=kernel//2, 每经过一个maxpooling后,fm size均不变,并做为分支。而后通过cat将几个分支在channel维度上相加,得到的size较于SPPF的输入,h,w不变,channel为输入的2倍,最后再通过一个SConv层,通道减半,使得输入和输出的fm size不变。

    Neck

    在这里插入图片描述
    Sconv就是1*1
    在这里插入图片描述

    class RepPANNeck(nn.Module):
        """RepPANNeck Module
        EfficientRep is the default backbone of this model.
        RepPANNeck has the balance of feature fusion ability and hardware efficiency.
        """
    
        def __init__(
            block=RepVGGBlock
        ):
            self.Rep_p4 = RepBlock(
                in_channels=channels_list[3] + channels_list[5],
                out_channels=channels_list[5],
                n=num_repeats[5],
                block=block
            )
    
            self.Rep_p3 = RepBlock(
                in_channels=channels_list[2] + channels_list[6],
                out_channels=channels_list[6],
                n=num_repeats[6],
                block=block
            )
    
            self.Rep_n3 = RepBlock(
                in_channels=channels_list[6] + channels_list[7],
                out_channels=channels_list[8],
                n=num_repeats[7],
                block=block
            )
    
            self.Rep_n4 = RepBlock(
                in_channels=channels_list[5] + channels_list[9],
                out_channels=channels_list[10],
                n=num_repeats[8],
                block=block
            )
    
            self.reduce_layer0 = SimConv(
                in_channels=channels_list[4],
                out_channels=channels_list[5],
                kernel_size=1,
                stride=1
            )
    
            self.upsample0 = Transpose(
                in_channels=channels_list[5],
                out_channels=channels_list[5],
            )
    
            self.reduce_layer1 = SimConv(
                in_channels=channels_list[5],
                out_channels=channels_list[6],
                kernel_size=1,
                stride=1
            )
    
            self.upsample1 = Transpose(
                in_channels=channels_list[6],
                out_channels=channels_list[6]
            )
    
            self.downsample2 = SimConv(
                in_channels=channels_list[6],
                out_channels=channels_list[7],
                kernel_size=3,
                stride=2
            )
    
            self.downsample1 = SimConv(
                in_channels=channels_list[8],
                out_channels=channels_list[9],
                kernel_size=3,
                stride=2
            )
    
    
        def forward(self, input):
    
            (x2, x1, x0) = input
    
            fpn_out0 = self.reduce_layer0(x0)
            upsample_feat0 = self.upsample0(fpn_out0)
            f_concat_layer0 = torch.cat([upsample_feat0, x1], 1)
            f_out0 = self.Rep_p4(f_concat_layer0)
    
            fpn_out1 = self.reduce_layer1(f_out0)
            upsample_feat1 = self.upsample1(fpn_out1)
            f_concat_layer1 = torch.cat([upsample_feat1, x2], 1)
            pan_out2 = self.Rep_p3(f_concat_layer1)
    
            down_feat1 = self.downsample2(pan_out2)
            p_concat_layer1 = torch.cat([down_feat1, fpn_out1], 1)
            pan_out1 = self.Rep_n3(p_concat_layer1)
    
            down_feat0 = self.downsample1(pan_out1)
            p_concat_layer2 = torch.cat([down_feat0, fpn_out0], 1)
            pan_out0 = self.Rep_n4(p_concat_layer2)
    
            outputs = [pan_out2, pan_out1, pan_out0]
    
            return outputs
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101

    Neck层美团官方称其为Rep-PAN,是基于PAN的拓扑方法,如上图所示,类似一种“U”型结构,其中U型左侧从上到下fm的h,w增大,右侧从下到上fm的h,w减小,其中Upsample上采样基于torch官方自带的转置卷积实现

    整个neck层的流程为,U型左侧,从ERB5输出20x20x512的fm,通过SConv 变成20x20x128大小,上采样后h,w较之前增大一倍后与ERB4的输出在channel层上concate后fm变成40x40x384,通过一个RB(s=1, o≠i)后,输出 40x40x128,重复上述步骤后,输出8x080x64的fm。U型右侧,将80x80x64的fm先SConv下采样,得到40x40x64的fm,与U型左侧h,w一致的fm在channel层上concate后,通过一个RB(s=1, o≠i),输出第二个fm,重复U型右侧以上步骤,输出第三个fm。至此,neck层输出三个fm分别为(20x20x256, 40x40x128, 80x80x64).

    从结构上来看,其实还是用了YOLO V5这PANET结构,只不过将其中的CSPDarknet换做了RepBlock。

    head在这里插入图片描述

    在这里插入图片描述

    在这里插入图片描述
    在这里插入图片描述

    整个head借鉴了yolox中的解耦头设计,并对其做了改进,head流程如下:从neck层输出三个分支,对于每个分支,先对输出fm通过BConv层(11),做fm的特征融合后,分成两个分支,一个分支通过Conv(33)+BConv完成分类任务的预测,另外一个分支先通过Conv融合特征后再分成两个分支,一个分支通过BConv完成边框的回归,一个分支通过BConv完成前后背景的分类,至此三个分支再通过concate在channel层上融合,输出未经后处理的预测结果。

    class Detect(nn.Module):
        def forward(self, x):
            if self.training:
                cls_score_list = []
                reg_distri_list = []
    
                for i in range(self.nl):
                    x[i] = self.stems[i](x[i])
                    cls_x = x[i]
                    reg_x = x[i]
                    cls_feat = self.cls_convs[i](cls_x)
                    cls_output = self.cls_preds[i](cls_feat)
                    reg_feat = self.reg_convs[i](reg_x)
                    reg_output = self.reg_preds[i](reg_feat)
    
                    cls_output = torch.sigmoid(cls_output)
                    cls_score_list.append(cls_output.flatten(2).permute((0, 2, 1)))
                    reg_distri_list.append(reg_output.flatten(2).permute((0, 2, 1)))
                
                cls_score_list = torch.cat(cls_score_list, axis=1)
                reg_distri_list = torch.cat(reg_distri_list, axis=1)
    
                return x, cls_score_list, reg_distri_list
            else:
                cls_score_list = []
                reg_dist_list = []
                anchor_points, stride_tensor = generate_anchors(
                    x, self.stride, self.grid_cell_size, self.grid_cell_offset, device=x[0].device, is_eval=True)
    
                for i in range(self.nl):
                    b, _, h, w = x[i].shape
                    l = h * w
                    x[i] = self.stems[i](x[i])
                    cls_x = x[i]
                    reg_x = x[i]
                    cls_feat = self.cls_convs[i](cls_x)
                    cls_output = self.cls_preds[i](cls_feat)
                    reg_feat = self.reg_convs[i](reg_x)
                    reg_output = self.reg_preds[i](reg_feat)
                    
                    if self.use_dfl:
                        reg_output = reg_output.reshape([-1, 4, self.reg_max + 1, l]).permute(0, 2, 1, 3)
                        reg_output = self.proj_conv(F.softmax(reg_output, dim=1))
                    
                    cls_output = torch.sigmoid(cls_output)
                    cls_score_list.append(cls_output.reshape([b, self.nc, l]))
                    reg_dist_list.append(reg_output.reshape([b, 4, l]))
                
                cls_score_list = torch.cat(cls_score_list, axis=-1).permute(0, 2, 1)
                reg_dist_list = torch.cat(reg_dist_list, axis=-1).permute(0, 2, 1)
    
    
                pred_bboxes = dist2bbox(reg_dist_list, anchor_points, box_format='xywh')
                pred_bboxes *= stride_tensor
                return torch.cat(
                    [
                        pred_bboxes,
                        torch.ones((b, pred_bboxes.shape[1], 1), device=pred_bboxes.device, dtype=pred_bboxes.dtype),
                        cls_score_list
                    ],
                    axis=-1)
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62

    REPVGG融合方法

    在这里插入图片描述

    ref
    https://blog.csdn.net/zqwwwm/article/details/125635594
    https://mp.weixin.qq.com/s/RrQCP4pTSwpTmSgvly9evg
    https://zhuanlan.zhihu.com/p/353697121

  • 相关阅读:
    AMAZINGIC晶焱科技推出低操作电压ESD保护元件
    [2023.09.12]: Yew应用开发的第一个hook--use_state
    DJ12-2-3 逻辑运算指令与移位指令
    flutter系列之:Material主题的基础-MaterialApp
    神仙打架!腾讯云阿里云谁更棋高一着?
    shopify独立站的运营
    MLC-LLM 部署RWKV World系列模型实战(3B模型Mac M2解码可达26tokens/s)
    七、【React-Router5】嵌套路由
    【Vue3.0移动端项目--旅游网】--项目初始化搭建
    【数据处理】如何在图片中随机采样
  • 原文地址:https://blog.csdn.net/qq_35608277/article/details/126850486