U2Net——U-Net套U-Net——套娃式图像分割算法

U2Net

1 相关参考

论文名称： U2-Net: Goging Deeper with Nested U-Structure for Salient Object Detetion
论文地址： https://arxiv.org/abs/2005.09007
官方源码： https://github.com/xuebinqin/U-2-Net
参考代码： Pytorch UNet
参考博客： https://blog.csdn.net/qq_37541097/article/details/126255483
参考视频： bilibili 我为霹导举大旗

建议大家可以先看霹导的原理讲解视频和代码讲解视频，代码写的真的太优雅了，以下内容作为自己对重点的记录和一些代码中的修改！

2 $U^2-Net$ 网络结构

整体结构：
在这里插入图片描述

保留了原始的U-Net网络结构，只是将每一个Block的内部结构做了很大的调整，换成了一个U-Net，同时针对整个结构的输出做出调整，在训练时，给六个输出进行loss计算，在测试时只得到一个输出。

Block结构RSU：
在这里插入图片描述

这里Block，除了输入和输出的通道会发生变化，在中间层进行卷积时，使用的通道数都是Mid_channels，同时在最下层的卷积中，使用的是膨胀卷积。这里的L=7，指的是RSU-7，是En_1和Dn_1的内部结构，在前四层中，都是使用的是RSU结构；

在后面的两层中，使用的是RSU-4F，其中的卷积层使用的是膨胀卷积，避免因为深度太深，导致图像尺寸太小，丢失特征，RSU-4F结构如下：
RSU-4F:
在这里插入图片描述

这里向下使用了两层的膨胀卷积，进行特征恢复，避免因为网络深度太深，导致特征丢失的问题！

损失函数：
网络在训练的时候，是对六个输出分别和GT进行BCE（二值交叉熵）计算，然后对损失求和进行反向传播，公式如下：
$L=\sum_{m=1}^{M} w_{\text {side }}^{(m)} l_{\text {side }}^{(m)}+w_{\text {fuse }} l_{\text {fuse }}$

在本网络中，前面一部分是六个输出和GT的损失，第二部分是最后的融合图像和GT的损失，代码如下：

import torch
import torch.nn as nn
from torch.nn import functional as F
class U2criterion(nn.Module):
    def __init__(self):
        super(U2criterion, self).__init__()
    
    def forward(self, inputs, target):
        losses = [F.binary_cross_entropy_with_logits(inputs[i], target) for i in range(len(inputs))]
        total_loss = sum(losses)
        return total_loss
1
2
3
4
5
6
7
8
9
10
11

3 网络代码和测试

from typing import Union, List
import torch
import torch.nn as nn
import torch.nn.functional as F


class ConvBNReLU(nn.Module):
    def __init__(self, in_ch, out_ch, kernel_size=3, dilation=1):
        super().__init__()

        padding = kernel_size // 2 if dilation == 1 else dilation  # 保持图像大小不变
        self.conv = nn.Sequential(
            nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=padding, dilation=dilation, bias=False),  # 因为后面有BN，bias不起作用
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True) 
        )
    def forward(self, x):
        return self.conv(x)

class DownConvBNReLu(ConvBNReLU):
    def __init__(self, in_ch, out_ch, kernel_size=3, dilation=1, flag=True):
        super().__init__(in_ch, out_ch, kernel_size, dilation)

        self.down_flag = flag

    def forward(self, x):
        if self.down_flag:
            x = F.max_pool2d(x, kernel_size=2, stride=2, ceil_mode=True)

        return self.conv(x)


class UpConvBNReLU(ConvBNReLU):
    def __init__(self, in_ch, out_ch, kernel_size=3, dilation=1, flag=True):
        super().__init__(in_ch, out_ch, kernel_size, dilation)
        
        self.up_flag = flag
    
    def forward(self, x1, x2): # x1为下面传入的， x2为左边传入的
        if self.up_flag:
            x1 = F.interpolate(x1, size=x2.shape[2:], mode="bilinear", align_corners=False)
        x = torch.cat([x1, x2], dim=1)
        return self.conv(x)

class RSU(nn.Module):
    def __init__(self, height, in_ch, mid_ch, out_ch):
        super().__init__()
        assert height >= 2
        self.conv_in = ConvBNReLU(in_ch, out_ch)  # 这个是不算在height上的

        encode_list = [DownConvBNReLu(out_ch, mid_ch, flag=False)]
        decode_list = [UpConvBNReLU(mid_ch*2, mid_ch, flag=False)]

        for i in range(height-2): # 含有上下采样的模块
            encode_list.append(DownConvBNReLu(mid_ch, mid_ch))
            decode_list.append(UpConvBNReLU(mid_ch*2, mid_ch if i < height-3 else out_ch)) # 这里最后的decode的输出是out_ch

        encode_list.append(ConvBNReLU(mid_ch, mid_ch, dilation=2))
        self.encode_modules = nn.ModuleList(encode_list)
        self.decode_modules = nn.ModuleList(decode_list)
    
    def forward(self, x):
        x_in = self.conv_in(x)

        x = x_in
        encode_outputs = []
        for m in self.encode_modules:
            x = m(x)
            encode_outputs.append(x)
        
        x = encode_outputs.pop() # 这是移除list最后的一个数据，并且将该数据赋值给x，这里的x是含有空洞卷积的输出
        for m  in self.decode_modules:
            x2 = encode_outputs.pop() # 这里是倒数第二深的输出，x表示下面的，x2表示左边的
            x = m(x, x2)  # 将下面的，和左边的一起传入到上卷积中
        return x + x_in  # 这里是最上面一层进行相加

class RSU4F(nn.Module):
    def __init__(self, in_ch, mid_ch, out_ch):
        super().__init__()
        self.conv_in = ConvBNReLU(in_ch, out_ch)

        self.encode_modules = nn.ModuleList([ConvBNReLU(out_ch, mid_ch),
                                             ConvBNReLU(mid_ch, mid_ch, dilation=2),
                                             ConvBNReLU(mid_ch, mid_ch, dilation=4),
                                             ConvBNReLU(mid_ch, mid_ch, dilation=8)])
        
        self.decode_modules = nn.ModuleList([ConvBNReLU(mid_ch*2, mid_ch, dilation=4),
                                             ConvBNReLU(mid_ch*2, mid_ch, dilation=2),
                                             ConvBNReLU(mid_ch*2, out_ch)])
    
    def forward(self, x):
        x_in = self.conv_in(x)

        x = x_in
        encode_outputs = []
        for m in self.encode_modules:
            x = m(x)
            encode_outputs.append(x)
        
        x = encode_outputs.pop()
        for m in self.decode_modules:
            x2 = encode_outputs.pop()
            x = m(torch.cat([x, x2], dim=1))
        
        return x+x_in

class U2Net(nn.Module):
    def __init__(self, cfg, out_ch=1):
        super().__init__()
        
        assert "encode" in cfg
        assert "decode" in cfg

        self.encode_num = len(cfg["encode"])

        encode_list = []
        side_list = []

        for c in cfg["encode"]:
            # [height, in_ch, mid_ch, out_ch, RSU4F, side]
            assert len(c) == 6
            encode_list.append(RSU(*c[:4]) if c[4] is False else RSU4F(*c[1:4]))  # 这里的*是将列表解开为单独的数值，这样才能传入到函数中

            if c[5] is True:
                side_list.append(nn.Conv2d(c[3], out_ch, kernel_size=3, padding=1))
        self.encode_modules = nn.ModuleList(encode_list)

        decode_list = []
        for c in cfg["decode"]:
            assert len(c) == 6
            decode_list.append(RSU(*c[:4]) if  c[4] is False else RSU4F(*c[1:4]))

            if c[5] is True:
                side_list.append(nn.Conv2d(c[3], out_ch, kernel_size=3, padding=1))
        self.decode_modules = nn.ModuleList(decode_list)
        self.side_modules = nn.ModuleList(side_list)
        self.out_conv = nn.Conv2d(self.encode_num*out_ch, out_ch, kernel_size=1)  # 这里是针对cat后的结果进行卷积，得到最后的out_ch=1

    def forward(self, x):
        _, _, h, w = x.shape

        encode_outputs = []
        for i, m in enumerate(self.encode_modules):
            x = m(x)
            encode_outputs.append(x)
            if i != self.encode_num - 1:  # 除了最后一个encode_block不用下采样，其余每一个block都需要下采样
                x = F.max_pool2d(x, kernel_size=2, stride=2, ceil_mode=True)
        
        x = encode_outputs.pop()
        decode_outputs = [x]
        for m in self.decode_modules:
            x2 = encode_outputs.pop()
            x = F.interpolate(x, size=x2.shape[2:], mode="bilinear", align_corners=False)
            x = m(torch.cat([x, x2], dim=1))
            decode_outputs.insert(0, x) #这里是保证了从上到下的decode层的输出，在列表中的遍历是从0到5

        side_outputs = []
        for m in self.side_modules:
            x = decode_outputs.pop()
            x = F.interpolate(m(x), size=[h,w], mode="bilinear", align_corners=False)
            side_outputs.insert(0, x)
        x = self.out_conv(torch.cat(side_outputs, dim=1))

        if self.training:   # 在训练的时候，需要将6个输出都拿出来进行loss计算，
            return [x] + side_outputs
        else:  # 非训练时，直接sigmoid后的数据
            return torch.sigmoid(x)
        # return torch.sigmoid(x)

def u2net_full(in_ch=3, out_ch=1):
    cfg = {
        # height, in_ch, mid_ch, out_ch, RSU4F, side
        "encode": [[7, in_ch, 32, 64, False, False],      # En1
                   [6, 64, 32, 128, False, False],    # En2
                   [5, 128, 64, 256, False, False],   # En3
                   [4, 256, 128, 512, False, False],  # En4
                   [4, 512, 256, 512, True, False],   # En5
                   [4, 512, 256, 512, True, True]],   # En6
        # height, in_ch, mid_ch, out_ch, RSU4F, side
        "decode": [[4, 1024, 256, 512, True, True],   # De5
                   [4, 1024, 128, 256, False, True],  # De4
                   [5, 512, 64, 128, False, True],    # De3
                   [6, 256, 32, 64, False, True],     # De2
                   [7, 128, 16, 64, False, True]]     # De1
    }

    return U2Net(cfg, out_ch)

def u2net_lite(in_ch=3, out_ch=1):
    cfg = {
        # height, in_ch, mid_ch, out_ch, RSU4F, side
        "encode": [[7, in_ch, 16, 64, False, False],  # En1
                   [6, 64, 16, 64, False, False],  # En2
                   [5, 64, 16, 64, False, False],  # En3
                   [4, 64, 16, 64, False, False],  # En4
                   [4, 64, 16, 64, True, False],  # En5
                   [4, 64, 16, 64, True, True]],  # En6
        # height, in_ch, mid_ch, out_ch, RSU4F, side
        "decode": [[4, 128, 16, 64, True, True],  # De5
                   [4, 128, 16, 64, False, True],  # De4
                   [5, 128, 16, 64, False, True],  # De3
                   [6, 128, 16, 64, False, True],  # De2
                   [7, 128, 16, 64, False, True]]  # De1
    }
    return U2Net(cfg, out_ch)


# net = u2net_full(1,1)
# x = torch.randn(16,1,256,256)
# net.eval()
# print(net(x))

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212

这里u2net_full指的是完整的U2Net，u2net_lite，指的是轻量级的U2Net，这两个的唯一区别是，模块中的通道数不同；
指定网络的时候，需要指定网络输入通道和输出通道，这里的修改是为了自己使用网络的便利，原始网络中，默认输入通道是3；
轻量级模型的参数是完整性模型参数的1/40；

贴一个网络参数计算代码：

def count_parameters(model):  # 传入的是模型实例对象
    params = [p.numel() for p in model.parameters() if p.requires_grad]
#     for item in params:
#         print(f'{item:>16}')   # 参数大于16的展示
    print(f'________\n{sum(params):>16}')  # 大于16的进行统计，可以自行修改
1
2
3
4
5

网络测试：
在这里插入图片描述

再说一下，霹导写的代码真的很优雅，可以去看霹导的代码讲解和网络结构讲解！！

相关阅读:
拓端tecdat|数据预处理之异常值处理
真正牛的项目经理，都做到了这几点
【云原生K8S】Kubernetes之探针
qml中，实时改变TextField中的内容
【PyTorch深度学习项目实战100例】—— 基于MnasNet实现垃圾分类任务 | 第47例
FlinkModule加载HiveModule异常
css知识学习系列（6）-每天10个知识点
【Shell 系列教程】shell介绍（一）
北京君正应用案例：联想新款Yoga Book 9i亮相 CES
算法——哈希王

原文地址：https://blog.csdn.net/qq_44864833/article/details/128076194

U2Net——U-Net套U-Net——套娃式图像分割算法

U2Net

1 相关参考

2 U 2 − N e t U^2-Net U2−Net 网络结构

3 网络代码和测试

2 $U^2-Net$ 网络结构