Darknet是最经典的一个深层网络,结合Resnet的特点在保证对特征进行超强表达的同时又避免了网络过深带来的梯度问题,主要有Darknet19和Darknet53,当然,如果你觉得这还不够深,在你条件允许的情况下你也可以延伸到99,199,999,…。

table1的块状结构如下图:
从上图中可以看到,输入首先经过一个1×1的卷积层Conv(1×1,stride=1)将通道数降低一半变为
In_channels/2,然后进入一个3×3的卷积层Conv(3×3,stride=1)进行特征提取,这时通道数又从In_channels恢复为In_channels。最后3×3卷积的输出与经过Shorcut传递过来的输入Input相加得到最终的Output(此时3×3卷积的输出与Input的形状(In_channels,h,w)相同,可以直接相加)。我们看到,经过Residual运算之后,输入的特征图形状保持不变。
从上图中我们可以看到,Darknet-53中总共有6个单独的卷积层和23个Residual,每个Residual包含2个卷积层(一个1×1,一个3×3),所以Darknet-53中共有52层卷积,可为什么叫做Darknet-53呢?因为Darknet-53在YOLO v3中,前52层只用作特征提取,最后一层是用于输出预测值的,故加上输出那一层称为Darknet-53。
网络结构设计理念
Darknet-53在Darknet-19的基础上增加了大量的残差结构Residual,并且使用步长为2,卷积核大小为3×3卷积层Conv2D代替池化层Maxpooling2D。作者为什么要做这两点改进呢?
残差结构
首先,加入残差结构Residual的目的是为了增加网络的深度,用于支持网络提取更高级别的语义特征,同时残差的结构可以帮助我们避免梯度的消失或爆炸。因为残差的物理结构,反映到反向梯度传播中,可以使得梯度传递到前面很远的网络层中,削弱反向求导的链式反应。
其次,我们看到残差结构单元里边输入首先会经过一个1×1的卷积层将输入通道降低一半,然后再进行3×3的卷积,这在相当程度上帮助网络减少了计算量,使得网络的运行速度更快,效率更高。
步长为2的卷积替换池化层
从作用上来说,步长为2的卷积替换池化层都可以完成下采样的工作,但其实现在的神经网络中,池化层已经比较少了,大家都开始尝试其他的下采样方法,比如步长为2的卷积。那么为什么要这样替换呢?参考CNN为什么不需要池化层下采样了?.
对于池化层和步长为2的卷积层来说,个人的理解是这样的,池化层是一种先验的下采样方式,即人为的确定好下采样的规则(选取覆盖范围内最大的那个值,默认最大值包含的信息是最多的);而对于步长为2的卷积层来说,其参数是通过学习得到的,采样的规则是不确定的,这种不确定性会增加网络的学习能力。
方法一:
import torch
from torch.autograd import Variable
from torchvision.transforms import transforms
from torchvision.models import Inception3,ResNet18_Weights,DenseNet
from torchsummary import summary
from torch.nn.modules import Conv2d,BatchNorm2d,AvgPool2d,Flatten,LeakyReLU,Linear
#1.定义卷积快
class conv(torch.nn.Module):
def __init__(self,in_channel,out_channel,kernel_size=(3,3),strides=(1,1),padding=1):
super(conv, self).__init__()
self.seq=torch.nn.Sequential(
Conv2d(in_channels=in_channel,out_channels=out_channel,kernel_size=kernel_size,stride=strides,padding=padding),
BatchNorm2d(out_channel),
LeakyReLU(0.1),
)
def forward(self,x):
x=self.seq(x)
return x
#2.再把残差单元装一块
class convblock(torch.nn.Module):
def __init__(self,in_channel):
out_channel=in_channel//2
super(convblock, self).__init__()
# self.conv1=conv(in_channel,out_channel,(1,1),(1,1),0)
# self.conv2=conv(out_channel,in_channel,(3,3),(1,1),1)
self.conv1=torch.nn.Sequential(
conv(in_channel, out_channel, (1, 1), (1, 1), 0),
conv(out_channel, in_channel, (3, 3), (1, 1), 1),
)
def forward(self,x):
# x=self.conv1(x)
# entry=self.conv2(x)
# e=x+entry
# return e
x=x + self.conv1(x)
return x
#3.最后按照残差图把残差和卷积装一块
class darknet53(torch.nn.Module):
def __init__(self):
super(darknet53, self).__init__()
self.conv1=conv(3,32,(3,3),(1,1),1)
self.conv2=conv(32,64,(3,3),(2,2),1)
self.conv3_4=convblock(64)
self.conv5=conv(64,128,(3,3),(2,2),1)
self.conv6_9=torch.nn.Sequential(
convblock(128),
convblock(128),
)
self.conv10=conv(128,256,(3,3),(2,2),1)
self.conv11_26=torch.nn.Sequential(
convblock(256),
convblock(256),
convblock(256),
convblock(256),
convblock(256),
convblock(256),
convblock(256),
convblock(256),
)
self.conv27=conv(256,512,(3,3),(2,2),1)
self.conv28_43 = torch.nn.Sequential(
convblock(512),
convblock(512),
convblock(512),
convblock(512),
convblock(512),
convblock(512),
convblock(512),
convblock(512),
)
self.conv44 = conv(512,1024, (3, 3), (2, 2), 1)
self.conv45_53=torch.nn.Sequential(
convblock(1024),
convblock(1024),
convblock(1024),
convblock(1024),
)
self.avg_pool=AvgPool2d((1,1))
self.flat = Flatten()
# self.fc=conv(1024,1000,(3,3),(1,1),1)
self.fc=Linear(65536,1000)
self.softmax=torch.nn.Softmax()
def forward(self,x):
x=self.conv1(x)
x=self.conv2(x)
x=self.conv3_4(x)
x=self.conv5(x)
x=self.conv6_9(x)
x=self.conv10(x)
x=self.conv11_26(x)
x=self.conv27(x)
x=self.conv28_43(x)
x=self.conv44(x)
x=self.conv45_53(x)
x=self.avg_pool(x)
print(x.shape)
x=self.flat(x)
x=self.fc(x)
print(x.shape)
x=self.softmax(x)
# print(x.shape)
return x
model=darknet53()
summary(model,(3,256,256))
方法二:
import math
from collections import OrderedDict
import torch.nn as nn
#---------------------------------------------------------------------#
# 残差结构
# 利用一个1x1卷积下降通道数,然后利用一个3x3卷积提取特征并且上升通道数
# 最后接上一个残差边
#---------------------------------------------------------------------#
class BasicBlock(nn.Module):
def __init__(self, inplanes, planes):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes[0], kernel_size=1, stride=1, padding=0, bias=False)
self.bn1 = nn.BatchNorm2d(planes[0])
self.relu1 = nn.LeakyReLU(0.1)
self.conv2 = nn.Conv2d(planes[0], planes[1], kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes[1])
self.relu2 = nn.LeakyReLU(0.1)
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu1(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu2(out)
out += residual
return out
class DarkNet(nn.Module):
def __init__(self, layers):
super(DarkNet, self).__init__()
self.inplanes = 32
# 416,416,3 -> 416,416,32
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(self.inplanes)
self.relu1 = nn.LeakyReLU(0.1)
# 416,416,32 -> 208,208,64
self.layer1 = self._make_layer([32, 64], layers[0])
# 208,208,64 -> 104,104,128
self.layer2 = self._make_layer([64, 128], layers[1])
# 104,104,128 -> 52,52,256
self.layer3 = self._make_layer([128, 256], layers[2])
# 52,52,256 -> 26,26,512
self.layer4 = self._make_layer([256, 512], layers[3])
# 26,26,512 -> 13,13,1024
self.layer5 = self._make_layer([512, 1024], layers[4])
self.layers_out_filters = [64, 128, 256, 512, 1024]
# 进行权值初始化
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
#---------------------------------------------------------------------#
# 在每一个layer里面,首先利用一个步长为2的3x3卷积进行下采样
# 然后进行残差结构的堆叠
#---------------------------------------------------------------------#
def _make_layer(self, planes, blocks):
layers = []
# 下采样,步长为2,卷积核大小为3
layers.append(("ds_conv", nn.Conv2d(self.inplanes, planes[1], kernel_size=3, stride=2, padding=1, bias=False)))
layers.append(("ds_bn", nn.BatchNorm2d(planes[1])))
layers.append(("ds_relu", nn.LeakyReLU(0.1)))
# 加入残差结构
self.inplanes = planes[1]
for i in range(0, blocks):
layers.append(("residual_{}".format(i), BasicBlock(self.inplanes, planes)))
return nn.Sequential(OrderedDict(layers))
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu1(x)
x = self.layer1(x)
x = self.layer2(x)
out3 = self.layer3(x)
out4 = self.layer4(out3)
out5 = self.layer5(out4)
return out3, out4, out5
def darknet53():
model = DarkNet([1, 2, 8, 8, 4])
return model
这篇博客从网络结构、创新设计以及改进的动机对YOLO v3提取特征的Backbone Darknet-53进行了详细的剖析,并最后基于Pytorch对其进行实现。对于经典网络的分析总是能让人学到很多东西,深度学习不是使用框架随便搭建出来就行了,我们要知悉网络各部分的设计理念及其深层次的统计学意义,这样才不会觉得神经网路只是机械式地搭积木,也不是盲盒调参。只有当我们知道各部分工作的底层原理之后,才能进行创新,创造出自己的东西。