• 基于ResNet框架的CNN


    数据准备

    DATA_URL = 'http://download.tensorflow.org/example_images/flower_photos.tgz'

    一、训练集和验证集的划分

    1. #spile_data.py
    2. import os
    3. from shutil import copy
    4. import random
    5. def mkfile(file):
    6. if not os.path.exists(file):
    7. os.makedirs(file)
    8. file = 'flower_data/flower_photos'
    9. flower_class = [cla for cla in os.listdir(file) if ".txt" not in cla] #['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips']
    10. mkfile('flower_data/train') #生成train文件夹
    11. for cla in flower_class:
    12. mkfile('flower_data/train/'+cla) #在train文件夹下生成各个类别的文件夹
    13. mkfile('flower_data/val')
    14. for cla in flower_class:
    15. mkfile('flower_data/val/'+cla)
    16. split_rate = 0.1
    17. for cla in flower_class:
    18. cla_path = file + '/' + cla + '/'
    19. images = os.listdir(cla_path)
    20. num = len(images)
    21. eval_index = random.sample(images, k=int(num*split_rate)) #在images中随机获取0.1的图片
    22. for index, image in enumerate(images):
    23. if image in eval_index:
    24. image_path = cla_path + image
    25. new_path = 'flower_data/val/' + cla
    26. copy(image_path, new_path)
    27. else:
    28. image_path = cla_path + image
    29. new_path = 'flower_data/train/' + cla
    30. copy(image_path, new_path)
    31. print("\r[{}] processing [{}/{}]".format(cla, index+1, num), end="") # processing bar
    32. print()
    33. print("processing done!")

    二、ResNet网络

    1. import torch.nn as nn
    2. import math
    3. import torch.utils.model_zoo as model_zoo
    4. import mmd
    5. from attention import ChannelAttention
    6. from attention import SpatialAttention
    7. import torch
    8. __all__ = ['ResNet', 'resnet50']
    9. model_urls = {
    10. 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
    11. }
    12. def conv3x3(in_planes, out_planes, stride=1,groups=1):
    13. """3x3 convolution with padding"""
    14. return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
    15. padding=1, bias=False)
    16. def conv1x1(in_planes, out_planes, stride=1):
    17. """1x1 convolution"""
    18. return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
    19. '''
    20. Resnet中BasicBlock结构,ResNet中使用的网络结构。分2步走:3x3; 3x3
    21. '''
    22. class BasicBlock(nn.Module):
    23. expansion = 1 # 最后输出的通道数扩充的比例
    24. # BN层来加快网络模型的收敛速度/训练速度/解决梯度消失或者梯度爆炸的问题
    25. # 对batch中所有的同一个channel的数据元素进行标准化处理。一个batch共享一套参数
    26. # 即如果有C个通道,对N*H*W进行标准化处理,一共进行C次。
    27. def __init__(self, inplanes, planes, stride=1, downsample=None):
    28. super(BasicBlock, self).__init__()
    29. self.conv1 = conv3x3(inplanes, planes, stride)
    30. self.bn1 = nn.BatchNorm2d(planes)
    31. self.relu = nn.ReLU(inplace=True)
    32. self.conv2 = conv3x3(planes, planes)
    33. self.bn2 = nn.BatchNorm2d(planes)
    34. self.downsample = downsample
    35. self.stride = stride
    36. def forward(self, x):
    37. residual = x
    38. out = self.conv1(x)
    39. out = self.bn1(out)
    40. out = self.relu(out)
    41. out = self.conv2(out)
    42. out = self.bn2(out)
    43. # downsample是用一个1x1的卷积核处理,改变通道数,如果H/W尺度也不一样就设计stride
    44. if self.downsample is not None:
    45. residual = self.downsample(x)
    46. out += residual
    47. out = self.relu(out)
    48. return out
    49. '''
    50. Resnet中Bottleneck结构,ResNet中使用的网络结构。目的是为了降低参数量,分三步走:
    51. 1数据降维(1x1),2常规卷积核的卷积(3x3),3数据升维(1x1)
    52. 结果图片长宽不变,通道数扩大4倍
    53. '''
    54. class Bottleneck(nn.Module):
    55. expansion = 4 # 最后输出的通道数扩充的比例
    56. def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
    57. base_width=64, norm_layer=None):
    58. super(Bottleneck, self).__init__()
    59. self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
    60. # BN层来加快网络模型的收敛速度/训练速度/解决梯度消失或者梯度爆炸的问题
    61. # 对batch中所有的同一个channel的数据元素进行标准化处理。一个batch共享一套参数
    62. # 即如果有C个通道,对N*H*W进行标准化处理,一共进行C次。
    63. self.bn1 = nn.BatchNorm2d(planes) # 卷积层后加BatchNorm2d,按照channel进行归一化
    64. self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
    65. padding=1, bias=False)
    66. self.bn2 = nn.BatchNorm2d(planes)
    67. self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
    68. self.bn3 = nn.BatchNorm2d(planes * 4)
    69. self.relu = nn.ReLU(inplace=True)
    70. self.downsample = downsample
    71. self.dropout=nn.Dropout()
    72. self.stride = stride
    73. def forward(self, x):
    74. residual = x
    75. out = self.conv1(x)
    76. out = self.bn1(out)
    77. out = self.relu(out)
    78. out = self.conv2(out)
    79. out = self.bn2(out)
    80. out = self.relu(out)
    81. out = self.conv3(out)
    82. out = self.bn3(out)
    83. # downsample是用一个1x1的卷积核处理,改变通道数,如果H/W尺度也不一样就设计stride
    84. if self.downsample is not None:
    85. residual = self.downsample(x)
    86. out += residual
    87. out = self.relu(out)
    88. return out
    89. '''
    90. ResNet由以下组成:
    91. 1.conv1、norm1、relu(当指定了deep_stem,这三个将被stem代替)
    92. 2.maxpool
    93. 3.layer1~layer4(定义为ResLayer类,分别由多个BasicBlock或Bottleneck组成)
    94. '''
    95. class ResNet(nn.Module):
    96. # 参数block指明残差块是两层或三层,参数layers指明每个卷积层需要的残差块数量,num_classes指明分类数,zero_init_residual是否初始化为0
    97. def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
    98. groups=1, width_per_group=64, norm_layer=None):
    99. super(ResNet, self).__init__()
    100. if norm_layer is None:
    101. norm_layer = nn.BatchNorm2d
    102. self.inplanes = 64
    103. self.groups = groups
    104. self.base_width = width_per_group
    105. self.conv1 = nn.Conv2d(12, self.inplanes, kernel_size=7, stride=2, padding=3,
    106. bias=False)
    107. self.bn1 = norm_layer(self.inplanes)
    108. self.relu = nn.ReLU(inplace=True)
    109. # 网络的第一层加入注意力机制
    110. # self.ca = ChannelAttention(self.inplanes)
    111. # self.sa = SpatialAttention()
    112. self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    113. self.layer1 = self._make_layer(block, 64, layers[0], norm_layer=norm_layer)
    114. self.layer2 = self._make_layer(block, 128, layers[1], stride=2, norm_layer=norm_layer)
    115. self.layer3 = self._make_layer(block, 256, layers[2], stride=2, norm_layer=norm_layer)
    116. self.layer4 = self._make_layer(block, 512, layers[3], stride=2, norm_layer=norm_layer)
    117. # 网络的卷积层的最后一层加入注意力机制
    118. # self.ca1 = ChannelAttention(self.inplanes)
    119. # self.sa1 = SpatialAttention()
    120. self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # 自适应平均池化,指定输出(H,W)
    121. self.fc = nn.Linear(512 * block.expansion, num_classes)
    122. for m in self.modules():
    123. if isinstance(m, nn.Conv2d):
    124. nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
    125. elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
    126. nn.init.constant_(m.weight, 1)
    127. nn.init.constant_(m.bias, 0)
    128. if zero_init_residual:
    129. for m in self.modules():
    130. if isinstance(m, Bottleneck):
    131. nn.init.constant_(m.bn3.weight, 0)
    132. elif isinstance(m, BasicBlock):
    133. nn.init.constant_(m.bn2.weight, 0)
    134. # 构造ResLayer类,layer1~layer4
    135. # block:BasicBlock/Bottleneck; planes:块的输入通道数; blocks:块的数目
    136. def _make_layer(self, block, planes, blocks, stride=1, norm_layer=None):
    137. if norm_layer is None:
    138. norm_layer = nn.BatchNorm2d
    139. downsample = None # downSample的作用于在残差连接时 将输入的图像的通道数变成和卷积操作的尺寸一致
    140. if stride != 1 or self.inplanes != planes * block.expansion:
    141. # 通道数恢复成一致/长宽恢复一致
    142. downsample = nn.Sequential(
    143. conv1x1(self.inplanes, planes * block.expansion, stride),
    144. norm_layer(planes * block.expansion),
    145. )
    146. layers = []
    147. layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
    148. self.base_width, norm_layer))
    149. self.inplanes = planes * block.expansion
    150. for _ in range(1, blocks):
    151. layers.append(block(self.inplanes, planes, groups=self.groups,
    152. base_width=self.base_width, norm_layer=norm_layer))
    153. return nn.Sequential(*layers)
    154. '''
    155. 卷积/池化后的tensor维度为(batchsize,channels,x,y),其中x.size(0)指batchsize的值,
    156. 通过x.view(x.size(0), -1)将tensor的结构转换为了(batchsize, channels*x*y)
    157. 即将(channels,x,y)拉直,然后就可以和fc层连接
    158. 因为最后avgpool(1,1)指定输出长*宽为1*1,通道为512*4,所以channels*x*y=2048
    159. '''
    160. def forward(self, x):
    161. x = self.conv1(x)
    162. x = self.bn1(x)
    163. x = self.relu(x)
    164. # x = self.ca(x) * x
    165. # x = self.sa(x) * x
    166. x = self.maxpool(x)
    167. x = self.layer1(x)
    168. x = self.layer2(x)
    169. x = self.layer3(x)
    170. x = self.layer4(x)
    171. # x = self.ca1(x) * x
    172. # x = self.sa1(x) * x
    173. x = self.avgpool(x)
    174. x = x.view(x.size(0), -1)
    175. #x=self.fc(x)
    176. return x
    177. class DANNet(nn.Module):
    178. def __init__(self, num_classes=2):
    179. super(DANNet, self).__init__()
    180. self.sharedNet = resnet50(False)
    181. self.cls_fc = nn.Linear(2048, num_classes) # channels*x*y=2048*1*1,见上面的备注
    182. def forward(self, source, target):
    183. loss = 0
    184. source = self.sharedNet(source)
    185. if self.training == True:
    186. target = self.sharedNet(target)
    187. # loss += mmd.mmd_rbf_accelerate(source, target)
    188. loss += mmd.mmd_rbf_noaccelerate(source, target)
    189. source = self.cls_fc(source)
    190. #target = self.cls_fc(target)
    191. return source, loss
    192. def resnet50(pretrained=False, **kwargs):
    193. """Constructs a ResNet-50 model.
    194. Args:
    195. pretrained (bool): If True, returns a model pre-trained on ImageNet
    196. """
    197. model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
    198. if pretrained:
    199. model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
    200. return model

    三、训练模型

    1. #train.py
    2. import torch
    3. import torch.nn as nn
    4. from torchvision import transforms, datasets
    5. import json
    6. import matplotlib.pyplot as plt
    7. import os
    8. import torch.optim as optim
    9. from model import resnet34, resnet101
    10. import torchvision.models.resnet
    11. device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    12. print(device)
    13. #数据增强操作,训练集:随机裁剪(RandomResizedCrop)、随机水平翻转(RandomHorizontalFlip)、转换为张量(ToTensor)以及归一化(Normalize)
    14. #验证集:大小调整(Resize)、中心裁剪(CenterCrop)、转换为张量(ToTensor)以及归一化(Normalize)
    15. data_transform = {
    16. "train": transforms.Compose([transforms.RandomResizedCrop(224),
    17. transforms.RandomHorizontalFlip(),
    18. transforms.ToTensor(),
    19. transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),#来自官网参数
    20. "val": transforms.Compose([transforms.Resize(256),#将最小边长缩放到256
    21. transforms.CenterCrop(224),
    22. transforms.ToTensor(),
    23. transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}
    24. data_root = os.getcwd()
    25. image_path = data_root + "/flower_data/" # flower data set path
    26. train_dataset = datasets.ImageFolder(root=image_path + "train",
    27. transform=data_transform["train"])
    28. train_num = len(train_dataset) #3306
    29. flower_list = train_dataset.class_to_idx #{'daisy': 0, 'dandelion': 1, 'roses': 2, 'sunflowers': 3, 'tulips': 4}
    30. cla_dict = dict((val, key) for key, val in flower_list.items()) #{0: 'daisy', 1: 'dandelion', 2: 'roses', 3: 'sunflowers', 4: 'tulips'}
    31. # write dict into json file
    32. json_str = json.dumps(cla_dict, indent=4) #将cla_dict字典对象转换为JSON格式的字符串,并通过indent=4参数指定缩进为4个空格
    33. with open('class_indices.json', 'w') as json_file:
    34. json_file.write(json_str)
    35. batch_size = 16
    36. train_loader = torch.utils.data.DataLoader(train_dataset,
    37. batch_size=batch_size, shuffle=True,
    38. num_workers=0)
    39. validate_dataset = datasets.ImageFolder(root=image_path + "/val",
    40. transform=data_transform["val"])
    41. val_num = len(validate_dataset) #364
    42. validate_loader = torch.utils.data.DataLoader(validate_dataset,
    43. batch_size=batch_size, shuffle=False,
    44. num_workers=0)
    45. #net = resnet34()
    46. net = resnet34(num_classes=5)
    47. # load pretrain weights
    48. # model_weight_path = "./resnet34-pre.pth"
    49. # missing_keys, unexpected_keys = net.load_state_dict(torch.load(model_weight_path), strict=False)#载入模型参数
    50. # for param in net.parameters():
    51. # param.requires_grad = False
    52. # change fc layer structure
    53. # inchannel = net.fc.in_features
    54. # net.fc = nn.Linear(inchannel, 5)
    55. net.to(device) #将神经网络模型net移动到指定的设备上,这样模型就可以在GPU/CPU上计算
    56. loss_function = nn.CrossEntropyLoss() #损失函数
    57. optimizer = optim.Adam(net.parameters(), lr=0.0001) #优化器
    58. best_acc = 0.0
    59. save_path = './resNet34.pth'
    60. #一个epoch表示对整个训练数据集进行一次完整的迭代训练
    61. for epoch in range(3):
    62. # train
    63. net.train()
    64. running_loss = 0.0
    65. #step表示当前的步数(或者称为批次数),data则表示从train_loader中加载的数据对象
    66. for step, data in enumerate(train_loader, start=0):
    67. images, labels = data #images:(16,3,224,224) labels:(16,)
    68. optimizer.zero_grad()
    69. logits = net(images.to(device)) #logits:(16,5)将输入的图像数据images传入神经网络net
    70. loss = loss_function(logits, labels.to(device)) #1.6871 计算模型输出logits进行标准化(softmax),再计算每个样本预测标签和真实标签的交叉熵,对于整个批次的样本,计算平均交叉熵损失
    71. loss.backward()
    72. optimizer.step()
    73. # print statistics
    74. running_loss += loss.item() #累加每个批次的损失值
    75. # print train process
    76. rate = (step+1)/len(train_loader)
    77. a = "*" * int(rate * 50)
    78. b = "." * int((1 - rate) * 50)
    79. print("\rtrain loss: {:^3.0f}%[{}->{}]{:.4f}".format(int(rate*100), a, b, loss), end="")
    80. print()
    81. # validate
    82. net.eval()
    83. acc = 0.0 # accumulate accurate number / epoch
    84. with torch.no_grad():
    85. for val_data in validate_loader:
    86. val_images, val_labels = val_data
    87. outputs = net(val_images.to(device)) # eval model only have last output layer
    88. # loss = loss_function(outputs, test_labels)
    89. predict_y = torch.max(outputs, dim=1)[1] #torch.max包含两个维度信息,第一个维度是最大值,第二个维度是最大值对应的索引
    90. acc += (predict_y == val_labels.to(device)).sum().item() #每一次的validate_loader中预测正确的个数,.item() 方法转换为标量
    91. val_accurate = acc / val_num
    92. if val_accurate > best_acc:
    93. best_acc = val_accurate
    94. torch.save(net.state_dict(), save_path) #state_dict()方法返回模型的参数字典,save_path保存模型参数的文件路径
    95. print('[epoch %d] train_loss: %.3f test_accuracy: %.3f' %
    96. (epoch + 1, running_loss / step, val_accurate))
    97. print('Finished Training')

    四、预测

    1. #predict.py
    2. import torch
    3. from model import resnet34
    4. from PIL import Image
    5. from torchvision import transforms
    6. import matplotlib.pyplot as plt
    7. import json
    8. data_transform = transforms.Compose(
    9. [transforms.Resize(256),
    10. transforms.CenterCrop(224),
    11. transforms.ToTensor(),
    12. transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
    13. # load image
    14. img = Image.open("./roses.jpg")
    15. plt.imshow(img)
    16. # [N, C, H, W]
    17. img = data_transform(img)
    18. # expand batch dimension
    19. img = torch.unsqueeze(img, dim=0)
    20. # read class_indict
    21. try:
    22. json_file = open('./class_indices.json', 'r')
    23. class_indict = json.load(json_file)
    24. except Exception as e:
    25. print(e)
    26. exit(-1)
    27. # create model
    28. model = resnet34(num_classes=5)
    29. # load model weights
    30. model_weight_path = "./resNet34.pth"
    31. model.load_state_dict(torch.load(model_weight_path))
    32. model.eval()
    33. with torch.no_grad():
    34. # predict class
    35. output = torch.squeeze(model(img))
    36. predict = torch.softmax(output, dim=0)
    37. predict_cla = torch.argmax(predict).numpy()
    38. print(class_indict[str(predict_cla)], predict[predict_cla].numpy())
    39. plt.show()

  • 相关阅读:
    Google Earth Engine ——影像无法正差加载可能是可视化参数设置得问题(visParams)
    无法启动此程序,因为计算机中丢失MSVCR71.dll的详细解决修复方法
    别人都在这个春暖花开的端午节吃粽子,而我在踏青学springboot数据持久化
    【数据结构】带头结点的单链表的头插法
    React之服务端渲染
    fpga内嵌逻辑分析仪使用方法
    [go学习笔记.第十章.面向对象编程] 8.面向对象的三大特性-封装
    CSS实现竖向步骤条
    网卡限速工具之WonderShaper
    docker一键安装debian/ubuntu桌面环境LXDE+VNC+Firefox
  • 原文地址:https://blog.csdn.net/qq_46458188/article/details/134517999