• 365天深度学习训练营-第P2周:彩色图片识别


    目录

    一、前言

    二、我的环境

    三、代码实现

    1、数据下载以及可视化

    2、CNN模型

    3、训练结果可视化

     4、随机图像预测

     四、模型优化

    1、CNN模型

    2、VGG-16模型

    3、Alexnet模型

    4、Resnet模型


    一、前言

    1. >- **🍨 本文为[🔗365天深度学习训练营](https://mp.weixin.qq.com/s/xLjALoOD8HPZcH563En8bQ) 中的学习记录博客**
    2. >- **🍦 参考文章:365天深度学习训练营-第P2周:彩色图片识别(训练营内部成员可读)**
    3. >- **🍖 原作者:[K同学啊|接辅导、项目定制](https://mtyjkh.blog.csdn.net/)**
    1. ● 难度:夯实基础⭐⭐
    2. ● 语言:Python3、Pytorch3
    3. ● 时间:1126日-122
    4. 🍺 要求:
    5. 1. 自己搭建CNN网络框架
    6. 2. 调用官方的VGG-16网络框架
    7. 🍻 拔高(可选):
    8. 1. 验证集准确率达到85%
    9. 2. 使用PPT画出VGG-16算法框架图

    二、我的环境

    语言环境:Python3.7

    编译器:jupyter notebook

    深度学习环境:TensorFlow2

    三、代码实现

    1. # 设置GPU
    2. import copy
    3. import torch
    4. import torch.nn as nn
    5. import matplotlib.pyplot as plt
    6. from torchvision import datasets, transforms, models
    7. import torchvision
    8. import random
    9. device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    10. device
    11. # 导入数据
    12. train_ds = torchvision.datasets.CIFAR10('data',
    13. train=True,
    14. transform=torchvision.transforms.ToTensor(), # 将数据类型转化为Tensor
    15. download=True)
    16. test_ds = torchvision.datasets.CIFAR10('data',
    17. train=False,
    18. transform=torchvision.transforms.ToTensor(), # 将数据类型转化为Tensor
    19. download=True)
    20. batch_size = 32
    21. train_dl = torch.utils.data.DataLoader(train_ds,
    22. batch_size=batch_size,
    23. shuffle=True)
    24. test_dl = torch.utils.data.DataLoader(test_ds,
    25. batch_size=batch_size)
    26. # 取一个批次查看数据格式
    27. # 数据的shape为:[batch_size, channel, height, weight]
    28. # 其中batch_size为自己设定,channel,height和weight分别是图片的通道数,高度和宽度。
    29. imgs, labels = next(iter(train_dl))
    30. imgs.shape
    31. import numpy as np
    32. # 指定图片大小,图像大小为20宽、5高的绘图(单位为英寸inch)
    33. plt.figure(figsize=(20, 5))
    34. for i, imgs in enumerate(imgs[:20]):
    35. # 维度缩减
    36. npimg = imgs.numpy().transpose((1, 2, 0))
    37. # 将整个figure分成2行10列,绘制第i+1个子图。
    38. plt.subplot(2, 10, i + 1)
    39. plt.imshow(npimg, cmap=plt.cm.binary)
    40. plt.axis('off')
    41. # 构建CNN网络
    42. import torch.nn.functional as F
    43. num_classes = 10 # 图片的类别数
    44. class Model(nn.Module):
    45. def __init__(self):
    46. super().__init__()
    47. # 特征提取网络
    48. self.conv1 = nn.Conv2d(3, 64, kernel_size=3) # 第一层卷积,卷积核大小为3*3
    49. self.pool1 = nn.MaxPool2d(kernel_size=2) # 设置池化层,池化核大小为2*2
    50. self.conv2 = nn.Conv2d(64, 64, kernel_size=3) # 第二层卷积,卷积核大小为3*3
    51. self.pool2 = nn.MaxPool2d(kernel_size=2)
    52. self.conv3 = nn.Conv2d(64, 128, kernel_size=3) # 第二层卷积,卷积核大小为3*3
    53. self.pool3 = nn.MaxPool2d(kernel_size=2)
    54. # 分类网络
    55. self.fc1 = nn.Linear(512, 256)
    56. self.fc2 = nn.Linear(256, num_classes)
    57. # 前向传播
    58. def forward(self, x):
    59. x = self.pool1(F.relu(self.conv1(x)))
    60. x = self.pool2(F.relu(self.conv2(x)))
    61. x = self.pool3(F.relu(self.conv3(x)))
    62. x = torch.flatten(x, start_dim=1)
    63. x = F.relu(self.fc1(x))
    64. x = self.fc2(x)
    65. return x
    66. from torchinfo import summary
    67. # 将模型转移到GPU中(我们模型运行均在GPU中进行)
    68. model = Model().to(device)
    69. summary(model)
    70. # 设置超参数
    71. loss_fn = nn.CrossEntropyLoss() # 创建损失函数
    72. learn_rate = 1e-2 # 学习率
    73. opt = torch.optim.SGD(model.parameters(), lr=learn_rate)
    74. # 编写训练函数
    75. # 训练循环
    76. def train(dataloader, model, loss_fn, optimizer):
    77. size = len(dataloader.dataset) # 训练集的大小,一共60000张图片
    78. num_batches = len(dataloader) # 批次数目,1875(60000/32)
    79. train_loss, train_acc = 0, 0 # 初始化训练损失和正确率
    80. for X, y in dataloader: # 获取图片及其标签
    81. X, y = X.to(device), y.to(device)
    82. # 计算预测误差
    83. pred = model(X) # 网络输出
    84. loss = loss_fn(pred, y) # 计算网络输出和真实值之间的差距,targets为真实值,计算二者差值即为损失
    85. # 反向传播
    86. optimizer.zero_grad() # grad属性归零
    87. loss.backward() # 反向传播
    88. optimizer.step() # 每一步自动更新
    89. # 记录acc与loss
    90. train_acc += (pred.argmax(1) == y).type(torch.float).sum().item()
    91. train_loss += loss.item()
    92. train_acc /= size
    93. train_loss /= num_batches
    94. return train_acc, train_loss
    95. # 编写测试函数
    96. def test(dataloader, model, loss_fn):
    97. size = len(dataloader.dataset) # 测试集的大小,一共10000张图片
    98. num_batches = len(dataloader) # 批次数目,313(10000/32=312.5,向上取整)
    99. test_loss, test_acc = 0, 0
    100. # 当不进行训练时,停止梯度更新,节省计算内存消耗
    101. with torch.no_grad():
    102. for imgs, target in dataloader:
    103. imgs, target = imgs.to(device), target.to(device)
    104. # 计算loss
    105. target_pred = model(imgs)
    106. loss = loss_fn(target_pred, target)
    107. test_loss += loss.item()
    108. test_acc += (target_pred.argmax(1) == target).type(torch.float).sum().item()
    109. test_acc /= size
    110. test_loss /= num_batches
    111. return test_acc, test_loss
    112. epochs = 10
    113. train_loss = []
    114. train_acc = []
    115. test_loss = []
    116. test_acc = []
    117. for epoch in range(epochs):
    118. model.train()
    119. epoch_train_acc, epoch_train_loss = train(train_dl, model, loss_fn, opt)
    120. model.eval()
    121. epoch_test_acc, epoch_test_loss = test(test_dl, model, loss_fn)
    122. # 保存最优模型
    123. if epoch_test_acc > best_acc:
    124. best_acc = epoch_test_acc
    125. best_model = copy.deepcopy(model)
    126. train_acc.append(epoch_train_acc)
    127. train_loss.append(epoch_train_loss)
    128. test_acc.append(epoch_test_acc)
    129. test_loss.append(epoch_test_loss)
    130. template = ('Epoch:{:2d}, Train_acc:{:.1f}%, Train_loss:{:.3f}, Test_acc:{:.1f}%,Test_loss:{:.3f}')
    131. print(template.format(epoch + 1, epoch_train_acc * 100, epoch_train_loss, epoch_test_acc * 100, epoch_test_loss))
    132. PATH = './best_model.pth '
    133. torch.save(model.state_dict(), PATH)
    134. print('Done')
    135. # 训练结果
    136. import matplotlib.pyplot as plt
    137. # 隐藏警告
    138. import warnings
    139. warnings.filterwarnings("ignore") # 忽略警告信息
    140. plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
    141. plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
    142. plt.rcParams['figure.dpi'] = 100 # 分辨率
    143. epochs_range = range(epochs)
    144. plt.figure(figsize=(12, 3))
    145. plt.subplot(1, 2, 1)
    146. plt.plot(epochs_range, train_acc, label='Training Accuracy')
    147. plt.plot(epochs_range, test_acc, label='Test Accuracy')
    148. plt.legend(loc='lower right')
    149. plt.title('Training and Validation Accuracy')
    150. plt.subplot(1, 2, 2)
    151. plt.plot(epochs_range, train_loss, label='Training Loss')
    152. plt.plot(epochs_range, test_loss, label='Test Loss')
    153. plt.legend(loc='upper right')
    154. plt.title('Training and Validation Loss')
    155. plt.show()
    156. plt.figure(figsize=(16, 14))
    157. for i in range(10):
    158. img_data, label_id = random.choice(list(zip(test_ds.data, test_ds.targets)))
    159. img = transforms.ToPILImage()(img_data)
    160. predict_id = torch.argmax(model(transform(img).to(device).unsqueeze(0)))
    161. predict = test_ds.classes[predict_id]
    162. label = test_ds.classes[label_id]
    163. plt.subplot(3, 4, i + 1)
    164. plt.imshow(img)
    165. plt.title(f'truth:{label}\npredict:{predict}')

    1、数据下载以及可视化

     

    2、CNN模型

     

    3、训练结果可视化

    得到的训练集和测试集的的acc和loss数据可视化,得知预测的结果并不是很满意,所以本文后面会对模型进行改善。

     4、随机图像预测

     四、模型优化

    1、CNN模型

    主要的思路就是增加卷积层和池化层 可以在其中加BN层

    BN的本质原理:在网络的每一层输入的时候,又插入了一个归一化层,也就是先做一个归一化处理(归一化至:均值0、方差为1),然后再进入网络的下一层。不过文献归一化层,可不像我们想象的那么简单,它是一个可学习、有参数(γ、β)的网络层。

    1. class Model(nn.Module):
    2. def __init__(self):
    3. super(Model, self).__init__()
    4. self.conv1 = nn.Sequential(
    5. nn.Conv2d(3, 12, kernel_size=5, padding=0), # 12*220*220
    6. nn.BatchNorm2d(12),
    7. nn.ReLU()
    8. )
    9. self.conv2 = nn.Sequential(
    10. nn.Conv2d(12, 12, kernel_size=5, padding=0), # 12*216*216
    11. nn.BatchNorm2d(12),
    12. nn.ReLU()
    13. )
    14. self.pool3 = nn.Sequential(
    15. nn.MaxPool2d(2), # 12*108*108
    16. nn.Dropout(0.15)
    17. )
    18. self.conv4 = nn.Sequential(
    19. nn.Conv2d(12, 24, kernel_size=5, padding=0), # 24*104*104
    20. nn.BatchNorm2d(24),
    21. nn.ReLU()
    22. )
    23. self.conv5 = nn.Sequential(
    24. nn.Conv2d(24, 24, kernel_size=5, padding=0), # 24*100*100
    25. nn.BatchNorm2d(24),
    26. nn.ReLU()
    27. )
    28. self.pool6 = nn.Sequential(
    29. nn.MaxPool2d(2), # 24*50*50
    30. nn.Dropout(0.15)
    31. )
    32. self.fc = nn.Sequential(
    33. nn.Linear(24 * 50 * 50, num_classes)
    34. )
    35. def forward(self, x):
    36. batch_size = x.size(0)
    37. x = self.conv1(x) # 卷积-BN-激活
    38. x = self.conv2(x) # 卷积-BN-激活
    39. x = self.pool3(x) # 池化-Drop
    40. x = self.conv4(x) # 卷积-BN-激活
    41. x = self.conv5(x) # 卷积-BN-激活
    42. x = self.pool6(x) # 池化-Drop
    43. x = x.view(batch_size, -1) # flatten 变成全连接网络需要的输入 (batch, 24*50*50) ==> (batch, -1), -1 此处自动算出的是21168
    44. x = self.fc(x)
    45. return x

     模型结构图可以在进行绘制

    NN SVG (alexlenail.me)

    2、VGG-16模型

    1. class Vgg16_net(nn.Module):
    2. def __init__(self):
    3. super(Vgg16_net, self).__init__()
    4. self.layer1 = nn.Sequential(
    5. nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1), # (32-3+2)/1+1=32 32*32*64
    6. nn.BatchNorm2d(64),
    7. # inplace-选择是否进行覆盖运算
    8. # 意思是是否将计算得到的值覆盖之前的值,比如
    9. nn.ReLU(inplace=True),
    10. # 意思就是对从上层网络Conv2d中传递下来的tensor直接进行修改,
    11. # 这样能够节省运算内存,不用多存储其他变量
    12. nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
    13. # (32-3+2)/1+1=32 32*32*64
    14. # Batch Normalization强行将数据拉回到均值为0,方差为1的正太分布上,
    15. # 一方面使得数据分布一致,另一方面避免梯度消失。
    16. nn.BatchNorm2d(64),
    17. nn.ReLU(inplace=True),
    18. nn.MaxPool2d(kernel_size=2, stride=2) # (32-2)/2+1=16 16*16*64
    19. )
    20. self.layer2 = nn.Sequential(
    21. nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
    22. # (16-3+2)/1+1=16 16*16*128
    23. nn.BatchNorm2d(128),
    24. nn.ReLU(inplace=True),
    25. nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
    26. # (16-3+2)/1+1=16 16*16*128
    27. nn.BatchNorm2d(128),
    28. nn.ReLU(inplace=True),
    29. nn.MaxPool2d(2, 2) # (16-2)/2+1=8 8*8*128
    30. )
    31. self.layer3 = nn.Sequential(
    32. nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1), # (8-3+2)/1+1=8 8*8*256
    33. nn.BatchNorm2d(256),
    34. nn.ReLU(inplace=True),
    35. nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1), # (8-3+2)/1+1=8 8*8*256
    36. nn.BatchNorm2d(256),
    37. nn.ReLU(inplace=True),
    38. nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1), # (8-3+2)/1+1=8 8*8*256
    39. nn.BatchNorm2d(256),
    40. nn.ReLU(inplace=True),
    41. nn.MaxPool2d(2, 2) # (8-2)/2+1=4 4*4*256
    42. )
    43. self.layer4 = nn.Sequential(
    44. nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
    45. # (4-3+2)/1+1=4 4*4*512
    46. nn.BatchNorm2d(512),
    47. nn.ReLU(inplace=True),
    48. nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
    49. # (4-3+2)/1+1=4 4*4*512
    50. nn.BatchNorm2d(512),
    51. nn.ReLU(inplace=True),
    52. nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
    53. # (4-3+2)/1+1=4 4*4*512
    54. nn.BatchNorm2d(512),
    55. nn.ReLU(inplace=True),
    56. nn.MaxPool2d(2, 2) # (4-2)/2+1=2 2*2*512
    57. )
    58. self.layer5 = nn.Sequential(
    59. nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
    60. # (2-3+2)/1+1=2 2*2*512
    61. nn.BatchNorm2d(512),
    62. nn.ReLU(inplace=True),
    63. nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
    64. # (2-3+2)/1+1=2 2*2*512
    65. nn.BatchNorm2d(512),
    66. nn.ReLU(inplace=True),
    67. nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
    68. # (2-3+2)/1+1=2 2*2*512
    69. nn.BatchNorm2d(512),
    70. nn.ReLU(inplace=True),
    71. nn.MaxPool2d(2, 2) # (2-2)/2+1=1 1*1*512
    72. )
    73. self.conv = nn.Sequential(
    74. self.layer1,
    75. self.layer2,
    76. self.layer3,
    77. self.layer4,
    78. self.layer5
    79. )
    80. self.fc = nn.Sequential(
    81. # y=xA^T+b x是输入,A是权值,b是偏执,y是输出
    82. # nn.Liner(in_features,out_features,bias)
    83. # in_features:输入x的列数 输入数据:[batchsize,in_features]
    84. # out_freatures:线性变换后输出的y的列数,输出数据的大小是:[batchsize,out_features]
    85. # bias: bool 默认为True
    86. # 线性变换不改变输入矩阵x的行数,仅改变列数
    87. nn.Linear(512, 512),
    88. nn.ReLU(inplace=True),
    89. nn.Dropout(0.5),
    90. nn.Linear(512, 256),
    91. nn.ReLU(inplace=True),
    92. nn.Dropout(0.5),
    93. nn.Linear(256, 10)
    94. )
    95. def forward(self, x):
    96. x = self.conv(x)
    97. # 这里-1表示一个不确定的数,就是你如果不确定你想要reshape成几行,但是你很肯定要reshape成512列
    98. # 那不确定的地方就可以写成-1
    99. # 如果出现x.size(0)表示的是batchsize的值
    100. # x=x.view(x.size(0),-1)
    101. x = x.view(-1, 512)
    102. x = self.fc(x)
    103. return x

    模型结构图大致如下

    3、Alexnet模型

    可以使用 torchvision.models定义神经网络

    1. # 使用torchvision.models定义神经网络
    2. net_a = models.alexnet(num_classes = 10)
    3. print(net_a)
    4. # 定义loss函数:
    5. loss_fn = nn.CrossEntropyLoss()
    6. print(loss_fn)
    7. # 定义优化器
    8. net = net_a
    9. Learning_rate = 0.01 # 学习率
    10. # optimizer = SGD: 基本梯度下降法
    11. # parameters:指明要优化的参数列表
    12. # lr:指明学习率
    13. # optimizer = torch.optim.Adam(model.parameters(), lr = Learning_rate)
    14. optimizer = torch.optim.SGD(net.parameters(), lr=Learning_rate, momentum=0.9)
    15. print(optimizer)

     

     

     模型结构图

    4、Resnet模型

    1. class ResidualBlock(nn.Module):
    2. def __init__(self, in_channels, out_channels, stride = 1, shotcut = None):
    3. super(ResidualBlock, self).__init__()
    4. self.conv1 = conv3x3(in_channels, out_channels,stride)
    5. self.bn1 = nn.BatchNorm2d(out_channels)
    6. self.relu = nn.ReLU(inplace=True)
    7. self.conv2 = conv3x3(out_channels, out_channels)
    8. self.bn2 = nn.BatchNorm2d(out_channels)
    9. self.shotcut = shotcut
    10. def forward(self, x):
    11. residual = x
    12. out = self.conv1(x)
    13. out = self.bn1(out)
    14. out = self.relu(out)
    15. out = self.conv2(out)
    16. out = self.bn2(out)
    17. if self.shotcut:
    18. residual = self.shotcut(x)
    19. out += residual
    20. out = self.relu(out)
    21. return out
    22. class ResNet(nn.Module):
    23. def __init__(self, block, layer, num_classes = 10):
    24. super(ResNet, self).__init__()
    25. self.in_channels = 16
    26. self.conv = conv3x3(3,16)
    27. self.bn = nn.BatchNorm2d(16)
    28. self.relu = nn.ReLU(inplace=True)
    29. self.layer1 = self.make_layer(block, 16, layer[0])
    30. self.layer2 = self.make_layer(block, 32, layer[1], 2)
    31. self.layer3 = self.make_layer(block, 64, layer[2], 2)
    32. self.avg_pool = nn.AvgPool2d(8)
    33. self.fc = nn.Linear(64, num_classes)
    34. def make_layer(self, block, out_channels, blocks, stride = 1):
    35. shotcut = None
    36. if(stride != 1) or (self.in_channels != out_channels):
    37. shotcut = nn.Sequential(
    38. nn.Conv2d(self.in_channels, out_channels,kernel_size=3,stride = stride,padding=1),
    39. nn.BatchNorm2d(out_channels))
    40. layers = []
    41. layers.append(block(self.in_channels, out_channels, stride, shotcut))
    42. for i in range(1, blocks):
    43. layers.append(block(out_channels, out_channels))
    44. self.in_channels = out_channels
    45. return nn.Sequential(*layers)
    46. def forward(self, x):
    47. x = self.conv(x)
    48. x = self.bn(x)
    49. x = self.relu(x)
    50. x = self.layer1(x)
    51. x = self.layer2(x)
    52. x = self.layer3(x)
    53. x = self.avg_pool(x)
    54. x = x.view(x.size(0), -1)
    55. x = self.fc(x)
    56. return x

    模型图转自知乎

     

  • 相关阅读:
    MATLAB cell数组 (tuple)
    Linux常用初级指令介绍和使用
    C Primer Plus(6) 中文版 第11章 字符串和字符串函数 11.4 自定义输入/输出函数
    Redis注解式开发并整合ssm项目以及击穿,穿透和雪崩的介绍及解决方案
    app稳定性测试-iOS篇
    MySQL——索引
    R语言caTools包进行数据划分、scale函数进行数据缩放、e1071包的naiveBayes函数构建朴素贝叶斯模型
    【微信小程序】分包
    java基于Springboot+vue 的在线药品销售商城购药管理系统 elementui
    轻松读懂spring之 IOC的主干流程
  • 原文地址:https://blog.csdn.net/m0_58585940/article/details/128132374