• AlexNet——训练花数据集


    目录

    一、网络结构

    二、创新点分析

    三、知识点

    1. nn.ReLU(inplace) 

    2. os.getcwd与os.path.abspath 

    3. 使用torchvision下的datasets包 

    4. items()与dict()用法 

    5. json文件  

    6. tqdm

    7. net.train()与net.val()

    四、代码


    AlexNet是由Alex Krizhevsky、Ilya Sutskever和Geoffrey Hinton在2012年ImageNet图像分类竞赛中提出的一种经典的卷积神经网络。AlexNet使用了Dropout层,减少过拟合现象的发生。

    一、网络结构

    二、数据集 

    文件存放:

    dataset->flower_data->flower_photos

    再使用split_data.py 将数据集根据比例划分成训练集和预测集

    详细请查看b站up主霹雳吧啦Wzhttps://github.com/WZMIAOMIAO/deep-learning-for-image-processing/blob/master/pytorch_classification

    三、创新点分析

    1. deeper网络结构

        通过增加网络深度,AlexNet可以更好的学习数据集的特征,并提高分类的准确率。

    2. 使用ReLU激活函数,克服梯度消失以及求梯度复杂的问题。

    3. 使用LRN局部响应归一化

        LRN是在卷积与池化层间添加归一化操作。卷积过程中,每个卷积核都对应一个feature map,LRN对这些feature map进行归一化操作。即,对每个特征图的每个位置,计算该位置周围的像素平方和,然后将当前位置像素值除以这个和。LRN可抑制邻近神经元的响应,在一定程度上能够避免过拟合,提高网络泛化能力。

    4. 使用Dropout层

    Dropout层:在训练过程中随机删除一定比例的神经元,以减少过拟合。Dropout一般放在全连接层与全连接层之间。

    四、知识点

    1. nn.ReLU(inplace) 默认参数为:inplace=False

    inplace=False:不会修改输入对象的值,而是返回一个新创建的对象,即打印出的对象存储地址不同。(值传递)

    inplace=True:会修改输入对象的值,即打印的对象存储地址相同,可以节省申请与释放内存的空间与时间。(地址传递)

    1. import torch
    2. import numpy as np
    3. import torch.nn as nn
    4. # id()方法返回对象的内存地址
    5. relu1 = nn.ReLU(inplace=False)
    6. relu2 = nn.ReLU(inplace=True)
    7. data = np.random.randn(2, 4)
    8. input = torch.from_numpy(data) # 转换成tensor类型
    9. print("input address:", id(input))
    10. output1 = relu1(input)
    11. print("replace=False -- output address:", id(output1))
    12. output2 = relu2(input)
    13. print("replace=True -- output address:", id(output2))
    14. # input address: 1669839583200
    15. # replace=False -- output address: 1669817512352
    16. # replace=True -- output address: 1669839583200

    2. os.getcwd与os.path.abspath 

    os.getcwd():获取当前工作目录

    os.path.abspath('xxx.py'):获取文件当前的完整路径

    1. import os
    2. print(os.getcwd()) # D:\Code
    3. print(os.path.abspath('test.py')) # D:\Code\test.py

    3. 使用torchvision下的datasets包 

    train_dataset=datasets.ImageFolder(root=os.path.join(image_path,'train'),transform=data_transform['train'])

    可以得出这些信息: 

    4. items()与dict()用法 

    items():把字典中的每对key和value组成一个元组,并将这些元组放在列表中返回。

    1. obj = {
    2. 'dog': 0,
    3. 'cat': 1,
    4. 'fish': 2
    5. }
    6. print(obj) # {'dog': 0, 'cat': 1, 'fish': 2}
    7. print(obj.items()) # dict_items([('dog', 0), ('cat', 1), ('fish', 2)])
    8. print(dict((v, k) for k, v in obj.items())) # {0: 'dog', 1: 'cat', 2: 'fish'}

    5. json文件  

    (1)json.dumps:将Python对象编码成JSON字符串

    (2)json.loads:将已编码的JSON字符串编码为Python对象

    1. import json
    2. data = [1, 2, 3]
    3. data_json = json.dumps(data) #
    4. data = json.loads(data_json)
    5. print(type(data)) #

    6. tqdm

    train_bar = tqdm(train_loader, file=sys.stdout)
    使用tqdm函数,对train_loader进行迭代,将进度条输出到标准输出流sys.stdout中。可以方便用户查看训练进度。

    1. from tqdm import tqdm
    2. import time
    3. for i in tqdm(range(10)):
    4. time.sleep(0.1)

    7. net.train()与net.val()

    net.train():启用BatchNormalization和Dropout

    net.eval|():不启用BatchNormalization和Dropout

    五、代码

    model.py

    1. import torch
    2. import torch.nn as nn
    3. class AlexNet(nn.Module):
    4. def __init__(self, num_classes=1000):
    5. super(AlexNet, self).__init__()
    6. self.features = nn.Sequential(
    7. nn.Conv2d(3, 96, kernel_size=11, padding=2, stride=4), # input[3,224,224] output[96,55,55]
    8. nn.ReLU(inplace=True), # inplace=True 址传递
    9. nn.MaxPool2d(kernel_size=3, stride=2), # output[96,27,27]
    10. nn.Conv2d(96, 256, kernel_size=5, padding=2), # output[256,27,27]
    11. nn.ReLU(inplace=True),
    12. nn.MaxPool2d(kernel_size=3, stride=2), # output[256,13,13]
    13. nn.Conv2d(256, 384, kernel_size=3, padding=1), # output[384,13,13]
    14. nn.ReLU(inplace=True),
    15. nn.Conv2d(384, 384, kernel_size=3, padding=1), # output[384,13,13]
    16. nn.ReLU(inplace=True),
    17. nn.Conv2d(384, 256, kernel_size=3, padding=1), # output[256,13,13]
    18. nn.ReLU(inplace=True),
    19. nn.MaxPool2d(kernel_size=3, stride=2), # output[256,6,6]
    20. )
    21. self.classifier = nn.Sequential(
    22. nn.Dropout(p=0.5),
    23. nn.Linear(256 * 6 * 6, 2048),
    24. nn.ReLU(inplace=True),
    25. nn.Dropout(p=0.5),
    26. nn.Linear(2048, 2048),
    27. nn.ReLU(inplace=True),
    28. nn.Linear(2048, num_classes)
    29. )
    30. def forward(self, x):
    31. x = self.features(x)
    32. x = torch.flatten(x, start_dim=1) # batch这一维度不用,从channel开始
    33. x = self.classifier(x)
    34. return x

    train.py 

    1. import os
    2. import torch
    3. import torch.nn as nn
    4. from torchvision import transforms, datasets
    5. import json
    6. from model import AlexNet
    7. import torch.optim as optim
    8. from tqdm import tqdm
    9. def main():
    10. device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    11. print("using:{}".format(device))
    12. data_transform = {
    13. 'train': transforms.Compose([
    14. # 将给定图像随机裁剪为不同的大小和宽高比,然后缩放所裁剪得到的图像为指定大小
    15. transforms.RandomResizedCrop(224),
    16. # 水平方向随机翻转
    17. transforms.RandomHorizontalFlip(),
    18. transforms.ToTensor(),
    19. transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    20. ]),
    21. 'val': transforms.Compose([
    22. transforms.Resize((224, 224)),
    23. transforms.ToTensor(),
    24. transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    25. ])
    26. }
    27. # get data root path
    28. data_root = os.path.abspath(os.getcwd()) # D:\Code\AlexNet
    29. # get flower data set path
    30. image_path = os.path.join(data_root, 'data_set', 'flower_data') # D:\Code\AlexNet\data_set\flower_data
    31. # 使用assert断言语句:出现错误条件时,就触发异常
    32. assert os.path.exists(image_path), '{} path does not exist!'.format(image_path)
    33. train_dataset = datasets.ImageFolder(root=os.path.join(image_path, 'train'), transform=data_transform['train'])
    34. val_dataset = datasets.ImageFolder(root=os.path.join(image_path, 'val'), transform=data_transform['val'])
    35. train_num = len(train_dataset)
    36. val_num = len(val_dataset)
    37. # write class_dict into json file
    38. flower_list = train_dataset.class_to_idx
    39. class_dict = dict((v, k) for k, v in flower_list.items())
    40. json_str = json.dumps(class_dict)
    41. with open('class_indices.json', 'w') as file:
    42. file.write(json_str)
    43. batch_size = 32
    44. train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
    45. val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=4, shuffle=False, num_workers=0)
    46. net = AlexNet(num_classes=5)
    47. net.to(device)
    48. loss_function = nn.CrossEntropyLoss()
    49. optimizer = optim.Adam(net.parameters(), lr=0.0002)
    50. epochs = 5
    51. save_path = './model/AlexNet.pth'
    52. best_acc = 0.0
    53. train_steps = len(train_loader) # train_num / batch_size
    54. train_bar = tqdm(train_loader)
    55. val_bar = tqdm(val_loader)
    56. for epoch in range(epochs):
    57. # train
    58. net.train()
    59. epoch_loss = 0.0
    60. # 加入进度条
    61. train_bar = tqdm(train_loader)
    62. for step, data in enumerate(train_bar):
    63. images, labels = data
    64. optimizer.zero_grad()
    65. outputs = net(images.to(device))
    66. loss = loss_function(outputs, labels.to(device))
    67. loss.backward()
    68. optimizer.step() # update x by optimizer
    69. # print statistics
    70. epoch_loss += loss.item()
    71. train_bar.desc = 'train eporch[{}/{}] loss:{:.3f}'.format(epoch + 1, epochs, loss)
    72. # validate
    73. net.eval()
    74. acc = 0.0
    75. with torch.no_grad():
    76. val_bar = tqdm(val_loader)
    77. for val_data in val_bar:
    78. val_images, val_labels = val_data
    79. outputs = net(val_images.to(device))
    80. predict_y = torch.max(outputs, dim=1)[1] # [1]取每行最大值的索引
    81. acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
    82. val_acc = acc / val_num
    83. print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' % (epoch + 1, epoch_loss / train_steps, val_acc))
    84. # find best accuracy
    85. if val_acc > best_acc:
    86. best_acc = val_acc
    87. torch.save(net.state_dict(), save_path)
    88. print('Train finished!')
    89. if __name__ == '__main__':
    90. main()

    class_indices.json

    {"0": "daisy", "1": "dandelion", "2": "roses", "3": "sunflowers", "4": "tulips"}

    predict.py 

    1. import os
    2. import torch
    3. from torchvision import transforms
    4. from PIL import Image
    5. import matplotlib.pyplot as plt
    6. import json
    7. from model import AlexNet
    8. def main():
    9. device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    10. transform = transforms.Compose([
    11. transforms.Resize((224, 224)),
    12. transforms.ToTensor(),
    13. transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    14. ])
    15. # load image
    16. img_path = './2.jpg'
    17. assert os.path.exists(img_path), "file:'{}' does not exist".format(img_path)
    18. img = Image.open(img_path)
    19. plt.imshow(img)
    20. # input [N,C,H,W]
    21. img = transform(img)
    22. img = torch.unsqueeze(img, dim=0)
    23. # read class_indices
    24. json_path = './class_indices.json'
    25. assert os.path.exists(json_path), "file:'{}' does not exist".format(json_path)
    26. with open(json_path, 'r') as file:
    27. class_dict = json.load(file) # {'0': 'daisy', '1': 'dandelion', '2': 'roses', '3': 'sunflowers', '4': 'tulips'}
    28. # load model
    29. net = AlexNet(num_classes=5).to(device)
    30. # load model weights
    31. weight_path = './model/AlexNet.pth'
    32. assert os.path.exists(weight_path), "file:'{}' does not exist".format(weight_path)
    33. net.load_state_dict(torch.load(weight_path))
    34. # predict
    35. net.eval()
    36. with torch.no_grad():
    37. output = torch.squeeze(net(img.to(device))).cpu()
    38. predict = torch.softmax(output, dim=0)
    39. predict_class = torch.argmax(predict).numpy()
    40. print_res = 'class:{} probability:{:.3}'.format(class_dict[str(predict_class)], predict[predict_class].numpy())
    41. plt.title(print_res)
    42. plt.show()
    43. for i in range(len(predict)):
    44. print('class:{:10} probability:{:.3}'.format(class_dict[str(i)], predict[i]))
    45. if __name__ == '__main__':
    46. main()

    Result:

     

  • 相关阅读:
    操作系统实验一模拟优先级调度算法(C语言实现附带详细注释)
    JavaScript 中的高阶函数
    Vue:状态管理pinia
    你知道有哪些类型的接口吗?
    ICLR 2023 最高分论文被锤抄袭??
    Java实用优化代码技巧
    【高性能计算】内存结构与优化理论
    简单记录关于Velocity的一些想法
    UVM实战——01基本概念_2 什么是UVM?
    GOOGLE/DYNAMICWORLD/V1
  • 原文地址:https://blog.csdn.net/qq_61706112/article/details/132895183