• 通过PyTorch构建的LeNet-5网络对手写数字进行训练和识别


          调用PyTorch相关接口实现一个LeNet-5网络,然后通过MNIST数据集训练模型,最后对生成的模型进行预测,主要包括2大部分:训练和预测

          1.训练部分:

          (1).加载MNIST数据集,通过调用TorchVision模块中的接口实现,将每幅图像缩放到32*32大小,小批量数据集数量设置为32;

          (2).设置网络参数的初始值,这样保证每次重新训练时初始值都是固定的,便于查找定位问题;

          (3).设计LeNet-5网络,并实例化一个网络对象,重载了__init__和forward两个函数,使用到的layer包括Conv2d、AvgPool2d、Linear;激活函数使用Tanh:

          (4).指定优化算法,这里采用Adam;

          (5).指定损失函数,这里采用CrossEntropyLoss;

          (6).训练,epochs设置为10,给出每次的训练结果;

          (7).保存模型,推荐使用state_dict。

          代码段如下:

    1. def load_mnist_dataset(img_size, batch_size):
    2. '''下载并加载mnist数据集
    3. img_size: 图像大小,宽高长度相同
    4. batch_size: 小批量数据集数量
    5. '''
    6. # 对PIL图像先进行缩放操作,然后转换成tensor类型
    7. transforms_ = transforms.Compose([transforms.Resize(size=(img_size, img_size)), transforms.ToTensor()])
    8. '''下载MNIST数据集
    9. root: mnist数据集存放目录名
    10. train: 可选参数, 默认为True; 若为True,则从MNIST/processed/training.pt创建数据集;若为False,则从MNIST/processed/test.pt创建数据集
    11. transform: 可选参数, 默认为None; 接收PIL图像并作处理
    12. target_transform: 可选参数, 默认为None
    13. download: 可选参数, 默认为False; 若为True,则从网络上下载数据集到root指定的目录
    14. '''
    15. train_dataset = datasets.MNIST(root="mnist_data", train=True, transform=transforms_, target_transform=None, download=True)
    16. valid_dataset = datasets.MNIST(root="mnist_data", train=False, transform=transforms_, target_transform=None, download=False)
    17. # 加载MNIST数据集:shuffle为True,则在每次epoch时重新打乱顺序
    18. train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    19. valid_loader = DataLoader(dataset=valid_dataset, batch_size=batch_size, shuffle=False)
    20. return train_loader, valid_loader, train_dataset, valid_dataset
    21. class LeNet5(nn.Module):
    22. '''构建lenet网络'''
    23. def __init__(self, n_classes: int) -> None:
    24. super(LeNet5, self).__init__() # 调用父类Module的构造方法
    25. # n_classes: 类别数
    26. # nn.Sequential: 顺序容器,Module将按照它们在构造函数中传递的顺序添加,它允许将整个容器视为单个module
    27. self.feature_extractor = nn.Sequential( # 输入32*32
    28. nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=0), # 卷积层,28*28*6
    29. nn.Tanh(), # 激活函数Tanh,使其值范围在(-1, 1)内
    30. nn.AvgPool2d(kernel_size=2, stride=None, padding=0), # 平均池化层,14*14*6
    31. nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0), # 10*10*16
    32. nn.Tanh(),
    33. nn.AvgPool2d(kernel_size=2, stride=None, padding=0), # 5*5*16
    34. nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5, stride=1, padding=0), # 1*1*120
    35. nn.Tanh()
    36. )
    37. self.classifier = nn.Sequential( # 输入1*1*120
    38. nn.Linear(in_features=120, out_features=84), # 全连接层,84
    39. nn.Tanh(),
    40. nn.Linear(in_features=84, out_features=n_classes) # 10
    41. )
    42. # LeNet5继承nn.Module,定义forward函数后,backward函数就会利用Autograd被自动实现
    43. # 只要实例化一个LeNet5对象并传入对应的参数x就可以自动调用forward函数
    44. def forward(self, x: Tensor):
    45. x = self.feature_extractor(x)
    46. x = torch.flatten(input=x, start_dim=1) # 将输入按指定展平,start_dim=1则第一维度不变,后面的展平
    47. logits = self.classifier(x)
    48. probs = F.softmax(input=logits, dim=1) # 激活函数softmax: 使得每一个元素的范围都在(0,1)之间,并且所有元素的和为1
    49. return logits, probs
    50. def validate(valid_loader, model, criterion, device):
    51. '''Function for the validation step of the training loop'''
    52. model.eval() # 将网络设置为评估模式
    53. running_loss = 0
    54. for X, y_true in valid_loader:
    55. X = X.to(device) # 将数据导入到指定的设备上(cpu或gpu)
    56. y_true = y_true.to(device)
    57. # Forward pass and record loss
    58. y_hat, _ = model(X) # 前向传播:调用Module的__call__方法, 此方法内会调用指定网络(如LeNet5)的forward方法
    59. loss = criterion(y_hat, y_true) # 计算loss,同上,通过__call__方法调用指定损失函数类(如CrossEntropyLoss)中的forward方法
    60. running_loss += loss.item() * X.size(0)
    61. epoch_loss = running_loss / len(valid_loader.dataset)
    62. return model, epoch_loss
    63. def get_accuracy(model, data_loader, device):
    64. '''Function for computing the accuracy of the predictions over the entire data_loader'''
    65. correct_pred = 0
    66. n = 0
    67. with torch.no_grad(): # 临时将循环内的所有Tensor的requires_grad标志设置为False,不再计算Tensor的梯度(自动求导)
    68. model.eval() # 将网络设置为评估模式
    69. for X, y_true in data_loader:
    70. X = X.to(device) # 将数据导入到指定的设备上(cpu或gpu)
    71. y_true = y_true.to(device)
    72. _, y_prob = model(X) # y_prob.size(): troch.Size([32, 10]): [cols, rows]
    73. # torch.max(input):返回Tensor中所有元素的最大值
    74. # torch.max(input, dim):按维度dim返回最大值,并且返回索引
    75. # dim=0: 返回每一列中最大值的那个元素,并且返回索引
    76. # dim=1: 返回每一行中最大值的那个元素,并且返回索引
    77. _, predicted_labels = torch.max(y_prob, 1)
    78. n += y_true.size(0)
    79. correct_pred += (predicted_labels == y_true).sum()
    80. return correct_pred.float() / n
    81. def train(train_loader, model, criterion, optimizer, device):
    82. '''Function for the training step of the training loop'''
    83. model.train() # 将网络设置为训练模式
    84. running_loss = 0
    85. for X, y_true in train_loader: # 先调用DataLoader类的__iter__函数,接着循环调用_DataLoaderIter类的__next__函数
    86. # X.size(shape: [n,c,h,w]): torch.Size([32, 1, 32, 32]); y_true.size: torch.Size([32]); n为batch_size
    87. optimizer.zero_grad() # 将优化算法中的梯度重置为0,需要在计算下一个小批量数据集的梯度之前调用它,否则梯度将累积到现有的梯度中
    88. # 将Tensor数据导入到指定的设备上(cpu或gpu)
    89. X = X.to(device)
    90. y_true = y_true.to(device)
    91. y_hat, _ = model(X) # 前向传播:调用Module的__call__方法, 此方法内会调用指定网络(如LeNet5)的forward方法
    92. # y_hat.size(): torch.Size([32, 10]); _.size(): torch.Size([32, 10])
    93. loss = criterion(y_hat, y_true) # 计算loss,同上,通过__call__方法调用指定损失函数类(如CrossEntropyLoss)中的forward方法
    94. running_loss += loss.item() * X.size(0)
    95. loss.backward() # 反向传播,使用Autograd自动计算标量的当前梯度
    96. optimizer.step() # 根据梯度更新网络参数,优化器通过.grad中存储的梯度来调整每个参数
    97. epoch_loss = running_loss / len(train_loader.dataset)
    98. return model, optimizer, epoch_loss
    99. def training_loop(model, criterion, optimizer, train_loader, valid_loader, epochs, device, print_every=1):
    100. '''Function defining the entire training loop
    101. model: 网络对象
    102. criterion: 损失函数对象
    103. optimizer: 优化算法对象
    104. train_loader: 训练数据集对象
    105. valid_loader: 测试数据集对象
    106. epochs: 重复训练整个训练数据集的次数
    107. device: 指定在cpu上还是在gpu上运行
    108. print_every: 每训练几次打印一次训练结果
    109. '''
    110. train_losses = []
    111. valid_losses = []
    112. for epoch in range(0, epochs):
    113. model, optimizer, train_loss = train(train_loader, model, criterion, optimizer, device)
    114. train_losses.append(train_loss)
    115. # 每次训练完后通过测试数据集进行评估
    116. with torch.no_grad(): # 临时将循环内的所有Tensor的requires_grad标志设置为False,不再计算Tensor的梯度(自动求导)
    117. model, valid_loss = validate(valid_loader, model, criterion, device)
    118. valid_losses.append(valid_loss)
    119. if epoch % print_every == (print_every - 1):
    120. train_acc = get_accuracy(model, train_loader, device=device)
    121. valid_acc = get_accuracy(model, valid_loader, device=device)
    122. print(f' {datetime.now().time().replace(microsecond=0)}:'
    123. f' Epoch: {epoch}', f' Train loss: {train_loss:.4f}', f' Valid loss: {valid_loss:.4f}'
    124. f' Train accuracy: {100 * train_acc:.2f}', f' Valid accuracy: {100 * valid_acc:.2f}')
    125. return model, optimizer, (train_losses, valid_losses)
    126. def train_and_save_model():
    127. print("#### start training ... ####")
    128. print("1. load mnist dataset")
    129. train_loader, valid_loader, _, _ = load_mnist_dataset(img_size=32, batch_size=32)
    130. print("2. fixed random init value")
    131. # 用于设置随机初始化;如果不设置每次训练时的网络初始化都是随机的,导致结果不确定;如果设置了,则每次初始化都是固定的
    132. torch.manual_seed(seed=42)
    133. #print("value:", torch.rand(1), torch.rand(1), torch.rand(1)) # 运行多次,每次输出的值都是相同的,[0, 1)
    134. print("3. instantiate lenet net object")
    135. model = LeNet5(n_classes=10).to('cpu') # 在CPU上运行
    136. print("4. specify the optimization algorithm: Adam")
    137. optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001) # 定义优化算法:Adam是一种基于梯度的优化算法
    138. print("5. specify the loss function: CrossEntropyLoss")
    139. criterion = nn.CrossEntropyLoss() # 定义损失函数:交叉熵损失
    140. print("6. repeated training")
    141. model, _, _ = training_loop(model, criterion, optimizer, train_loader, valid_loader, epochs=10, device='cpu') # epochs为遍历训练整个数据集的次数
    142. print("7. save model")
    143. model_name = "../../../data/Lenet-5.pth"
    144. #torch.save(model, model_name) # 保存整个模型, 对应于model = torch.load
    145. torch.save(model.state_dict(), model_name) # 推荐:只保存模型训练好的参数,对应于model.load_state_dict(torch.load)

          执行结果如下所示:

          2.手写数字图像识别部分:

          (1).加载模型,推荐使用load_state_dict,对应于保存模型时使用的state_dict;

          (2).设置网络到评估模式;

          (3).准备测试图像,一共10幅,0到9各一幅,如下图所示,注意:训练图像背景色为黑色,而测试图像背景色为白色:

          (4).依次对每幅图像进行识别。

          代码段如下所示:

    1. def list_files(filepath, filetype):
    2. '''遍历指定目录下的指定文件'''
    3. paths = []
    4. for root, dirs, files in os.walk(filepath):
    5. for file in files:
    6. if file.lower().endswith(filetype.lower()):
    7. paths.append(os.path.join(root, file))
    8. return paths
    9. def get_image_label(image_name, image_name_suffix):
    10. '''获取测试图像对应label'''
    11. index = image_name.rfind("/")
    12. if index == -1:
    13. print(f"Error: image name {image_name} is not supported")
    14. sub = image_name[index+1:]
    15. label = sub[:len(sub)-len(image_name_suffix)]
    16. return label
    17. def image_predict():
    18. print("#### start predicting ... ####")
    19. print("1. load model")
    20. model_name = "../../../data/Lenet-5.pth"
    21. model = LeNet5(n_classes=10).to('cpu') # 实例化一个网络对象
    22. model.load_state_dict(torch.load(model_name)) # 加载模型
    23. print("2. set net to evaluate mode")
    24. model.eval()
    25. print("3. prepare test images")
    26. image_path = "../../../data/image/handwritten_digits/"
    27. image_name_suffix = ".png"
    28. images_name = list_files(image_path, image_name_suffix)
    29. print("4. image recognition")
    30. with torch.no_grad():
    31. for image_name in images_name:
    32. #print("image name:", image_name)
    33. label = get_image_label(image_name, image_name_suffix)
    34. img = cv2.imread(image_name, cv2.IMREAD_GRAYSCALE)
    35. img = cv2.resize(img, (32, 32))
    36. # MNIST图像背景为黑色,而测试图像的背景色为白色,识别前需要做转换
    37. img = cv2.bitwise_not(img)
    38. #print("img shape:", img.shape)
    39. # 将opencv image转换到pytorch tensor
    40. transform = transforms.ToTensor()
    41. tensor = transform(img) # tensor shape: torch.Size([1, 32, 32])
    42. tensor = tensor.unsqueeze(0) # tensor shape: torch.Size([1, 1, 32, 32])
    43. #print("tensor shape:", tensor.shape)
    44. _, y_prob = model(tensor)
    45. _, predicted_label = torch.max(y_prob, 1)
    46. print(f" predicted label: {predicted_label.item()}, ground truth label: {label}")

          执行结果如下图所示:

          GitHub: https://github.com/fengbingchun/PyTorch_Test

  • 相关阅读:
    C语言每日一题(10):无人生还
    js:创建一个基于vite 的React项目
    基础不牢的把vue的插槽再好好看下吧
    骨感传导蓝牙耳机怎么样,骨感传导耳机对于我们耳道有保护吗
    Vue2数据双向绑定的原理(Object.defineProperty)
    Linux(Ubuntu)shell命令(入门必看)
    Scala第二十章节
    java进阶(七)------多线程---多线程操作同一变量
    1029 Median
    S1_servlet与数据库连接、filter过滤器、分页 实操的叙述
  • 原文地址:https://blog.csdn.net/fengbingchun/article/details/125462001