• 【PyTorch】Training Model


    七、Training Model

    1、模型训练

    CIFAR10数据集为例:

    import torchvision
    from torch.utils.data import DataLoader
    from torch.utils.tensorboard import SummaryWriter
    import time
    
    from Model import *
    
    # 准备数据集
    train_data = torchvision.datasets.CIFAR10("../data", train=True, transform=torchvision.transforms.ToTensor(),
                                              download=True)
    test_data = torchvision.datasets.CIFAR10("../data", train=False, transform=torchvision.transforms.ToTensor(),
                                             download=True)
    
    # length长度
    train_data_len = len(train_data)
    test_data_len = len(test_data)
    print("训练集: {}".format(train_data_len))
    print("测试集: {}".format(test_data_len))
    
    # 利用DataLoader加载数据集
    train_dataloader = DataLoader(train_data, batch_size=64)
    test_dataloader = DataLoader(test_data, batch_size=64)
    
    # 创建网络模型
    liang = Liang()
    
    # 损失函数
    loss_fn = nn.CrossEntropyLoss()
    
    # 优化器
    # learning_rate = 0.01
    learning_rate = 1e-2  # 1*(10)^(-2)=1/100
    optimizer = torch.optim.SGD(liang.parameters(), lr=learning_rate)
    
    # 设置训练网络的一些参数
    total_train_step = 0  # 训练次数
    total_test_step = 0  # 测试次数
    epoch = 10  # 训练轮数
    
    # 添加TensorBoard
    writer = SummaryWriter("../logs")
    start_time = time.time()
    
    for i in range(epoch):
        print("-----------第 {} 轮训练开始----------".format(i + 1))
    
        # 训练步骤开始
        liang.train()
        for data in train_dataloader:
            imgs, targets = data
            outputs = liang(imgs)
            loss = loss_fn(outputs, targets)
    
            # 优化调优
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    
            total_train_step += 1
            if total_train_step % 100 == 0:
                end_time = time.time()
                print(end_time - start_time)
                print("训练次数: {}, Loss: {}".format(total_train_step, loss.item()))
                writer.add_scalar("train_loss", loss.item(), total_train_step)
    
        # 测试步骤开始
        liang.eval()
        total_test_loss = 0
        total_accuracy = 0
        with torch.no_grad():
            for data in test_dataloader:
                imgs, targets = data
                outputs = liang(imgs)
                loss = loss_fn(outputs, targets)
                total_test_loss += loss.item()
    
                accuracy = (outputs.argmax(1) == targets).sum()
                total_accuracy += accuracy
    
        print("整体测试集上的Loss: {}".format(total_test_loss))
        print("整体测试集上的正确率: {}".format(total_accuracy / test_data_len))
        writer.add_scalar("test_loss", total_test_loss, total_test_step)
        writer.add_scalar("test_accuracy", total_accuracy / test_data_len, total_test_step)
        total_test_step += 1
    
        torch.save(liang, "../model/liang_{}.pth".format(i))
        print("模型已保存")
    
    writer.close()
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    Files already downloaded and verified
    Files already downloaded and verified
    训练集: 50000
    测试集: 10000
    -----------1 轮训练开始----------
    6.537519931793213
    训练次数: 100, Loss: 2.288882255554199
    13.001430749893188
    训练次数: 200, Loss: 2.271170139312744
    19.13790225982666
    训练次数: 300, Loss: 2.247511148452759
    25.20561981201172
    训练次数: 400, Loss: 2.168041706085205
    31.378580570220947
    训练次数: 500, Loss: 2.049440383911133
    37.541871309280396
    训练次数: 600, Loss: 2.054497241973877
    43.90901756286621
    训练次数: 700, Loss: 1.9997793436050415
    整体测试集上的Loss: 309.624484539032
    整体测试集上的正确率: 0.2912999987602234
    模型已保存
    ...
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23

    2、GPU训练

    神经网络、损失函数、数据 转为 cuda(GPU型) 执行,我们可以发现,速度明显比CPU执行的快很多!

    2.1 .cuda()

    # 神经网络
    liang = Liang()
    if torch.cuda.is_available():
        liang = liang.cuda()
    # 损失函数
    loss_fn = nn.CrossEntropyLoss()
    if torch.cuda.is_available():
        loss_fn = loss_fn.cuda()
    # 数据
    imgs, targets = data
        if torch.cuda.is_available():
            imgs = imgs.cuda()
            targets = targets.cuda()
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    Files already downloaded and verified
    Files already downloaded and verified
    训练集: 50000
    测试集: 10000
    -----------1 轮训练开始----------
    10.994545936584473
    训练次数: 100, Loss: 2.2849647998809814
    12.99094533920288
    训练次数: 200, Loss: 2.2762258052825928
    14.33635950088501
    训练次数: 300, Loss: 2.230626106262207
    16.00475764274597
    训练次数: 400, Loss: 2.1230242252349854
    17.964726209640503
    训练次数: 500, Loss: 2.022688150405884
    19.61249876022339
    训练次数: 600, Loss: 2.01230788230896
    20.96266460418701
    训练次数: 700, Loss: 1.9741096496582031
    整体测试集上的Loss: 305.68632411956787
    整体测试集上的正确率: 0.29739999771118164
    模型已保存
    ...
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23

    2.2 .to(device)

    # 定义训练的设备
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(device)
    # 神经网络
    liang = Liang()
    liang = liang.to(device)
    # 损失函数
    loss_fn = nn.CrossEntropyLoss()
    loss_fn = loss_fn.to(device)
    # 数据
    imgs, targets = data
    imgs = imgs.to(device)
    targets = targets.to(device)
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    Files already downloaded and verified
    Files already downloaded and verified
    cuda
    训练集: 50000
    测试集: 10000
    -----------1 轮训练开始----------
    9.563657283782959
    训练次数: 100, Loss: 2.2956345081329346
    10.768706560134888
    训练次数: 200, Loss: 2.2770333290100098
    11.968295335769653
    训练次数: 300, Loss: 2.26665997505188
    13.181000471115112
    训练次数: 400, Loss: 2.2037200927734375
    14.387518167495728
    训练次数: 500, Loss: 2.0665152072906494
    15.585152387619019
    训练次数: 600, Loss: 2.0054214000701904
    16.81506586074829
    训练次数: 700, Loss: 2.0446667671203613
    整体测试集上的Loss: 320.6275497674942
    整体测试集上的正确率: 0.2667999863624573
    模型已保存
    ...
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24

    2.3 Google Colab

    我们可以借助Google提供的Colab来进行GPU训练:https://colab.research.google.com/ (需要VPN

    在Colab中如果想要使用GPU进行训练,需要在笔记本设置中选择GPU。

    明显快好多!!!

    3、模型验证

    import torch
    import torchvision
    from PIL import Image
    from torch import nn
    
    image_pth = "../images/dog.jpg"
    image = Image.open(image_pth)
    print(image)
    
    transform = torchvision.transforms.Compose([torchvision.transforms.Resize((32, 32)),
                                                torchvision.transforms.ToTensor()])
    image = transform(image)
    print(image.shape)
    
    
    class Liang(nn.Module):
        def __init__(self):
            super(Liang, self).__init__()
            self.module = nn.Sequential(
                nn.Conv2d(3, 32, 5, 1, 2),
                nn.MaxPool2d(2),
                nn.Conv2d(32, 32, 5, 1, 2),
                nn.MaxPool2d(2),
                nn.Conv2d(32, 64, 5, 1, 2),
                nn.MaxPool2d(2),
                nn.Flatten(),
                nn.Linear(64 * 4 * 4, 64),
                nn.Linear(64, 10)
            )
    
        def forward(self, x):
            x = self.module(x)
            return x
    
    
    model = torch.load("../model/liang_4.pth")
    print(model)
    
    image = torch.reshape(image, (1, 3, 32, 32))
    print(image.shape)
    
    model.eval()
    image = image.cuda()
    
    with torch.no_grad():
        output = model(image)
    print(output)
    
    print(output.argmax(1))
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=300x200 at 0x19E66C68100>
    torch.Size([3, 32, 32])
    Liang(
      (module): Sequential(
        (0): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
        (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (2): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
        (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (4): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
        (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (6): Flatten(start_dim=1, end_dim=-1)
        (7): Linear(in_features=1024, out_features=64, bias=True)
        (8): Linear(in_features=64, out_features=10, bias=True)
      )
    )
    torch.Size([1, 3, 32, 32])
    tensor([[-0.8167, -2.1763,  1.3891,  0.7956,  1.2035,  1.8374, -0.7936,  1.7908,
             -2.0639, -1.4441]], device='cuda:0')
    tensor([5], device='cuda:0')
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
  • 相关阅读:
    Gamframwork 检查更新、开始更新 热更的流程
    GPT-SoVITS教程,接入酒馆AI,SillyTavern-1.11.5,让AI女友声若幽兰
    TFT-eSPI 库在 ESP32 上的配置和使用(ESP32 for Arduino)
    计算机网络4小时速成:应用层,cs模型,p2p模型,DNS域名系统,文件传输协议FTP,电子邮件SMTP,万维网HTTP,动态主机配置协议DHCP
    行为严究深抓住消费行为,提升品牌营销
    怎么移除pdf文件编辑限制,有哪些方法?
    xss-labs/level12
    yolov8 区域多类别计数
    一种新的在 fragment 中处理按键返回的方法
    【Redis入门笔记 09】缓存穿透、击穿与雪崩
  • 原文地址:https://blog.csdn.net/m0_70885101/article/details/128055640