• pytorch CV入门4-使用MobileNet解决视觉相关问题


    专栏链接:https://blog.csdn.net/qq_33345365/category_12578430.html

    初次编辑:2024/3/8;最后编辑:2024/3/9

    参考网站-微软教程:https://learn.microsoft.com/en-us/training/modules/intro-computer-vision-pytorch

    更多的内容可以参考本作者其他专栏:

    Pytorch基础https://blog.csdn.net/qq_33345365/category_12591348.html

    Pytorch NLP基础:https://blog.csdn.net/qq_33345365/category_12597850.html


    使用 MobileNet 解决视觉相关问题


    轻量模型与MobileNet

    我们已经看到,复杂的网络需要大量的计算资源,比如GPU,用于训练,同时也用于快速推断。然而,事实证明,在大多数情况下,参数数量显著较小的模型仍然可以被训练得表现得相当不错。换句话说,模型复杂度的增加通常会导致模型性能的小幅(非比例)增加。

    我们在模块开始时观察到了这一点,当我们训练MNIST数字分类时。简单的全连接模型的准确率与强大的CNN模型相比没有显著下降。增加CNN层数和/或分类器中的神经元数量最多能让我们获得几个百分点的准确率提升。

    这使我们产生了一个想法,即我们可以尝试使用轻量级的网络架构来训练更快的模型。如果我们希望能够在移动设备上执行我们的模型,这一点尤为重要。

    本模块将依赖于我们在上一单元下载的Cats and Dogs数据集。首先,我们将确保数据集是可用的。

    使用以下python代码来帮助构建代码:

    # Script file to hide implementation details for PyTorch computer vision module
    
    import builtins
    import torch
    import torch.nn as nn
    from torch.utils import data
    import torchvision
    from torchvision.transforms import ToTensor
    import matplotlib.pyplot as plt
    import numpy as np
    from PIL import Image
    import glob
    import os
    import zipfile
    
    default_device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    
    def load_mnist(batch_size=64):
        builtins.data_train = torchvision.datasets.MNIST('./data',
                                                         download=True, train=True, transform=ToTensor())
        builtins.data_test = torchvision.datasets.MNIST('./data',
                                                        download=True, train=False, transform=ToTensor())
        builtins.train_loader = torch.utils.data.DataLoader(data_train, batch_size=batch_size)
        builtins.test_loader = torch.utils.data.DataLoader(data_test, batch_size=batch_size)
    
    
    def train_epoch(net, dataloader, lr=0.01, optimizer=None, loss_fn=nn.NLLLoss()):
        optimizer = optimizer or torch.optim.Adam(net.parameters(), lr=lr)
        net.train()
        total_loss, acc, count = 0, 0, 0
        for features, labels in dataloader:
            optimizer.zero_grad()
            lbls = labels.to(default_device)
            out = net(features.to(default_device))
            loss = loss_fn(out, lbls)  # cross_entropy(out,labels)
            loss.backward()
            optimizer.step()
            total_loss += loss
            _, predicted = torch.max(out, 1)
            acc += (predicted == lbls).sum()
            count += len(labels)
        return total_loss.item() / count, acc.item() / count
    
    
    def validate(net, dataloader, loss_fn=nn.NLLLoss()):
        net.eval()
        count, acc, loss = 0, 0, 0
        with torch.no_grad():
            for features, labels in dataloader:
                lbls = labels.to(default_device)
                out = net(features.to(default_device))
                loss += loss_fn(out, lbls)
                pred = torch.max(out, 1)[1]
                acc += (pred == lbls).sum()
                count += len(labels)
        return loss.item() / count, acc.item() / count
    
    
    def train(net, train_loader, test_loader, optimizer=None, lr=0.01, epochs=10, loss_fn=nn.NLLLoss()):
        optimizer = optimizer or torch.optim.Adam(net.parameters(), lr=lr)
        res = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}
        for ep in range(epochs):
            tl, ta = train_epoch(net, train_loader, optimizer=optimizer, lr=lr, loss_fn=loss_fn)
            vl, va = validate(net, test_loader, loss_fn=loss_fn)
            print(f"Epoch {ep:2}, Train acc={ta:.3f}, Val acc={va:.3f}, Train loss={tl:.3f}, Val loss={vl:.3f}")
            res['train_loss'].append(tl)
            res['train_acc'].append(ta)
            res['val_loss'].append(vl)
            res['val_acc'].append(va)
        return res
    
    
    def train_long(net, train_loader, test_loader, epochs=5, lr=0.01, optimizer=None, loss_fn=nn.NLLLoss(), print_freq=10):
        optimizer = optimizer or torch.optim.Adam(net.parameters(), lr=lr)
        for epoch in range(epochs):
            net.train()
            total_loss, acc, count = 0, 0, 0
            for i, (features, labels) in enumerate(train_loader):
                lbls = labels.to(default_device)
                optimizer.zero_grad()
                out = net(features.to(default_device))
                loss = loss_fn(out, lbls)
                loss.backward()
                optimizer.step()
                total_loss += loss
                _, predicted = torch.max(out, 1)
                acc += (predicted == lbls).sum()
                count += len(labels)
                if i % print_freq == 0:
                    print("Epoch {}, minibatch {}: train acc = {}, train loss = {}".format(epoch, i, acc.item() / count,
                                                                                           total_loss.item() / count))
            vl, va = validate(net, test_loader, loss_fn)
            print("Epoch {} done, validation acc = {}, validation loss = {}".format(epoch, va, vl))
    
    
    def plot_results(hist):
        plt.figure(figsize=(15, 5))
        plt.subplot(121)
        plt.plot(hist['train_acc'], label='Training acc')
        plt.plot(hist['val_acc'], label='Validation acc')
        plt.legend()
        plt.subplot(122)
        plt.plot(hist['train_loss'], label='Training loss')
        plt.plot(hist['val_loss'], label='Validation loss')
        plt.legend()
    
    
    def plot_convolution(t, title=''):
        with torch.no_grad():
            c = nn.Conv2d(kernel_size=(3, 3), out_channels=1, in_channels=1)
            c.weight.copy_(t)
            fig, ax = plt.subplots(2, 6, figsize=(8, 3))
            fig.suptitle(title, fontsize=16)
            for i in range(5):
                im = data_train[i][0]
                ax[0][i].imshow(im[0])
                ax[1][i].imshow(c(im.unsqueeze(0))[0][0])
                ax[0][i].axis('off')
                ax[1][i].axis('off')
            ax[0, 5].imshow(t)
            ax[0, 5].axis('off')
            ax[1, 5].axis('off')
            # plt.tight_layout()
            plt.show()
    
    
    def display_dataset(dataset, n=10, classes=None):
        fig, ax = plt.subplots(1, n, figsize=(15, 3))
        mn = min([dataset[i][0].min() for i in range(n)])
        mx = max([dataset[i][0].max() for i in range(n)])
        for i in range(n):
            ax[i].imshow(np.transpose((dataset[i][0] - mn) / (mx - mn), (1, 2, 0)))
            ax[i].axis('off')
            if classes:
                ax[i].set_title(classes[dataset[i][1]])
    
    
    def check_image(fn):
        try:
            im = Image.open(fn)
            im.verify()
            return True
        except:
            return False
    
    
    def check_image_dir(path):
        for fn in glob.glob(path):
            if not check_image(fn):
                print("Corrupt image: {}".format(fn))
                os.remove(fn)
    
    
    def common_transform():
        std_normalize = torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                         std=[0.229, 0.224, 0.225])
        trans = torchvision.transforms.Compose([
            torchvision.transforms.Resize(256),
            torchvision.transforms.CenterCrop(224),
            torchvision.transforms.ToTensor(),
            std_normalize])
        return trans
    
    
    def load_cats_dogs_dataset():
        if not os.path.exists('data/PetImages'):
            with zipfile.ZipFile('data/kagglecatsanddogs_5340.zip', 'r') as zip_ref:
                zip_ref.extractall('data')
    
        check_image_dir('data/PetImages/Cat/*.jpg')
        check_image_dir('data/PetImages/Dog/*.jpg')
    
        dataset = torchvision.datasets.ImageFolder('data/PetImages', transform=common_transform())
        trainset, testset = torch.utils.data.random_split(dataset, [20000, len(dataset) - 20000])
        trainloader = torch.utils.data.DataLoader(trainset, batch_size=32)
        testloader = torch.utils.data.DataLoader(trainset, batch_size=32)
        return dataset, trainloader, testloader
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101
    • 102
    • 103
    • 104
    • 105
    • 106
    • 107
    • 108
    • 109
    • 110
    • 111
    • 112
    • 113
    • 114
    • 115
    • 116
    • 117
    • 118
    • 119
    • 120
    • 121
    • 122
    • 123
    • 124
    • 125
    • 126
    • 127
    • 128
    • 129
    • 130
    • 131
    • 132
    • 133
    • 134
    • 135
    • 136
    • 137
    • 138
    • 139
    • 140
    • 141
    • 142
    • 143
    • 144
    • 145
    • 146
    • 147
    • 148
    • 149
    • 150
    • 151
    • 152
    • 153
    • 154
    • 155
    • 156
    • 157
    • 158
    • 159
    • 160
    • 161
    • 162
    • 163
    • 164
    • 165
    • 166
    • 167
    • 168
    • 169
    • 170
    • 171
    • 172
    • 173
    • 174
    • 175
    • 176
    • 177
    • 178

    版本要求:

    torchvision==0.13.0
    torch==1.12.0
    
    • 1
    • 2

    创建一个新文件,加载类

    import torch
    import torch.nn as nn
    import torchvision
    import torchvision.transforms as transforms
    import matplotlib.pyplot as plt
    from torchinfo import summary
    import numpy as np
    import os
    
    from pytorchcv import train, plot_results, display_dataset, train_long, check_image_dir
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    if not os.path.exists('data/kagglecatsanddogs_5340.zip'):
        wget -P data -q https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip
    
    dataset, train_loader, test_loader = load_cats_dogs_dataset()
    
    • 1
    • 2
    • 3
    • 4

    MobileNet

    在之前的单元中,已经介绍用于图像分类的ResNet架构。ResNet的更轻量级的模拟是MobileNet,它使用所谓的“反向残差块(Inverted Residual Blocks)”。让我们加载预训练的MobileNet,并看看它是如何工作的:

    model = torch.hub.load('pytorch/vision:v0.13.0', 'mobilenet_v2', weights='MobileNet_V2_Weights.DEFAULT')
    model.eval()
    print(model)
    
    • 1
    • 2
    • 3

    输出是:

    Using cache found in /home/vmuser/.cache/torch/hub/pytorch_vision_v0.6.0
    MobileNetV2(
      (features): Sequential(
        (0): ConvBNReLU(
          (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
              (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (2): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(96, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=96, bias=False)
              (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(96, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (3): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(144, 144, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=144, bias=False)
              (1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(144, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (4): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(144, 144, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=144, bias=False)
              (1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(144, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (5): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False)
              (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (6): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False)
              (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (7): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(192, 192, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=192, bias=False)
              (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (8): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
              (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(384, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (9): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
              (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(384, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (10): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
              (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(384, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (11): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
              (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (12): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(576, 576, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=576, bias=False)
              (1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (13): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(576, 576, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=576, bias=False)
              (1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (14): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(576, 576, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=576, bias=False)
              (1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(576, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (15): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
              (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (16): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
              (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (17): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
              (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (18): ConvBNReLU(
          (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(1280, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
      )
      (classifier): Sequential(
        (0): Dropout(p=0.2, inplace=False)
        (1): Linear(in_features=1280, out_features=1000, bias=True)
      )
    )
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101
    • 102
    • 103
    • 104
    • 105
    • 106
    • 107
    • 108
    • 109
    • 110
    • 111
    • 112
    • 113
    • 114
    • 115
    • 116
    • 117
    • 118
    • 119
    • 120
    • 121
    • 122
    • 123
    • 124
    • 125
    • 126
    • 127
    • 128
    • 129
    • 130
    • 131
    • 132
    • 133
    • 134
    • 135
    • 136
    • 137
    • 138
    • 139
    • 140
    • 141
    • 142
    • 143
    • 144
    • 145
    • 146
    • 147
    • 148
    • 149
    • 150
    • 151
    • 152
    • 153
    • 154
    • 155
    • 156
    • 157
    • 158
    • 159
    • 160
    • 161
    • 162
    • 163
    • 164
    • 165
    • 166
    • 167
    • 168
    • 169
    • 170
    • 171
    • 172
    • 173
    • 174
    • 175
    • 176
    • 177
    • 178
    • 179
    • 180
    • 181
    • 182
    • 183
    • 184
    • 185
    • 186
    • 187
    • 188
    • 189
    • 190
    • 191
    • 192
    • 193
    • 194
    • 195
    • 196
    • 197
    • 198
    • 199
    • 200
    • 201
    • 202
    • 203
    • 204
    • 205
    • 206
    • 207
    • 208
    • 209
    • 210
    • 211
    • 212
    • 213
    • 214
    • 215
    • 216
    • 217
    • 218
    • 219
    • 220
    • 221
    • 222
    • 223
    • 224
    • 225
    • 226
    • 227
    • 228
    • 229
    • 230
    • 231
    • 232
    • 233
    • 234
    • 235
    • 236
    • 237
    • 238
    • 239
    • 240
    • 241
    • 242
    • 243
    • 244
    • 245
    • 246
    • 247
    • 248
    • 249
    • 250
    • 251
    • 252
    • 253
    • 254
    • 255
    • 256
    • 257
    • 258
    • 259
    • 260
    • 261
    • 262
    • 263
    • 264
    • 265
    • 266
    • 267
    • 268
    • 269
    • 270
    • 271
    • 272
    • 273
    • 274
    • 275
    • 276
    • 277
    • 278
    • 279
    • 280
    • 281
    • 282
    • 283
    • 284
    • 285
    • 286

    让我们将模型应用于我们的数据集,并确保它有效。

    sample_image = dataset[0][0].unsqueeze(0)
    res = model(sample_image)
    print(res[0].argmax())
    
    • 1
    • 2
    • 3

    输出是:

    tensor(281)
    
    • 1

    结果(281)是ImageNet类别编号,在之前的单元中已经讨论过。

    请注意,MobileNet和全尺度ResNet模型的参数数量存在显著差异。在某些方面,MobileNet比VGG模型系列更紧凑,但精度较低。然而,参数数量的减少自然会导致模型精度的下降。

    使用MobileNet进行迁移学习

    现在让我们执行与上一单元相同的迁移学习过程,但使用MobileNet。首先,让我们冻结模型的所有参数:

    for x in model.parameters():
        x.requires_grad = False
    
    • 1
    • 2

    然后,替换最终分类器。我们还将模型转移到默认的训练设备(GPU或CPU):

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.classifier = nn.Linear(1280,2)
    model = model.to(device)
    summary(model,input_size=(1,3,244,244))
    
    • 1
    • 2
    • 3
    • 4

    输出是:

    ==========================================================================================
    Layer (type:depth-idx)                   Output Shape              Param #
    ==========================================================================================
    ├─Sequential: 1-1                        [1, 1280, 8, 8]           --
    |    └─ConvBNReLU: 2-1                   [1, 32, 122, 122]         --
    |    |    └─Conv2d: 3-1                  [1, 32, 122, 122]         (864)
    |    |    └─BatchNorm2d: 3-2             [1, 32, 122, 122]         (64)
    |    |    └─ReLU6: 3-3                   [1, 32, 122, 122]         --
    |    └─InvertedResidual: 2-2             [1, 16, 122, 122]         --
    |    |    └─Sequential: 3-4              [1, 16, 122, 122]         (896)
    |    └─InvertedResidual: 2-3             [1, 24, 61, 61]           --
    |    |    └─Sequential: 3-5              [1, 24, 61, 61]           (5,136)
    |    └─InvertedResidual: 2-4             [1, 24, 61, 61]           --
    |    |    └─Sequential: 3-6              [1, 24, 61, 61]           (8,832)
    |    └─InvertedResidual: 2-5             [1, 32, 31, 31]           --
    |    |    └─Sequential: 3-7              [1, 32, 31, 31]           (10,000)
    |    └─InvertedResidual: 2-6             [1, 32, 31, 31]           --
    |    |    └─Sequential: 3-8              [1, 32, 31, 31]           (14,848)
    |    └─InvertedResidual: 2-7             [1, 32, 31, 31]           --
    |    |    └─Sequential: 3-9              [1, 32, 31, 31]           (14,848)
    |    └─InvertedResidual: 2-8             [1, 64, 16, 16]           --
    |    |    └─Sequential: 3-10             [1, 64, 16, 16]           (21,056)
    |    └─InvertedResidual: 2-9             [1, 64, 16, 16]           --
    |    |    └─Sequential: 3-11             [1, 64, 16, 16]           (54,272)
    |    └─InvertedResidual: 2-10            [1, 64, 16, 16]           --
    |    |    └─Sequential: 3-12             [1, 64, 16, 16]           (54,272)
    |    └─InvertedResidual: 2-11            [1, 64, 16, 16]           --
    |    |    └─Sequential: 3-13             [1, 64, 16, 16]           (54,272)
    |    └─InvertedResidual: 2-12            [1, 96, 16, 16]           --
    |    |    └─Sequential: 3-14             [1, 96, 16, 16]           (66,624)
    |    └─InvertedResidual: 2-13            [1, 96, 16, 16]           --
    |    |    └─Sequential: 3-15             [1, 96, 16, 16]           (118,272)
    |    └─InvertedResidual: 2-14            [1, 96, 16, 16]           --
    |    |    └─Sequential: 3-16             [1, 96, 16, 16]           (118,272)
    |    └─InvertedResidual: 2-15            [1, 160, 8, 8]            --
    |    |    └─Sequential: 3-17             [1, 160, 8, 8]            (155,264)
    |    └─InvertedResidual: 2-16            [1, 160, 8, 8]            --
    |    |    └─Sequential: 3-18             [1, 160, 8, 8]            (320,000)
    |    └─InvertedResidual: 2-17            [1, 160, 8, 8]            --
    |    |    └─Sequential: 3-19             [1, 160, 8, 8]            (320,000)
    |    └─InvertedResidual: 2-18            [1, 320, 8, 8]            --
    |    |    └─Sequential: 3-20             [1, 320, 8, 8]            (473,920)
    |    └─ConvBNReLU: 2-19                  [1, 1280, 8, 8]           --
    |    |    └─Conv2d: 3-21                 [1, 1280, 8, 8]           (409,600)
    |    |    └─BatchNorm2d: 3-22            [1, 1280, 8, 8]           (2,560)
    |    |    └─ReLU6: 3-23                  [1, 1280, 8, 8]           --
    ├─Linear: 1-2                            [1, 2]                    2,562
    ==========================================================================================
    Total params: 2,226,434
    Trainable params: 2,562
    Non-trainable params: 2,223,872
    Total mult-adds (M): 196.40
    ==========================================================================================
    Input size (MB): 0.71
    Forward/backward pass size (MB): 20.12
    Params size (MB): 8.91
    Estimated Total Size (MB): 29.74
    ==========================================================================================
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58

    下面开始实际的训练:

    train_long(model,train_loader,test_loader,loss_fn=torch.nn.CrossEntropyLoss(),epochs=1,print_freq=90)
    
    • 1

    输出是:

    Epoch 0, minibatch 0: train acc = 0.5, train loss = 0.02309325896203518
    Epoch 0, minibatch 90: train acc = 0.9443681318681318, train loss = 0.006317565729329874
    Epoch 0, minibatch 180: train acc = 0.9488950276243094, train loss = 0.00590015182178982
    Epoch 0, minibatch 270: train acc = 0.9492619926199262, train loss = 0.006072205810969167
    Epoch 0, minibatch 360: train acc = 0.9500519390581718, train loss = 0.00641324315374908
    Epoch 0, minibatch 450: train acc = 0.9494872505543237, train loss = 0.006945275943189397
    Epoch 0, minibatch 540: train acc = 0.9521141404805915, train loss = 0.0067323536617257896
    Epoch 0 done, validation acc = 0.98245, validation loss = 0.002347727584838867
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8

    小结

    请注意,MobileNet的准确率几乎与VGG-16相同,仅略低于全尺度的ResNet。

    像MobileNet或ResNet-18这样的小型模型的主要优势在于它们可以在移动设备上使用。这里是在Android设备上使用ResNet-18的官方示例,这里是使用MobileNet的类似示例。


    本教程总结

    在此模块中,你已了解卷积神经网络如何工作以及它们如何捕获二维图像中的模式。 事实上,CNN 还可用于发现一维信号(如声波或时序)和多维结构(例如视频中的事件,其中某些模式在一些帧中重复)中的模式。

    此外,CNN 是用于解决更复杂的计算机视觉任务(如映像生成)的简单构建基块。 生成对抗性网络可用于生成给定数据集中类似的映像,例如,可用于生成计算机生成的图画。 同样,CNN 用于对象检测、实例分段等。 我们还用单独的一节课程了解了如何实现神经网络来解决这些问题,我们建议你继续学习掌握计算机视觉!

  • 相关阅读:
    linux 系统启动过程
    mysql入门笔记
    以太坊路线图:合并之后 Rollup+分片是扩容关键
    力扣labuladong——一刷day07
    苹果终向安卓屈服!iPad 10换用Type-C接口
    二叉搜索树
    智慧社区大屏:连接社区生活的数字桥梁
    【精华】Python基础知识精华
    神经网络可以解决的问题,神经网络修复老照片
    有哪些前端可以做的性能优化点
  • 原文地址:https://blog.csdn.net/qq_33345365/article/details/136584658