• 深度学习-AlexNet


    1. 网络结构
    1.1 简介

    AlexNet 是 2012 年 ImageNet 竞赛冠军获得者 Hinton 和他的学生 Alex Krizhevsky 设计的,该网络在 ImageNet LSVRC-2010 竞赛中错误率分别为 37.5%(top-1)和 17.0%(top-5)。

    论文地址

    在 AlexNet 中主要有以下几个特点:

    • 使用 GPU 进行训练;
    • 使用 Relu 激活函数;
    • 使用 LRN 局部响应归一化(这种归一化方法在以后的 CNN 中使用的越来越少,被 BatchNorm 替代);
    • 使用 Dropout,防止过拟合。
    1.2 网络结构

    inputkernel sizepaddingstrideoutput
    conv3x224x22411x11(1, 2)496x55x55relu
    maxpool96x55x553x3296x27x27
    conv96x27x275x521256x27x27relu
    maxpool256x27x273x32256x13x13
    conv256x13x133x311384x13x13relu
    conv384x13x133x311256x13x13relu
    conv256x13x133x311256x13x13relu
    maxpool256x13x133x32256x6x6flatten, dropout
    fc92164096relu, dropout
    fc40962048relu, dropout
    fc20481000
    2. 代码实现(Pytorch)

    使用数据集CIFAR10进行图像分类

    import torch
    from torch import nn
    from torch.utils.data import Dataset, DataLoader
    from torchvision import transforms
    import torchvision
    from tqdm import tqdm
    import numpy as np
    from PIL import Image
    
    import matplotlib.pyplot as plt
    %matplotlib inline
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    class AlexNet(nn.Module):
        def __init__(self, init_weights=False):
            super(AlexNet, self).__init__()
            self.features = nn.Sequential(
                nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2),
                
                nn.Conv2d(96, 256, kernel_size=5, padding=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2),
                
                nn.Conv2d(256, 384, kernel_size=3, padding=1),
                nn.ReLU(inplace=True),
                nn.Conv2d(384, 384, kernel_size=3, padding=1),
                nn.ReLU(inplace=True),
                nn.Conv2d(384, 256, kernel_size=3, padding=1),
                nn.ReLU(inplace=True),
                
                nn.MaxPool2d(kernel_size=3, stride=2)
            )
            self.classifier = nn.Sequential(
                nn.Flatten(),
                nn.Dropout(p=0.5),
                nn.Linear(9216, 4096),
                nn.ReLU(inplace=True),
                nn.Dropout(p=0.5),
                nn.Linear(4096, 2048),
                nn.ReLU(inplace=True),
                # 输出修改为CIFAR10的类别10
                nn.Linear(2048, 10)
            )
            if init_weights:
                self._initialize_weights()
        
        def forward(self, x):
            x = self.features(x)
            x = self.classifier(x)
            return x
            
        def _initialize_weights(self):
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    nn.init.kaiming_normal_(m.weight, mode='fan_out')
                    if m.bias is not None:
                        nn.init.constant_(m.bias, 0)
                elif isinstance(m, nn.Linear):
                    # 均值为0,方差为0.01的正态分布
                    nn.init.normal_(m.weight, 0, 0.01)
                    nn.init.constant_(m.bias, 0)
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    # 定义网络模型
    net = AlexNet(init_weights=True)
    net
    
    • 1
    • 2
    • 3
    out:
    AlexNet(
      (features): Sequential(
        (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
        (1): ReLU(inplace=True)
        (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
        (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
        (4): ReLU(inplace=True)
        (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
        (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (7): ReLU(inplace=True)
        (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (9): ReLU(inplace=True)
        (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (11): ReLU(inplace=True)
        (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
      )
      (classifier): Sequential(
        (0): Flatten(start_dim=1, end_dim=-1)
        (1): Dropout(p=0.5, inplace=False)
        (2): Linear(in_features=9216, out_features=4096, bias=True)
        (3): ReLU(inplace=True)
        (4): Dropout(p=0.5, inplace=False)
        (5): Linear(in_features=4096, out_features=2048, bias=True)
        (6): ReLU(inplace=True)
        (7): Linear(in_features=2048, out_features=10, bias=True)
      )
    )
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    # 加载数据并预处理
    resize = (224, 224)
    mean = (0.5, 0.5, 0.5)
    std = (0.5, 0.5, 0.5)
    
    data_transform = {
        "train": transforms.Compose([
            transforms.Resize(resize),
            transforms.ToTensor(),
            transforms.Normalize(mean, std)
        ]),
        "test": transforms.Compose([
            transforms.Resize(resize),
            transforms.ToTensor(),
            transforms.Normalize(mean, std)
        ])
    }
    
    train_dataset = torchvision.datasets.CIFAR10("./data", train=True, download=True, transform=data_transform["train"])
    test_dataset = torchvision.datasets.CIFAR10("./data", train=False, download=True, transform=data_transform["test"])
    classes = ("airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck")
    
    batch_size = 512
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size, num_workers=4)
    
    optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)
    loss_fn = nn.CrossEntropyLoss()
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    # 训练
    net.to(device)
    loss_fn.to(device)
    num_epochs = 10
    
    for epoch in range(num_epochs):
        net.train()
        running_loss = 0.0
        for step, data in enumerate(tqdm(train_dataloader, desc=f"Train Epoch: {epoch}/{num_epochs}"), start=0):
            inputs = data[0]
            labels = data[1]
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        net.eval()
        with torch.no_grad():
            # 在测试集上的表现
            accuracy_num = 0
            for step, data in enumerate(tqdm(test_dataloader, desc=f"Test  Epoch: {epoch}/{num_epochs}"), start=0):
                inputs = data[0]
                labels = data[1]
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = net(inputs)
                
                predict_y = torch.max(outputs, dim=1)[1].to(device)
                acc = (predict_y == labels).sum().item()
                accuracy_num += acc
        print(f"Epoch: {epoch}/{num_epochs}; loss: {np.round(running_loss / len(train_dataloader), 3)}; Acc: {np.round(accuracy_num / len(test_dataset) * 100, 2)} %")
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37

    print(torch.cuda.memory_summary(device, abbreviated=True))
    
    • 1

    # 输入一张图片测试模型
    img = Image.open("/root/autodl-tmp/DogsVSCats/train/train/cat.100.jpg")
    plt.imshow(img)
    trans = data_transform["test"]
    img = trans(img)
    img = torch.unsqueeze(img, dim=0)
    net.eval()
    with torch.no_grad():
        img = img.to(device)
        output = net(img)
        predict = torch.max(output, dim=1)[1].item()
        print(f"label: {classes[int(predict)]}")
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12

  • 相关阅读:
    SRv6----IS-IS扩展
    Vue3使用dataV报错问题解决
    Kubernetes(K8S) 配置管理-ConfigMap 介绍
    员工管理 封装组件全局注册 sortable数据升序处理 枚举取值三种方法
    大数据领域的常用开发语言详解
    通过termux tailscale huggingface 来手把手一步一步在手机上部署LLAMA2-7b和LLAMA2-70b大模型
    react源码分析:深度理解React.Context
    Service介绍
    2021-04-27 51单片机接通电源0~2秒内LED点阵显示状态为全亮...
    数据的内存分布笔记总结
  • 原文地址:https://blog.csdn.net/weixin_40330033/article/details/126687764