AlexNet 首次发表在 NIPS 2012,获得了 ILSVRC 2012 的冠军,开启了深度学习模型在图像分类中的应用历程,由Alex Krizhevsky, Ilya Sutskever, and Geoffrey E. Hinton三人提出,因此网络名称为AlexNet,论文阅读地址:ImageNet Classification with Deep Convolutional Neural Networks

这里注意一下,原文给出的输入尺寸是224*224,但是使用它的卷积参数,想要达到下一层是55*55的尺寸,需要向上取整,所以,复现代码用的227的尺寸,实现起来更加方便

结合网络结构图和代码一起看,在前两个网络层中,通过in_out_channels控制输入输出通道,同时因为kernel_size和strid的原因,会导致图像尺寸变化,后三个卷积层,因为运算关系,没有改变图像尺寸,计算公式如下:Conv2d和MaxPool2d输入输出尺寸计算

- 平台使用的Kaggle,使用GPU进行加速
- 数据集使用的是medical-mnist,是Kaggle开放的数据集,包含6个分类,训练图片47163张,测试图片11791张
- 基本保留了AlexNet的网络结构和基础设置,改变了数据集以及没有使用分布式训练的方法
部分截图:

源码使用:
第一步:在Kaggle平台上搜索medical mnist,找到并new Notebook
第二步:打开GPU加速
第三步:复制以下代码,跑起来吧!
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from torchvision.utils import make_grid
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import os
import random
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
train_transforms = transforms.Compose([
transforms.RandomRotation(10), # Rotation (-10,109)
transforms.RandomHorizontalFlip(), # HorizontalFlip by 0.5 ratio
transforms.Resize(227),
transforms.CenterCrop(227),
transforms.ToTensor(),
transforms.Normalize([0.485,0.456,0.406], # Three channesl by data-mean/std (mean, std)
[0.229,0.224,0.225])
])
dataset = datasets.ImageFolder(root="../input/medical-mnist", transform=train_transforms)
train_indices, test_indices = train_test_split(list(range(len(dataset.targets))), test_size=0.2, stratify=dataset.target_transform)
train_data = torch.utils.data.Subset(dataset, train_indices)
test_data = torch.utils.data.Subset(dataset, test_indices)
print(len(train_data), len(test_data))
train_loader = DataLoader(train_data, batch_size=12, shuffle=True)
test_loader = DataLoader(test_data, batch_size=12)
print(len(test_loader), len(train_loader))
# AlexNet
class AlexNet(nn.Module):
"Neural network model "
def __init__(self, num_classes):
super().__init__()
self.net = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4), # (b x 96 x 55 x 55)
nn.ReLU(False),
nn.LocalResponseNorm(size=5, alpha=0.0001,beta=0.75, k=2),
nn.MaxPool2d(kernel_size=3, stride=2), # (b x 96 x 27 x 27)
nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5,padding=2), # (b x 256 x 27 x 27)
nn.ReLU(False),
nn.LocalResponseNorm(size=5, alpha=0.0001,beta=0.75, k=2),
nn.MaxPool2d(kernel_size=3, stride=2), # (b x 256 x 13 x 13)
nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, padding=1), # (b x 384 x 13 x 13)
nn.ReLU(False),
nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, padding=1), # (b x 384 x 13 x 13)
nn.ReLU(False),
nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, padding=1), # (b x 256 x 13 x 13)
nn.ReLU(False),
nn.MaxPool2d(kernel_size=3, stride=2) # (b x 256 x 6 x 6)
)
# this is last outLayer
self.classifier = nn.Sequential(
nn.Dropout(p=0.5, inplace=False),
nn.Linear(in_features=(256*6*6), out_features=4096),
nn.ReLU(False),
nn.Dropout(p=0.5, inplace=False),
nn.Linear(in_features=4096, out_features=4096),
nn.ReLU(False),
nn.Linear(in_features=4096, out_features=num_classes)
)
self.init_bias() # initialize bias
def init_bias(self):
for layer in self.net:
if isinstance(layer, nn.Conv2d):
nn.init.normal_(layer.weight, mean=0, std=0.01)
nn.init.constant_(layer.bias, 0)
# original paper = 1 for Conv2d layers 2nd, 4th, and 5th conv layers
nn.init.constant_(self.net[4].bias, 1)
nn.init.constant_(self.net[10].bias, 1)
nn.init.constant_(self.net[12].bias, 1)
def forward(self,x):
x = self.net(x)
# print(x.shape)
x = x.view(-1, 256*6*6) # reduce the dimensions for linear layer input
return self.classifier(x)
# count model parameters
def count_parameters(model):
params = [p.numel() for p in model.parameters() if p.requires_grad]
for item in params:
print(f'{item:>8}')
print(f'________\n{sum(params):>8}')
count_parameters(AlexNet(6))
device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')
alexnet = AlexNet(num_classes=6).to(device)
optimizer = torch.optim.Adam(alexnet.parameters(), lr=0.0001)
criterion = nn.CrossEntropyLoss().to(device)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
import time
start_time = time.time()
train_losses = []
test_losses = []
train_acc = []
test_acc = []
epoch = 90
for i in range(epoch):
total_train_loss = 0
total_train_acc = 0
for b,(X_train, y_train) in enumerate(train_loader):
X_train, y_train = X_train.to(device), y_train.to(device)
y_pred= alexnet(X_train)
loss = criterion(y_pred, y_train)
total_train_loss += loss.item()
accuracy = (y_pred.argmax(1) == y_train).sum()
total_train_acc += accuracy
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_losses.append(total_train_loss)
train_acc.append(total_train_acc/len(train_data))
total_test_loss = 0
total_test_acc = 0
with torch.no_grad():
for b, (X_test, y_test) in enumerate(test_loader):
X_test, y_test = X_test.to(device), y_test.to(device)
y_val = alexnet(X_test)
loss = criterion(y_val, y_test)
total_test_loss += loss.item()
accuracy = (y_val.argmax(1) == y_test).sum()
total_test_acc += accuracy
test_losses.append(total_test_loss)
test_acc.append(total_test_acc/len(test_data))
print(f"epoch:{i+1},\t train_loss:{total_train_loss} \t train_acc:{total_train_acc/len(train_data)} \t test_loss:{total_test_loss} \t test_acc:{total_test_acc/len(test_data)}")
print(f'\nDuration: {time.time() - start_time:.0f} seconds')
- 按块复制运行,有助于辅助理解
- 总的参数个数约为58305926个,和原文给出的基本类似
- 个人认为这里使用ImageFolder、train_test_split、torch.utils.data.Subset进行数据加载也非常重要,可以稍微注意一下
最后贴一下源代码链接,可以参考学习一下:AlexNet代码_清晰易懂
我还预测了你们会报错的地方,就是RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [128, 4096]].......
修改方法:将源码中出现的ReLU和Dropout参数中的True改为False