在学习猫狗二分类问题时,我主要参考的是这篇博客:http://t.csdn.cn/J7L0n
然后数据集下载的是:Dogs vs. Cats | Kaggle
下载的数据集一共有25000张,这里采用CPU训练速度非常慢,25000张图片训练一次要4h,所以我们仅选取了200张dog,200张cat用来train,200张dog,200张cat作为test。(从原数据集的train中复制出自己的训练集)。
数据集结构如下:
需要注意的是在以下代码中,train和test下必须要分类!
文件:data1
文件:dogs-vs-cats-迁移学习vgg16-train-small
- import torch
- import torchvision
- from torchvision import datasets,transforms,models
- import os
- import numpy as np
- import matplotlib.pyplot as plt
- from torch.autograd import Variable
- import time
-
- path='data1'
-
- transform=transforms.Compose([
- transforms.CenterCrop(224),
- transforms.ToTensor(),
- transforms.Normalize([0.5, 0.5, 0.5],[0.5, 0.5, 0.5])
- ])
-
- data_image={
- x:datasets.ImageFolder(root=os.path.join(path,x),
- transform=transform)
- for x in ["train","test"]
- }
-
- data_loader_image={
- x:torch.utils.data.DataLoader(dataset=data_image[x],
- batch_size=4,
- shuffle=True)
- for x in ["train","test"]
- }
-
- use_gpu=torch.cuda.is_available()
- print(use_gpu)
-
- classes=data_image["train"].classes #按文件夹名字分类
- classes_index=data_image["train"].class_to_idx #文件夹类名所对应的链值
- print(classes)
- print(classes_index)
-
- print("train data set:",len(data_image["train"]))
- print("test data set:",len(data_image["test"]))
-
- x_train,y_train=next(iter(data_loader_image["train"]))
- mean=[0.5, 0.5, 0.5]
- std=[0.5, 0.5, 0.5]
- img=torchvision.utils.make_grid(x_train)
- img=img.numpy().transpose((1,2,0))
- img=img*std+mean
-
- print([classes[i] for i in y_train])
- plt.imshow(img)
- plt.show()
-
- #选择预训练好的模型vgg16
- model=models.vgg16(pretrained=True)
- print(model)
-
- for parma in model.parameters():
- parma.requires_grad=False #预训练的网络不进行梯度更新
-
- #改变模型的全连接层,从原模型的1000个类到本项目的2个类
- model.classifier=torch.nn.Sequential(
- torch.nn.Linear(25088,4096),
- torch.nn.ReLU(),
- torch.nn.Dropout(p=0.5),
- torch.nn.Linear(4096,4096),
- torch.nn.ReLU(),
- torch.nn.Dropout(p=0.5),
- torch.nn.Linear(4096,2)
- )
-
- for index,parma in enumerate(model.classifier.parameters()):
- if index ==6:
- parma.requires_grad=True
-
- if use_gpu:
- model=model.cuda()
- print(parma)
-
- #定义代价函数和优化器
- cost=torch.nn.CrossEntropyLoss()
- optimizer=torch.optim.Adam(model.classifier.parameters())
-
- print(model)
-
- #开始训练模型
- n_epochs=1
- for epoch in range(n_epochs):
- since=time.time()
- print("Epoch{}/{}".format(epoch,n_epochs))
- print("-"*10)
- for param in ["train","test"]:
- if param == "train":
- model.train=True
- else:
- model.train=False
-
- running_loss=0.0
- running_correct=0
- batch=0
- for data in data_loader_image[param]:
- batch+=1
- x,y=data
- if use_gpu:
- x,y=Variable(x.cuda()),Variable(y.cuda())
- else:
- x,y=Variable(x),Variable(y)
-
- optimizer.zero_grad()
- y_pred=model(x)
- _,pred=torch.max(y_pred.data,1)
-
- loss=cost(y_pred,y)
- if param=="train":
- loss.backward()
- optimizer.step()
- running_loss+=loss.item() #running_loss+=loss.data[0]
- running_correct+=torch.sum(pred==y.data)
- if batch%10==0 and param=="train":
- print("Batch{},Train Loss:{:.4f},Train Acc:{:.4f}%".format(
- batch,running_loss/(4*batch),100*running_correct/(4*batch)))
-
- epoch_loss=running_loss/len(data_image[param])
- epoch_correct=100*running_correct/len(data_image[param])
-
- print("{}Loss:{:.4f},Correct:{:.4f}%".format(param,epoch_loss,epoch_correct))
- now_time=time.time()-since
- print("Training time is:{:.0f}m {:.0f}s".format(now_time//60,now_time%60))
-
- torch.save(model,'model.pth')
- #保存模型,备测试使用
输出结果:
False ['cat', 'dog'] {'cat': 0, 'dog': 1} train data set: 400 test data set: 400 ['cat', 'cat', 'dog', 'cat']VGG( (features): Sequential( (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (1): ReLU(inplace=True) (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (3): ReLU(inplace=True) (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (6): ReLU(inplace=True) (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (8): ReLU(inplace=True) (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (11): ReLU(inplace=True) (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (13): ReLU(inplace=True) (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (15): ReLU(inplace=True) (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (18): ReLU(inplace=True) (19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (20): ReLU(inplace=True) (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (22): ReLU(inplace=True) (23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (25): ReLU(inplace=True) (26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (27): ReLU(inplace=True) (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (29): ReLU(inplace=True) (30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) ) (avgpool): AdaptiveAvgPool2d(output_size=(7, 7)) (classifier): Sequential( (0): Linear(in_features=25088, out_features=4096, bias=True) (1): ReLU(inplace=True) (2): Dropout(p=0.5, inplace=False) (3): Linear(in_features=4096, out_features=4096, bias=True) (4): ReLU(inplace=True) (5): Dropout(p=0.5, inplace=False) (6): Linear(in_features=4096, out_features=1000, bias=True) ) ) Parameter containing: tensor([-0.0110, -0.0124], requires_grad=True) VGG( (features): Sequential( (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (1): ReLU(inplace=True) (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (3): ReLU(inplace=True) (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (6): ReLU(inplace=True) (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (8): ReLU(inplace=True) (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (11): ReLU(inplace=True) (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (13): ReLU(inplace=True) (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (15): ReLU(inplace=True) (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (18): ReLU(inplace=True) (19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (20): ReLU(inplace=True) (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (22): ReLU(inplace=True) (23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (25): ReLU(inplace=True) (26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (27): ReLU(inplace=True) (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (29): ReLU(inplace=True) (30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) ) (avgpool): AdaptiveAvgPool2d(output_size=(7, 7)) (classifier): Sequential( (0): Linear(in_features=25088, out_features=4096, bias=True) (1): ReLU() (2): Dropout(p=0.5, inplace=False) (3): Linear(in_features=4096, out_features=4096, bias=True) (4): ReLU() (5): Dropout(p=0.5, inplace=False) (6): Linear(in_features=4096, out_features=2, bias=True) ) ) Epoch0/1 ---------- Batch10,Train Loss:1.3042,Train Acc:60.0000% Batch20,Train Loss:1.0106,Train Acc:68.7500% Batch30,Train Loss:1.1665,Train Acc:74.1667% Batch40,Train Loss:1.1059,Train Acc:78.1250% Batch50,Train Loss:0.9046,Train Acc:81.0000% Batch60,Train Loss:1.4522,Train Acc:79.5833% Batch70,Train Loss:1.8163,Train Acc:80.7143% Batch80,Train Loss:1.6358,Train Acc:82.1875% Batch90,Train Loss:1.5268,Train Acc:82.5000% Batch100,Train Loss:1.4596,Train Acc:83.2500% trainLoss:1.4596,Correct:83.2500% testLoss:0.4573,Correct:92.7500% Training time is:4m 33s
文件:dogs-vs-cats-迁移学习vgg16-test-small
- import os
- import torch
- import torchvision
- from torchvision import datasets,transforms,models
- import numpy as np
- import matplotlib.pyplot as plt
- from torch.autograd import Variable
- import time
- model=torch.load('model.pth')
- path='data1'
-
- transform=transforms.Compose([
- transforms.CenterCrop(224),
- transforms.ToTensor(),
- transforms.Normalize([0.5, 0.5, 0.5],[0.5, 0.5, 0.5])
- ])
-
- data_test_img=datasets.ImageFolder(
- root="data1/test/",
- transform=transform
- )
-
- data_loader_test_img=torch.utils.data.DataLoader(
- dataset=data_test_img,
- batch_size=16,
- shuffle=True
- )
-
- classes=data_test_img.classes
-
- image,label=next(iter(data_loader_test_img))
- images=Variable(image)
- y_pred=model(images)
- _,pred=torch.max(y_pred.data,1)
- print(pred)
-
- img=torchvision.utils.make_grid(image)
- img=img.numpy().transpose(1,2,0)
- mean=[0.5, 0.5, 0.5]
- std=[0.5, 0.5, 0.5]
- img=img*std+mean
- print("Pred Label:",[classes[i] for i in pred])
- plt.imshow(img)
- plt.show()
输出:
tensor([1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]) Pred Label: ['dog', 'cat', 'cat', 'cat', 'cat', 'cat', 'cat', 'cat', 'cat', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog']
200张cat和200张dog的训练效果已经很可观,有GPU条件下,可以采用更多的数据,效果会非常理想!
2022/7/30