• 0 简单的图像分类


    本文主要针对交通标识图片进行分类,包含62类,这个就是当前科大讯飞比赛,目前准确率在0.94左右,难点如下:

    1 类别不均衡,有得种类图片2百多,有个只有10个不到;

    2 像素大小不同,导致有的图片很清晰,有的很模糊;

    直接上代码:

    1. import os
    2. import torch
    3. import torchvision
    4. import torch.nn as nn
    5. import torch.nn.functional as F
    6. import torch.optim as optim
    7. from torch.utils.data import random_split
    8. from torchvision import models, datasets, transforms
    9. import torch.utils.data as tud
    10. import numpy as np
    11. from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
    12. from PIL import Image
    13. import matplotlib.pyplot as plt
    14. import warnings
    15. import pandas as pd
    16. from torch.utils.data import random_split
    17. warnings.filterwarnings("ignore")
    18. # 检测能否使用GPU
    19. print(#labels
    20. torch.device("cuda:0" if torch.cuda.is_available() else 'cpu')
    21. )
    22. device = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu')
    23. n_classes = 62 # 几种分类的
    24. preteain = False # 是否下载使用训练参数 有网true 没网false
    25. epoches = 10 # 训练的轮次
    26. traindataset = datasets.ImageFolder(root='../all/data/train_set/', transform=transforms.Compose([
    27. transforms.Resize((224,224)),
    28. #transforms.RandomHorizontalFlip(),
    29. transforms.ToTensor(),
    30. transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    31. ]))
    32. # 分割比例:比如80%的数据用于训练,20%用于验证
    33. train_val_ratio = 0.8
    34. train_size = int(len(traindataset) * train_val_ratio)
    35. val_size = len(traindataset) - train_size
    36. train_dataset, val_dataset = random_split(traindataset, [train_size, val_size])
    37. classes = traindataset.classes
    38. print(classes)
    39. model = models.resnext50_32x4d(pretrained=preteain)
    40. #model = models.resnet34(pretrained=preteain)
    41. if preteain == True:
    42. for param in model.parameters():
    43. param.requires_grad = False
    44. model.fc = nn.Linear(in_features=2048, out_features=n_classes, bias=True)
    45. model = model.to(device)
    46. def train_model(model, train_loader, loss_fn, optimizer, epoch):
    47. model.train()
    48. total_loss = 0.
    49. total_corrects = 0.
    50. total = 0.
    51. for idx, (inputs, labels) in enumerate(train_loader):
    52. inputs = inputs.to(device)
    53. labels = labels.to(device)
    54. outputs = model(inputs)
    55. loss = loss_fn(outputs, labels)
    56. optimizer.zero_grad()
    57. loss.backward()
    58. optimizer.step()
    59. preds = outputs.argmax(dim=1)
    60. total_corrects += torch.sum(preds.eq(labels))
    61. total_loss += loss.item() * inputs.size(0)
    62. total += labels.size(0)
    63. total_loss = total_loss / total
    64. acc = 100 * total_corrects / total
    65. print("轮次:%4d|训练集损失:%.5f|训练集准确率:%6.2f%%" % (epoch + 1, total_loss, acc))
    66. return total_loss, acc
    67. def test_model(model, test_loader, loss_fn, optimizer, epoch):
    68. model.train()
    69. total_loss = 0.
    70. total_corrects = 0.
    71. total = 0.
    72. with torch.no_grad():
    73. for idx, (inputs, labels) in enumerate(test_loader):
    74. inputs = inputs.to(device)
    75. labels = labels.to(device)
    76. outputs = model(inputs)
    77. loss = loss_fn(outputs, labels)
    78. preds = outputs.argmax(dim=1)
    79. total += labels.size(0)
    80. total_loss += loss.item() * inputs.size(0)
    81. total_corrects += torch.sum(preds.eq(labels))
    82. loss = total_loss / total
    83. accuracy = 100 * total_corrects / total
    84. print("轮次:%4d|测试集损失:%.5f|测试集准确率:%6.2f%%" % (epoch + 1, loss, accuracy))
    85. return loss, accuracy
    86. loss_fn = nn.CrossEntropyLoss().to(device)
    87. optimizer = optim.Adam(model.parameters(), lr=0.0001)
    88. train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    89. test_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)
    90. for epoch in range(0, epoches):
    91. loss1, acc1 = train_model(model, train_loader, loss_fn, optimizer, epoch)
    92. loss2, acc2 = test_model(model, test_loader, loss_fn, optimizer, epoch)

    模型预测

    sub = pd.read_csv("../all/data/example.csv")
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    model.eval()
    for path in os.listdir("../all/data/test_set/"):
        try:
            img = Image.open("../all/data/test_set/"+path)
            img_p = transform(img).unsqueeze(0).to(device)
            output = model(img_p)
            pred = output.argmax(dim=1).item()
            if img.size[0] * img.size[1]<2000:
                plt.imshow(img)
                plt.show()
            p = 100 * nn.Softmax(dim=1)(output).detach().cpu().numpy()[0]
            sub.loc[sub['ImageID'] == path,'label'] = classes[pred]
            print(f'{path} size = {img.size}, 该图像预测类别为:', classes[pred])
        except:
            print(f'error {path}')
    sub.loc[sub['ImageID']=='e57471de-6527-4b9b-90a8-4f1d93909216.png','label'] = 'Under Construction'
    sub.loc[sub['ImageID']=='ff38d59e-9a11-41e4-901b-67097bb0e960.png','label'] = 'Keep Left'
    sub.columns = ['ImageID','Sign Name']
    label_map = pd.read_excel("../all/data/label_map.xlsx")
    sub_all = pd.merge(left=sub,right=label_map,on='Sign Name',how='left')
    #sub_all[['ImageID','label']].to_csv('./sub_resnet34_add_img_ratio_drop_dire.csv',index=False)

    个人的心得:

    1 如何进行图片增强,图片增强应该注意什么(方向问题);总结一些transforms,数据增强的方式 - 代码天地

    2 模型大小如何进行选择;

    更新:

    6.17: 昨天我尝试使用更多的增强技术,直接上到0.98;

  • 相关阅读:
    day09扩展:键盘录入笔记
    解决kkFileView4.4.0版本pdf、word不能预览问题
    Rainiverse VoxEdit 大赛
    C#进阶06——多线程,预处理器指令
    如何在Linux系统部署MeterSphere服务并配置固定公网访问地址
    SpringMVC与JavaConfig笔记整理
    SpringCloud ——@RefreshScope
    Remix 2.0 正式发布,现代化全栈Web框架!
    跨平台SIP 客户端-linphone下载、使用、开启视频H264
    第十九章·迭代器模式
  • 原文地址:https://blog.csdn.net/qq_28611929/article/details/139738105