• Pytorch R-CNN目标检测-汽车car


    概述

    目标检测(Object Detection)就是一种基于目标几何和统计特征的图像分割,它将目标的分割和识别合二为一,通俗点说就是给定一张图片要精确的定位到物体所在位置,并完成对物体类别的识别。其准确性和实时性是整个系统的一项重要能力。

    R-CNN的全称是Region-CNN(区域卷积神经网络),是第一个成功将深度学习应用到目标检测上的算法。R-CNN基于卷积神经网络(CNN),线性回归,和支持向量机(SVM)等算法,实现目标检测技术。

    以下的代码和项目工程是引用他人的,此文只对其做一个简单的流程梳理。

    这里先贴出工具脚本util.py的代码如下

    1. # -*- coding: utf-8 -*-
    2. """
    3. @date: 2020/2/29 下午7:31
    4. @file: util.py
    5. @author: zj
    6. @description:
    7. """
    8. import os
    9. import numpy as np
    10. import xmltodict
    11. import torch
    12. import matplotlib.pyplot as plt
    13. def check_dir(data_dir):
    14. if not os.path.exists(data_dir):
    15. os.mkdir(data_dir)
    16. def parse_car_csv(csv_dir):
    17. csv_path = os.path.join(csv_dir, 'car.csv')
    18. samples = np.loadtxt(csv_path, dtype='str')
    19. return samples
    20. def parse_xml(xml_path):
    21. """
    22. 解析xml文件,返回标注边界框坐标
    23. """
    24. # print(xml_path)
    25. with open(xml_path, 'rb') as f:
    26. xml_dict = xmltodict.parse(f)
    27. # print(xml_dict)
    28. bndboxs = list()
    29. objects = xml_dict['annotation']['object']
    30. if isinstance(objects, list):
    31. for obj in objects:
    32. obj_name = obj['name']
    33. difficult = int(obj['difficult'])
    34. if 'car'.__eq__(obj_name) and difficult != 1:
    35. bndbox = obj['bndbox']
    36. bndboxs.append((int(bndbox['xmin']), int(bndbox['ymin']), int(bndbox['xmax']), int(bndbox['ymax'])))
    37. elif isinstance(objects, dict):
    38. obj_name = objects['name']
    39. difficult = int(objects['difficult'])
    40. if 'car'.__eq__(obj_name) and difficult != 1:
    41. bndbox = objects['bndbox']
    42. bndboxs.append((int(bndbox['xmin']), int(bndbox['ymin']), int(bndbox['xmax']), int(bndbox['ymax'])))
    43. else:
    44. pass
    45. return np.array(bndboxs)
    46. def iou(pred_box, target_box):
    47. """
    48. 计算候选建议和标注边界框的IoU
    49. :param pred_box: 大小为[4]
    50. :param target_box: 大小为[N, 4]
    51. :return: [N]
    52. """
    53. if len(target_box.shape) == 1:
    54. target_box = target_box[np.newaxis, :]
    55. xA = np.maximum(pred_box[0], target_box[:, 0])
    56. yA = np.maximum(pred_box[1], target_box[:, 1])
    57. xB = np.minimum(pred_box[2], target_box[:, 2])
    58. yB = np.minimum(pred_box[3], target_box[:, 3])
    59. # 计算交集面积
    60. intersection = np.maximum(0.0, xB - xA) * np.maximum(0.0, yB - yA)
    61. # 计算两个边界框面积
    62. boxAArea = (pred_box[2] - pred_box[0]) * (pred_box[3] - pred_box[1])
    63. boxBArea = (target_box[:, 2] - target_box[:, 0]) * (target_box[:, 3] - target_box[:, 1])
    64. scores = intersection / (boxAArea + boxBArea - intersection)
    65. return scores
    66. def compute_ious(rects, bndboxs):
    67. iou_list = list()
    68. for rect in rects:
    69. scores = iou(rect, bndboxs)
    70. iou_list.append(max(scores))
    71. return iou_list
    72. def save_model(model, model_save_path):
    73. # 保存最好的模型参数
    74. check_dir('./models')
    75. torch.save(model.state_dict(), model_save_path)
    76. def plot_loss(loss_list):
    77. x = list(range(len(loss_list)))
    78. fg = plt.figure()
    79. plt.plot(x, loss_list)
    80. plt.title('loss')
    81. plt.savefig('./loss.png')

    数据集准备

    数据集下载

    运行项目中的pascal_voc.py脚本,这个脚本是下载数据集。

    1. # -*- coding: utf-8 -*-
    2. """
    3. @date: 2020/2/29 下午2:51
    4. @file: pascal_voc.py
    5. @author: zj
    6. @description: 加载PASCAL VOC 2007数据集
    7. """
    8. import cv2
    9. import numpy as np
    10. from torchvision.datasets import VOCDetection
    11. if __name__ == '__main__':
    12. """
    13. 下载PASCAL VOC数据集
    14. """
    15. dataset = VOCDetection('../../data', year='2007', image_set='trainval', download=True)
    16. # img, target = dataset.__getitem__(1000)
    17. # img = np.array(img)
    18. #
    19. # print(target)
    20. # print(img.shape)
    21. #
    22. # cv2.imshow('img', img)
    23. # cv2.waitKey(0)

    从数据集中提取出car相关的数据

    由于本文只针对汽车car进行目标检测,所以只需要car相关的数据。

    执行pascal_voc_car.py脚本,脚本依次做了以下事:

    ①读取'../../data/VOCdevkit/VOC2007/ImageSets/Main/car_train.txt'文件和'../../data/VOCdevkit/VOC2007/ImageSets/Main/car_val.txt'文件

    car_train.txt和car_val.txt文件的内容格式如下

    ②然后将以上文件内容分别保存到'../../data/voc_car/train/car.csv'和'../../data/voc_car/val/car.csv'中

    car.csv的内容格式如下

    ③最后根据筛选出来的car的相关数据,从'../../data/VOCdevkit/VOC2007/Annotations/'中复制相关.xml文件到'../../data/voc_car/train/Annotations/'和'../../data/voc_car/val/Annotations/',以及从'../../data/VOCdevkit/VOC2007/JPEGImages/'中复制相关.jpg文件到'../../data/voc_car/train/JPEGImages/'和'../../data/voc_car/val/JPEGImages/'

    以下是pascal_voc_car.py脚本代码

    1. # -*- coding: utf-8 -*-
    2. """
    3. @date: 2020/2/29 下午2:43
    4. @file: pascal_voc_car.py
    5. @author: zj
    6. @description: 从PASCAL VOC 2007数据集中抽取类别Car。保留1/10的数目
    7. """
    8. import os
    9. import shutil
    10. import random
    11. import numpy as np
    12. import xmltodict
    13. from utils.util import check_dir
    14. suffix_xml = '.xml'
    15. suffix_jpeg = '.jpg'
    16. car_train_path = '../../data/VOCdevkit/VOC2007/ImageSets/Main/car_train.txt'
    17. car_val_path = '../../data/VOCdevkit/VOC2007/ImageSets/Main/car_val.txt'
    18. voc_annotation_dir = '../../data/VOCdevkit/VOC2007/Annotations/'
    19. voc_jpeg_dir = '../../data/VOCdevkit/VOC2007/JPEGImages/'
    20. car_root_dir = '../../data/voc_car/'
    21. def parse_train_val(data_path):
    22. """
    23. 提取指定类别图像
    24. """
    25. samples = []
    26. with open(data_path, 'r') as file:
    27. lines = file.readlines()
    28. for line in lines:
    29. res = line.strip().split(' ')
    30. if len(res) == 3 and int(res[2]) == 1:
    31. samples.append(res[0])
    32. return np.array(samples)
    33. def sample_train_val(samples):
    34. """
    35. 随机采样样本,减少数据集个数(留下1/10)
    36. """
    37. for name in ['train', 'val']:
    38. dataset = samples[name]
    39. length = len(dataset)
    40. random_samples = random.sample(range(length), int(length / 10))
    41. # print(random_samples)
    42. new_dataset = dataset[random_samples]
    43. samples[name] = new_dataset
    44. return samples
    45. def save_car(car_samples, data_root_dir, data_annotation_dir, data_jpeg_dir):
    46. """
    47. 保存类别Car的样本图片和标注文件
    48. """
    49. for sample_name in car_samples:
    50. src_annotation_path = os.path.join(voc_annotation_dir, sample_name + suffix_xml)
    51. dst_annotation_path = os.path.join(data_annotation_dir, sample_name + suffix_xml)
    52. shutil.copyfile(src_annotation_path, dst_annotation_path)
    53. src_jpeg_path = os.path.join(voc_jpeg_dir, sample_name + suffix_jpeg)
    54. dst_jpeg_path = os.path.join(data_jpeg_dir, sample_name + suffix_jpeg)
    55. shutil.copyfile(src_jpeg_path, dst_jpeg_path)
    56. csv_path = os.path.join(data_root_dir, 'car.csv')
    57. np.savetxt(csv_path, np.array(car_samples), fmt='%s')
    58. if __name__ == '__main__':
    59. samples = {'train': parse_train_val(car_train_path), 'val': parse_train_val(car_val_path)}
    60. print(samples)
    61. # samples = sample_train_val(samples)
    62. # print(samples)
    63. check_dir(car_root_dir)
    64. for name in ['train', 'val']:
    65. data_root_dir = os.path.join(car_root_dir, name)
    66. data_annotation_dir = os.path.join(data_root_dir, 'Annotations')
    67. data_jpeg_dir = os.path.join(data_root_dir, 'JPEGImages')
    68. check_dir(data_root_dir)
    69. check_dir(data_annotation_dir)
    70. check_dir(data_jpeg_dir)
    71. save_car(samples[name], data_root_dir, data_annotation_dir, data_jpeg_dir)
    72. print('done')

    卷积神经网络微调模型

    准备微调数据正负样本集

    执行create_finetune_data.py脚本,这个脚本主要做了以下事

    ①把'../../data/voc_car/train/JPEGImages/'和'../../data/voc_car/val/JPEGImages/'中的.jpg文件复制到'../../data/finetune_car/train/JPEGImages/'和'../../data/finetune_car/val/JPEGImages/',然后又把'../../data/voc_car/train/car.csv'和'../../data/voc_car/val/car.csv'分别复制到'../../data/finetune_car/train/car.csv'和'../../data/finetune_car/val/car.csv'

    ②根据'../../data/finetune_car/train/car.csv'和'../../data/finetune_car/val/car.csv'文件内容分别读取'../../data/finetune_car/train/JPEGImages/'和'../../data/finetune_car/val/JPEGImages/'中的图片,并传入parse_annotation_jpeg方法

    ③parse_annotation_jpeg方法中,先获取候选框rects,然后从.xml文件中获取标注框bndboxs,接着计算候选框和标注框的IoU得到iou_list,遍历iou_list,选出IoU≥0.5的作为正样本,0

    其文件内容格式如下

    以下是create_finetune_data.py脚本代码

    1. # -*- coding: utf-8 -*-
    2. """
    3. @date: 2020/2/29 下午7:22
    4. @file: create_finetune_data.py
    5. @author: zj
    6. @description: 创建微调数据集
    7. """
    8. import time
    9. import shutil
    10. import numpy as np
    11. import cv2
    12. import os
    13. import selectivesearch
    14. from utils.util import check_dir
    15. from utils.util import parse_car_csv
    16. from utils.util import parse_xml
    17. from utils.util import compute_ious
    18. # train
    19. # positive num: 66517
    20. # negatie num: 464340
    21. # val
    22. # positive num: 64712
    23. # negative num: 415134
    24. def parse_annotation_jpeg(annotation_path, jpeg_path, gs):
    25. """
    26. 获取正负样本(注:忽略属性difficult为True的标注边界框)
    27. 正样本:候选建议与标注边界框IoU大于等于0.5
    28. 负样本:IoU大于0,小于0.5。为了进一步限制负样本数目,其大小必须大于标注框的1/5
    29. """
    30. img = cv2.imread(jpeg_path)
    31. selectivesearch.config(gs, img, strategy='q')
    32. # 计算候选建议
    33. rects = selectivesearch.get_rects(gs)
    34. # 获取标注边界框
    35. bndboxs = parse_xml(annotation_path)
    36. # 标注框大小
    37. maximum_bndbox_size = 0
    38. for bndbox in bndboxs:
    39. xmin, ymin, xmax, ymax = bndbox
    40. bndbox_size = (ymax - ymin) * (xmax - xmin)
    41. if bndbox_size > maximum_bndbox_size:
    42. maximum_bndbox_size = bndbox_size
    43. # 获取候选建议和标注边界框的IoU
    44. iou_list = compute_ious(rects, bndboxs)
    45. positive_list = list()
    46. negative_list = list()
    47. for i in range(len(iou_list)):
    48. xmin, ymin, xmax, ymax = rects[i]
    49. rect_size = (ymax - ymin) * (xmax - xmin)
    50. iou_score = iou_list[i]
    51. if iou_list[i] >= 0.5:
    52. # 正样本
    53. positive_list.append(rects[i])
    54. if 0 < iou_list[i] < 0.5 and rect_size > maximum_bndbox_size / 5.0:
    55. # 负样本
    56. negative_list.append(rects[i])
    57. else:
    58. pass
    59. return positive_list, negative_list
    60. if __name__ == '__main__':
    61. car_root_dir = '../../data/voc_car/'
    62. finetune_root_dir = '../../data/finetune_car/'
    63. check_dir(finetune_root_dir)
    64. gs = selectivesearch.get_selective_search()
    65. for name in ['train', 'val']:
    66. src_root_dir = os.path.join(car_root_dir, name)
    67. src_annotation_dir = os.path.join(src_root_dir, 'Annotations')
    68. src_jpeg_dir = os.path.join(src_root_dir, 'JPEGImages')
    69. dst_root_dir = os.path.join(finetune_root_dir, name)
    70. dst_annotation_dir = os.path.join(dst_root_dir, 'Annotations')
    71. dst_jpeg_dir = os.path.join(dst_root_dir, 'JPEGImages')
    72. check_dir(dst_root_dir)
    73. check_dir(dst_annotation_dir)
    74. check_dir(dst_jpeg_dir)
    75. total_num_positive = 0
    76. total_num_negative = 0
    77. samples = parse_car_csv(src_root_dir)
    78. # 复制csv文件
    79. src_csv_path = os.path.join(src_root_dir, 'car.csv')
    80. dst_csv_path = os.path.join(dst_root_dir, 'car.csv')
    81. shutil.copyfile(src_csv_path, dst_csv_path)
    82. for sample_name in samples:
    83. since = time.time()
    84. src_annotation_path = os.path.join(src_annotation_dir, sample_name + '.xml')
    85. src_jpeg_path = os.path.join(src_jpeg_dir, sample_name + '.jpg')
    86. # 获取正负样本
    87. positive_list, negative_list = parse_annotation_jpeg(src_annotation_path, src_jpeg_path, gs)
    88. total_num_positive += len(positive_list)
    89. total_num_negative += len(negative_list)
    90. dst_annotation_positive_path = os.path.join(dst_annotation_dir, sample_name + '_1' + '.csv')
    91. dst_annotation_negative_path = os.path.join(dst_annotation_dir, sample_name + '_0' + '.csv')
    92. dst_jpeg_path = os.path.join(dst_jpeg_dir, sample_name + '.jpg')
    93. # 保存图片
    94. shutil.copyfile(src_jpeg_path, dst_jpeg_path)
    95. # 保存正负样本标注
    96. np.savetxt(dst_annotation_positive_path, np.array(positive_list), fmt='%d', delimiter=' ')
    97. np.savetxt(dst_annotation_negative_path, np.array(negative_list), fmt='%d', delimiter=' ')
    98. time_elapsed = time.time() - since
    99. print('parse {}.png in {:.0f}m {:.0f}s'.format(sample_name, time_elapsed // 60, time_elapsed % 60))
    100. print('%s positive num: %d' % (name, total_num_positive))
    101. print('%s negative num: %d' % (name, total_num_negative))
    102. print('done')

    自定义微调数据集类

    custom_finetune_dataset.py,该脚本不用主动执行,在训练微调模型的时候,自然会调用到,以下只说这个脚本做了什么事

    ①CustomFinetuneDataset类继承自Dataset

    ②__init__时读取'../../data/finetune_car/train/JPEGImages/'或'../../data/finetune_car/val/JPEGImages/'文件夹中的图片,读取'../../data/finetune_car/train/Annotations/'或'../../data/finetune_car/val/Annotations/'中的正负样本集,记录正样本总数self.total_positive_num,负样本总数self.total_negative_num,正样本候选框positive_rects,负样本候选框negative_rects

    ③__getitem__方法传入index,如果index小于正样本总数self.total_positive_num,那么返回对应正样本的图片和标签(1),否则返回对应负样本的图片和标签(0)。

    以下是custom_finetune_dataset.py脚本代码

    1. # -*- coding: utf-8 -*-
    2. """
    3. @date: 2020/3/3 下午7:06
    4. @file: custom_finetune_dataset.py
    5. @author: zj
    6. @description: 自定义微调数据类
    7. """
    8. import numpy as np
    9. import os
    10. import cv2
    11. from PIL import Image
    12. from torch.utils.data import Dataset
    13. from torch.utils.data import DataLoader
    14. import torchvision.transforms as transforms
    15. from utils.util import parse_car_csv
    16. class CustomFinetuneDataset(Dataset):
    17. def __init__(self, root_dir, transform=None):
    18. samples = parse_car_csv(root_dir)
    19. jpeg_images = [cv2.imread(os.path.join(root_dir, 'JPEGImages', sample_name + ".jpg"))
    20. for sample_name in samples]
    21. positive_annotations = [os.path.join(root_dir, 'Annotations', sample_name + '_1.csv')
    22. for sample_name in samples]
    23. negative_annotations = [os.path.join(root_dir, 'Annotations', sample_name + '_0.csv')
    24. for sample_name in samples]
    25. # 边界框大小
    26. positive_sizes = list()
    27. negative_sizes = list()
    28. # 边界框坐标
    29. positive_rects = list()
    30. negative_rects = list()
    31. for annotation_path in positive_annotations:
    32. rects = np.loadtxt(annotation_path, dtype=int, delimiter=' ')
    33. # 存在文件为空或者文件中仅有单行数据
    34. if len(rects.shape) == 1:
    35. # 是否为单行
    36. if rects.shape[0] == 4:
    37. positive_rects.append(rects)
    38. positive_sizes.append(1)
    39. else:
    40. positive_sizes.append(0)
    41. else:
    42. positive_rects.extend(rects)
    43. positive_sizes.append(len(rects))
    44. for annotation_path in negative_annotations:
    45. rects = np.loadtxt(annotation_path, dtype=int, delimiter=' ')
    46. # 和正样本规则一样
    47. if len(rects.shape) == 1:
    48. if rects.shape[0] == 4:
    49. negative_rects.append(rects)
    50. negative_sizes.append(1)
    51. else:
    52. positive_sizes.append(0)
    53. else:
    54. negative_rects.extend(rects)
    55. negative_sizes.append(len(rects))
    56. print(positive_rects)
    57. self.transform = transform
    58. self.jpeg_images = jpeg_images
    59. self.positive_sizes = positive_sizes
    60. self.negative_sizes = negative_sizes
    61. self.positive_rects = positive_rects
    62. self.negative_rects = negative_rects
    63. self.total_positive_num = int(np.sum(positive_sizes))
    64. self.total_negative_num = int(np.sum(negative_sizes))
    65. def __getitem__(self, index: int):
    66. # 定位下标所属图像
    67. image_id = len(self.jpeg_images) - 1
    68. if index < self.total_positive_num:
    69. # 正样本
    70. target = 1
    71. xmin, ymin, xmax, ymax = self.positive_rects[index]
    72. # 寻找所属图像
    73. for i in range(len(self.positive_sizes) - 1):
    74. if np.sum(self.positive_sizes[:i]) <= index < np.sum(self.positive_sizes[:(i + 1)]):
    75. image_id = i
    76. break
    77. image = self.jpeg_images[image_id][ymin:ymax, xmin:xmax]
    78. else:
    79. # 负样本
    80. target = 0
    81. idx = index - self.total_positive_num
    82. xmin, ymin, xmax, ymax = self.negative_rects[idx]
    83. # 寻找所属图像
    84. for i in range(len(self.negative_sizes) - 1):
    85. if np.sum(self.negative_sizes[:i]) <= idx < np.sum(self.negative_sizes[:(i + 1)]):
    86. image_id = i
    87. break
    88. image = self.jpeg_images[image_id][ymin:ymax, xmin:xmax]
    89. # print('index: %d image_id: %d target: %d image.shape: %s [xmin, ymin, xmax, ymax]: [%d, %d, %d, %d]' %
    90. # (index, image_id, target, str(image.shape), xmin, ymin, xmax, ymax))
    91. if self.transform:
    92. image = self.transform(image)
    93. return image, target
    94. def __len__(self) -> int:
    95. return self.total_positive_num + self.total_negative_num
    96. def get_positive_num(self) -> int:
    97. return self.total_positive_num
    98. def get_negative_num(self) -> int:
    99. return self.total_negative_num
    100. def test(idx):
    101. root_dir = '../../data/finetune_car/train'
    102. train_data_set = CustomFinetuneDataset(root_dir)
    103. print('positive num: %d' % train_data_set.get_positive_num())
    104. print('negative num: %d' % train_data_set.get_negative_num())
    105. print('total num: %d' % train_data_set.__len__())
    106. # 测试id=3/66516/66517/530856
    107. image, target = train_data_set.__getitem__(idx)
    108. print('target: %d' % target)
    109. image = Image.fromarray(image)
    110. print(image)
    111. print(type(image))
    112. # cv2.imshow('image', image)
    113. # cv2.waitKey(0)
    114. def test2():
    115. root_dir = '../../data/finetune_car/train'
    116. transform = transforms.Compose([
    117. transforms.ToPILImage(),
    118. transforms.Resize((227, 227)),
    119. transforms.ToTensor(),
    120. transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    121. ])
    122. train_data_set = CustomFinetuneDataset(root_dir, transform=transform)
    123. image, target = train_data_set.__getitem__(530856)
    124. print('target: %d' % target)
    125. print('image.shape: ' + str(image.shape))
    126. def test3():
    127. root_dir = '../../data/finetune_car/train'
    128. transform = transforms.Compose([
    129. transforms.ToPILImage(),
    130. transforms.Resize((227, 227)),
    131. transforms.ToTensor(),
    132. transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    133. ])
    134. train_data_set = CustomFinetuneDataset(root_dir, transform=transform)
    135. data_loader = DataLoader(train_data_set, batch_size=128, num_workers=8, drop_last=True)
    136. inputs, targets = next(data_loader.__iter__())
    137. print(targets)
    138. print(inputs.shape)
    139. if __name__ == '__main__':
    140. # test(159622)
    141. # test(4051)
    142. test3()

    自定义批量采样器类

    custom_batch_sampler.py,该脚本也不用主动执行,在训练微调模型的时候,自然会调用到,以下只说这个脚本做了什么事

    ①CustomBatchSampler类继承自(Sampler)

    ②__init__时通过传入的正样本总数num_positive和负样本总数num_negative得出一个列表self.idx_list,并结合传入的单次正样本数batch_positive和单次负样本数batch_negative算出可迭代次数self.num_iter

    ③__iter__方法中循环self.num_iter次,每次循环中会对正样本随机采集self.batch_positive次index,以及对负样本随机采集self.batch_negative次index,然后打乱存入sampler_list,最后返回一个迭代器iter(sampler)

    以下是custom_batch_sampler.py脚本代码

    1. # -*- coding: utf-8 -*-
    2. """
    3. @date: 2020/3/3 下午7:38
    4. @file: custom_batch_sampler.py
    5. @author: zj
    6. @description: 自定义采样器
    7. """
    8. import numpy as np
    9. import random
    10. from torch.utils.data import Sampler
    11. from torch.utils.data import DataLoader
    12. import torchvision.transforms as transforms
    13. from utils.data.custom_finetune_dataset import CustomFinetuneDataset
    14. class CustomBatchSampler(Sampler):
    15. def __init__(self, num_positive, num_negative, batch_positive, batch_negative) -> None:
    16. """
    17. 2分类数据集
    18. 每次批量处理,其中batch_positive个正样本,batch_negative个负样本
    19. @param num_positive: 正样本数目
    20. @param num_negative: 负样本数目
    21. @param batch_positive: 单次正样本数
    22. @param batch_negative: 单次负样本数
    23. """
    24. self.num_positive = num_positive
    25. self.num_negative = num_negative
    26. self.batch_positive = batch_positive
    27. self.batch_negative = batch_negative
    28. length = num_positive + num_negative
    29. self.idx_list = list(range(length))
    30. self.batch = batch_negative + batch_positive
    31. self.num_iter = length // self.batch
    32. def __iter__(self):
    33. sampler_list = list()
    34. for i in range(self.num_iter):
    35. tmp = np.concatenate(
    36. (random.sample(self.idx_list[:self.num_positive], self.batch_positive),
    37. random.sample(self.idx_list[self.num_positive:], self.batch_negative))
    38. )
    39. random.shuffle(tmp)
    40. sampler_list.extend(tmp)
    41. return iter(sampler_list)
    42. def __len__(self) -> int:
    43. return self.num_iter * self.batch
    44. def get_num_batch(self) -> int:
    45. return self.num_iter
    46. def test():
    47. root_dir = '../../data/finetune_car/train'
    48. train_data_set = CustomFinetuneDataset(root_dir)
    49. train_sampler = CustomBatchSampler(train_data_set.get_positive_num(), train_data_set.get_negative_num(), 32, 96)
    50. print('sampler len: %d' % train_sampler.__len__())
    51. print('sampler batch num: %d' % train_sampler.get_num_batch())
    52. first_idx_list = list(train_sampler.__iter__())[:128]
    53. print(first_idx_list)
    54. # 单次批量中正样本个数
    55. print('positive batch: %d' % np.sum(np.array(first_idx_list) < 66517))
    56. def test2():
    57. root_dir = '../../data/finetune_car/train'
    58. transform = transforms.Compose([
    59. transforms.ToPILImage(),
    60. transforms.Resize((227, 227)),
    61. transforms.ToTensor(),
    62. transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    63. ])
    64. train_data_set = CustomFinetuneDataset(root_dir, transform=transform)
    65. train_sampler = CustomBatchSampler(train_data_set.get_positive_num(), train_data_set.get_negative_num(), 32, 96)
    66. data_loader = DataLoader(train_data_set, batch_size=128, sampler=train_sampler, num_workers=8, drop_last=True)
    67. inputs, targets = next(data_loader.__iter__())
    68. print(targets)
    69. print(inputs.shape)
    70. if __name__ == '__main__':
    71. test()

    训练微调模型

    执行finetune.py脚本

    ①调用custom_finetune_dataset.py脚本和custom_batch_sampler.py脚本,得到训练数据data_loaders

    ②使用预训练模型AlexNet网络模型,修改分类器对象classifier的输出为2类(1类是car,一类是背景),然后定义损失函数为交叉熵损失函数,优化函数为SGD,学习率更新器为StepLR,然后开始训练,保存准确率最高的权重到'models/alexnet_car.pth'

    以下是finetune.py脚本代码

    1. # -*- coding: utf-8 -*-
    2. """
    3. @date: 2020/3/1 上午9:54
    4. @file: finetune.py
    5. @author: zj
    6. @description:
    7. """
    8. import os
    9. import copy
    10. import time
    11. import torch
    12. import torch.nn as nn
    13. import torch.optim as optim
    14. from torch.utils.data import DataLoader
    15. import torchvision.transforms as transforms
    16. import torchvision.models as models
    17. from torchvision.models import AlexNet_Weights
    18. from utils.data.custom_finetune_dataset import CustomFinetuneDataset
    19. from utils.data.custom_batch_sampler import CustomBatchSampler
    20. from utils.util import check_dir
    21. def load_data(data_root_dir):
    22. transform = transforms.Compose([
    23. transforms.ToPILImage(),
    24. transforms.Resize((227, 227)),
    25. transforms.RandomHorizontalFlip(),
    26. transforms.ToTensor(),
    27. transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    28. ])
    29. data_loaders = {}
    30. data_sizes = {}
    31. for name in ['train', 'val']:
    32. data_dir = os.path.join(data_root_dir, name)
    33. data_set = CustomFinetuneDataset(data_dir, transform=transform)
    34. data_sampler = CustomBatchSampler(data_set.get_positive_num(), data_set.get_negative_num(), 32, 96)
    35. data_loader = DataLoader(data_set, batch_size=128, sampler=data_sampler, num_workers=8, drop_last=True)
    36. data_loaders[name] = data_loader
    37. data_sizes[name] = data_sampler.__len__()
    38. return data_loaders, data_sizes
    39. def train_model(data_loaders, model, criterion, optimizer, lr_scheduler, num_epochs=25, device=None):
    40. since = time.time()
    41. best_model_weights = copy.deepcopy(model.state_dict())
    42. best_acc = 0.0
    43. for epoch in range(num_epochs):
    44. print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    45. print('-' * 10)
    46. # Each epoch has a training and validation phase
    47. for phase in ['train', 'val']:
    48. if phase == 'train':
    49. model.train() # Set model to training mode
    50. else:
    51. model.eval() # Set model to evaluate mode
    52. running_loss = 0.0
    53. running_corrects = 0
    54. # Iterate over data.
    55. for inputs, labels in data_loaders[phase]:
    56. inputs = inputs.to(device)
    57. labels = labels.to(device)
    58. # zero the parameter gradients
    59. optimizer.zero_grad()
    60. # forward
    61. # track history if only in train
    62. with torch.set_grad_enabled(phase == 'train'):
    63. outputs = model(inputs)
    64. _, preds = torch.max(outputs, 1)
    65. loss = criterion(outputs, labels)
    66. # backward + optimize only if in training phase
    67. if phase == 'train':
    68. loss.backward()
    69. optimizer.step()
    70. # statistics
    71. running_loss += loss.item() * inputs.size(0)
    72. running_corrects += torch.sum(preds == labels.data)
    73. if phase == 'train':
    74. lr_scheduler.step()
    75. epoch_loss = running_loss / data_sizes[phase]
    76. epoch_acc = running_corrects.double() / data_sizes[phase]
    77. print('{} Loss: {:.4f} Acc: {:.4f}'.format(
    78. phase, epoch_loss, epoch_acc))
    79. # deep copy the model
    80. if phase == 'val' and epoch_acc > best_acc:
    81. best_acc = epoch_acc
    82. best_model_weights = copy.deepcopy(model.state_dict())
    83. print()
    84. time_elapsed = time.time() - since
    85. print('Training complete in {:.0f}m {:.0f}s'.format(
    86. time_elapsed // 60, time_elapsed % 60))
    87. print('Best val Acc: {:4f}'.format(best_acc))
    88. # load best model weights
    89. model.load_state_dict(best_model_weights)
    90. return model
    91. if __name__ == '__main__':
    92. device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    93. data_loaders, data_sizes = load_data('./data/finetune_car')
    94. model = models.alexnet(weights=AlexNet_Weights.IMAGENET1K_V1)
    95. # print(model)
    96. num_features = model.classifier[6].in_features
    97. model.classifier[6] = nn.Linear(num_features, 2)
    98. # print(model)
    99. model = model.to(device)
    100. criterion = nn.CrossEntropyLoss()
    101. optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)
    102. lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
    103. best_model = train_model(data_loaders, model, criterion, optimizer, lr_scheduler, device=device, num_epochs=25)
    104. # 保存最好的模型参数
    105. check_dir('./models')
    106. torch.save(best_model.state_dict(), 'models/alexnet_car.pth')

    分类器训练

    准备分类器数据集

    执行create_classifier_data.py脚本

    ①把'../../data/finetune_car/train/JPEGImages/'和'../../data/finetune_car/val/JPEGImages/'中的.jpg文件复制到'../../data/classifier_car/train/JPEGImages/'和'../../data/classifier_car/val/JPEGImages/',然后又把'../../data/finetune_car/train/car.csv'和'../../data/finetune_car/val/car.csv'分别复制到'../../data/classifier_car/train/car.csv'和'../../data/classifier_car/val/car.csv'

    ②根据'../../data/classifier_car/train/car.csv'和'../../data/classifier_car/val/car.csv'文件内容分别读取'../../data/classifier_car/train/JPEGImages/'和'../../data/classifier_car/val/JPEGImages/'中的图片,并传入parse_annotation_jpeg方法

    ③parse_annotation_jpeg方法中,先获取候选框rects,然后从.xml文件中获取标注框bndboxs,接着计算候选框和标注框的IoU得到iou_list,遍历iou_list,选出0

    以下是create_classifier_data.py脚本代码

    1. # -*- coding: utf-8 -*-
    2. """
    3. @date: 2020/3/1 下午7:17
    4. @file: create_classifier_data.py
    5. @author: zj
    6. @description: 创建分类器数据集
    7. """
    8. import random
    9. import numpy as np
    10. import shutil
    11. import time
    12. import cv2
    13. import os
    14. import xmltodict
    15. import selectivesearch
    16. from utils.util import check_dir
    17. from utils.util import parse_car_csv
    18. from utils.util import parse_xml
    19. from utils.util import iou
    20. from utils.util import compute_ious
    21. # train
    22. # positive num: 625
    23. # negative num: 366028
    24. # val
    25. # positive num: 625
    26. # negative num: 321474
    27. def parse_annotation_jpeg(annotation_path, jpeg_path, gs):
    28. """
    29. 获取正负样本(注:忽略属性difficult为True的标注边界框)
    30. 正样本:标注边界框
    31. 负样本:IoU大于0,小于等于0.3。为了进一步限制负样本数目,其大小必须大于标注框的1/5
    32. """
    33. img = cv2.imread(jpeg_path)
    34. selectivesearch.config(gs, img, strategy='q')
    35. # 计算候选建议
    36. rects = selectivesearch.get_rects(gs)
    37. # 获取标注边界框
    38. bndboxs = parse_xml(annotation_path)
    39. # 标注框大小
    40. maximum_bndbox_size = 0
    41. for bndbox in bndboxs:
    42. xmin, ymin, xmax, ymax = bndbox
    43. bndbox_size = (ymax - ymin) * (xmax - xmin)
    44. if bndbox_size > maximum_bndbox_size:
    45. maximum_bndbox_size = bndbox_size
    46. # 获取候选建议和标注边界框的IoU
    47. iou_list = compute_ious(rects, bndboxs)
    48. positive_list = list()
    49. negative_list = list()
    50. for i in range(len(iou_list)):
    51. xmin, ymin, xmax, ymax = rects[i]
    52. rect_size = (ymax - ymin) * (xmax - xmin)
    53. iou_score = iou_list[i]
    54. if 0 < iou_score <= 0.3 and rect_size > maximum_bndbox_size / 5.0:
    55. # 负样本
    56. negative_list.append(rects[i])
    57. else:
    58. pass
    59. return bndboxs, negative_list
    60. if __name__ == '__main__':
    61. car_root_dir = '../../data/voc_car/'
    62. classifier_root_dir = '../../data/classifier_car/'
    63. check_dir(classifier_root_dir)
    64. gs = selectivesearch.get_selective_search()
    65. for name in ['train', 'val']:
    66. src_root_dir = os.path.join(car_root_dir, name)
    67. src_annotation_dir = os.path.join(src_root_dir, 'Annotations')
    68. src_jpeg_dir = os.path.join(src_root_dir, 'JPEGImages')
    69. dst_root_dir = os.path.join(classifier_root_dir, name)
    70. dst_annotation_dir = os.path.join(dst_root_dir, 'Annotations')
    71. dst_jpeg_dir = os.path.join(dst_root_dir, 'JPEGImages')
    72. check_dir(dst_root_dir)
    73. check_dir(dst_annotation_dir)
    74. check_dir(dst_jpeg_dir)
    75. total_num_positive = 0
    76. total_num_negative = 0
    77. samples = parse_car_csv(src_root_dir)
    78. # 复制csv文件
    79. src_csv_path = os.path.join(src_root_dir, 'car.csv')
    80. dst_csv_path = os.path.join(dst_root_dir, 'car.csv')
    81. shutil.copyfile(src_csv_path, dst_csv_path)
    82. for sample_name in samples:
    83. since = time.time()
    84. src_annotation_path = os.path.join(src_annotation_dir, sample_name + '.xml')
    85. src_jpeg_path = os.path.join(src_jpeg_dir, sample_name + '.jpg')
    86. # 获取正负样本
    87. positive_list, negative_list = parse_annotation_jpeg(src_annotation_path, src_jpeg_path, gs)
    88. total_num_positive += len(positive_list)
    89. total_num_negative += len(negative_list)
    90. dst_annotation_positive_path = os.path.join(dst_annotation_dir, sample_name + '_1' + '.csv')
    91. dst_annotation_negative_path = os.path.join(dst_annotation_dir, sample_name + '_0' + '.csv')
    92. dst_jpeg_path = os.path.join(dst_jpeg_dir, sample_name + '.jpg')
    93. # 保存图片
    94. shutil.copyfile(src_jpeg_path, dst_jpeg_path)
    95. # 保存正负样本标注
    96. np.savetxt(dst_annotation_positive_path, np.array(positive_list), fmt='%d', delimiter=' ')
    97. np.savetxt(dst_annotation_negative_path, np.array(negative_list), fmt='%d', delimiter=' ')
    98. time_elapsed = time.time() - since
    99. print('parse {}.png in {:.0f}m {:.0f}s'.format(sample_name, time_elapsed // 60, time_elapsed % 60))
    100. print('%s positive num: %d' % (name, total_num_positive))
    101. print('%s negative num: %d' % (name, total_num_negative))
    102. print('done')

    自定义分类器数据集类

    custom_classifier_dataset.py,该脚本不用主动执行,在训练分类器模型的时候,自然会调用到,以下只说这个脚本做了什么事

    ①CustomClassifierDataset类继承自Dataset

    ②__init__时读取'../../data/classifier_car/train/JPEGImages/'或'../../data/classifier_car/val/JPEGImages/'文件夹中的图片,读取'../../data/classifier_car/train/Annotations/'或'../../data/classifier_car/val/Annotations/'中的正负样本集,记录正样本列表self.positive_list,负样本总数self.negative_list,正样本候选框positive_rects,负样本候选框negative_rects

    ③__getitem__方法传入index,如果index小于正样本总数len(self.positive_list),那么返回对应正样本的图片和标签(1),否则返回对应负样本的图片和标签(0)。

    以下是custom_classifier_dataset.py脚本代码

    1. # -*- coding: utf-8 -*-
    2. """
    3. @date: 2020/3/4 下午4:00
    4. @file: custom_classifier_dataset.py
    5. @author: zj
    6. @description: 分类器数据集类,可进行正负样本集替换,适用于hard negative mining操作
    7. """
    8. import numpy as np
    9. import os
    10. import cv2
    11. from PIL import Image
    12. from torch.utils.data import Dataset
    13. from torch.utils.data import DataLoader
    14. import torchvision.transforms as transforms
    15. from utils.util import parse_car_csv
    16. class CustomClassifierDataset(Dataset):
    17. def __init__(self, root_dir, transform=None):
    18. samples = parse_car_csv(root_dir)
    19. jpeg_images = list()
    20. positive_list = list()
    21. negative_list = list()
    22. for idx in range(len(samples)):
    23. sample_name = samples[idx]
    24. jpeg_images.append(cv2.imread(os.path.join(root_dir, 'JPEGImages', sample_name + ".jpg")))
    25. positive_annotation_path = os.path.join(root_dir, 'Annotations', sample_name + '_1.csv')
    26. positive_annotations = np.loadtxt(positive_annotation_path, dtype=int, delimiter=' ')
    27. # 考虑csv文件为空或者仅包含单个标注框
    28. if len(positive_annotations.shape) == 1:
    29. # 单个标注框坐标
    30. if positive_annotations.shape[0] == 4:
    31. positive_dict = dict()
    32. positive_dict['rect'] = positive_annotations
    33. positive_dict['image_id'] = idx
    34. # positive_dict['image_name'] = sample_name
    35. positive_list.append(positive_dict)
    36. else:
    37. for positive_annotation in positive_annotations:
    38. positive_dict = dict()
    39. positive_dict['rect'] = positive_annotation
    40. positive_dict['image_id'] = idx
    41. # positive_dict['image_name'] = sample_name
    42. positive_list.append(positive_dict)
    43. negative_annotation_path = os.path.join(root_dir, 'Annotations', sample_name + '_0.csv')
    44. negative_annotations = np.loadtxt(negative_annotation_path, dtype=int, delimiter=' ')
    45. # 考虑csv文件为空或者仅包含单个标注框
    46. if len(negative_annotations.shape) == 1:
    47. # 单个标注框坐标
    48. if negative_annotations.shape[0] == 4:
    49. negative_dict = dict()
    50. negative_dict['rect'] = negative_annotations
    51. negative_dict['image_id'] = idx
    52. # negative_dict['image_name'] = sample_name
    53. negative_list.append(negative_dict)
    54. else:
    55. for negative_annotation in negative_annotations:
    56. negative_dict = dict()
    57. negative_dict['rect'] = negative_annotation
    58. negative_dict['image_id'] = idx
    59. # negative_dict['image_name'] = sample_name
    60. negative_list.append(negative_dict)
    61. self.transform = transform
    62. self.jpeg_images = jpeg_images
    63. self.positive_list = positive_list
    64. self.negative_list = negative_list
    65. def __getitem__(self, index: int):
    66. # 定位下标所属图像
    67. if index < len(self.positive_list):
    68. # 正样本
    69. target = 1
    70. positive_dict = self.positive_list[index]
    71. xmin, ymin, xmax, ymax = positive_dict['rect']
    72. image_id = positive_dict['image_id']
    73. image = self.jpeg_images[image_id][ymin:ymax, xmin:xmax]
    74. cache_dict = positive_dict
    75. else:
    76. # 负样本
    77. target = 0
    78. idx = index - len(self.positive_list)
    79. negative_dict = self.negative_list[idx]
    80. xmin, ymin, xmax, ymax = negative_dict['rect']
    81. image_id = negative_dict['image_id']
    82. image = self.jpeg_images[image_id][ymin:ymax, xmin:xmax]
    83. cache_dict = negative_dict
    84. # print('index: %d image_id: %d target: %d image.shape: %s [xmin, ymin, xmax, ymax]: [%d, %d, %d, %d]' %
    85. # (index, image_id, target, str(image.shape), xmin, ymin, xmax, ymax))
    86. if self.transform:
    87. image = self.transform(image)
    88. return image, target, cache_dict
    89. def __len__(self) -> int:
    90. return len(self.positive_list) + len(self.negative_list)
    91. def get_transform(self):
    92. return self.transform
    93. def get_jpeg_images(self) -> list:
    94. return self.jpeg_images
    95. def get_positive_num(self) -> int:
    96. return len(self.positive_list)
    97. def get_negative_num(self) -> int:
    98. return len(self.negative_list)
    99. def get_positives(self) -> list:
    100. return self.positive_list
    101. def get_negatives(self) -> list:
    102. return self.negative_list
    103. # 用于hard negative mining
    104. # 替换负样本
    105. def set_negative_list(self, negative_list):
    106. self.negative_list = negative_list
    107. def test(idx):
    108. root_dir = '../../data/classifier_car/val'
    109. train_data_set = CustomClassifierDataset(root_dir)
    110. print('positive num: %d' % train_data_set.get_positive_num())
    111. print('negative num: %d' % train_data_set.get_negative_num())
    112. print('total num: %d' % train_data_set.__len__())
    113. # 测试id=3/66516/66517/530856
    114. image, target, cache_dict = train_data_set.__getitem__(idx)
    115. print('target: %d' % target)
    116. print('dict: ' + str(cache_dict))
    117. image = Image.fromarray(image)
    118. print(image)
    119. print(type(image))
    120. # cv2.imshow('image', image)
    121. # cv2.waitKey(0)
    122. def test2():
    123. root_dir = '../../data/classifier_car/train'
    124. transform = transforms.Compose([
    125. transforms.ToPILImage(),
    126. transforms.Resize((227, 227)),
    127. transforms.ToTensor(),
    128. transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    129. ])
    130. train_data_set = CustomClassifierDataset(root_dir, transform=transform)
    131. image, target, cache_dict = train_data_set.__getitem__(230856)
    132. print('target: %d' % target)
    133. print('dict: ' + str(cache_dict))
    134. print('image.shape: ' + str(image.shape))
    135. def test3():
    136. root_dir = '../../data/classifier_car/train'
    137. transform = transforms.Compose([
    138. transforms.ToPILImage(),
    139. transforms.Resize((227, 227)),
    140. transforms.ToTensor(),
    141. transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    142. ])
    143. train_data_set = CustomClassifierDataset(root_dir, transform=transform)
    144. data_loader = DataLoader(train_data_set, batch_size=128, num_workers=8, drop_last=True)
    145. inputs, targets, cache_dicts = next(data_loader.__iter__())
    146. print(targets)
    147. print(inputs.shape)
    148. if __name__ == '__main__':
    149. # test(159622)
    150. # test(4051)
    151. test(24768)
    152. # test2()
    153. # test3()

    自定义批量采样器类

    同"卷积神经网络微调模型"中的"自定义批量采样器类",在训练分类器模型的时候,自然会调用到

    训练分类器

    执行linear_svm.py脚本

    ①调用custom_classifier_dataset.py脚本和custom_batch_sampler.py脚本,得到训练数据data_loaders

    ②使用AlexNet网络模型,修改分类器对象classifier的输出为2类(1类是car,一类是背景),加载之前微调训练的权重alexnet_car.pth,并设置参数冻结,然后再添加一个全连接层作为svm分类器,定义损失函数为折页损失函数,优化函数为SGD,学习率更新器为StepLR,然后开始训练,保存准确率最高的权重到'models/best_linear_svm_alexnet_car.pth'

    以下是linear_svm.py脚本代码

    1. # -*- coding: utf-8 -*-
    2. """
    3. @date: 2020/3/1 下午2:38
    4. @file: linear_svm.py
    5. @author: zj
    6. @description:
    7. """
    8. import time
    9. import copy
    10. import os
    11. import random
    12. import numpy as np
    13. import torch
    14. import torch.nn as nn
    15. import torch.optim as optim
    16. from torch.utils.data import DataLoader
    17. import torchvision.transforms as transforms
    18. from torchvision.models import alexnet
    19. from utils.data.custom_classifier_dataset import CustomClassifierDataset
    20. from utils.data.custom_hard_negative_mining_dataset import CustomHardNegativeMiningDataset
    21. from utils.data.custom_batch_sampler import CustomBatchSampler
    22. from utils.util import check_dir
    23. from utils.util import save_model
    24. batch_positive = 32
    25. batch_negative = 96
    26. batch_total = 128
    27. def load_data(data_root_dir):
    28. transform = transforms.Compose([
    29. transforms.ToPILImage(),
    30. transforms.Resize((227, 227)),
    31. transforms.RandomHorizontalFlip(),
    32. transforms.ToTensor(),
    33. transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    34. ])
    35. data_loaders = {}
    36. data_sizes = {}
    37. remain_negative_list = list()
    38. for name in ['train', 'val']:
    39. data_dir = os.path.join(data_root_dir, name)
    40. data_set = CustomClassifierDataset(data_dir, transform=transform)
    41. if name == 'train':
    42. """
    43. 使用hard negative mining方式
    44. 初始正负样本比例为1:1。由于正样本数远小于负样本,所以以正样本数为基准,在负样本集中随机提取同样数目负样本作为初始负样本集
    45. """
    46. positive_list = data_set.get_positives()
    47. negative_list = data_set.get_negatives()
    48. init_negative_idxs = random.sample(range(len(negative_list)), len(positive_list))
    49. init_negative_list = [negative_list[idx] for idx in range(len(negative_list)) if idx in init_negative_idxs]
    50. remain_negative_list = [negative_list[idx] for idx in range(len(negative_list))
    51. if idx not in init_negative_idxs]
    52. data_set.set_negative_list(init_negative_list)
    53. data_loaders['remain'] = remain_negative_list
    54. sampler = CustomBatchSampler(data_set.get_positive_num(), data_set.get_negative_num(),
    55. batch_positive, batch_negative)
    56. data_loader = DataLoader(data_set, batch_size=batch_total, sampler=sampler, num_workers=8, drop_last=True)
    57. data_loaders[name] = data_loader
    58. data_sizes[name] = len(sampler)
    59. return data_loaders, data_sizes
    60. def hinge_loss(outputs, labels):
    61. """
    62. 折页损失计算
    63. :param outputs: 大小为(N, num_classes)
    64. :param labels: 大小为(N)
    65. :return: 损失值
    66. """
    67. num_labels = len(labels)
    68. corrects = outputs[range(num_labels), labels].unsqueeze(0).T
    69. # 最大间隔
    70. margin = 1.0
    71. margins = outputs - corrects + margin
    72. loss = torch.sum(torch.max(margins, 1)[0]) / len(labels)
    73. # # 正则化强度
    74. # reg = 1e-3
    75. # loss += reg * torch.sum(weight ** 2)
    76. return loss
    77. def add_hard_negatives(hard_negative_list, negative_list, add_negative_list):
    78. for item in hard_negative_list:
    79. if len(add_negative_list) == 0:
    80. # 第一次添加负样本
    81. negative_list.append(item)
    82. add_negative_list.append(list(item['rect']))
    83. if list(item['rect']) not in add_negative_list:
    84. negative_list.append(item)
    85. add_negative_list.append(list(item['rect']))
    86. def get_hard_negatives(preds, cache_dicts):
    87. fp_mask = preds == 1
    88. tn_mask = preds == 0
    89. fp_rects = cache_dicts['rect'][fp_mask].numpy()
    90. fp_image_ids = cache_dicts['image_id'][fp_mask].numpy()
    91. tn_rects = cache_dicts['rect'][tn_mask].numpy()
    92. tn_image_ids = cache_dicts['image_id'][tn_mask].numpy()
    93. hard_negative_list = [{'rect': fp_rects[idx], 'image_id': fp_image_ids[idx]} for idx in range(len(fp_rects))]
    94. easy_negatie_list = [{'rect': tn_rects[idx], 'image_id': tn_image_ids[idx]} for idx in range(len(tn_rects))]
    95. return hard_negative_list, easy_negatie_list
    96. def train_model(data_loaders, model, criterion, optimizer, lr_scheduler, num_epochs=25, device=None):
    97. since = time.time()
    98. best_model_weights = copy.deepcopy(model.state_dict())
    99. best_acc = 0.0
    100. for epoch in range(num_epochs):
    101. print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    102. print('-' * 10)
    103. # Each epoch has a training and validation phase
    104. for phase in ['train', 'val']:
    105. if phase == 'train':
    106. model.train() # Set model to training mode
    107. else:
    108. model.eval() # Set model to evaluate mode
    109. running_loss = 0.0
    110. running_corrects = 0
    111. # 输出正负样本数
    112. data_set = data_loaders[phase].dataset
    113. print('{} - positive_num: {} - negative_num: {} - data size: {}'.format(
    114. phase, data_set.get_positive_num(), data_set.get_negative_num(), data_sizes[phase]))
    115. # Iterate over data.
    116. for inputs, labels, cache_dicts in data_loaders[phase]:
    117. inputs = inputs.to(device)
    118. labels = labels.to(device)
    119. # zero the parameter gradients
    120. optimizer.zero_grad()
    121. # forward
    122. # track history if only in train
    123. with torch.set_grad_enabled(phase == 'train'):
    124. outputs = model(inputs)
    125. # print(outputs.shape)
    126. _, preds = torch.max(outputs, 1)
    127. loss = criterion(outputs, labels)
    128. # backward + optimize only if in training phase
    129. if phase == 'train':
    130. loss.backward()
    131. optimizer.step()
    132. # statistics
    133. running_loss += loss.item() * inputs.size(0)
    134. running_corrects += torch.sum(preds == labels.data)
    135. if phase == 'train':
    136. lr_scheduler.step()
    137. epoch_loss = running_loss / data_sizes[phase]
    138. epoch_acc = running_corrects.double() / data_sizes[phase]
    139. print('{} Loss: {:.4f} Acc: {:.4f}'.format(
    140. phase, epoch_loss, epoch_acc))
    141. # deep copy the model
    142. if phase == 'val' and epoch_acc > best_acc:
    143. best_acc = epoch_acc
    144. best_model_weights = copy.deepcopy(model.state_dict())
    145. # 每一轮训练完成后,测试剩余负样本集,进行hard negative mining
    146. train_dataset = data_loaders['train'].dataset
    147. remain_negative_list = data_loaders['remain']
    148. jpeg_images = train_dataset.get_jpeg_images()
    149. transform = train_dataset.get_transform()
    150. with torch.set_grad_enabled(False):
    151. remain_dataset = CustomHardNegativeMiningDataset(remain_negative_list, jpeg_images, transform=transform)
    152. remain_data_loader = DataLoader(remain_dataset, batch_size=batch_total, num_workers=8, drop_last=True)
    153. # 获取训练数据集的负样本集
    154. negative_list = train_dataset.get_negatives()
    155. # 记录后续增加的负样本
    156. add_negative_list = data_loaders.get('add_negative', [])
    157. running_corrects = 0
    158. # Iterate over data.
    159. for inputs, labels, cache_dicts in remain_data_loader:
    160. inputs = inputs.to(device)
    161. labels = labels.to(device)
    162. # zero the parameter gradients
    163. optimizer.zero_grad()
    164. outputs = model(inputs)
    165. # print(outputs.shape)
    166. _, preds = torch.max(outputs, 1)
    167. running_corrects += torch.sum(preds == labels.data)
    168. hard_negative_list, easy_neagtive_list = get_hard_negatives(preds.cpu().numpy(), cache_dicts)
    169. add_hard_negatives(hard_negative_list, negative_list, add_negative_list)
    170. remain_acc = running_corrects.double() / len(remain_negative_list)
    171. print('remain negative size: {}, acc: {:.4f}'.format(len(remain_negative_list), remain_acc))
    172. # 训练完成后,重置负样本,进行hard negatives mining
    173. train_dataset.set_negative_list(negative_list)
    174. tmp_sampler = CustomBatchSampler(train_dataset.get_positive_num(), train_dataset.get_negative_num(),
    175. batch_positive, batch_negative)
    176. data_loaders['train'] = DataLoader(train_dataset, batch_size=batch_total, sampler=tmp_sampler,
    177. num_workers=8, drop_last=True)
    178. data_loaders['add_negative'] = add_negative_list
    179. # 重置数据集大小
    180. data_sizes['train'] = len(tmp_sampler)
    181. # 每训练一轮就保存
    182. save_model(model, 'models/linear_svm_alexnet_car_%d.pth' % epoch)
    183. time_elapsed = time.time() - since
    184. print('Training complete in {:.0f}m {:.0f}s'.format(
    185. time_elapsed // 60, time_elapsed % 60))
    186. print('Best val Acc: {:4f}'.format(best_acc))
    187. # load best model weights
    188. model.load_state_dict(best_model_weights)
    189. return model
    190. if __name__ == '__main__':
    191. device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    192. # device = 'cpu'
    193. data_loaders, data_sizes = load_data('./data/classifier_car')
    194. # 加载CNN模型
    195. model_path = './models/alexnet_car.pth'
    196. model = alexnet()
    197. num_classes = 2
    198. num_features = model.classifier[6].in_features
    199. model.classifier[6] = nn.Linear(num_features, num_classes)
    200. model.load_state_dict(torch.load(model_path))
    201. model.eval()
    202. # 固定特征提取
    203. for param in model.parameters():
    204. param.requires_grad = False
    205. # 创建SVM分类器
    206. model.classifier[6] = nn.Linear(num_features, num_classes)
    207. # print(model)
    208. model = model.to(device)
    209. criterion = hinge_loss
    210. # 由于初始训练集数量很少,所以降低学习率
    211. optimizer = optim.SGD(model.parameters(), lr=1e-4, momentum=0.9)
    212. # 共训练10轮,每隔4论减少一次学习率
    213. lr_schduler = optim.lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.1)
    214. best_model = train_model(data_loaders, model, criterion, optimizer, lr_schduler, num_epochs=10, device=device)
    215. # 保存最好的模型参数
    216. save_model(best_model, 'models/best_linear_svm_alexnet_car.pth')

    边界框回归训练

    准备边界框回归数据集

    执行create_bbox_regression_data.py脚本

    ①读取'../../data/voc_car/train/Annotations/'中的标注框信息存入bndboxs和'../../data/finetune_car/train/Annotations/'中的正样本数据存入positive_bndboxes,计算标注框和正样本数据的IoU,针对IoU>0.6的正样本数据,保存其到'../../data/bbox_regression/positive/'中,并保存对应的图片到'../../data/bbox_regression/JPEGImages/'中,保存对应的标注框信息到'../../data/bbox_regression/bndboxs/'中,保存对应的图片名到'../../data/bbox_regression/car.csv'中

    以下是create_bbox_regression_data.py脚本代码

    1. # -*- coding: utf-8 -*-
    2. """
    3. @date: 2020/4/3 下午7:19
    4. @file: create_bbox_regression_data.py
    5. @author: zj
    6. @description: 创建边界框回归数据集
    7. """
    8. import os
    9. import shutil
    10. import numpy as np
    11. import utils.util as util
    12. # 正样本边界框数目:37222
    13. if __name__ == '__main__':
    14. """
    15. 从voc_car/train目录中提取标注边界框坐标
    16. 从finetune_car/train目录中提取训练集正样本坐标(IoU>=0.5),进一步提取IoU>0.6的边界框
    17. 数据集保存在bbox_car目录下
    18. """
    19. voc_car_train_dir = '../../data/voc_car/train'
    20. # ground truth
    21. gt_annotation_dir = os.path.join(voc_car_train_dir, 'Annotations')
    22. jpeg_dir = os.path.join(voc_car_train_dir, 'JPEGImages')
    23. classifier_car_train_dir = '../../data/finetune_car/train'
    24. # positive
    25. positive_annotation_dir = os.path.join(classifier_car_train_dir, 'Annotations')
    26. dst_root_dir = '../../data/bbox_regression/'
    27. dst_jpeg_dir = os.path.join(dst_root_dir, 'JPEGImages')
    28. dst_bndbox_dir = os.path.join(dst_root_dir, 'bndboxs')
    29. dst_positive_dir = os.path.join(dst_root_dir, 'positive')
    30. util.check_dir(dst_root_dir)
    31. util.check_dir(dst_jpeg_dir)
    32. util.check_dir(dst_bndbox_dir)
    33. util.check_dir(dst_positive_dir)
    34. samples = util.parse_car_csv(voc_car_train_dir)
    35. res_samples = list()
    36. total_positive_num = 0
    37. for sample_name in samples:
    38. # 提取正样本边界框坐标(IoU>=0.5)
    39. positive_annotation_path = os.path.join(positive_annotation_dir, sample_name + '_1.csv')
    40. positive_bndboxes = np.loadtxt(positive_annotation_path, dtype=int, delimiter=' ')
    41. # 提取标注边界框
    42. gt_annotation_path = os.path.join(gt_annotation_dir, sample_name + '.xml')
    43. bndboxs = util.parse_xml(gt_annotation_path)
    44. # 计算符合条件(IoU>0.6)的候选建议
    45. positive_list = list()
    46. if len(positive_bndboxes.shape) == 1 and len(positive_bndboxes) != 0:
    47. scores = util.iou(positive_bndboxes, bndboxs)
    48. if np.max(scores) > 0.6:
    49. positive_list.append(positive_bndboxes)
    50. elif len(positive_bndboxes.shape) == 2:
    51. for positive_bndboxe in positive_bndboxes:
    52. scores = util.iou(positive_bndboxe, bndboxs)
    53. if np.max(scores) > 0.6:
    54. positive_list.append(positive_bndboxe)
    55. else:
    56. pass
    57. # 如果存在正样本边界框(IoU>0.6),那么保存相应的图片以及标注边界框
    58. if len(positive_list) > 0:
    59. # 保存图片
    60. jpeg_path = os.path.join(jpeg_dir, sample_name + ".jpg")
    61. dst_jpeg_path = os.path.join(dst_jpeg_dir, sample_name + ".jpg")
    62. shutil.copyfile(jpeg_path, dst_jpeg_path)
    63. # 保存标注边界框
    64. dst_bndbox_path = os.path.join(dst_bndbox_dir, sample_name + ".csv")
    65. np.savetxt(dst_bndbox_path, bndboxs, fmt='%s', delimiter=' ')
    66. # 保存正样本边界框
    67. dst_positive_path = os.path.join(dst_positive_dir, sample_name + ".csv")
    68. np.savetxt(dst_positive_path, np.array(positive_list), fmt='%s', delimiter=' ')
    69. total_positive_num += len(positive_list)
    70. res_samples.append(sample_name)
    71. print('save {} done'.format(sample_name))
    72. else:
    73. print('-------- {} 不符合条件'.format(sample_name))
    74. dst_csv_path = os.path.join(dst_root_dir, 'car.csv')
    75. np.savetxt(dst_csv_path, res_samples, fmt='%s', delimiter=' ')
    76. print('total positive num: {}'.format(total_positive_num))
    77. print('done')

    自定义边界框回归训练数据集类

    custom_bbox_regression_dataset.py,该脚本不用主动执行,在训练分类器模型的时候,自然会调用到,以下只说这个脚本做了什么事

    ①BBoxRegressionDataset类继承自Dataset

    ②__init__时读取'../../data/bbox_regression/JPEGImages/'文件夹中的图片,存入self.jpeg_list,又读取'../../data/bbox_regression/bndboxs/'中的标注框信息和'../../data/bbox_regression/positive/'中的正样本数据并都存入self.box_list

    ③__getitem__方法计算并返回图片和相对坐标差

    以下是custom_bbox_regression_dataset.py脚本代码

    1. # -*- coding: utf-8 -*-
    2. """
    3. @date: 2020/4/3 下午8:07
    4. @file: custom_bbox_regression_dataset.py
    5. @author: zj
    6. @description:
    7. """
    8. import os
    9. import cv2
    10. import numpy as np
    11. import torch
    12. import torchvision.transforms as transforms
    13. from torch.utils.data import Dataset
    14. from torch.utils.data import DataLoader
    15. import utils.util as util
    16. class BBoxRegressionDataset(Dataset):
    17. def __init__(self, root_dir, transform=None):
    18. super(BBoxRegressionDataset, self).__init__()
    19. self.transform = transform
    20. samples = util.parse_car_csv(root_dir)
    21. jpeg_list = list()
    22. # 保存{'image_id': ?, 'positive': ?, 'bndbox': ?}
    23. box_list = list()
    24. for i in range(len(samples)):
    25. sample_name = samples[i]
    26. jpeg_path = os.path.join(root_dir, 'JPEGImages', sample_name + '.jpg')
    27. bndbox_path = os.path.join(root_dir, 'bndboxs', sample_name + '.csv')
    28. positive_path = os.path.join(root_dir, 'positive', sample_name + '.csv')
    29. jpeg_list.append(cv2.imread(jpeg_path))
    30. bndboxes = np.loadtxt(bndbox_path, dtype=int, delimiter=' ')
    31. positives = np.loadtxt(positive_path, dtype=int, delimiter=' ')
    32. if len(positives.shape) == 1:
    33. bndbox = self.get_bndbox(bndboxes, positives)
    34. box_list.append({'image_id': i, 'positive': positives, 'bndbox': bndbox})
    35. else:
    36. for positive in positives:
    37. bndbox = self.get_bndbox(bndboxes, positive)
    38. box_list.append({'image_id': i, 'positive': positive, 'bndbox': bndbox})
    39. self.jpeg_list = jpeg_list
    40. self.box_list = box_list
    41. def __getitem__(self, index: int):
    42. assert index < self.__len__(), '数据集大小为%d,当前输入下标为%d' % (self.__len__(), index)
    43. box_dict = self.box_list[index]
    44. image_id = box_dict['image_id']
    45. positive = box_dict['positive']
    46. bndbox = box_dict['bndbox']
    47. # 获取预测图像
    48. jpeg_img = self.jpeg_list[image_id]
    49. xmin, ymin, xmax, ymax = positive
    50. image = jpeg_img[ymin:ymax, xmin:xmax]
    51. if self.transform:
    52. image = self.transform(image)
    53. # 计算P/G的x/y/w/h
    54. target = dict()
    55. p_w = xmax - xmin
    56. p_h = ymax - ymin
    57. p_x = xmin + p_w / 2
    58. p_y = ymin + p_h / 2
    59. xmin, ymin, xmax, ymax = bndbox
    60. g_w = xmax - xmin
    61. g_h = ymax - ymin
    62. g_x = xmin + g_w / 2
    63. g_y = ymin + g_h / 2
    64. # 计算t
    65. t_x = (g_x - p_x) / p_w
    66. t_y = (g_y - p_y) / p_h
    67. t_w = np.log(g_w / p_w)
    68. t_h = np.log(g_h / p_h)
    69. return image, np.array((t_x, t_y, t_w, t_h))
    70. def __len__(self):
    71. return len(self.box_list)
    72. def get_bndbox(self, bndboxes, positive):
    73. """
    74. 返回和positive的IoU最大的标注边界框
    75. :param bndboxes: 大小为[N, 4]或者[4]
    76. :param positive: 大小为[4]
    77. :return: [4]
    78. """
    79. if len(bndboxes.shape) == 1:
    80. # 只有一个标注边界框,直接返回即可
    81. return bndboxes
    82. else:
    83. scores = util.iou(positive, bndboxes)
    84. return bndboxes[np.argmax(scores)]
    85. def test():
    86. """
    87. 创建数据集类实例
    88. """
    89. transform = transforms.Compose([
    90. transforms.ToPILImage(),
    91. transforms.Resize((227, 227)),
    92. transforms.RandomHorizontalFlip(),
    93. transforms.ToTensor(),
    94. transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    95. ])
    96. data_root_dir = '../../data/bbox_regression'
    97. data_set = BBoxRegressionDataset(data_root_dir, transform=transform)
    98. print(data_set.__len__())
    99. image, target = data_set.__getitem__(10)
    100. print(image.shape)
    101. print(target)
    102. print(target.dtype)
    103. def test2():
    104. """
    105. 测试DataLoader使用
    106. """
    107. transform = transforms.Compose([
    108. transforms.ToPILImage(),
    109. transforms.Resize((227, 227)),
    110. transforms.RandomHorizontalFlip(),
    111. transforms.ToTensor(),
    112. transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    113. ])
    114. data_root_dir = '../../data/bbox_regression'
    115. data_set = BBoxRegressionDataset(data_root_dir, transform=transform)
    116. data_loader = DataLoader(data_set, batch_size=128, shuffle=True, num_workers=8)
    117. items = next(data_loader.__iter__())
    118. datas, targets = items
    119. print(datas.shape)
    120. print(targets.shape)
    121. print(targets.dtype)
    122. if __name__ == '__main__':
    123. test()
    124. # test2()

    训练边界框回归

    执行bbox_regression.py脚本

    ①调用custom_bbox_regression_dataset.py脚本,得到训练数据data_loader

    ②使用AlexNet网络模型,修改分类器对象classifier的输出为2类(1类是car,一类是背景),加载权重best_linear_svm_alexnet_car.pth,并设置参数冻结,然后再添加一个线性层作为全连接层,定义损失函数为均方误差损失函数,优化函数为SGD,学习率更新器为StepLR,然后开始训练,保存模型到'models/bbox_regression_%d.pth'

    以下是bbox_regression.py脚本代码

    1. # -*- coding: utf-8 -*-
    2. """
    3. @date: 2020/4/3 下午6:55
    4. @file: bbox_regression.py
    5. @author: zj
    6. @description: 边界框回归训练
    7. """
    8. import os
    9. import copy
    10. import time
    11. import torch
    12. import torch.nn as nn
    13. import torch.optim as optim
    14. from torch.utils.data import DataLoader
    15. import torchvision.transforms as transforms
    16. from torchvision.models import AlexNet
    17. from utils.data.custom_bbox_regression_dataset import BBoxRegressionDataset
    18. import utils.util as util
    19. def load_data(data_root_dir):
    20. transform = transforms.Compose([
    21. transforms.ToPILImage(),
    22. transforms.Resize((227, 227)),
    23. transforms.RandomHorizontalFlip(),
    24. transforms.ToTensor(),
    25. transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    26. ])
    27. data_set = BBoxRegressionDataset(data_root_dir, transform=transform)
    28. data_loader = DataLoader(data_set, batch_size=128, shuffle=True, num_workers=8)
    29. return data_loader
    30. def train_model(data_loader, feature_model, model, criterion, optimizer, lr_scheduler, num_epochs=25, device=None):
    31. since = time.time()
    32. model.train() # Set model to training mode
    33. loss_list = list()
    34. for epoch in range(num_epochs):
    35. print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    36. print('-' * 10)
    37. running_loss = 0.0
    38. # Iterate over data.
    39. for inputs, targets in data_loader:
    40. inputs = inputs.to(device)
    41. targets = targets.float().to(device)
    42. features = feature_model.features(inputs)
    43. features = torch.flatten(features, 1)
    44. # zero the parameter gradients
    45. optimizer.zero_grad()
    46. # forward
    47. outputs = model(features)
    48. loss = criterion(outputs, targets)
    49. loss.backward()
    50. optimizer.step()
    51. # statistics
    52. running_loss += loss.item() * inputs.size(0)
    53. lr_scheduler.step()
    54. epoch_loss = running_loss / data_loader.dataset.__len__()
    55. loss_list.append(epoch_loss)
    56. print('{} Loss: {:.4f}'.format(epoch, epoch_loss))
    57. # 每训练一轮就保存
    58. util.save_model(model, './models/bbox_regression_%d.pth' % epoch)
    59. print()
    60. time_elapsed = time.time() - since
    61. print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    62. return loss_list
    63. def get_model(device=None):
    64. # 加载CNN模型
    65. model = AlexNet(num_classes=2)
    66. model.load_state_dict(torch.load('./models/best_linear_svm_alexnet_car.pth'))
    67. model.eval()
    68. # 取消梯度追踪
    69. for param in model.parameters():
    70. param.requires_grad = False
    71. if device:
    72. model = model.to(device)
    73. return model
    74. if __name__ == '__main__':
    75. data_loader = load_data('./data/bbox_regression')
    76. device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    77. feature_model = get_model(device)
    78. # AlexNet最后一个池化层计算得到256*6*6输出
    79. in_features = 256 * 6 * 6
    80. out_features = 4
    81. model = nn.Linear(in_features, out_features)
    82. model.to(device)
    83. criterion = nn.MSELoss()
    84. optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)
    85. lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
    86. loss_list = train_model(data_loader, feature_model, model, criterion, optimizer, lr_scheduler, device=device,
    87. num_epochs=12)
    88. util.plot_loss(loss_list)

    汽车car目标检测器实现

    读取图片,先检测图片中是否有汽车,然后使用非极大值抑制(NMS)算法消除冗余边界框,最后输出目标检测结果,如下图

    工程下载

    pytorch-r-cnn工程文件

  • 相关阅读:
    国内的软件测试真的这么不受待见吗?
    C#Lambda让代码变得更加简洁而优雅
    【Linux API 揭秘】container_of函数详解
    怎样在Windows10系统中安装配置PL/SQL
    数据分析常见的业务面试题
    阿里巴巴API接口解析,实现获得商品详情
    上6休3上3休2……这烧脑的调休安排,国内外AI都算不明白,集体大“翻车”!...
    光电柴微电网日前调度报告
    【电梯控制系统】基于VHDL语言和状态机实现的电梯控制系统的设计,使用了状态机
    Java的垃圾回收机制详解——从入门到出土,学不会接着来砍我!
  • 原文地址:https://blog.csdn.net/Victor_Li_/article/details/134274482