• YOLO V5源码详解


    1.数据读取

            首先读取图片以及标签路径,并将标签存入缓存,对单标签情况、特定类别、以及是否保持长方形等情况分别进行处理。

            如果需要进行mosaic 数据增强,首先找到中心点,将图片分别放置于四个位置,进行裁剪或者拼接以适应,并对labels进行调整。同时,对进行过mosaic数据增强过的图像,再进行copy_paste数据增强和旋转、平移、缩放数据增强。

             同时,还可以进行其他数据增强方式,比如mix up,hsv等

     代码如下:

    1. class LoadImagesAndLabels(Dataset):
    2. # YOLOv5 train_loader/val_loader, loads images and labels for training and validation
    3. cache_version = 0.6 # dataset labels *.cache version
    4. rand_interp_methods = [cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4]
    5. def __init__(self,
    6. path,
    7. img_size=640,
    8. batch_size=16,
    9. augment=False,
    10. hyp=None,
    11. rect=False,
    12. image_weights=False,
    13. cache_images=False, # 缓存图片
    14. single_cls=False,
    15. stride=32,
    16. pad=0.0,
    17. prefix=''):
    18. self.img_size = img_size
    19. self.augment = augment
    20. self.hyp = hyp
    21. self.image_weights = image_weights
    22. self.rect = False if image_weights else rect
    23. self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
    24. self.mosaic_border = [-img_size // 2, -img_size // 2] # 拼接过程中按照什么中心点进行拼接
    25. self.stride = stride
    26. self.path = path
    27. self.albumentations = Albumentations() if augment else None
    28. try:
    29. f = [] # image files
    30. for p in path if isinstance(path, list) else [path]: # window和linux
    31. p = Path(p) # os-agnostic
    32. if p.is_dir(): # dir
    33. f += glob.glob(str(p / '**' / '*.*'), recursive=True) # 获取指定路径
    34. # f = list(p.rglob('*.*')) # pathlib
    35. elif p.is_file(): # file
    36. with open(p) as t:
    37. t = t.read().strip().splitlines() # 读取图片路径
    38. parent = str(p.parent) + os.sep # 指定系统分隔符
    39. # 相对地址转绝对地址
    40. f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
    41. # f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib)
    42. else:
    43. raise FileNotFoundError(f'{prefix}{p} does not exist')
    44. self.im_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS) # 排序
    45. # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS]) # pathlib
    46. assert self.im_files, f'{prefix}No images found'
    47. except Exception as e:
    48. raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {HELP_URL}')
    49. # Check cache
    50. self.label_files = img2label_paths(self.im_files) # labels
    51. # 设置缓存
    52. cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache')
    53. try:
    54. cache, exists = np.load(cache_path, allow_pickle=True).item(), True # load dict
    55. assert cache['version'] == self.cache_version # matches current version
    56. assert cache['hash'] == get_hash(self.label_files + self.im_files) # identical hash
    57. except Exception:
    58. cache, exists = self.cache_labels(cache_path, prefix), False # run cache ops
    59. # Display cache
    60. nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupt, total
    61. if exists and LOCAL_RANK in {-1, 0}:
    62. d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupt"
    63. tqdm(None, desc=prefix + d, total=n, initial=n, bar_format=BAR_FORMAT) # display cache results
    64. if cache['msgs']:
    65. LOGGER.info('\n'.join(cache['msgs'])) # display warnings
    66. assert nf > 0 or not augment, f'{prefix}No labels in {cache_path}. Can not train without labels. See {HELP_URL}'
    67. # Read cache
    68. [cache.pop(k) for k in ('hash', 'version', 'msgs')] # remove items 去除不需要的信息
    69. labels, shapes, self.segments = zip(*cache.values())
    70. self.labels = list(labels) # 标签信息
    71. self.shapes = np.array(shapes, dtype=np.float64) # 图片大小
    72. self.im_files = list(cache.keys()) # update 图片文件名称
    73. self.label_files = img2label_paths(cache.keys()) # update 标签文件名称
    74. n = len(shapes) # number of images
    75. bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index
    76. nb = bi[-1] + 1 # number of batches
    77. self.batch = bi # batch index of image
    78. self.n = n
    79. self.indices = range(n) # 索引
    80. # Update labels 筛选标签以仅包括这些类(可选)
    81. include_class = [] # filter labels to include only these classes (optional)
    82. include_class_array = np.array(include_class).reshape(1, -1)
    83. for i, (label, segment) in enumerate(zip(self.labels, self.segments)):
    84. if include_class:
    85. j = (label[:, 0:1] == include_class_array).any(1)
    86. self.labels[i] = label[j]
    87. if segment:
    88. self.segments[i] = segment[j]
    89. if single_cls: # single-class training, merge all classes into 0
    90. self.labels[i][:, 0] = 0
    91. if segment:
    92. self.segments[i][:, 0] = 0
    93. # Rectangular Training
    94. if self.rect:
    95. # Sort by aspect ratio
    96. s = self.shapes # wh
    97. ar = s[:, 1] / s[:, 0] # aspect ratio
    98. irect = ar.argsort()
    99. self.im_files = [self.im_files[i] for i in irect]
    100. self.label_files = [self.label_files[i] for i in irect]
    101. self.labels = [self.labels[i] for i in irect]
    102. self.shapes = s[irect] # wh
    103. ar = ar[irect]
    104. # Set training image shapes
    105. shapes = [[1, 1]] * nb
    106. for i in range(nb):
    107. ari = ar[bi == i]
    108. mini, maxi = ari.min(), ari.max()
    109. if maxi < 1:
    110. shapes[i] = [maxi, 1]
    111. elif mini > 1:
    112. shapes[i] = [1, 1 / mini]
    113. self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
    114. # Cache images into RAM/disk for faster training (WARNING: large datasets may exceed system resources)
    115. # 将图像缓存到RAM/磁盘中以加快训练(警告:大型数据集可能超过系统资源)
    116. self.ims = [None] * n
    117. self.npy_files = [Path(f).with_suffix('.npy') for f in self.im_files]
    118. if cache_images:
    119. gb = 0 # Gigabytes of cached images
    120. self.im_hw0, self.im_hw = [None] * n, [None] * n
    121. fcn = self.cache_images_to_disk if cache_images == 'disk' else self.load_image
    122. results = ThreadPool(NUM_THREADS).imap(fcn, range(n))
    123. pbar = tqdm(enumerate(results), total=n, bar_format=BAR_FORMAT, disable=LOCAL_RANK > 0)
    124. for i, x in pbar:
    125. if cache_images == 'disk':
    126. gb += self.npy_files[i].stat().st_size
    127. else: # 'ram'
    128. self.ims[i], self.im_hw0[i], self.im_hw[i] = x # im, hw_orig, hw_resized = load_image(self, i)
    129. gb += self.ims[i].nbytes
    130. pbar.desc = f'{prefix}Caching images ({gb / 1E9:.1f}GB {cache_images})'
    131. pbar.close()
    132. def cache_labels(self, path=Path('./labels.cache'), prefix=''):
    133. # Cache dataset labels, check images and read shapes
    134. x = {} # dict
    135. nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages
    136. desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..."
    137. with Pool(NUM_THREADS) as pool:
    138. pbar = tqdm(pool.imap(verify_image_label, zip(self.im_files, self.label_files, repeat(prefix))),
    139. desc=desc,
    140. total=len(self.im_files),
    141. bar_format=BAR_FORMAT)
    142. for im_file, lb, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar:
    143. nm += nm_f
    144. nf += nf_f
    145. ne += ne_f
    146. nc += nc_f
    147. if im_file:
    148. x[im_file] = [lb, shape, segments]
    149. if msg:
    150. msgs.append(msg)
    151. pbar.desc = f"{desc}{nf} found, {nm} missing, {ne} empty, {nc} corrupt"
    152. pbar.close()
    153. if msgs:
    154. LOGGER.info('\n'.join(msgs))
    155. if nf == 0:
    156. LOGGER.warning(f'{prefix}WARNING: No labels found in {path}. See {HELP_URL}')
    157. x['hash'] = get_hash(self.label_files + self.im_files)
    158. x['results'] = nf, nm, ne, nc, len(self.im_files)
    159. x['msgs'] = msgs # warnings
    160. x['version'] = self.cache_version # cache version
    161. try:
    162. np.save(path, x) # save cache for next time
    163. path.with_suffix('.cache.npy').rename(path) # remove .npy suffix
    164. LOGGER.info(f'{prefix}New cache created: {path}')
    165. except Exception as e:
    166. LOGGER.warning(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}') # not writeable
    167. return x
    168. def __len__(self):
    169. return len(self.im_files)
    170. # def __iter__(self):
    171. # self.count = -1
    172. # print('ran dataset iter')
    173. # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
    174. # return self
    175. def __getitem__(self, index):
    176. index = self.indices[index] # linear, shuffled, or image_weights
    177. hyp = self.hyp
    178. mosaic = self.mosaic and random.random() < hyp['mosaic']
    179. if mosaic:
    180. # Load mosaic mosaic数据增强
    181. img, labels = self.load_mosaic(index)
    182. shapes = None
    183. # MixUp augmentation
    184. if random.random() < hyp['mixup']:
    185. img, labels = mixup(img, labels, *self.load_mosaic(random.randint(0, self.n - 1)))
    186. else:
    187. # 不进行mosaic的操作
    188. # Load image
    189. img, (h0, w0), (h, w) = self.load_image(index)
    190. # Letterbox
    191. shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
    192. img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
    193. shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
    194. labels = self.labels[index].copy()
    195. if labels.size: # normalized xywh to pixel xyxy format
    196. labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
    197. if self.augment:
    198. img, labels = random_perspective(img,
    199. labels,
    200. degrees=hyp['degrees'],
    201. translate=hyp['translate'],
    202. scale=hyp['scale'],
    203. shear=hyp['shear'],
    204. perspective=hyp['perspective'])
    205. nl = len(labels) # number of labels
    206. if nl:
    207. # x1,y1,x2,y2 转换为x,y,w,h
    208. labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1E-3)
    209. if self.augment:
    210. # Albumentations 数据增强
    211. img, labels = self.albumentations(img, labels)
    212. nl = len(labels) # update after albumentations
    213. # HSV color-space
    214. augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
    215. # Flip up-down 翻转操作
    216. if random.random() < hyp['flipud']:
    217. img = np.flipud(img)
    218. if nl:
    219. labels[:, 2] = 1 - labels[:, 2]
    220. # Flip left-right
    221. if random.random() < hyp['fliplr']:
    222. img = np.fliplr(img)
    223. if nl:
    224. labels[:, 1] = 1 - labels[:, 1]
    225. # Cutouts
    226. # labels = cutout(img, labels, p=0.5)
    227. # nl = len(labels) # update after cutout
    228. labels_out = torch.zeros((nl, 6))
    229. if nl:
    230. labels_out[:, 1:] = torch.from_numpy(labels)
    231. # Convert
    232. img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
    233. # ascontiguousarray函数将一个内存不连续存储的数组转换为内存连续存储的数组,使得运行速度更快
    234. img = np.ascontiguousarray(img)
    235. return torch.from_numpy(img), labels_out, self.im_files[index], shapes
    236. def load_image(self, i):
    237. # Loads 1 image from dataset index 'i', returns (im, original hw, resized hw)
    238. im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i],
    239. if im is None: # not cached in RAM
    240. if fn.exists(): # load npy
    241. im = np.load(fn)
    242. else: # read image
    243. im = cv2.imread(f) # BGR
    244. assert im is not None, f'Image Not Found {f}'
    245. h0, w0 = im.shape[:2] # orig hw
    246. r = self.img_size / max(h0, w0) # ratio
    247. if r != 1: # if sizes are not equal
    248. interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA # 插值方式
    249. im = cv2.resize(im, (int(w0 * r), int(h0 * r)), interpolation=interp)
    250. return im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized
    251. else:
    252. return self.ims[i], self.im_hw0[i], self.im_hw[i] # im, hw_original, hw_resized
    253. def cache_images_to_disk(self, i):
    254. # Saves an image as an *.npy file for faster loading
    255. f = self.npy_files[i]
    256. if not f.exists():
    257. np.save(f.as_posix(), cv2.imread(self.im_files[i]))
    258. def load_mosaic(self, index):
    259. # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
    260. labels4, segments4 = [], []
    261. s = self.img_size
    262. yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y
    263. indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices
    264. random.shuffle(indices)
    265. for i, index in enumerate(indices):
    266. # Load image 加载图片,并将长边resize成(640,640)
    267. img, _, (h, w) = self.load_image(index)
    268. # place img in img4
    269. if i == 0: # top left
    270. img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
    271. x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
    272. x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
    273. elif i == 1: # top right
    274. x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
    275. x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
    276. elif i == 2: # bottom left
    277. x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
    278. x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
    279. elif i == 3: # bottom right
    280. x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
    281. x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
    282. img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
    283. padw = x1a - x1b
    284. padh = y1a - y1b
    285. # Labels
    286. labels, segments = self.labels[index].copy(), self.segments[index].copy()
    287. if labels.size:
    288. # 将xywh转换为x1,y1,x2,y2并加上padw,padh
    289. labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format
    290. segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
    291. labels4.append(labels)
    292. segments4.extend(segments)
    293. # Concat/clip labels 将标签限制在02s
    294. labels4 = np.concatenate(labels4, 0)
    295. for x in (labels4[:, 1:], *segments4):
    296. np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
    297. # img4, labels4 = replicate(img4, labels4) # replicate
    298. # Augment
    299. # copy_pase数据增强
    300. img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp['copy_paste'])
    301. # 数据增强,旋转、平移、缩放
    302. img4, labels4 = random_perspective(img4,
    303. labels4,
    304. segments4,
    305. degrees=self.hyp['degrees'],
    306. translate=self.hyp['translate'],
    307. scale=self.hyp['scale'],
    308. shear=self.hyp['shear'],
    309. perspective=self.hyp['perspective'],
    310. border=self.mosaic_border) # border to remove
    311. return img4, labels4
    312. def load_mosaic9(self, index):
    313. # YOLOv5 9-mosaic loader. Loads 1 image + 8 random images into a 9-image mosaic
    314. labels9, segments9 = [], []
    315. s = self.img_size
    316. indices = [index] + random.choices(self.indices, k=8) # 8 additional image indices
    317. random.shuffle(indices)
    318. hp, wp = -1, -1 # height, width previous
    319. for i, index in enumerate(indices):
    320. # Load image
    321. img, _, (h, w) = self.load_image(index)
    322. # place img in img9
    323. if i == 0: # center
    324. img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
    325. h0, w0 = h, w
    326. c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates
    327. elif i == 1: # top
    328. c = s, s - h, s + w, s
    329. elif i == 2: # top right
    330. c = s + wp, s - h, s + wp + w, s
    331. elif i == 3: # right
    332. c = s + w0, s, s + w0 + w, s + h
    333. elif i == 4: # bottom right
    334. c = s + w0, s + hp, s + w0 + w, s + hp + h
    335. elif i == 5: # bottom
    336. c = s + w0 - w, s + h0, s + w0, s + h0 + h
    337. elif i == 6: # bottom left
    338. c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h
    339. elif i == 7: # left
    340. c = s - w, s + h0 - h, s, s + h0
    341. elif i == 8: # top left
    342. c = s - w, s + h0 - hp - h, s, s + h0 - hp
    343. padx, pady = c[:2]
    344. x1, y1, x2, y2 = (max(x, 0) for x in c) # allocate coords
    345. # Labels
    346. labels, segments = self.labels[index].copy(), self.segments[index].copy()
    347. if labels.size:
    348. labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady) # normalized xywh to pixel xyxy format
    349. segments = [xyn2xy(x, w, h, padx, pady) for x in segments]
    350. labels9.append(labels)
    351. segments9.extend(segments)
    352. # Image
    353. img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:] # img9[ymin:ymax, xmin:xmax]
    354. hp, wp = h, w # height, width previous
    355. # Offset
    356. yc, xc = (int(random.uniform(0, s)) for _ in self.mosaic_border) # mosaic center x, y
    357. img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s]
    358. # Concat/clip labels
    359. labels9 = np.concatenate(labels9, 0)
    360. labels9[:, [1, 3]] -= xc
    361. labels9[:, [2, 4]] -= yc
    362. c = np.array([xc, yc]) # centers
    363. segments9 = [x - c for x in segments9]
    364. for x in (labels9[:, 1:], *segments9):
    365. np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
    366. # img9, labels9 = replicate(img9, labels9) # replicate
    367. # Augment
    368. img9, labels9 = random_perspective(img9,
    369. labels9,
    370. segments9,
    371. degrees=self.hyp['degrees'],
    372. translate=self.hyp['translate'],
    373. scale=self.hyp['scale'],
    374. shear=self.hyp['shear'],
    375. perspective=self.hyp['perspective'],
    376. border=self.mosaic_border) # border to remove
    377. return img9, labels9
    378. @staticmethod
    379. def collate_fn(batch):
    380. im, label, path, shapes = zip(*batch) # transposed
    381. for i, lb in enumerate(label):
    382. lb[:, 0] = i # add target image index for build_targets()
    383. return torch.stack(im, 0), torch.cat(label, 0), path, shapes
    384. @staticmethod
    385. def collate_fn4(batch):
    386. img, label, path, shapes = zip(*batch) # transposed
    387. n = len(shapes) // 4
    388. im4, label4, path4, shapes4 = [], [], path[:n], shapes[:n]
    389. ho = torch.tensor([[0.0, 0, 0, 1, 0, 0]])
    390. wo = torch.tensor([[0.0, 0, 1, 0, 0, 0]])
    391. s = torch.tensor([[1, 1, 0.5, 0.5, 0.5, 0.5]]) # scale
    392. for i in range(n): # zidane torch.zeros(16,3,720,1280) # BCHW
    393. i *= 4
    394. if random.random() < 0.5:
    395. im = F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2.0, mode='bilinear',
    396. align_corners=False)[0].type(img[i].type())
    397. lb = label[i]
    398. else:
    399. im = torch.cat((torch.cat((img[i], img[i + 1]), 1), torch.cat((img[i + 2], img[i + 3]), 1)), 2)
    400. lb = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s
    401. im4.append(im)
    402. label4.append(lb)
    403. for i, lb in enumerate(label4):
    404. lb[:, 0] = i # add target image index for build_targets()
    405. return torch.stack(im4, 0), torch.cat(label4, 0), path4, shapes4

    2.模型配置文件读取 

            depth_multiple表示网络的深度,表示在网络层的数量,非1的层乘以该系数,width_multiple表示网络的深度,网络最终的输出通道数乘以该系数即可得到网络的最终通道数。YOLO提供了不同版本的模型,对于不同版本的模型,最大的不同的在于以上两个系数。

            anchor表示网络的先验框

            对于网络参数,from表示输入来自于哪一层的输出,number表示网络层的层数,module表示网络层的名称。args表示网络的超参数。

    1. # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
    2. # Parameters
    3. nc: 80 # number of classes
    4. depth_multiple: 0.33 # model depth multiple
    5. width_multiple: 0.50 # layer channel multiple
    6. anchors:
    7. - [10,13, 16,30, 33,23] # P3/8
    8. - [30,61, 62,45, 59,119] # P4/16
    9. - [116,90, 156,198, 373,326] # P5/32
    10. # YOLOv5 v6.0 backbone
    11. backbone:
    12. # [from, number, module, args]
    13. [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
    14. [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
    15. [-1, 3, C3, [128]],
    16. [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
    17. [-1, 6, C3, [256]],
    18. [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
    19. [-1, 9, C3, [512]],
    20. [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
    21. [-1, 3, C3, [1024]],
    22. [-1, 1, SPPF, [1024, 5]], # 9
    23. ]
    24. # YOLOv5 v6.0 head
    25. head:
    26. [[-1, 1, Conv, [512, 1, 1]],
    27. [-1, 1, nn.Upsample, [None, 2, 'nearest']],
    28. [[-1, 6], 1, Concat, [1]], # cat backbone P4
    29. [-1, 3, C3, [512, False]], # 13
    30. [-1, 1, Conv, [256, 1, 1]],
    31. [-1, 1, nn.Upsample, [None, 2, 'nearest']],
    32. [[-1, 4], 1, Concat, [1]], # cat backbone P3
    33. [-1, 3, C3, [256, False]], # 17 (P3/8-small)
    34. [-1, 1, Conv, [256, 3, 2]],
    35. [[-1, 14], 1, Concat, [1]], # cat head P4
    36. [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
    37. [-1, 1, Conv, [512, 3, 2]],
    38. [[-1, 10], 1, Concat, [1]], # cat head P5
    39. [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
    40. [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
    41. ]

    读取代码:

    1. def parse_model(d, ch): # model_dict, input_channels(3)
    2. # ch (list):表示各层输出通道数
    3. LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}") # 打印表头
    4. # 读取相应数据 anchors:锚框,nc:类别 gd:深度系数 gw 宽度系数
    5. anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
    6. na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
    7. no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
    8. layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
    9. for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
    10. m = eval(m) if isinstance(m, str) else m # eval strings
    11. for j, a in enumerate(args):
    12. try:
    13. args[j] = eval(a) if isinstance(a, str) else a # eval strings
    14. except NameError:
    15. pass
    16. n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
    17. if m in (Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
    18. BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x):
    19. c1, c2 = ch[f], args[0]
    20. if c2 != no: # if not output
    21. c2 = make_divisible(c2 * gw, 8) # 输出通道
    22. args = [c1, c2, *args[1:]] # 更新配置参数
    23. # 对应层需要重复
    24. if m in [BottleneckCSP, C3, C3TR, C3Ghost, C3x]:
    25. args.insert(2, n) # number of repeats
    26. n = 1
    27. elif m is nn.BatchNorm2d:
    28. args = [ch[f]] # batch_normal层,上一层的输出维度
    29. elif m is Concat:
    30. c2 = sum(ch[x] for x in f) # concat层:通道数之和
    31. elif m is Detect:
    32. args.append([ch[x] for x in f]) # detect:通道数之和
    33. if isinstance(args[1], int): # 整数表示number of anchors
    34. args[1] = [list(range(args[1] * 2))] * len(f) # 初始化一个anchors矩阵
    35. elif m is Contract:
    36. c2 = ch[f] * args[0] ** 2
    37. elif m is Expand:
    38. c2 = ch[f] // args[0] ** 2
    39. else:
    40. c2 = ch[f]
    41. # 构建对应层的模型
    42. m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
    43. t = str(m)[8:-2].replace('__main__.', '') # module type
    44. np = sum(x.numel() for x in m_.parameters()) # number params
    45. m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
    46. LOGGER.info(f'{i:>3}{str(f):>18}{n_:>3}{np:10.0f} {t:<40}{str(args):<30}') # print
    47. # 保存不等于-1的x的x%i
    48. save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
    49. layers.append(m_)
    50. if i == 0:
    51. ch = []
    52. ch.append(c2)
    53. return nn.Sequential(*layers), sorted(save)

     3.网络结构(yolox.pt)

    (1)前两层

            网络前两层一层为6*6的卷积,strides为2,padding为(2,2),第二层为3*3的卷积,strides为2,padding为2,特征维度变换为3*640*640-->80*320*320-->160*160*160

    (2) C3模块

            C3层有两个路径,第一个短路径, 只有一个1*1的卷积,将特征图从c1降维成c1*e,第二条路径是n个bottleneck模块,每个bottomneck包含两个卷积,第一个卷积为1*1的卷积,将通道数降维成c1-->c1*e,第二层为3*3的卷积,将通道数由c1*e-->c2,最后再做一层3*3的卷积,调整通道数。其中,bottleneck都采用残差连接。经过c3模块,特征图大小不变,通道数由c1变成c2,最后,再经过3*3的卷积,将特征图大小减半。

            

     (3)SPPF模块

            SPPF是SPP的快速版本,有四条路径,第一条路径,1*1的卷积,将特征图通道数由c1变成c1//2【1】,第二条路径,对【1】的结果经过5*5的最大池化,pading为2【2】,第三条路径,对【2】的结果,经过5*5的最大池化,pading为2【3】,第四条路径,经过 5*5的最大池化,pad为2【4】。将四条路径的结果融合,再做一层1*1的卷积。

     4.PAN流程

            PAN实现了双向通信,将高维特征与低维特征进行融合,三个特征图大小分别下采样8倍,16倍和32倍。首先,将高维特征进行上采样,与低维特征融合,然后再通过卷积实现从低维特征到高维特征的融合 

            假设网络输入为256*256,网络各层输出如下:对于上采样部分,

            对于缩小32倍的特征图:将第10层输出进行上采样后,与第6层输出concat,输出大小为1,1280,16,16【1】

            对于缩小8倍的特征图,将【1】的结果,经过一层C3层,此时维度变为:将通道数由1280变为640,再经过一层卷积层,将通道数变为320,再经过上采样,与第四层的结果相连接,输出维度为1,640,32,32【2】

            下采样部分:

            对于【2】,首先,经过一层C3层,将维度变为1,320,32,32,然后经过一层3*3的卷积,与第14层的结果相连,即【1】经过C3和1*1的卷积后的结果相连,维度为1,640,16,16。【3】

            对于【3】,经过一层C3层和1*1的卷积,通道数保持不变,与第10层的结果相连,即为32层特征图上采样之前的结果。此时特征图为1,1280,8,8,再经过一层C3模块。【4】

            对于【2】【3】【4】层结果,分别做预测  

    1. models.common.Conv 网络层数 0 输出: torch.Size([1, 80, 128, 128])
    2. models.common.Conv 网络层数 1 输出: torch.Size([1, 160, 64, 64])
    3. models.common.C3 网络层数 2 输出: torch.Size([1, 160, 64, 64])
    4. models.common.Conv 网络层数 3 输出: torch.Size([1, 320, 32, 32])
    5. models.common.C3 网络层数 4 输出: torch.Size([1, 320, 32, 32])
    6. models.common.Conv 网络层数 5 输出: torch.Size([1, 640, 16, 16])
    7. models.common.C3 网络层数 6 输出: torch.Size([1, 640, 16, 16])
    8. models.common.Conv 网络层数 7 输出: torch.Size([1, 1280, 8, 8])
    9. models.common.C3 网络层数 8 输出: torch.Size([1, 1280, 8, 8])
    10. models.common.SPPF 网络层数 9 输出: torch.Size([1, 1280, 8, 8])
    11. models.common.Conv 网络层数 10 输出: torch.Size([1, 640, 8, 8])
    12. torch.nn.modules.upsampling.Upsample 网络层数 11 输出: torch.Size([1, 640, 16, 16])
    13. models.common.Concat 网络层数 12 输出: torch.Size([1, 1280, 16, 16])
    14. models.common.C3 网络层数 13 输出: torch.Size([1, 640, 16, 16])
    15. models.common.Conv 网络层数 14 输出: torch.Size([1, 320, 16, 16])
    16. torch.nn.modules.upsampling.Upsample 网络层数 15 输出: torch.Size([1, 320, 32, 32])
    17. models.common.Concat 网络层数 16 输出: torch.Size([1, 640, 32, 32])
    18. models.common.C3 网络层数 17 输出: torch.Size([1, 320, 32, 32])
    19. models.common.Conv 网络层数 18 输出: torch.Size([1, 320, 16, 16])
    20. models.common.Concat 网络层数 19 输出: torch.Size([1, 640, 16, 16])
    21. models.common.C3 网络层数 20 输出: torch.Size([1, 640, 16, 16])
    22. models.common.Conv 网络层数 21 输出: torch.Size([1, 640, 8, 8])
    23. models.common.Concat 网络层数 22 输出: torch.Size([1, 1280, 8, 8])
    24. models.common.C3 网络层数 23 输出: torch.Size([1, 1280, 8, 8])

    代码如下:

    1. class Model(nn.Module):
    2. # YOLOv5 model
    3. def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes
    4. super().__init__()
    5. if isinstance(cfg, dict):
    6. self.yaml = cfg # model dict
    7. else: # is *.yaml
    8. import yaml # for torch hub
    9. self.yaml_file = Path(cfg).name
    10. with open(cfg, encoding='ascii', errors='ignore') as f:
    11. self.yaml = yaml.safe_load(f) # model dict
    12. # Define model
    13. ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels
    14. if nc and nc != self.yaml['nc']:
    15. LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
    16. self.yaml['nc'] = nc # override yaml value
    17. if anchors:
    18. LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}')
    19. self.yaml['anchors'] = round(anchors) # override yaml value
    20. # 根据参数、构建模型 self.save:保留需要连接的层
    21. self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist
    22. self.names = [str(i) for i in range(self.yaml['nc'])] # default names
    23. self.inplace = self.yaml.get('inplace', True)
    24. # Build strides, anchors
    25. m = self.model[-1] # Detect()
    26. if isinstance(m, Detect):
    27. s = 256 # 2x min stride
    28. m.inplace = self.inplace
    29. # ch:input channels s:
    30. m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward
    31. check_anchor_order(m) # must be in pixel-space (not grid-space) 检查顺序是否正确
    32. m.anchors /= m.stride.view(-1, 1, 1) # 各特征层的anchors
    33. self.stride = m.stride # [8,16,32]
    34. self._initialize_biases() # only run once
    35. # Init weights, biases
    36. initialize_weights(self)
    37. self.info()
    38. LOGGER.info('')
    39. def forward(self, x, augment=False, profile=False, visualize=False):
    40. if augment:
    41. return self._forward_augment(x) # augmented inference, None
    42. return self._forward_once(x, profile, visualize) # single-scale inference, train
    43. def _forward_augment(self, x):
    44. img_size = x.shape[-2:] # height, width
    45. s = [1, 0.83, 0.67] # scales
    46. f = [None, 3, None] # flips (2-ud, 3-lr)
    47. y = [] # outputs
    48. for si, fi in zip(s, f):
    49. xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
    50. yi = self._forward_once(xi)[0] # forward
    51. # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
    52. yi = self._descale_pred(yi, fi, si, img_size)
    53. y.append(yi)
    54. y = self._clip_augmented(y) # clip augmented tails
    55. return torch.cat(y, 1), None # augmented inference, train
    56. def _forward_once(self, x, profile=False, visualize=False):
    57. y, dt = [], [] # outputs
    58. for m in self.model:
    59. if m.f != -1: # if not from previous layer
    60. x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
    61. if profile:
    62. self._profile_one_layer(m, x, dt)
    63. x = m(x) # run
    64. print(m.type,"网络层数 ",m.i,"输出:",x.shape)
    65. y.append(x if m.i in self.save else None) # save output
    66. if visualize:
    67. feature_visualization(x, m.type, m.i, save_dir=visualize)
    68. return x
    69. def _descale_pred(self, p, flips, scale, img_size):
    70. # de-scale predictions following augmented inference (inverse operation)
    71. if self.inplace:
    72. p[..., :4] /= scale # de-scale
    73. if flips == 2:
    74. p[..., 1] = img_size[0] - p[..., 1] # de-flip ud
    75. elif flips == 3:
    76. p[..., 0] = img_size[1] - p[..., 0] # de-flip lr
    77. else:
    78. x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale
    79. if flips == 2:
    80. y = img_size[0] - y # de-flip ud
    81. elif flips == 3:
    82. x = img_size[1] - x # de-flip lr
    83. p = torch.cat((x, y, wh, p[..., 4:]), -1)
    84. return p
    85. def _clip_augmented(self, y):
    86. # Clip YOLOv5 augmented inference tails
    87. nl = self.model[-1].nl # number of detection layers (P3-P5)
    88. g = sum(4 ** x for x in range(nl)) # grid points
    89. e = 1 # exclude layer count
    90. i = (y[0].shape[1] // g) * sum(4 ** x for x in range(e)) # indices
    91. y[0] = y[0][:, :-i] # large
    92. i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e)) # indices
    93. y[-1] = y[-1][:, i:] # small
    94. return y
    95. def _profile_one_layer(self, m, x, dt):
    96. c = isinstance(m, Detect) # is final layer, copy input as inplace fix
    97. o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs
    98. t = time_sync()
    99. for _ in range(10):
    100. m(x.copy() if c else x)
    101. dt.append((time_sync() - t) * 100)
    102. if m == self.model[0]:
    103. LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} module")
    104. LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}')
    105. if c:
    106. LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s} Total")
    107. def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
    108. # https://arxiv.org/abs/1708.02002 section 3.3
    109. # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
    110. m = self.model[-1] # Detect() module
    111. for mi, s in zip(m.m, m.stride): # from
    112. b = mi.bias.view(m.na, -1).detach() # conv.bias(255) to (3,85)
    113. b[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
    114. b[:, 5:] += math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # cls
    115. mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
    116. def _print_biases(self):
    117. m = self.model[-1] # Detect() module
    118. for mi in m.m: # from
    119. b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85)
    120. LOGGER.info(
    121. ('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
    122. # def _print_weights(self):
    123. # for m in self.model.modules():
    124. # if type(m) is Bottleneck:
    125. # LOGGER.info('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights
    126. def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
    127. LOGGER.info('Fusing layers... ')
    128. for m in self.model.modules():
    129. if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'):
    130. m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
    131. delattr(m, 'bn') # remove batchnorm
    132. m.forward = m.forward_fuse # update forward
    133. self.info()
    134. return self
    135. def info(self, verbose=False, img_size=640): # print model information
    136. model_info(self, verbose, img_size)
    137. def _apply(self, fn):
    138. # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
    139. self = super()._apply(fn)
    140. m = self.model[-1] # Detect()
    141. if isinstance(m, Detect):
    142. m.stride = fn(m.stride)
    143. m.grid = list(map(fn, m.grid))
    144. if isinstance(m.anchor_grid, list):
    145. m.anchor_grid = list(map(fn, m.anchor_grid))
    146. return self

    4.训练参数解释 

     

    --weights:初始权重
    --cfg:模型配置文件
    --data:数据配置文件
    --hyp:学习率等超参数文件
    --epochs:迭代次数
    -imgsz:图像大小
    --rect:长方形训练策略,不resize成正方形
    --resume:恢复最近的培训,从last.pt开始
    --nosave:只保存最后的检查点
    --noval:仅在最后一次epochs进行验证
    --noautoanchor:禁用AutoAnchor
    --noplots:不保存打印文件
    --evolve:为x个epochs进化超参数
    --bucket:上传操作
    --cache:在ram或硬盘中缓存数据
    --image-weights:使用加权图像选择进行训练(类别加权)
    --single-cls:单类别标签置0 
    --device:gpu设置  
    --multi-scale:改变img大小+/-50%,能够被32整除
    --optimizer:学习率优化器
    --sync-bn:使用SyncBatchNorm,仅在DDP模式中支持,跨gpu时使用
    --workers:最大 dataloader 的线程数 (per RANK in DDP mode)
    --project:保存文件的地址
    --name:保存日志文件的名称
    ----exist-ok:现有项目/名称确定,不递增
    --quad
    --cos-lr:余弦学习率调度
    --label-smoothing:
    --patience:经过多少个epoch损失不再下降,就停止迭代
    --freeze:迁移学习,冻结训练
    --save-period:每x个周期保存一个检查点(如果<1,则禁用)
    --seed:
    --local_rank:gpu编号

     

           

  • 相关阅读:
    iOS代码混淆工具推荐:IPA Guard详细介绍
    CSRF防范介绍之一
    无代码开发打印模板入门教程
    java数据结构与算法刷题-----LeetCode572. 另一棵树的子树(经典题,树字符串化KMP)
    【历史上的今天】8 月 15 日:苹果推出初代 iMac;谷歌收购摩托罗拉移动;Fuchsia 首次发布
    【Java-框架-SpringMVC】(01) SpringMVC框架的简单创建与使用,快速上手 - 简易版
    动态内存开辟(上)
    算法随想录算法训练营第四十三天|300.最长递增子序列 674. 最长连续递增序列 718. 最长重复子数组
    【毕业设计】远程智能浇花灌溉系统 - stm32 单片机 嵌入式 物联网
    Git的使用
  • 原文地址:https://blog.csdn.net/qq_52053775/article/details/126425760