• YOLOv5结合GradCAM热力图可视化


    一、修改model/yolo.py文件中的Detect类中的forward函数如下

    1. logits_ = [] # 修改---1
    2. logits = x[i][..., 5:] # 修改---2
    3. logits_.append(logits.view(bs, -1, self.no - 5)) # 修改---3
    4. return x if self.training else (torch.cat(z, 1), torch.cat(logits_, 1), x) # 修改---4

     二、在model文件夹中,添加yolov5_object_detector.py文件:

    1. import numpy as np
    2. import torch
    3. from models.experimental import attempt_load
    4. from utils.general import xywh2xyxy
    5. from utils.datasets import letterbox
    6. import cv2
    7. import time
    8. import torchvision
    9. import torch.nn as nn
    10. from utils.metrics import box_iou
    11. class YOLOV5TorchObjectDetector(nn.Module):
    12. def __init__(self,
    13. model_weight,
    14. device,
    15. img_size,
    16. names=None,
    17. mode='eval',
    18. confidence=0.45,
    19. iou_thresh=0.45,
    20. agnostic_nms=False):
    21. super(YOLOV5TorchObjectDetector, self).__init__()
    22. self.device = device
    23. self.model = None
    24. self.img_size = img_size
    25. self.mode = mode
    26. self.confidence = confidence
    27. self.iou_thresh = iou_thresh
    28. self.agnostic = agnostic_nms
    29. self.model = attempt_load(model_weight, map_location=device, inplace=False, fuse=False)
    30. self.model.requires_grad_(True)
    31. self.model.to(device)
    32. if self.mode == 'train':
    33. self.model.train()
    34. else:
    35. self.model.eval()
    36. # fetch the names
    37. if names is None:
    38. self.names = ['your dataset classname']
    39. else:
    40. self.names = names
    41. # preventing cold start
    42. img = torch.zeros((1, 3, *self.img_size), device=device)
    43. self.model(img)
    44. @staticmethod
    45. def non_max_suppression(prediction, logits, conf_thres=0.3, iou_thres=0.45, classes=None, agnostic=False,
    46. multi_label=False, labels=(), max_det=300):
    47. """Runs Non-Maximum Suppression (NMS) on inference and logits results
    48. Returns:
    49. list of detections, on (n,6) tensor per image [xyxy, conf, cls] and pruned input logits (n, number-classes)
    50. """
    51. nc = prediction.shape[2] - 5 # number of classes
    52. xc = prediction[..., 4] > conf_thres # candidates
    53. # Checks
    54. assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
    55. assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
    56. # Settings
    57. min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height
    58. max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
    59. time_limit = 10.0 # seconds to quit after
    60. redundant = True # require redundant detections
    61. multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
    62. merge = False # use merge-NMS
    63. t = time.time()
    64. output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
    65. logits_output = [torch.zeros((0, nc), device=logits.device)] * logits.shape[0]
    66. # logits_output = [torch.zeros((0, 80), device=logits.device)] * logits.shape[0]
    67. for xi, (x, log_) in enumerate(zip(prediction, logits)): # image index, image inference
    68. # Apply constraints
    69. # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
    70. x = x[xc[xi]] # confidence
    71. log_ = log_[xc[xi]]
    72. # Cat apriori labels if autolabelling
    73. if labels and len(labels[xi]):
    74. l = labels[xi]
    75. v = torch.zeros((len(l), nc + 5), device=x.device)
    76. v[:, :4] = l[:, 1:5] # box
    77. v[:, 4] = 1.0 # conf
    78. v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls
    79. x = torch.cat((x, v), 0)
    80. # If none remain process next image
    81. if not x.shape[0]:
    82. continue
    83. # Compute conf
    84. x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
    85. # Box (center x, center y, width, height) to (x1, y1, x2, y2)
    86. box = xywh2xyxy(x[:, :4])
    87. # Detections matrix nx6 (xyxy, conf, cls)
    88. if multi_label:
    89. i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
    90. x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
    91. else: # best class only
    92. conf, j = x[:, 5:].max(1, keepdim=True)
    93. x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
    94. log_ = log_[conf.view(-1) > conf_thres]
    95. # Filter by class
    96. if classes is not None:
    97. x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
    98. # Check shape
    99. n = x.shape[0] # number of boxes
    100. if not n: # no boxes
    101. continue
    102. elif n > max_nms: # excess boxes
    103. x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
    104. # Batched NMS
    105. c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
    106. boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
    107. i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
    108. if i.shape[0] > max_det: # limit detections
    109. i = i[:max_det]
    110. if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
    111. # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
    112. iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
    113. weights = iou * scores[None] # box weights
    114. x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
    115. if redundant:
    116. i = i[iou.sum(1) > 1] # require redundancy
    117. output[xi] = x[i]
    118. logits_output[xi] = log_[i]
    119. assert log_[i].shape[0] == x[i].shape[0]
    120. if (time.time() - t) > time_limit:
    121. print(f'WARNING: NMS time limit {time_limit}s exceeded')
    122. break # time limit exceeded
    123. return output, logits_output
    124. @staticmethod
    125. def yolo_resize(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
    126. return letterbox(img, new_shape=new_shape, color=color, auto=auto, scaleFill=scaleFill, scaleup=scaleup)
    127. def forward(self, img):
    128. prediction, logits, _ = self.model(img, augment=False)
    129. prediction, logits = self.non_max_suppression(prediction, logits, self.confidence, self.iou_thresh,
    130. classes=None,
    131. agnostic=self.agnostic)
    132. self.boxes, self.class_names, self.classes, self.confidences = [[[] for _ in range(img.shape[0])] for _ in
    133. range(4)]
    134. for i, det in enumerate(prediction): # detections per image
    135. if len(det):
    136. for *xyxy, conf, cls in det:
    137. # 返回整数
    138. bbox = [int(b) for b in xyxy]
    139. self.boxes[i].append(bbox)
    140. self.confidences[i].append(round(conf.item(), 2))
    141. cls = int(cls.item())
    142. self.classes[i].append(cls)
    143. if self.names is not None:
    144. self.class_names[i].append(self.names[cls])
    145. else:
    146. self.class_names[i].append(cls)
    147. return [self.boxes, self.classes, self.class_names, self.confidences], logits
    148. def preprocessing(self, img):
    149. if len(img.shape) != 4:
    150. img = np.expand_dims(img, axis=0)
    151. im0 = img.astype(np.uint8)
    152. img = np.array([self.yolo_resize(im, new_shape=self.img_size)[0] for im in im0])
    153. img = img.transpose((0, 3, 1, 2))
    154. img = np.ascontiguousarray(img)
    155. img = torch.from_numpy(img).to(self.device)
    156. img = img / 255.0
    157. return img

    三、在model文件夹中,添加gradcam.py文件:

    1. import time
    2. import torch
    3. import torch.nn.functional as F
    4. def find_yolo_layer(model, layer_name):
    5. """Find yolov5 layer to calculate GradCAM and GradCAM++
    6. Args:
    7. model: yolov5 model.
    8. layer_name (str): the name of layer with its hierarchical information.
    9. Return:
    10. target_layer: found layer
    11. """
    12. hierarchy = layer_name.split('_')
    13. target_layer = model.model._modules[hierarchy[0]]
    14. for h in hierarchy[1:]:
    15. target_layer = target_layer._modules[h]
    16. return target_layer
    17. class YOLOV5GradCAM:
    18. # 初始化,得到target_layer层
    19. def __init__(self, model, layer_name, img_size=(640, 640)):
    20. self.model = model
    21. self.gradients = dict()
    22. self.activations = dict()
    23. def backward_hook(module, grad_input, grad_output):
    24. self.gradients['value'] = grad_output[0]
    25. return None
    26. def forward_hook(module, input, output):
    27. self.activations['value'] = output
    28. return None
    29. target_layer = find_yolo_layer(self.model, layer_name)
    30. # 获取forward过程中每层的输入和输出,用于对比hook是不是正确记录
    31. target_layer.register_forward_hook(forward_hook)
    32. target_layer.register_full_backward_hook(backward_hook)
    33. device = 'cuda' if next(self.model.model.parameters()).is_cuda else 'cpu'
    34. self.model(torch.zeros(1, 3, *img_size, device=device))
    35. def forward(self, input_img, class_idx=True):
    36. """
    37. Args:
    38. input_img: input image with shape of (1, 3, H, W)
    39. Return:
    40. mask: saliency map of the same spatial dimension with input
    41. logit: model output
    42. preds: The object predictions
    43. """
    44. saliency_maps = []
    45. b, c, h, w = input_img.size()
    46. preds, logits = self.model(input_img)
    47. for logit, cls, cls_name in zip(logits[0], preds[1][0], preds[2][0]):
    48. if class_idx:
    49. score = logit[cls]
    50. else:
    51. score = logit.max()
    52. self.model.zero_grad()
    53. tic = time.time()
    54. # 获取梯度
    55. score.backward(retain_graph=True)
    56. print(f"[INFO] {cls_name}, model-backward took: ", round(time.time() - tic, 4), 'seconds')
    57. gradients = self.gradients['value']
    58. activations = self.activations['value']
    59. b, k, u, v = gradients.size()
    60. alpha = gradients.view(b, k, -1).mean(2)
    61. weights = alpha.view(b, k, 1, 1)
    62. saliency_map = (weights * activations).sum(1, keepdim=True)
    63. saliency_map = F.relu(saliency_map)
    64. saliency_map = F.interpolate(saliency_map, size=(h, w), mode='bilinear', align_corners=False)
    65. saliency_map_min, saliency_map_max = saliency_map.min(), saliency_map.max()
    66. saliency_map = (saliency_map - saliency_map_min).div(saliency_map_max - saliency_map_min).data
    67. saliency_maps.append(saliency_map)
    68. return saliency_maps, logits, preds
    69. def __call__(self, input_img):
    70. return self.forward(input_img)
    71. class YOLOV5GradCAMPP(YOLOV5GradCAM):
    72. def __init__(self, model, layer_name, img_size=(640, 640)):
    73. super(YOLOV5GradCAMPP, self).__init__(model, layer_name, img_size)
    74. def forward(self, input_img, class_idx=True):
    75. saliency_maps = []
    76. b, c, h, w = input_img.size()
    77. tic = time.time()
    78. preds, logits = self.model(input_img)
    79. print("[INFO] model-forward took: ", round(time.time() - tic, 4), 'seconds')
    80. for logit, cls, cls_name in zip(logits[0], preds[1][0], preds[2][0]):
    81. if class_idx:
    82. score = logit[cls]
    83. else:
    84. score = logit.max()
    85. self.model.zero_grad()
    86. tic = time.time()
    87. # 获取梯度
    88. score.backward(retain_graph=True)
    89. print(f"[INFO] {cls_name}, model-backward took: ", round(time.time() - tic, 4), 'seconds')
    90. gradients = self.gradients['value'] # dS/dA
    91. activations = self.activations['value'] # A
    92. b, k, u, v = gradients.size()
    93. alpha_num = gradients.pow(2)
    94. alpha_denom = gradients.pow(2).mul(2) + \
    95. activations.mul(gradients.pow(3)).view(b, k, u * v).sum(-1, keepdim=True).view(b, k, 1, 1)
    96. # torch.where(condition, x, y) condition是条件,满足条件就返回x,不满足就返回y
    97. alpha_denom = torch.where(alpha_denom != 0.0, alpha_denom, torch.ones_like(alpha_denom))
    98. alpha = alpha_num.div(alpha_denom + 1e-7)
    99. positive_gradients = F.relu(score.exp() * gradients) # ReLU(dY/dA) == ReLU(exp(S)*dS/dA))
    100. weights = (alpha * positive_gradients).view(b, k, u * v).sum(-1).view(b, k, 1, 1)
    101. saliency_map = (weights * activations).sum(1, keepdim=True)
    102. saliency_map = F.relu(saliency_map)
    103. saliency_map = F.interpolate(saliency_map, size=(h, w), mode='bilinear', align_corners=False)
    104. saliency_map_min, saliency_map_max = saliency_map.min(), saliency_map.max()
    105. saliency_map = (saliency_map - saliency_map_min).div(saliency_map_max - saliency_map_min).data
    106. saliency_maps.append(saliency_map)
    107. return saliency_maps, logits, preds

    四、在根目录下新建main_gradcam.py文件 :

    1. import os
    2. import random
    3. import time
    4. import argparse
    5. import numpy as np
    6. from models.gradcam import YOLOV5GradCAM, YOLOV5GradCAMPP
    7. from models.yolov5_object_detector import YOLOV5TorchObjectDetector
    8. import cv2
    9. # 数据集类别名
    10. names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
    11. 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    12. 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
    13. 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
    14. 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
    15. 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
    16. 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    17. 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
    18. 'hair drier', 'toothbrush'] # class names
    19. # yolov5s网络中的三个detect层
    20. target_layers = ['model_17_cv3_act', 'model_20_cv3_act', 'model_23_cv3_act']
    21. # Arguments
    22. parser = argparse.ArgumentParser()
    23. parser.add_argument('--model-path', type=str, default="weights/yolov5s.pt", help='Path to the model')
    24. parser.add_argument('--img-path', type=str, default='data/images', help='input image path')
    25. parser.add_argument('--output-dir', type=str, default='outputs/', help='output dir')
    26. parser.add_argument('--img-size', type=int, default=640, help="input image size")
    27. parser.add_argument('--target-layer', type=str, default='model_17_cv3_act',
    28. help='The layer hierarchical address to which gradcam will applied,'
    29. ' the names should be separated by underline')
    30. parser.add_argument('--method', type=str, default='gradcam', help='gradcam method')
    31. parser.add_argument('--device', type=str, default='cpu', help='cuda or cpu')
    32. parser.add_argument('--no_text_box', action='store_true',
    33. help='do not show label and box on the heatmap')
    34. args = parser.parse_args()
    35. def get_res_img(bbox, mask, res_img):
    36. mask = mask.squeeze(0).mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).detach().cpu().numpy().astype(
    37. np.uint8)
    38. heatmap = cv2.applyColorMap(mask, cv2.COLORMAP_JET)
    39. # n_heatmat = (Box.fill_outer_box(heatmap, bbox) / 255).astype(np.float32)
    40. n_heatmat = (heatmap / 255).astype(np.float32)
    41. res_img = res_img / 255
    42. res_img = cv2.add(res_img, n_heatmat)
    43. res_img = (res_img / res_img.max())
    44. return res_img, n_heatmat
    45. def plot_one_box(x, img, color=None, label=None, line_thickness=3):
    46. # this is a bug in cv2. It does not put box on a converted image from torch unless it's buffered and read again!
    47. cv2.imwrite('temp.jpg', (img * 255).astype(np.uint8))
    48. img = cv2.imread('temp.jpg')
    49. # Plots one bounding box on image img
    50. tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness
    51. color = color or [random.randint(0, 255) for _ in range(3)]
    52. c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    53. cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    54. if label:
    55. tf = max(tl - 1, 1) # font thickness
    56. t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
    57. outside = c1[1] - t_size[1] - 3 >= 0 # label fits outside box up
    58. c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 if outside else c1[1] + t_size[1] + 3
    59. outsize_right = c2[0] - img.shape[:2][1] > 0 # label fits outside box right
    60. c1 = c1[0] - (c2[0] - img.shape[:2][1]) if outsize_right else c1[0], c1[1]
    61. c2 = c2[0] - (c2[0] - img.shape[:2][1]) if outsize_right else c2[0], c2[1]
    62. cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled
    63. cv2.putText(img, label, (c1[0], c1[1] - 2 if outside else c2[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf,
    64. lineType=cv2.LINE_AA)
    65. return img
    66. # 检测单个图片
    67. def main(img_path):
    68. colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
    69. device = args.device
    70. input_size = (args.img_size, args.img_size)
    71. # 读入图片
    72. img = cv2.imread(img_path) # 读取图像格式:BGR
    73. print('[INFO] Loading the model')
    74. # 实例化YOLOv5模型,得到检测结果
    75. model = YOLOV5TorchObjectDetector(args.model_path, device, img_size=input_size, names=names)
    76. # img[..., ::-1]: BGR --> RGB
    77. # (480, 640, 3) --> (1, 3, 480, 640)
    78. torch_img = model.preprocessing(img[..., ::-1])
    79. tic = time.time()
    80. # 遍历三层检测层
    81. for target_layer in target_layers:
    82. # 获取grad-cam方法
    83. if args.method == 'gradcam':
    84. saliency_method = YOLOV5GradCAM(model=model, layer_name=target_layer, img_size=input_size)
    85. elif args.method == 'gradcampp':
    86. saliency_method = YOLOV5GradCAMPP(model=model, layer_name=target_layer, img_size=input_size)
    87. masks, logits, [boxes, _, class_names, conf] = saliency_method(torch_img) # 得到预测结果
    88. result = torch_img.squeeze(0).mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).detach().cpu().numpy()
    89. result = result[..., ::-1] # convert to bgr
    90. # 保存设置
    91. imgae_name = os.path.basename(img_path) # 获取图片名
    92. save_path = f'{args.output_dir}{imgae_name[:-4]}/{args.method}'
    93. if not os.path.exists(save_path):
    94. os.makedirs(save_path)
    95. print(f'[INFO] Saving the final image at {save_path}')
    96. # 遍历每张图片中的每个目标
    97. for i, mask in enumerate(masks):
    98. # 遍历图片中的每个目标
    99. res_img = result.copy()
    100. # 获取目标的位置和类别信息
    101. bbox, cls_name = boxes[0][i], class_names[0][i]
    102. label = f'{cls_name} {conf[0][i]}' # 类别+置信分数
    103. # 获取目标的热力图
    104. res_img, heat_map = get_res_img(bbox, mask, res_img)
    105. res_img = plot_one_box(bbox, res_img, label=label, color=colors[int(names.index(cls_name))],
    106. line_thickness=3)
    107. # 缩放到原图片大小
    108. res_img = cv2.resize(res_img, dsize=(img.shape[:-1][::-1]))
    109. output_path = f'{save_path}/{target_layer[6:8]}_{i}.jpg'
    110. cv2.imwrite(output_path, res_img)
    111. print(f'{target_layer[6:8]}_{i}.jpg done!!')
    112. print(f'Total time : {round(time.time() - tic, 4)} s')
    113. if __name__ == '__main__':
    114. # 图片路径为文件夹
    115. if os.path.isdir(args.img_path):
    116. img_list = os.listdir(args.img_path)
    117. print(img_list)
    118. for item in img_list:
    119. # 依次获取文件夹中的图片名,组合成图片的路径
    120. main(os.path.join(args.img_path, item))
    121. # 单个图片
    122. else:
    123. main(args.img_path)

    五、使用介绍

    1、更改main_gradcam.py文件中的类别

    2、更改model-path和img-path路径

    3、运行main_gradcam.py文件,结果如下

     

    4、如果不想要显示坐标框,可以将main_gradcam.py文件中的下面这段代码注释掉

     

    Appendix

    如果以上报错,即为torch版本不对,升级为1.8.0以上就可

    也可以将gradcam.py文件中的register_full_backward_hook换成register_backward_hook

    reference

    【YOLOv5】结合GradCAM热力图可视化_嗜睡的篠龙的博客-CSDN博客_yolov5 热力图icon-default.png?t=M7J4https://blog.csdn.net/weixin_43799388/article/details/126207632?spm=1001.2014.3001.5502

     

  • 相关阅读:
    基于R语言地理加权回归、主成份分析、判别分析等空间异质性数据分析
    Springboot 拦截器,拦截所有请求,判断是否登录,验证权限
    SpringCloud - Spring Cloud Alibaba 之 SkyWalking 分布式链路跟踪;SkyWalking集群(十七)
    惊讶,CRUD搬砖两三年了,不会阅读Spring源码?
    2023秋招面经记录
    机器学习之客户违约预测模型搭建之案例实战
    prometheus初窥
    04 jenkins中使用各种变量(Powershell、cmd)
    如何处理 Angular 单页面应用里的 a 标签,避免点击后重新加载整个应用
    高性能零售IT系统的建设06-当应对大量HTTP请求时兼顾性能、处理速度的架构设计
  • 原文地址:https://blog.csdn.net/m0_56247038/article/details/126677066