• yolov7使用onnx推理


    官方代码:

    GitHub - WongKinYiu/yolov7: Implementation of paper - YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors

            上个月,官方放了个使用onnx推理的ipynb文件,过了几天上去看,官方又给删了,不知道是不是要更新波大的,还好手快保存了一份,这个可以作为备忘,懒得再重新写(不过这得是多懒,都在这码字了不去写代码。。。)

    不带NMS 

            先贴代码吧:

    1. import cv2
    2. import time
    3. import requests
    4. import random
    5. import numpy as np
    6. import onnxruntime as ort
    7. from PIL import Image
    8. from pathlib import Path
    9. from collections import OrderedDict,namedtuple
    10. cuda = False
    11. w = "yolov7.onnx"
    12. providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
    13. session = ort.InferenceSession(w, providers=providers)
    14. def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
    15. # Resize and pad image while meeting stride-multiple constraints
    16. shape = im.shape[:2] # current shape [height, width]
    17. if isinstance(new_shape, int):
    18. new_shape = (new_shape, new_shape)
    19. # Scale ratio (new / old)
    20. r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    21. if not scaleup: # only scale down, do not scale up (for better val mAP)
    22. r = min(r, 1.0)
    23. # Compute padding
    24. new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    25. dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
    26. if auto: # minimum rectangle
    27. dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
    28. dw /= 2 # divide padding into 2 sides
    29. dh /= 2
    30. if shape[::-1] != new_unpad: # resize
    31. im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
    32. top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    33. left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    34. im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
    35. return im, r, (dw, dh)
    36. names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
    37. 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    38. 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
    39. 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
    40. 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
    41. 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
    42. 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    43. 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
    44. 'hair drier', 'toothbrush']
    45. colors = {name:[random.randint(0, 255) for _ in range(3)] for i,name in enumerate(names)}
    46. # url = 'https://oneflow-static.oss-cn-beijing.aliyuncs.com/tripleMu/image1.jpg'
    47. # file = requests.get(url)
    48. img_path = r'\inference\images\image3.jpg'
    49. # img = cv2.imdecode(np.frombuffer(file.content, np.uint8), 1)
    50. img = cv2.imread(img_path)
    51. # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    52. image = img.copy()
    53. image, ratio, dwdh = letterbox(image, auto=False)
    54. image = image.transpose((2, 0, 1))
    55. image = np.expand_dims(image, 0)
    56. image = np.ascontiguousarray(image)
    57. im = image.astype(np.float32)
    58. im /= 255
    59. outname = [i.name for i in session.get_outputs()]
    60. inname = [i.name for i in session.get_inputs()]
    61. inp = {inname[0]:im}
    62. t1 = time.time()
    63. outputs = session.run(outname, inp)[0]
    64. print('inference time :%.4f'%(time.time()-t1))
    65. # print(outputs)
    66. ori_images = [img.copy()]
    67. for i,(batch_id,x0,y0,x1,y1,cls_id,score) in enumerate(outputs):
    68. image = ori_images[int(batch_id)]
    69. box = np.array([x0,y0,x1,y1])
    70. box -= np.array(dwdh*2)
    71. box /= ratio
    72. box = box.round().astype(np.int32).tolist()
    73. cls_id = int(cls_id)
    74. score = round(float(score),3)
    75. name = names[cls_id]
    76. color = colors[name]
    77. name += ' '+str(score)
    78. cv2.rectangle(image,box[:2],box[2:],color,2)
    79. cv2.putText(image,name,(box[0], box[1] - 2),cv2.FONT_HERSHEY_SIMPLEX,0.75,[225, 255, 255],thickness=2)
    80. cv2.imshow('dddd',ori_images[0])
    81. cv2.waitKey(0)
    82. cv2.destroyAllWindows()
    83. # Image.fromarray(ori_images[0])

            熟悉yolo系列的朋友应该看出上面的问题了,没有NMS,这是因为官方代码在导出onnx的时候做了简化和端到端的处理。

            导出指令:

    python export.py --weights yolov7.pt --grid --end2end --simplify \
            --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640 --max-wh 640

            如果单纯运行export.py导出的onnx是运行不了上面的代码的,在for循环的时候会报错。

    在控制台执行以上指令:

            可以看到模型最后是导出成功的,过程会有些警告,忽视即可,不过简化失败,这个主要是应为onnx的版本问题,我的是1.9,降级到1.8.1之后就可以了。

            直接运行最初的onnx推理代码,不过需要注意的是图片的路径可能需要修改一下

    img_path = r'改成本地图片路径'

            推理结果:

     

    带NMS

            这里是带有nms的版本

            在yolov7的根目录下创建detect_onnx.py,并复制如下代码:

    1. import argparse
    2. import time
    3. from pathlib import Path
    4. import cv2
    5. import torch
    6. import torch.backends.cudnn as cudnn
    7. from numpy import random
    8. from utils.datasets import LoadStreams, LoadImages
    9. from utils.general import check_img_size, check_requirements, check_imshow, non_max_suppression, apply_classifier, \
    10. scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path
    11. from utils.plots import plot_one_box
    12. from utils.torch_utils import select_device, load_classifier, time_synchronized
    13. import onnxruntime
    14. def detect(save_img=False):
    15. source, weights, view_img, save_txt, imgsz, trace = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, not opt.no_trace
    16. save_img = not opt.nosave and not source.endswith('.txt') # save inference images
    17. webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
    18. ('rtsp://', 'rtmp://', 'http://', 'https://'))
    19. # Directories
    20. save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
    21. (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
    22. # Initialize
    23. set_logging()
    24. device = select_device(opt.device)
    25. half = device.type != 'cpu' # half precision only supported on CUDA
    26. cuda = torch.cuda.is_available()
    27. check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
    28. providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
    29. session = onnxruntime.InferenceSession(weights, providers=providers)
    30. # Load model
    31. model = session
    32. # stride = int(model.stride.max()) # model stride
    33. stride = 32
    34. imgsz = check_img_size(imgsz, s=stride) # check img_size
    35. # Second-stage classifier
    36. classify = False
    37. if classify:
    38. modelc = load_classifier(name='resnet101', n=2) # initialize
    39. modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()
    40. # Set Dataloader
    41. vid_path, vid_writer = None, None
    42. if webcam:
    43. view_img = check_imshow()
    44. cudnn.benchmark = True # set True to speed up constant image size inference
    45. dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    46. else:
    47. dataset = LoadImages(source, img_size=imgsz, stride=stride,auto=False)
    48. # Get names and colors
    49. names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
    50. 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    51. 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
    52. 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
    53. 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
    54. 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
    55. 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    56. 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
    57. 'hair drier', 'toothbrush']
    58. colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
    59. # Run inference
    60. if device.type != 'cpu':
    61. model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
    62. t0 = time.time()
    63. for path, img, im0s, vid_cap in dataset:
    64. img = torch.from_numpy(img).to(device)
    65. img = img.half() if half else img.float() # uint8 to fp16/32
    66. img /= 255.0 # 0 - 255 to 0.0 - 1.0
    67. if img.ndimension() == 3:
    68. img = img.unsqueeze(0)
    69. # Inference
    70. t1 = time_synchronized()
    71. ts = time.time()
    72. im = img.cpu().numpy() # torch to numpy
    73. pred = model.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: im})[0]
    74. # pred = model.run([i.name for i in session.get_outputs()], {session.get_inputs()[0].name: im})[0]
    75. te = time.time()
    76. print('inference time : %.4f s'%(te-ts))
    77. # Apply NMS
    78. pred = torch.from_numpy(pred).reshape(1,-1,85)
    79. pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
    80. t2 = time_synchronized()
    81. # Apply Classifier
    82. if classify:
    83. pred = apply_classifier(pred, modelc, img, im0s)
    84. # Process detections
    85. for i, det in enumerate(pred): # detections per image
    86. if webcam: # batch_size >= 1
    87. p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
    88. else:
    89. p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)
    90. p = Path(p) # to Path
    91. save_path = str(save_dir / p.name) # img.jpg
    92. txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt
    93. s += '%gx%g ' % img.shape[2:] # print string
    94. gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
    95. if len(det):
    96. # Rescale boxes from img_size to im0 size
    97. det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
    98. # Print results
    99. for c in det[:, -1].unique():
    100. n = (det[:, -1] == c).sum() # detections per class
    101. s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
    102. # Write results
    103. for *xyxy, conf, cls in reversed(det):
    104. if save_txt: # Write to file
    105. xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
    106. line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format
    107. with open(txt_path + '.txt', 'a') as f:
    108. f.write(('%g ' * len(line)).rstrip() % line + '\n')
    109. if save_img or view_img: # Add bbox to image
    110. label = f'{names[int(cls)]} {conf:.2f}'
    111. plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)
    112. # Print time (inference + NMS)
    113. print(f'{s}Done. ({t2 - t1:.3f}s)')
    114. # Stream results
    115. if view_img:
    116. cv2.imshow(str(p), im0)
    117. cv2.waitKey(0) # 1 millisecond
    118. # Save results (image with detections)
    119. if save_img:
    120. if dataset.mode == 'image':
    121. cv2.imwrite(save_path, im0)
    122. print(f" The image with the result is saved in: {save_path}")
    123. else: # 'video' or 'stream'
    124. if vid_path != save_path: # new video
    125. vid_path = save_path
    126. if isinstance(vid_writer, cv2.VideoWriter):
    127. vid_writer.release() # release previous video writer
    128. if vid_cap: # video
    129. fps = vid_cap.get(cv2.CAP_PROP_FPS)
    130. w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    131. h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    132. else: # stream
    133. fps, w, h = 30, im0.shape[1], im0.shape[0]
    134. save_path += '.mp4'
    135. vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
    136. vid_writer.write(im0)
    137. if save_txt or save_img:
    138. s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
    139. #print(f"Results saved to {save_dir}{s}")
    140. print(f'Done. ({time.time() - t0:.3f}s)')
    141. if __name__ == '__main__':
    142. parser = argparse.ArgumentParser()
    143. parser.add_argument('--weights', nargs='+', type=str, default='yolov7.onnx', help='model.onnx path(s)')
    144. parser.add_argument('--source', type=str, default='inference/images', help='source') # file/folder, 0 for webcam
    145. parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
    146. parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
    147. parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
    148. parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    149. parser.add_argument('--view-img', action='store_true', help='display results')
    150. parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
    151. parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
    152. parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
    153. parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
    154. parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
    155. parser.add_argument('--augment', action='store_true', help='augmented inference')
    156. parser.add_argument('--update', action='store_true', help='update all models')
    157. parser.add_argument('--project', default='runs/detect', help='save results to project/name')
    158. parser.add_argument('--name', default='exp', help='save results to project/name')
    159. parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
    160. parser.add_argument('--no-trace', action='store_true', help='don`t trace model')
    161. opt = parser.parse_args()
    162. print(opt)
    163. #check_requirements(exclude=('pycocotools', 'thop'))
    164. with torch.no_grad():
    165. if opt.update: # update all models (to fix SourceChangeWarning)
    166. for opt.weights in ['yolov7.pt']:
    167. detect()
    168. strip_optimizer(opt.weights)
    169. else:
    170. detect()

    这里有两点要注意:

    1、dataset = LoadImages(source, img_size=imgsz, stride=stride,auto=False)
    需要加auto参数,并设置为False,主要是为了把参数传到以下函数里,不然自动padding的时候图像大小可能会变化,导致报错
    img = letterbox(img0, self.img_size, stride=self.stride,auto=self.auto)[0]

    2、导出onnx时的语句:python export.py --weights yolov7.pt --grid --img-size 640 640

    推理结果:

    ​​​​​​​ 

     

     

  • 相关阅读:
    Java基础进阶-序列化
    优思学院|看板方式与传统生产方式的对比
    N个元素进栈 出栈情况种数
    [c++刷题]贪心算法.N01
    JS面向对象---原型链继承
    想要拿到手软的大厂offer必须要刷5遍这份5000页Java 最全技术栈手册
    【校招VIP】产品深度理解之热点事件分析
    【沐风老师】3DMAX一键生成圣诞树建模插件使用教程
    【Linux】操作系统安装详解
    (尊享版)22年国内最牛的Java面试八股文合集,不接受反驳
  • 原文地址:https://blog.csdn.net/athrunsunny/article/details/126306363