Rk1126 实现 yolov5 6.2 推理

在这里插入图片描述

基于 RK1126 实现 yolov5 6.2 推理.

转换 ONNX

python export.py --weights ./weights/yolov5s.pt --img 640 --batch 1 --include onnx --simplify

安装 rk 环境

安装部分参考网上, 有很多. 参考: https://github.com/rockchip-linux/rknpu

转换 RK模型并验证

yolov562_to_rknn_3_4.py ( s/m/l/x ..，输出节点不同，使用 netror 查看 )

# -*- coding: utf-8 -*-
"""
Created on Wed Oct 12 18:24:38 2022

@author: bobod
"""


import os
import numpy as np
import cv2
from rknn.api import RKNN


ONNX_MODEL = './weights/yolov5s_v6.2.onnx'
RKNN_MODEL = './weights/yolov5s_v6.2.rknn'
IMG_PATH = './000000102411.jpg'
DATASET = './dataset.txt'

QUANTIZE_ON = True

BOX_THRESH = 0.5
NMS_THRESH = 0.6
IMG_SIZE = (640, 640) # (width, height), such as (1280, 736)

SHAPES =((0.0, 0.0), (0.0, 0.0)) #1 scale_coords
SHAPE =(0,0)

CLASSES = ("person", "bicycle", "car","motorbike ","aeroplane ","bus ","train","truck ","boat","traffic light",
           "fire hydrant","stop sign ","parking meter","bench","bird","cat","dog ","horse ","sheep","cow","elephant",
           "bear","zebra ","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite",
           "baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife ",
           "spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza ","donut","cake","chair","sofa",
           "pottedplant","bed","diningtable","toilet ","tvmonitor","laptop	","mouse	","remote ","keyboard ","cell phone","microwave ",
           "oven ","toaster","sink","refrigerator ","book","clock","vase","scissors ","teddy bear ","hair drier", "toothbrush ")

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def xywh2xyxy(x):
    # Convert [x, y, w, h] to [x1, y1, x2, y2]
    y = np.copy(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
    return y

def process(input, mask, anchors):

    anchors = [anchors[i] for i in mask]
    grid_h, grid_w = map(int, input.shape[0:2])

    box_confidence = sigmoid(input[..., 4])
    box_confidence = np.expand_dims(box_confidence, axis=-1)

    box_class_probs = sigmoid(input[..., 5:])

    box_xy = sigmoid(input[..., :2])*2 - 0.5

    col = np.tile(np.arange(0, grid_w), grid_h).reshape(-1, grid_w)
    row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_w)
    col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
    row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
    grid = np.concatenate((col, row), axis=-1)
    box_xy += grid
    box_xy *= (int(IMG_SIZE[1]/grid_h), int(IMG_SIZE[0]/grid_w))

    box_wh = pow(sigmoid(input[..., 2:4])*2, 2)
    box_wh = box_wh * anchors

    box = np.concatenate((box_xy, box_wh), axis=-1)

    return box, box_confidence, box_class_probs

def filter_boxes(boxes, box_confidences, box_class_probs):
    """Filter boxes with box threshold. It's a bit different with origin yolov5 post process!
    # Arguments
        boxes: ndarray, boxes of objects.
        box_confidences: ndarray, confidences of objects.
        box_class_probs: ndarray, class_probs of objects.
    # Returns
        boxes: ndarray, filtered boxes.
        classes: ndarray, classes for boxes.
        scores: ndarray, scores for boxes.
    """
    boxes = boxes.reshape(-1, 4)
    box_confidences = box_confidences.reshape(-1)
    box_class_probs = box_class_probs.reshape(-1, box_class_probs.shape[-1])

    _box_pos = np.where(box_confidences >= BOX_THRESH)
    boxes = boxes[_box_pos]
    box_confidences = box_confidences[_box_pos]
    box_class_probs = box_class_probs[_box_pos]

    class_max_score = np.max(box_class_probs, axis=-1)
    classes = np.argmax(box_class_probs, axis=-1)
    _class_pos = np.where(class_max_score* box_confidences >= BOX_THRESH)

    boxes = boxes[_class_pos]
    classes = classes[_class_pos]
    scores = (class_max_score* box_confidences)[_class_pos]

    return boxes, classes, scores

def nms_boxes(boxes, scores):
    """Suppress non-maximal boxes.
    # Arguments
        boxes: ndarray, boxes of objects.
        scores: ndarray, scores of objects.
    # Returns
        keep: ndarray, index of effective boxes.
    """
    x = boxes[:, 0]
    y = boxes[:, 1]
    w = boxes[:, 2] - boxes[:, 0]
    h = boxes[:, 3] - boxes[:, 1]

    areas = w * h
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)

        xx1 = np.maximum(x[i], x[order[1:]])
        yy1 = np.maximum(y[i], y[order[1:]])
        xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
        yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])

        w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)
        h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)
        inter = w1 * h1

        ovr = inter / (areas[i] + areas[order[1:]] - inter)
        inds = np.where(ovr <= NMS_THRESH)[0]
        order = order[inds + 1]
    keep = np.array(keep)
    return keep


def yolov5_post_process(input_data):
    masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
    anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
              [59, 119], [116, 90], [156, 198], [373, 326]]

    boxes, classes, scores = [], [], []
    for input,mask in zip(input_data, masks):
        b, c, s = process(input, mask, anchors)
        b, c, s = filter_boxes(b, c, s)
        boxes.append(b)
        classes.append(c)
        scores.append(s)

    boxes = np.concatenate(boxes)
    boxes = xywh2xyxy(boxes)
    classes = np.concatenate(classes)
    scores = np.concatenate(scores)

    nboxes, nclasses, nscores = [], [], []
    for c in set(classes):
        inds = np.where(classes == c)
        b = boxes[inds]
        c = classes[inds]
        s = scores[inds]

        keep = nms_boxes(b, s)

        nboxes.append(b[keep])
        nclasses.append(c[keep])
        nscores.append(s[keep])

    if not nclasses and not nscores:
        return None, None, None

    boxes = np.concatenate(nboxes)
    scale_coords(IMG_SIZE, boxes, SHAPE, SHAPES) #2
    classes = np.concatenate(nclasses)
    scores = np.concatenate(nscores)

    return boxes, classes, scores

def draw(image, boxes, scores, classes):
    """Draw the boxes on the image.
    # Argument:
        image: original image.
        boxes: ndarray, boxes of objects.
        classes: ndarray, classes of objects.
        scores: ndarray, scores of objects.
        all_classes: all classes name.
    """
    for box, score, cl in zip(boxes, scores, classes):
        left, top, right, bottom = box
        print('class: {}, score: {}'.format(CLASSES[cl], score))
        print('box coordinate left,top,right,bottom: [{}, {}, {}, {}]'.format(left, top, right, bottom))
        left = int(left)
        top = int(top)
        right = int(right)
        bottom = int(bottom)

        cv2.rectangle(image, (left, top), (right, bottom), (255, 0, 0), 2)
        cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),
                    (left, top - 6),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.6, (0, 0, 255), 2)


def letterbox(im, new_shape=(640, 640), color=(0, 0, 0)):
    # Resize and pad image while meeting stride-multiple constraints
    shape = im.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # Scale ratio (new / old)
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])

    # Compute padding
    ratio = r, r  # width, height ratios
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding

    dw /= 2  # divide padding into 2 sides
    dh /= 2

    if shape[::-1] != new_unpad:  # resize
        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
    return im, ratio, (dw, dh)

#3
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
    # Rescale coords (xyxy) from img1_shape to img0_shape
    if ratio_pad is None:  # calculate from img0_shape
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
    else:
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]

    coords[:, [0, 2]] -= pad[0]  # x padding
    coords[:, [1, 3]] -= pad[1]  # y padding
    coords[:, :4] /= gain
    clip_coords(coords, img0_shape)
    return coords


def clip_coords(boxes, shape):
    # Clip bounding xyxy bounding boxes to image shape (height, width)
    boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1])  # x1, x2
    boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0])  # y1, y2
    
if __name__ == '__main__':

    # Create RKNN object
    rknn = RKNN(verbose=False)

    if not os.path.exists(ONNX_MODEL):
        print('model not exist')
        exit(-1)

    _force_builtin_perm = False
    # pre-process config
    print('--> Config model')
    rknn.config(
                reorder_channel='0 1 2',
                mean_values=[[0, 0, 0]],
                std_values=[[255, 255, 255]],
                optimization_level=3,
                #target_platform = 'rk1808',
                # target_platform='rv1109',
                target_platform = 'rv1126',
                quantize_input_node= QUANTIZE_ON,
                output_optimize=1,
                force_builtin_perm=_force_builtin_perm)
    print('done')

    # Load ONNX model
    print('--> Loading model')
    #ret = rknn.load_pytorch(model=PT_MODEL, input_size_list=[[3,IMG_SIZE[1], IMG_SIZE[0]]])
    ret = rknn.load_onnx(model=ONNX_MODEL, outputs=['output', '391', '402'])
    if ret != 0:
        print('Load yolov5 failed!')
        exit(ret)
    print('done')

    # Build model
    print('--> Building model')
    ret = rknn.build(do_quantization=QUANTIZE_ON, dataset=DATASET, pre_compile=False)
    if ret != 0:
        print('Build yolov5 failed!')
        exit(ret)
    print('done')

    # Export RKNN model
    print('--> Export RKNN model')
    ret = rknn.export_rknn(RKNN_MODEL)
    if ret != 0:
        print('Export yolov5rknn failed!')
        exit(ret)
    print('done')

    # init runtime environment
    print('--> Init runtime environment')
    ret = rknn.init_runtime() 
    #ret = rknn.init_runtime('rv1126', device_id='bab4d7a824f04867')
    # ret = rknn.init_runtime('rv1109', device_id='1109')
    # ret = rknn.init_runtime('rk1808', device_id='1808')
    if ret != 0:
        print('Init runtime environment failed')
        exit(ret)
    print('done')

    # Set inputs
    original_img = cv2.imread(IMG_PATH)#4
    img, ratio, pad = letterbox(original_img, new_shape=(IMG_SIZE[1], IMG_SIZE[0]))
    SHAPES=(ratio,pad)
    SHAPE=(original_img.shape[0],original_img.shape[1])
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Inference
    print('--> Running model')
    outputs = rknn.inference(inputs=[img], inputs_pass_through=[0 if not _force_builtin_perm else 1])

    # post process
    input0_data = outputs[0]
    input1_data = outputs[1]
    input2_data = outputs[2]

    input0_data = input0_data.reshape([3,-1]+list(input0_data.shape[-2:]))
    input1_data = input1_data.reshape([3,-1]+list(input1_data.shape[-2:]))
    input2_data = input2_data.reshape([3,-1]+list(input2_data.shape[-2:]))

    input_data = list()
    input_data.append(np.transpose(input0_data, (2, 3, 0, 1)))
    input_data.append(np.transpose(input1_data, (2, 3, 0, 1)))
    input_data.append(np.transpose(input2_data, (2, 3, 0, 1)))


    boxes, classes, scores = yolov5_post_process(input_data)


    if boxes is not None:
        draw(original_img, boxes, scores, classes)
    cv2.imwrite("result.jpg", original_img)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348

在这里插入图片描述

混合量化

首先做精度分析, 量化 build 之后，调用精度分析函数。后来尝试在 hybrid_quantization_step2 后调用，结果总是不成功 , 原因未知, 有知道的大佬望告知一下.
精度分析: entire_qnt ( 完全量化结果 ) ， fp32 ( fp32结果 ) ，individual_qnt ( 逐层量化结果,即输入为float, 排除累计误差 ), entire_qnt_error_analysis.txt individual_qnt_error_analysis.txt ( 完全量化和逐层量化分析结果 (欧式距离和余弦距离) )

这里需要注意, DATASET 只能有一行数据.

...
    # Build model
    print('--> Building model')
    ret = rknn.build(do_quantization=QUANTIZE_ON, dataset=DATASET, pre_compile=False)
    if ret != 0:
        print('Build yolov5 failed!')
        exit(ret)
    print('done')

    print('--> Accuracy analysis')
    ret = rknn.accuracy_analysis(inputs=DATASET1,output_dir="./output_dir")
    if ret != 0:
        print('accuracy_analysis failed!')
        exit(ret)
    print('done')
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15

生成混合量化配置文件, 调用 rknn.hybrid_quantization_step1 会得到 torchjitexport.data ， torchjitexport.json， torchjitexport.quantization.cfg 3个文件.

根据精度分析结果，torchjitexport.quantization.cfg 将不想量化的层添加到自定义层中.

# 1.冒号后面加个空格  2.一个模型最多同时只能存在两种量化方式
# add layer name and corresponding quantized_dtype to customized_quantize_layers, e.g conv2_3: float32
customized_quantize_layers: {
    "Conv_Conv_0_187": float32,
    "Sigmoid_Sigmoid_1_188_Mul_Mul_2_172": float32,
    "Conv_Conv_3_171": float32,
    ...
}
...
1
2
3
4
5
6
7
8
9

调用 rknn.hybrid_quantization_step2 导出rknn模型

...
ret = rknn.hybrid_quantization_step2(model_input='./torchjitexport.json',data_input='./torchjitexport.data', model_quantization_cfg='./torchjitexport.quantization.cfg',dataset=DATASET, pre_compile=False)
    if ret != 0:
        print("hybrid_quantization_step2 failed. ")
        exit(ret)

# Export RKNN model
print('--> Export RKNN model')
ret = rknn.export_rknn(RKNN_MODEL)
if ret != 0:
    print('Export yolov5rknn failed!')
    exit(ret)
print('done')
1
2
3
4
5
6
7
8
9
10
11
12
13

重复测试模型，找出一个平衡速度和准确率的模型。
验证没问题了，就可以连接板子尝试啦，可以将 pre_compile=True, 提高初始化速度. 不过不能在仿真环境中测试.

参考

相关阅读:
普乐蛙VR航天航空巡展项目来到了第七站——绵阳科博会
 XPS测试分峰的基础操作-科学指南针
 计算机毕业设计之java+javaweb的理发店管理系统
 Ubuntu 23.10（Mantic Minotaur）正式发布，支持Linux 6.5和GNOME 45
鸿蒙OS开发：【一次开发，多端部署】（app市场首页）项目
 AcWing 895. 最长上升子序列
 layuiAdmin侧边栏菜单刷新保持当前页面
 布隆过滤器：
NGF ； -R : Trk NTRK
Ubuntu上使用SSH连接到CentOS系统
原文地址：https://blog.csdn.net/haiyangyunbao813/article/details/130909775

Rk1126 实现 yolov5 6.2 推理

转换 ONNX

安装 rk 环境

转换 RK模型 并验证

混合量化

参考

转换 RK模型并验证