OpenCV DNN C++ 使用 YOLO 模型推理

引言

YOLO（You Only Look Once）是一种流行的目标检测算法，因其速度快和准确度高而被广泛应用。OpenCV 的 DNN（Deep Neural Networks）模块为我们提供了一个简单易用的 API，用于加载和运行预先训练的深度学习模型。本文将详细介绍如何使用 OpenCV 的 DNN 模块来进行 YOLOv5 的目标检测。

准备工作

确保您已经安装了 OpenCV 和 OpenCV 的 DNN 模块。如果您还没有，可以参照 OpenCV 官方文档来进行安装。

核心代码解析

结构体和类定义

struct DetectResult
{
	int classId;
	float score;
	cv::Rect box;
};

class YOLOv5Detector
{
public:
	void initConfig(std::string onnxpath, int iw, int ih, float threshold);
	void detect(cv::Mat& frame, std::vector<DetectResult>& result);

private:
	int input_w = 640;
	int input_h = 640;
	cv::dnn::Net net;
	int threshold_score = 0.25;
};

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20

我们定义了一个名为 DetectResult 的结构体，用于存储检测结果，其中包括目标的类别 ID、得分和边界框。

YOLOv5Detector 类提供了两个主要的公共方法：

initConfig：用于初始化网络模型和一些参数。
detect：用于进行目标检测。

初始化配置

void YOLOv5Detector::initConfig(std::string onnxpath, int iw, int ih, float threshold)
{
    this->input_w = iw;
    this->input_h = ih;
    this->threshold_score = threshold;
    this->net = cv::dnn::readNetFromONNX(onnxpath);
}
1
2
3
4
5
6
7

在 initConfig 方法中，我们主要进行了以下操作：

设置输入图像的宽度和高度（input_w 和 input_h）。
设置目标检测的置信度阈值（threshold_score）。
通过 cv::dnn::readNetFromONNX 方法加载预训练的 ONNX 模型。

目标检测

void YOLOv5Detector::detect(cv::Mat& frame, std::vector<DetectResult>& results)
{
	// 图象预处理 - 格式化操作
	int w = frame.cols;
	int h = frame.rows;
	int _max = std::max(h, w);
	cv::Mat image = cv::Mat::zeros(cv::Size(_max, _max), CV_8UC3);
	cv::Rect roi(0, 0, w, h);
	frame.copyTo(image(roi));

	float x_factor = image.cols / 640.0f;
	float y_factor = image.rows / 640.0f;

	cv::Mat blob = cv::dnn::blobFromImage(image, 1 / 255.0, cv::Size(this->input_w, this->input_h), cv::Scalar(0, 0, 0),
	                                      true, false);
	this->net.setInput(blob);
	cv::Mat preds = this->net.forward();

	cv::Mat det_output(preds.size[1], preds.size[2], CV_32F, preds.ptr<float>());
	float confidence_threshold = 0.5;
	std::vector<cv::Rect> boxes;
	std::vector<int> classIds;
	std::vector<float> confidences;
	for (int i = 0; i < det_output.rows; i++)
	{
		float confidence = det_output.at<float>(i, 4);
		if (confidence < 0.45)
		{
			continue;
		}
		cv::Mat classes_scores = det_output.row(i).colRange(5, 8);
		cv::Point classIdPoint;
		double score;
		minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint);

		// 置信度 0～1之间
		if (score > this->threshold_score)
		{
			float cx = det_output.at<float>(i, 0);
			float cy = det_output.at<float>(i, 1);
			float ow = det_output.at<float>(i, 2);
			float oh = det_output.at<float>(i, 3);
			int x = static_cast<int>((cx - 0.5 * ow) * x_factor);
			int y = static_cast<int>((cy - 0.5 * oh) * y_factor);
			int width = static_cast<int>(ow * x_factor);
			int height = static_cast<int>(oh * y_factor);
			cv::Rect box;
			box.x = x;
			box.y = y;
			box.width = width;
			box.height = height;

			boxes.push_back(box);
			classIds.push_back(classIdPoint.x);
			confidences.push_back(score);
		}
	}

	// NMS
	std::vector<int> indexes;
	cv::dnn::NMSBoxes(boxes, confidences, 0.25, 0.45, indexes);
	for (size_t i = 0; i < indexes.size(); i++)
	{
		DetectResult dr;
		int index = indexes[i];
		int idx = classIds[index];
		dr.box = boxes[index];
		dr.classId = idx;
		dr.score = confidences[index];
		cv::rectangle(frame, boxes[index], cv::Scalar(0, 0, 255), 2, 8);
		cv::rectangle(frame, cv::Point(boxes[index].tl().x, boxes[index].tl().y - 20),
		              cv::Point(boxes[index].br().x, boxes[index].tl().y), cv::Scalar(0, 255, 255), -1);
		results.push_back(dr);
	}


	std::ostringstream ss;
	std::vector<double> layersTimings;
	double freq = cv::getTickFrequency() / 1000.0;
	double time = net.getPerfProfile(layersTimings) / freq;
	ss << "FPS: " << 1000 / time << " ; time : " << time << " ms";
	putText(frame, ss.str(), cv::Point(20, 40), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 0, 0), 2, 8);
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83

在 detect 方法中，我们进行了以下几个关键步骤：

对输入图像进行预处理。
使用 cv::dnn::blobFromImage 函数创建一个 4 维 blob。
通过 setInput 和 forward 方法进行前向传播，得到预测结果。

然后，我们对预测结果进行解析，通过非极大值抑制（NMS）得到最终的目标检测结果。

参考资料

OpenCV 官方文档

完整代码

#include 
#include 
#include 
#include 
#include 


struct DetectResult
{
	int classId;
	float score;
	cv::Rect box;
};

class YOLOv5Detector
{
public:
	void initConfig(std::string onnxpath, int iw, int ih, float threshold);
	void detect(cv::Mat& frame, std::vector<DetectResult>& result);

private:
	int input_w = 640;
	int input_h = 640;
	cv::dnn::Net net;
	int threshold_score = 0.25;
};

void YOLOv5Detector::initConfig(std::string onnxpath, int iw, int ih, float threshold)
{
	this->input_w = iw;
	this->input_h = ih;
	this->threshold_score = threshold;
	this->net = cv::dnn::readNetFromONNX(onnxpath);
}

void YOLOv5Detector::detect(cv::Mat& frame, std::vector<DetectResult>& results)
{
	// 图象预处理 - 格式化操作
	int w = frame.cols;
	int h = frame.rows;
	int _max = std::max(h, w);
	cv::Mat image = cv::Mat::zeros(cv::Size(_max, _max), CV_8UC3);
	cv::Rect roi(0, 0, w, h);
	frame.copyTo(image(roi));

	float x_factor = image.cols / 640.0f;
	float y_factor = image.rows / 640.0f;

	cv::Mat blob = cv::dnn::blobFromImage(image, 1 / 255.0, cv::Size(this->input_w, this->input_h), cv::Scalar(0, 0, 0),
	                                      true, false);
	this->net.setInput(blob);
	cv::Mat preds = this->net.forward();

	cv::Mat det_output(preds.size[1], preds.size[2], CV_32F, preds.ptr<float>());
	float confidence_threshold = 0.5;
	std::vector<cv::Rect> boxes;
	std::vector<int> classIds;
	std::vector<float> confidences;
	for (int i = 0; i < det_output.rows; i++)
	{
		float confidence = det_output.at<float>(i, 4);
		if (confidence < 0.45)
		{
			continue;
		}
		cv::Mat classes_scores = det_output.row(i).colRange(5, 8);
		cv::Point classIdPoint;
		double score;
		minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint);

		// 置信度 0～1之间
		if (score > this->threshold_score)
		{
			float cx = det_output.at<float>(i, 0);
			float cy = det_output.at<float>(i, 1);
			float ow = det_output.at<float>(i, 2);
			float oh = det_output.at<float>(i, 3);
			int x = static_cast<int>((cx - 0.5 * ow) * x_factor);
			int y = static_cast<int>((cy - 0.5 * oh) * y_factor);
			int width = static_cast<int>(ow * x_factor);
			int height = static_cast<int>(oh * y_factor);
			cv::Rect box;
			box.x = x;
			box.y = y;
			box.width = width;
			box.height = height;

			boxes.push_back(box);
			classIds.push_back(classIdPoint.x);
			confidences.push_back(score);
		}
	}

	// NMS
	std::vector<int> indexes;
	cv::dnn::NMSBoxes(boxes, confidences, 0.25, 0.45, indexes);
	for (size_t i = 0; i < indexes.size(); i++)
	{
		DetectResult dr;
		int index = indexes[i];
		int idx = classIds[index];
		dr.box = boxes[index];
		dr.classId = idx;
		dr.score = confidences[index];
		cv::rectangle(frame, boxes[index], cv::Scalar(0, 0, 255), 2, 8);
		cv::rectangle(frame, cv::Point(boxes[index].tl().x, boxes[index].tl().y - 20),
		              cv::Point(boxes[index].br().x, boxes[index].tl().y), cv::Scalar(0, 255, 255), -1);
		results.push_back(dr);
	}


	std::ostringstream ss;
	std::vector<double> layersTimings;
	double freq = cv::getTickFrequency() / 1000.0;
	double time = net.getPerfProfile(layersTimings) / freq;
	ss << "FPS: " << 1000 / time << " ; time : " << time << " ms";
	putText(frame, ss.str(), cv::Point(20, 40), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 0, 0), 2, 8);
}

std::map<int, std::string> classNames = {{0, "-1"}, {1, "0"}, {2, "1"}};

int main(int argc, char* argv[])
{
	std::shared_ptr<YOLOv5Detector> detector = std::make_shared<YOLOv5Detector>();
	detector->initConfig(R"(D:\AllCodeProjects\best.onnx)", 640, 640, 0.25f);

	cv::Mat frame = cv::imread(R"(D:\0002.jpg)");

	std::vector<DetectResult> results;
	detector->detect(frame, results);
	for (DetectResult& dr : results)
	{
		cv::Rect box = dr.box;
		cv::putText(frame, classNames[dr.classId], cv::Point(box.tl().x, box.tl().y - 10), cv::FONT_HERSHEY_SIMPLEX,
		            .5, cv::Scalar(0, 0, 0));
	}
	cv::imshow("OpenCV DNN", frame);
	cv::waitKey();
	results.clear();
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141

相关阅读:
link 和@improt的区别
 cmake中配置了工具链，命令行工具可以找到，但是clion中找不到
 软考中级(软件设计师)——程序设计语言与语言处理程序基础(3-5分，一般是3分)
【 java 常用类】StringBuffer 源码分析以及 StringBuffer 底层的数组扩容机制
 2022-08-11 学习日记（31st day）网络通信（网络编程）
接口测试--知识问答
 Springboot 配置使用 Kafka
什么是虚拟dom，说一下react和vue的diff算法
 记一次相同sql语句，java中执行不成功，数据库中能执行成功的问题
 5G-A 商用加速，赋能工业互联网
原文地址：https://blog.csdn.net/qq_42896106/article/details/133563503