【OpenCV】基于OpenCV/C++实现yolo目标检测

文章目录

1. 原理

我们都知道，yolo这些深度学习检测算法都是在python下用pytorch或tf框架这些训练的，训练得到的是pt或者weight权重文件，这些是算法开发人员做的事情，如何让算法的检测精度更高、速度更快。

但在工程化的时候，一般还是要用C++实现的，OpenCV不只是能进行图像的基本处理（以前我太肤浅了），它还有很多能处理深度学习的模块，比如DNN模块就支持调用多种框架下训练的权重文件。

下面就在VS2017+OpenCV454环境下进行演示。可以选择4种yolo变体，可以检测图片或视频。
（代码参考这位博主，以下是集成和演示）

2. 图片检测程序

运行代码前，请先配置好VS和OpenCV环境，然后准备好yolo相关权重文件（cfg+weight）。

首先定义yolo.h头文件：

#include 
#include 
#include 
#include 	//调用dnn模块
#include 
#include 

using namespace cv;
using namespace dnn;
using namespace std;

//结构体定义：网络配置参数
struct Net_config
{
	float confThreshold; // 置信度阈值
	float nmsThreshold;  // 非极大值抑制（重叠率）阈值
	int inpWidth;  
	int inpHeight; 
	string classesFile;	//类别文件名
	string modelConfiguration;	//模型配置文件
	string modelWeights;	//模型权重
	string netname;	//模型名称
};

//定义yolo类
class YOLO
{
	public:
		YOLO(Net_config config);
		void detect(Mat& frame);	//检测函数
	private:
		float confThreshold;	//类别置信度阈值
		float nmsThreshold;		//重叠率阈值
		int inpWidth;	//图片宽度
		int inpHeight;	//图片高度
		char netname[20];	//网络名称
		vector<string> classes;	//存储类别的数组
		Net net;	//深度学习模型读取
		void postprocess(Mat& frame, const vector<Mat>& outs);	//后处理函数
		void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame);	//画框
};

//定义网络数组
Net_config yolo_nets[4] = {
	{0.5, 0.4, 416, 416,"coco.names", "yolov3/yolov3.cfg", "yolov3/yolov3.weights", "yolov3"},
	{0.5, 0.4, 608, 608,"coco.names", "yolov4/yolov4-tiny.cfg", "yolov4/yolov4-tiny.weights", "yolov4-tiny"},
	{0.5, 0.4, 320, 320,"coco.names", "yolo-fastest/yolo-fastest-xl.cfg", "yolo-fastest/yolo-fastest-xl.weights", "yolo-fastest"},
	{0.5, 0.4, 320, 320,"coco.names", "yolobile/csdarknet53s-panet-spp.cfg", "yolobile/yolobile.weights", "yolobile"}
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49

然后进入main主程序：

#include "yolo.h"

//网络配置构造函数
YOLO::YOLO(Net_config config)
{
	cout << "Net use " << config.netname << endl;
	this->confThreshold = config.confThreshold;
	this->nmsThreshold = config.nmsThreshold;
	this->inpWidth = config.inpWidth;
	this->inpHeight = config.inpHeight;
	strcpy_s(this->netname, config.netname.c_str());

	ifstream ifs(config.classesFile.c_str());
	string line;
	while (getline(ifs, line)) this->classes.push_back(line);

	this->net = readNetFromDarknet(config.modelConfiguration, config.modelWeights);
	this->net.setPreferableBackend(DNN_BACKEND_OPENCV);
	this->net.setPreferableTarget(DNN_TARGET_CPU);
}

//后处理
void YOLO::postprocess(Mat& frame, const vector<Mat>& outs)   // Remove the bounding boxes with low confidence using non-maxima suppression
{
	vector<int> classIds;	//类别
	vector<float> confidences;	//置信度
	vector<Rect> boxes;	//框

	for (size_t i = 0; i < outs.size(); ++i)
	{
		// Scan through all the bounding boxes output from the network and keep only the
		// ones with high confidence scores. Assign the box's class label as the class
		// with the highest score for the box.
		float* data = (float*)outs[i].data;
		for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
		{
			Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
			Point classIdPoint;
			double confidence;
			// Get the value and location of the maximum score
			minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
			//当置信度大于阈值
			if (confidence > this->confThreshold)
			{
				int centerX = (int)(data[0] * frame.cols);
				int centerY = (int)(data[1] * frame.rows);
				int width = (int)(data[2] * frame.cols);
				int height = (int)(data[3] * frame.rows);
				int left = centerX - width / 2;
				int top = centerY - height / 2;

				classIds.push_back(classIdPoint.x);
				confidences.push_back((float)confidence);
				boxes.push_back(Rect(left, top, width, height));
			}
		}
	}

	// Perform non maximum suppression to eliminate redundant overlapping boxes with
	// lower confidences
	vector<int> indices;
	NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices);
	for (size_t i = 0; i < indices.size(); ++i)
	{
		int idx = indices[i];
		Rect box = boxes[idx];
		this->drawPred(classIds[idx], confidences[idx], box.x, box.y,
			box.x + box.width, box.y + box.height, frame);
	}
}

//画预测框
void YOLO::drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame)   // Draw the predicted bounding box
{
	//Draw a rectangle displaying the bounding box 画框
	rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 3);

	//Get the label for the class name and its confidence	打标签
	string label = format("%.2f", conf);
	if (!this->classes.empty())
	{
		CV_Assert(classId < (int)this->classes.size());
		label = this->classes[classId] + ":" + label;
	}

	//Display the label at the top of the bounding box	展示标签
	int baseLine;
	Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
	top = max(top, labelSize.height);
	//rectangle(frame, Point(left, top - int(1.5 * labelSize.height)), Point(left + int(1.5 * labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED);
	putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1);
}

//detect检测
void YOLO::detect(Mat& frame)
{
	Mat blob;	//blob预处理
	blobFromImage(frame, blob, 1 / 255.0, Size(this->inpWidth, this->inpHeight), Scalar(0, 0, 0), true, false);
	this->net.setInput(blob);
	vector<Mat> outs;
	this->net.forward(outs, this->net.getUnconnectedOutLayersNames());	//前向处理
	this->postprocess(frame, outs);	//后处理

	vector<double> layersTimes;
	double freq = getTickFrequency() / 1000;
	double t = net.getPerfProfile(layersTimes) / freq;
	string label = format("%s Inference time : %.2f ms", this->netname, t);
	putText(frame, label, Point(0, 30), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 0, 255), 2);
	//imwrite(format("%s_out.jpg", this->netname), frame);
}

//main入口
int main()
{
	YOLO yolo_model(yolo_nets[2]);	//选择网络

	//1.图片检测
	string imgpath = "dog.jpg";
	Mat srcimg = imread(imgpath);	//读取照片
	yolo_model.detect(srcimg);	//调用检测程序

	//图片检测界面
	static const string kWinName = "Deep learning object detection in OpenCV C++";
	namedWindow(kWinName, WINDOW_NORMAL);
	imshow(kWinName, srcimg);
	waitKey(0);
	destroyAllWindows();


}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130

运行结果如下：

在这里插入图片描述

3. 视频检测程序

要调用视频，只需在main函数中加入：

	//2.视频检测/实时摄像头
	VideoCapture capture("test.avi");	//0
	Mat frame;
	while (true) {
		int ret = capture.read(frame);
		if (!ret) {
			break;
		}
		//imshow("input", frame);	//显示原视频
		yolo_model.detect(frame);	//调用process
		static const string kWinName = "Deep learning object detection in OpenCV C++";
		namedWindow(kWinName, WINDOW_NORMAL);
		imshow(kWinName, frame);

		char c = waitKey(5);
		if (c == 27) {
			break;
		}
	}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19

运行结果如下：

在这里插入图片描述

其他

还有一个用SSD MobileNet检测的示例：

项目Github地址：https://github.com/ChiekoN/OpenCV_SSD_MobileNet

#编译
mkdir build && cd build
cmake ..
make
./ssd_obj_detect
1
2
3
4
5

基于ROS的人脸检测的示例：

项目Github地址：https://github.com/1417265678/robot_vision

# 先起相机节点
roslaunch robot_vision usb_cam.launch
# 检测节点
roslaunch robot_vision face_detector.launch
1
2
3
4

在这里插入图片描述

以上。

相关阅读:
文件加密软件哪个好丨2023年最值得收藏的6款文件加密软件
 23ccpc（最长上升子序列题解）
QT 发布文章遇到问题解决方案
 python excel 读取及写入固定格式
 Pandas 操作数据（三）
出现Jupyter notebook 无法使用 parse_args() 函数的解决方法
 Nginx WEB访问与Linux授权约束
 【自然语言处理概述】百度百科数据爬取
 微信小程序| 做一款多人实时线上的五指棋联机游戏
 “量化交易”、“算法交易”、“电子交易”等新型交易模式是如何发展起来的呢？
原文地址：https://blog.csdn.net/qq_40344790/article/details/127650973