yolo数据集剪裁：切割目标框并将该框内的其他目标一并提取并转为可用数据集

在这里插入图片描述
如上图所示，我们的目标是切割出所有的盘子，没个盘子单独储存为一张图片，并且里面的水果也还在该盘子的对应位置。
类似于这样

因为都标注了，有位置信息了，通过大目标框和小目标框的相对位置完全可以切出来。若再标注那工作量就太大了。

处理步骤

获取大框和小框的位置信息
检查小框是否在大框内，并坐标转换，依据大小框的位置信息写入yolo格式数据集
切割大框
将yolo数据集转为voc数据集

获取大框和小框的位置信息

通过elementTree解析xml文件，没有elementTree基础的可以看下此文档：Python 解析 voc数据集的xml文件

提取test1.xml文件中的所有plate节点，并将相关位置信息存放到字典中

字典结构为:
{plate_1: [xmin,ymin,xmax,ymax,w,h]，plate_2: [xmin,ymin,xmax,ymax,w,h]}

file = 'test1.xml'
tree = ET.parse(file)
root = tree.getroot()

all_C_sd_n = dict()
file_name = root.find('filename').text[:-4]

i = 0
for obj in root.iter('object'):
    names = obj.find('name')
    if names.text == 'plate':
        i += 1
        sd_set = []
        box = obj.find('bndbox')
        # 大框的x,y坐标
        sd_set.append(int(box.find('xmin').text))
        sd_set.append(int(box.find('ymin').text))
        sd_set.append(int(box.find('xmax').text))
        sd_set.append(int(box.find('ymax').text))
        # 大框的宽和高
        w = int(box.find('xmax').text)-int(box.find('xmin').text)
        h = int(box.find('ymax').text)-int(box.find('ymin').text)
        sd_set.append(w)
        sd_set.append(h)
        # 字典格式为  file_name_1: xmin,ymin,xmax,ymax,w,h
        all_C_sd_n.update({file_name + '_' + str(i): sd_set})

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27

检查小框是否在大框内

获取小框位置坐标与大框的一样，主要是用大框的xmin ymin肯定要比小框的中心位置小，xmax ymax比小框的中心位置大

坐标转换为

# 将 xmin xmax ymin ymax 转为coco格式的 xwyh
def convert(size, box):
    dw = 1. / (size[0])
    dh = 1. / (size[1])
    x = (box[0] + box[1]) / 2.0 - 1
    y = (box[2] + box[3]) / 2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return x, y, w, h

1
2
3
4
5
6
7
8
9
10
11
12
13
14

for obj in root.iter('object'):
    names = obj.find('name')
    if names.text in ['C_sdh_n', 'C_wirerope_n', 'C_wirerope_notbinding']:
        box = obj.find('bndbox')

        xm = int(box.find('xmin').text) + (int(box.find('xmax').text) - int(box.find('xmin').text)) / 2
        ym = int(box.find('ymin').text) + (int(box.find('ymax').text) - int(box.find('ymin').text)) / 2
        for box_nb in all_C_sd_n.items():
            # print(box_nb)
            if box_nb[1][0] < xm < box_nb[1][2] and box_nb[1][1] < ym < box_nb[1][3]:

                # 这里的坐标为小框相对于大框的坐标
                xmin = int(box.find('xmin').text) - box_nb[1][0]
                ymin = int(box.find('ymin').text) - box_nb[1][1]
                xmax = int(box.find('xmax').text) - box_nb[1][0]
                ymax = int(box.find('ymax').text) - box_nb[1][1]
                w = box_nb[1][4]
                h = box_nb[1][5]
                b = (xmin, xmax, ymin, ymax)
                x, y, w, h = convert((w, h), b)
                print(x, y, w, h)
                out_file = open('{}.txt'.format(box_nb[0]), 'a')
                if names.text == 'C_wirerope_n':
                    out_file.write('{} {} {} {} {}\n'.format(0, x, y, w, h))
                #     print(box_nb[0], 0, x, y, w, h)
                if names.text == 'C_wirerope_notbinding':
                    out_file.write('{} {} {} {} {}\n'.format(1, x, y, w, h))
                # #     print(box_nb[0],1, x, y, w, h)
                if names.text == 'C_sdh_n':
                    # print(box_nb[0],2, x, y, w, h)
                    out_file.write('{} {} {} {} {}\n'.format(2, x, y, w, h))
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31

切割大框

OpenCV的基础操作

img = cv2.imread("1_001344.742_16_.jpg")
for box in all_C_sd_n.items():
    # box[1][1]:box[1][3]  ymin——>ymax  box[1][0]:box[1][2] xmax-xmin
    cut_img = img[box[1][1]:box[1][3],box[1][0]:box[1][2]]
    # cv2.imshow(box[0], cut_img)
    cv2.imwrite(box[0]+'.png', cut_img)
1
2
3
4
5
6

import xml.etree.ElementTree as ET
import cv2


# 将 xmin xmax ymin ymax 转为coco格式的 xwyh
def convert(size, box):
    dw = 1. / (size[0])
    dh = 1. / (size[1])
    x = (box[0] + box[1]) / 2.0 - 1
    y = (box[2] + box[3]) / 2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return x, y, w, h


file = 'test1.xml'
tree = ET.parse(file)
root = tree.getroot()

all_C_sd_n = dict()
file_name = root.find('filename').text[:-4]

i = 0
for obj in root.iter('object'):
    names = obj.find('name')
    if names.text == 'C_sd_n':
        i += 1
        sd_set = []
        box = obj.find('bndbox')
        # 大框的x,y坐标
        sd_set.append(int(box.find('xmin').text))
        sd_set.append(int(box.find('ymin').text))
        sd_set.append(int(box.find('xmax').text))
        sd_set.append(int(box.find('ymax').text))
        # 大框的宽和高
        w = int(box.find('xmax').text)-int(box.find('xmin').text)
        h = int(box.find('ymax').text)-int(box.find('ymin').text)
        sd_set.append(w)
        sd_set.append(h)
        # 字典格式为  file_name_1: xmin,ymin,xmax,ymax,w,h
        all_C_sd_n.update({file_name + '_' + str(i): sd_set})

# 切割C_sd_n框
# img = cv2.imread("1_001344.742_16_.jpg")
# for box in all_C_sd_n.items():
#     # box[1][1]:box[1][3]  ymin——>ymax  box[1][0]:box[1][2] xmax-xmin
#     cut_img = img[box[1][1]:box[1][3],box[1][0]:box[1][2]]
#     # cv2.imshow(box[0], cut_img)
#     cv2.imwrite(box[0]+'.png', cut_img)


for obj in root.iter('object'):
    names = obj.find('name')
    if names.text in ['C_sdh_n', 'C_wirerope_n', 'C_wirerope_notbinding']:
        box = obj.find('bndbox')

        xm = int(box.find('xmin').text) + (int(box.find('xmax').text) - int(box.find('xmin').text)) / 2
        ym = int(box.find('ymin').text) + (int(box.find('ymax').text) - int(box.find('ymin').text)) / 2
        for box_nb in all_C_sd_n.items():
            # print(box_nb)
            if box_nb[1][0] < xm < box_nb[1][2] and box_nb[1][1] < ym < box_nb[1][3]:
                # print('xmin','ymin','xmax','ymax')
                # print(int(box.find('xmin').text),int(box.find('ymin').text),int(box.find('xmax').text),int(box.find('ymax').text))
                # print(b[0],xm, ym)
                # 这里的坐标为小框相对于大框的坐标
                xmin = int(box.find('xmin').text) - box_nb[1][0]
                ymin = int(box.find('ymin').text) - box_nb[1][1]
                xmax = int(box.find('xmax').text) - box_nb[1][0]
                ymax = int(box.find('ymax').text) - box_nb[1][1]
                w = box_nb[1][4]
                h = box_nb[1][5]

                # print('x1, y1, x2, y2, w, h')
                # print(x1, y1, x2, y2, w, h)

                b = (xmin, xmax, ymin, ymax)
                # b = (int(box.find('xmin').text),
                # int(box.find('xmax').text),
                # int(box.find('ymin').text),
                # int(box.find('ymax').text))
                x, y, w, h = convert((w, h), b)
                print(x, y, w, h)
                out_file = open('{}.txt'.format(box_nb[0]), 'a')
                if names.text == 'C_wirerope_n':
                    out_file.write('{} {} {} {} {}\n'.format(0, x, y, w, h))
                #     print(box_nb[0], 0, x, y, w, h)
                if names.text == 'C_wirerope_notbinding':
                    out_file.write('{} {} {} {} {}\n'.format(1, x, y, w, h))
                # #     print(box_nb[0],1, x, y, w, h)
                if names.text == 'C_sdh_n':
                    # print(box_nb[0],2, x, y, w, h)
                    out_file.write('{} {} {} {} {}\n'.format(2, x, y, w, h))


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98

yolo格式数据集转cov

from xml.dom.minidom import Document
import os
import cv2


def makexml(picPath, txtPath, xmlPath):  # txt所在文件夹路径，xml文件保存路径，图片所在文件夹路径
    """此函数用于将yolo格式txt标注文件转换为voc格式xml标注文件
    在自己的标注图片文件夹下建三个子文件夹，分别命名为picture、txt、xml
    """
    dic = {'0': "apple",  # 创建字典用来对类型进行转换
           '1': "orange",  # 此处的字典要与自己的classes.txt文件中的类对应，且顺序要一致
           }
    files = os.listdir(txtPath)
    for i, name in enumerate(files):
        xmlBuilder = Document()
        annotation = xmlBuilder.createElement("annotation")  # 创建annotation标签
        xmlBuilder.appendChild(annotation)
        txtFile = open(txtPath + name)
        txtList = txtFile.readlines()
        img = cv2.imread(picPath + name[0:-4] + ".png")
        # print()
        Pheight, Pwidth, Pdepth = img.shape

        folder = xmlBuilder.createElement("folder")  # folder标签
        foldercontent = xmlBuilder.createTextNode("driving_annotation_dataset")
        folder.appendChild(foldercontent)
        annotation.appendChild(folder)  # folder标签结束

        filename = xmlBuilder.createElement("filename")  # filename标签
        filenamecontent = xmlBuilder.createTextNode(name[0:-4] + ".png")
        filename.appendChild(filenamecontent)
        annotation.appendChild(filename)  # filename标签结束

        size = xmlBuilder.createElement("size")  # size标签
        width = xmlBuilder.createElement("width")  # size子标签width
        widthcontent = xmlBuilder.createTextNode(str(Pwidth))
        width.appendChild(widthcontent)
        size.appendChild(width)  # size子标签width结束

        height = xmlBuilder.createElement("height")  # size子标签height
        heightcontent = xmlBuilder.createTextNode(str(Pheight))
        height.appendChild(heightcontent)
        size.appendChild(height)  # size子标签height结束

        depth = xmlBuilder.createElement("depth")  # size子标签depth
        depthcontent = xmlBuilder.createTextNode(str(Pdepth))
        depth.appendChild(depthcontent)
        size.appendChild(depth)  # size子标签depth结束

        annotation.appendChild(size)  # size标签结束

        for j in txtList:
            oneline = j.strip().split(" ")
            object = xmlBuilder.createElement("object")  # object 标签
            picname = xmlBuilder.createElement("name")  # name标签
            namecontent = xmlBuilder.createTextNode(dic[oneline[0]])
            picname.appendChild(namecontent)
            object.appendChild(picname)  # name标签结束

            pose = xmlBuilder.createElement("pose")  # pose标签
            posecontent = xmlBuilder.createTextNode("Unspecified")
            pose.appendChild(posecontent)
            object.appendChild(pose)  # pose标签结束

            truncated = xmlBuilder.createElement("truncated")  # truncated标签
            truncatedContent = xmlBuilder.createTextNode("0")
            truncated.appendChild(truncatedContent)
            object.appendChild(truncated)  # truncated标签结束

            difficult = xmlBuilder.createElement("difficult")  # difficult标签
            difficultcontent = xmlBuilder.createTextNode("0")
            difficult.appendChild(difficultcontent)
            object.appendChild(difficult)  # difficult标签结束

            bndbox = xmlBuilder.createElement("bndbox")  # bndbox标签
            xmin = xmlBuilder.createElement("xmin")  # xmin标签
            mathData = int(((float(oneline[1])) * Pwidth + 1) - (float(oneline[3])) * 0.5 * Pwidth)
            xminContent = xmlBuilder.createTextNode(str(mathData))
            xmin.appendChild(xminContent)
            bndbox.appendChild(xmin)  # xmin标签结束

            ymin = xmlBuilder.createElement("ymin")  # ymin标签
            mathData = int(((float(oneline[2])) * Pheight + 1) - (float(oneline[4])) * 0.5 * Pheight)
            yminContent = xmlBuilder.createTextNode(str(mathData))
            ymin.appendChild(yminContent)
            bndbox.appendChild(ymin)  # ymin标签结束

            xmax = xmlBuilder.createElement("xmax")  # xmax标签
            mathData = int(((float(oneline[1])) * Pwidth + 1) + (float(oneline[3])) * 0.5 * Pwidth)
            xmaxContent = xmlBuilder.createTextNode(str(mathData))
            xmax.appendChild(xmaxContent)
            bndbox.appendChild(xmax)  # xmax标签结束

            ymax = xmlBuilder.createElement("ymax")  # ymax标签
            mathData = int(((float(oneline[2])) * Pheight + 1) + (float(oneline[4])) * 0.5 * Pheight)
            ymaxContent = xmlBuilder.createTextNode(str(mathData))
            ymax.appendChild(ymaxContent)
            bndbox.appendChild(ymax)  # ymax标签结束

            object.appendChild(bndbox)  # bndbox标签结束

            annotation.appendChild(object)  # object标签结束

        f = open(xmlPath + name[0:-4] + ".xml", 'w')
        xmlBuilder.writexml(f, indent='\t', newl='\n', addindent='\t', encoding='utf-8')
        f.close()


if __name__ == "__main__":
    picPath = "img/"  # 图片所在文件夹路径，后面的/一定要带上
    txtPath = "txt/"  # txt所在文件夹路径，后面的/一定要带上
    xmlPath = "Annotations/"  # xml文件保存路径，后面的/一定要带上
    makexml(picPath, txtPath, xmlPath)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113

相关阅读:
【DSP】时域中的离散时间信号 -- MATLAB实现序列的运算
 重载运算符
 分布式计算MapReduce | Spark实验
 利用图神经网络进行药物再利用的计算方法(下)
(Applied Intelligence-2022)TransGait: 基于多模态的步态识别与集合Transformer
SpringMVC(Rest+映射请求数据+模型数据+视图和视图解析器)
Web应用防火墙的性能优化技术
 Modbus CRC
3.9-Dockerfile实战
 这些专业配音软件你值得拥有
原文地址：https://blog.csdn.net/weixin_45755332/article/details/127733783