• 划分VOC数据集,以及转换为划分后的COCO数据集格式


    1.VOC数据集

        LabelImg是一款广泛应用于图像标注的开源工具,主要用于构建目标检测模型所需的数据集。Visual Object Classes(VOC)数据集作为一种常见的目标检测数据集,通过labelimg工具在图像中标注边界框和类别标签,为训练模型提供了必要的注解信息。VOC数据集源于对PASCAL挑战赛的贡献,涵盖多个物体类别,成为目标检测领域的重要基准之一,推动着算法性能的不断提升。

        使用labelimg标注或者其他VOC标注工具标注后,会得到两个文件夹,如下:

    BirdNest    ------->>>  主目录,存放下面的两个文件夹
        ----Annotations    ------->>>  存放.xml标注信息文件
        ----JPEGImages     ------->>>  存放图片文件
    
    • 1
    • 2
    • 3

    在这里插入图片描述

    2.划分VOC数据集

        如下代码是按照训练集:验证集 = 8:2来划分的,会找出没有对应.xml的图片文件,且划分的时候支持JPEGImages文件夹下有如下图片格式:

    ['.jpg', '.png', '.gif', '.bmp', '.tiff', '.jpeg', '.webp', '.svg', '.psd', '.cr2', '.nef', '.dng']
    
    • 1

    整体代码为:

    import os
    import random
    
    from tqdm import tqdm
    
    image_extensions = ['.jpg', '.png', '.gif', '.bmp', '.tiff', '.jpeg', '.webp', '.svg', '.psd', '.cr2', '.nef', '.dng']
    
    
    def split_voc_dataset(dataset_dir, train_ratio, val_ratio):
        if not (0 < train_ratio + val_ratio <= 1):
            print("Invalid ratio values. They should sum up to 1.")
            return
    
        annotations_dir = os.path.join(dataset_dir, 'Annotations')
        images_dir = os.path.join(dataset_dir, 'JPEGImages')
        output_dir = os.path.join(dataset_dir, 'ImageSets/Main')
    
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
    
        dict_info = dict()
        # List all the image files in the JPEGImages directory
        for file in os.listdir(images_dir):
            if any(ext in file for ext in image_extensions):
                jpg_files, endwith = os.path.splitext(file)
                dict_info[jpg_files] = endwith
    
        # List all the XML files in the Annotations directory
        xml_files = [file for file in os.listdir(annotations_dir) if file.endswith('.xml')]
        random.shuffle(xml_files)
    
        num_samples = len(xml_files)
        num_train = int(num_samples * train_ratio)
        num_val = int(num_samples * val_ratio)
    
        train_xml_files = xml_files[:num_train]
        val_xml_files = xml_files[num_train:num_train + num_val]
    
        with open(os.path.join(output_dir, 'train_list.txt'), 'w+') as train_file:
            for xml_file in train_xml_files:
                image_name = os.path.splitext(xml_file)[0]
                if image_name in dict_info:
                    image_path = os.path.join('JPEGImages', image_name + dict_info[image_name])
                    annotation_path = os.path.join('Annotations', xml_file)
                    train_file.write(f'{image_path} {annotation_path}\n')
                else:
                    print(f"没有找到图片 {os.path.join(images_dir, image_name)}")
    
        with open(os.path.join(output_dir, 'val_list.txt'), 'w+') as val_file:
            for xml_file in val_xml_files:
                image_name = os.path.splitext(xml_file)[0]
                if image_name in dict_info:
                    image_path = os.path.join('JPEGImages', image_name + dict_info[image_name])
                    annotation_path = os.path.join('Annotations', xml_file)
                    val_file.write(f'{image_path} {annotation_path}\n')
                else:
                    print(f"没有找到图片 {os.path.join(images_dir, image_name)}")
    
        labels = set()
        for xml_file in tqdm(xml_files):
            annotation_path = os.path.join(annotations_dir, xml_file)
            with open(annotation_path, 'r+', encoding='utf-8') as f:
                lines = f.readlines()
                for line in lines:
                    if '' in line:
                        label = line.strip().replace('', '').replace('', '')
                        labels.add(label)
    
        with open(os.path.join(output_dir, 'labels.txt'), 'w+') as labels_file:
            for label in labels:
                labels_file.write(f'{label}\n')
                
    
    if __name__ == "__main__":
        dataset_dir = 'BirdNest/'
        train_ratio = 0.8  # Adjust the train-validation split ratio as needed
        val_ratio = 0.2
        split_voc_dataset(dataset_dir, train_ratio, val_ratio)
    
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80

    划分好后的截图:
    在这里插入图片描述

    3.VOC转COCO格式

    目前很多框架大多支持的是COCO格式,因为存放与使用起来方便,采用了json文件来代替xml文件。

    import os
    import json
    
    from xml.etree import ElementTree as ET
    
    
    def parse_xml(dataset_dir, xml_file):
        xml_path = os.path.join(dataset_dir, xml_file)
        tree = ET.parse(xml_path)
        root = tree.getroot()
    
        objects = root.findall('object')
        annotations = []
    
        for obj in objects:
            bbox = obj.find('bndbox')
            xmin = int(bbox.find('xmin').text)
            ymin = int(bbox.find('ymin').text)
            xmax = int(bbox.find('xmax').text)
            ymax = int(bbox.find('ymax').text)
    
            # Extract label from XML annotation
            label = obj.find('name').text
            if not label:
                print(f"Label not found in XML annotation. Skipping annotation.")
                continue
    
            annotations.append({
                'xmin': xmin,
                'ymin': ymin,
                'xmax': xmax,
                'ymax': ymax,
                'label': label
            })
    
        return annotations
    
    
    def convert_to_coco_format(image_list_file, annotations_dir, output_json_file, dataset_dir):
        images = []
        annotations = []
        categories = []
    
        # Load labels
        with open(os.path.join(os.path.dirname(image_list_file), 'labels.txt'), 'r+', encoding='utf-8') as labels_file:
            label_lines = labels_file.readlines()
            categories = [{'id': i + 1, 'name': label.strip()} for i, label in enumerate(label_lines)]
    
        annotation_id = 1  # Initialize unique annotation ID
    
        # Load image list file
        with open(image_list_file, 'r+') as image_list:
            image_lines = image_list.readlines()
            for i, line in enumerate(image_lines):
                image_path, annotation_path = line.strip().split(' ')
                image_id = i + 1
                image_filename = os.path.basename(image_path)
    
                # Extract image size from XML file
                xml_path = os.path.join(dataset_dir, annotation_path)
                tree = ET.parse(xml_path)
                size = tree.find('size')
                image_height = int(size.find('height').text)
                image_width = int(size.find('width').text)
    
                images.append({
                    'id': image_id,
                    'file_name': image_filename,
                    'height': image_height,
                    'width': image_width,
                    'license': None,
                    'flickr_url': None,
                    'coco_url': None,
                    'date_captured': None
                })
    
                # Load annotations from XML files
                xml_annotations = parse_xml(dataset_dir, annotation_path)
                for xml_annotation in xml_annotations:
                    label = xml_annotation['label']
                    category_id = next((cat['id'] for cat in categories if cat['name'] == label), None)
                    if category_id is None:
                        print(f"Label '{label}' not found in categories. Skipping annotation.")
                        continue
    
                    bbox = {
                        'xmin': xml_annotation['xmin'],
                        'ymin': xml_annotation['ymin'],
                        'xmax': xml_annotation['xmax'],
                        'ymax': xml_annotation['ymax']
                    }
    
                    annotations.append({
                        'id': annotation_id,
                        'image_id': image_id,
                        'category_id': category_id,
                        'bbox': [bbox['xmin'], bbox['ymin'], bbox['xmax'] - bbox['xmin'], bbox['ymax'] - bbox['ymin']],
                        'area': (bbox['xmax'] - bbox['xmin']) * (bbox['ymax'] - bbox['ymin']),
                        'segmentation': [],
                        'iscrowd': 0
                    })
                    annotation_id += 1  # Increment annotation ID for uniqueness
    
        coco_data = {
            'images': images,
            'annotations': annotations,
            'categories': categories
        }
    
        with open(output_json_file, 'w+') as json_file:
            json.dump(coco_data, json_file, indent=4)
    
    
    if __name__ == "__main__":
        # Adjust paths as needed
        dataset_dir = 'BirdNest/'
        image_sets_dir = 'BirdNest/ImageSets/Main/'
        train_list_file = os.path.join(image_sets_dir, 'train_list.txt')
        val_list_file = os.path.join(image_sets_dir, 'val_list.txt')
        output_train_json_file = os.path.join(dataset_dir, 'train_coco.json')
        output_val_json_file = os.path.join(dataset_dir, 'val_coco.json')
    
        convert_to_coco_format(train_list_file, image_sets_dir, output_train_json_file, dataset_dir)
        convert_to_coco_format(val_list_file, image_sets_dir, output_val_json_file, dataset_dir)
        print("The JSON file has been successfully generated!!!")
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101
    • 102
    • 103
    • 104
    • 105
    • 106
    • 107
    • 108
    • 109
    • 110
    • 111
    • 112
    • 113
    • 114
    • 115
    • 116
    • 117
    • 118
    • 119
    • 120
    • 121
    • 122
    • 123
    • 124
    • 125
    • 126

    转COCO格式成功截图:
    在这里插入图片描述
    在这里插入图片描述

  • 相关阅读:
    Hi,我是ChunJun,一个有趣好用的开源项目
    4. Python 函数进阶(多返回值、多种传参方式、匿名函数)
    【数据结构】七大排序
    从 Tesla 的 TTPoE 看资源和算法
    springboot+教学工作量管理系统 毕业设计-附源码221541
    cisco设备信息泄漏漏洞案例【二】
    自动驾驶中的坐标系变换
    如何利用Airtest做一些简单的装包小任务
    本地window环境安装mongoDB
    ITSS认证系统运维范围定义
  • 原文地址:https://blog.csdn.net/qq_37700257/article/details/134326355