• 【数据集|COCO】COCO格式数据集制作与数据集参数计算


    1. 批量修改 JSON 文件中的参数

    1.1 问题背景

    • 在不同的电脑上进行标注,生成的标签文件中的图像路径名与 json 文件名不一致,故需要进行修改。

    1.2 代码实现

    import os
    import json
    
    folder_path = "/home/zth/HardDisk/Datasets/nematode/datasets/before"
    for json_name in os.listdir(folder_path):
        modified_data = {}
    
        # 修改
        with open(os.path.join(folder_path, json_name),'rb') as f:
            data = json.load(f)
            print(json_name)
            data['imagePath'] = json_name
            modified_data = data
        f.close()
    
        # 写入
        with open(os.path.join(folder_path, json_name),'w') as r:
            json.dump(modified_data, r, indent=4, ensure_ascii=False) # 格式化并写入
        r.close()
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19

    2. 划分训练集和测试集

    2.1 问题背景

    • 按照 8:2 的比例随机划分训练集和测试集。

    2.2 环境配置

    pip install pytest-shutil
    pip install scikit-learn
    
    • 1
    • 2

    2.3 代码实现

    import os
    import shutil
    from sklearn.model_selection import train_test_split
    
    # 创建文件夹
    def mkdir(path):
        folder = os.path.exists(path)
        if not folder:
            os.makedirs(path)
            print(f'-- new folder "{path}" --')
        else:
            print(f'-- the folder "{path}" is already here --')
    
    # 设置原图和原标签路经及目标文件夹路径
    image_format = ".jpg"
    image_path = "images"
    label_path = "labels"
    train_set_save_path = "coco/train"
    test_set_save_path = "coco/test"
    mkdir(train_set_save_path)
    mkdir(test_set_save_path)
    
    file_pathes = os.listdir(image_path)
    # 获取文件夹下所有指定格式的图像的名称(不包含后缀名)
    img_names = []
    for file_path in file_pathes:
        if os.path.splitext(file_path)[1] == image_format:
            file_name = os.path.splitext(file_path)[0]
            img_names.append(file_name)
    
    # 划分训练集和验证集
    train_set, test_set = train_test_split(img_names, test_size=0.2, random_state=42)
    print(f"train_set size: {len(train_set)}, val_set size: {len(test_set)}")
    
    # 训练集处理:将图像和标签文件移动到目标文件夹
    for file_name in train_set:
        img_src_path = os.path.join(image_path, file_name+image_format)
        img_dst_path = os.path.join(train_set_save_path, file_name+image_format)
        shutil.copyfile(img_src_path, img_dst_path)
    
        json_src_path = os.path.join(label_path, file_name+".json")
        json_dst_path = os.path.join(train_set_save_path, file_name+".json")
        shutil.copyfile(json_src_path, json_dst_path)
    
    # 验证集处理:将图像和标签文件移动到目标文件夹
    for file_name in test_set:
        img_src_path = os.path.join(image_path, file_name+image_format)
        img_dst_path = os.path.join(test_set_save_path, file_name+image_format)
        shutil.copyfile(img_src_path, img_dst_path)
    
        json_src_path = os.path.join(label_path, file_name+".json")
        json_dst_path = os.path.join(test_set_save_path, file_name+".json")
        shutil.copyfile(json_src_path, json_dst_path)
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53

    3. 生成 JSON 标签文件

    3.1 环境配置

    pip install scikit-image
    
    • 1

    3.2 代码实现

    # -*- coding:utf-8 -*-
    
    import os
    import argparse
    import json
    import matplotlib.pyplot as plt
    import skimage.io as io
    from labelme import utils
    import numpy as np
    import glob
    import PIL.Image
    
    def mkdir(path):
        folder = os.path.exists(path)
        if not folder:
            os.makedirs(path)
            print(f'-- new folder "{path}" --')
        else:
            print(f'-- the folder "{path}" is already here --')
    
    class MyEncoder(json.JSONEncoder):
        def default(self, obj):
            if isinstance(obj, np.integer):
                return int(obj)
            elif isinstance(obj, np.floating):
                return float(obj)
            elif isinstance(obj, np.ndarray):
                return obj.tolist()
            else:
                return super(MyEncoder, self).default(obj)
    
    
    class labelme2coco(object):
        def __init__(self, labelme_json=[], save_json_path='./train.json'):
            self.labelme_json = labelme_json
            self.save_json_path = save_json_path
            self.images = []
            self.categories = []
            self.annotations = []
            self.label = []
            self.annID = 1
            self.height = 0
            self.width = 0
    
            self.save_json()
    
        def data_transfer(self):
    
            for num, json_file in enumerate(self.labelme_json):
                print(json_file)
                with open(json_file, 'r', encoding="utf8", errors='ignore') as fp:
                    data = json.load(fp)  # 加载json文件
                    self.images.append(self.image(data, num))
                    for shapes in data['shapes']:
                        label = shapes['label']
                        if label not in self.label:
                            self.categories.append(self.categorie(label))
                            self.label.append(label)
                        points = shapes['points']  # 这里的point是用rectangle标注得到的,只有两个点,需要转成四个点
                        points.append([points[0][0], points[1][1]])
                        points.append([points[1][0], points[0][1]])
                        self.annotations.append(self.annotation(points, label, num))
                        self.annID += 1
    
        def image(self, data, num):
            image = {}
            img = utils.img_b64_to_arr(data['imageData'])  # 解析原图片数据
            height, width = img.shape[:2]
            img = None
            image['height'] = height
            image['width'] = width
            image['id'] = num + 1
            image['file_name'] = data['imagePath'].split('/')[-1]
    
            self.height = height
            self.width = width
    
            return image
    
        def categorie(self, label):
            categorie = {}
            categorie['supercategory'] = 'Cancer'
            categorie['id'] = len(self.label) + 1  # 0 默认为背景
            categorie['name'] = label
            return categorie
    
        def annotation(self, points, label, num):
            annotation = {}
            annotation['segmentation'] = [list(np.asarray(points).flatten())]
            annotation['iscrowd'] = 0
            annotation['image_id'] = num + 1
            # annotation['bbox'] = str(self.getbbox(points)) # 使用list保存json文件时报错(不知道为什么)
            # list(map(int,a[1:-1].split(','))) a=annotation['bbox'] 使用该方式转成list
            annotation['bbox'] = list(map(float, self.getbbox(points)))
            annotation['area'] = annotation['bbox'][2] * annotation['bbox'][3]
            # annotation['category_id'] = self.getcatid(label)
            annotation['category_id'] = self.getcatid(label)  # 注意,源代码默认为1
            annotation['id'] = self.annID
            return annotation
    
        def getcatid(self, label):
            for categorie in self.categories:
                if label == categorie['name']:
                    return categorie['id']
            return 1
    
        def getbbox(self, points):
            polygons = points
    
            mask = self.polygons_to_mask([self.height, self.width], polygons)
            return self.mask2box(mask)
    
        def mask2box(self, mask):
            '''从mask反算出其边框
            mask:[h,w]  0、1组成的图片
            1对应对象,只需计算1对应的行列号(左上角行列号,右下角行列号,就可以算出其边框)
            '''
            # np.where(mask==1)
            index = np.argwhere(mask == 1)
            rows = index[:, 0]
            clos = index[:, 1]
    
            # 解析左上角行列号
            left_top_r = np.min(rows)  # y
            left_top_c = np.min(clos)  # x
    
            # 解析右下角行列号
            right_bottom_r = np.max(rows)
            right_bottom_c = np.max(clos)
    
            return [left_top_c, left_top_r, right_bottom_c - left_top_c,
                    right_bottom_r - left_top_r]  # [x1,y1,w,h] 对应COCO的bbox格式
    
        def polygons_to_mask(self, img_shape, polygons):
            mask = np.zeros(img_shape, dtype=np.uint8)
            mask = PIL.Image.fromarray(mask)
            xy = list(map(tuple, polygons))
            PIL.ImageDraw.Draw(mask).polygon(xy=xy, outline=1, fill=1)
            mask = np.array(mask, dtype=bool)
            return mask
    
        def data2coco(self):
            data_coco = {}
            data_coco['images'] = self.images
            data_coco['categories'] = self.categories
            data_coco['annotations'] = self.annotations
            return data_coco
    
        def save_json(self):
            self.data_transfer()
            self.data_coco = self.data2coco()
            # 保存json文件
            json.dump(self.data_coco, open(self.save_json_path, 'w'), indent=4, cls=MyEncoder)  # indent=4 更加美观显示
    
    
    if __name__ == "__main__":
    
        mkdir("coco/annotations")
    
        train_labelme_json = glob.glob(r'coco/train/*.json')
        labelme2coco(train_labelme_json, 'coco/annotations/instances_train.json')
    
        test_labelme_json = glob.glob(r'coco/test/*.json')
        labelme2coco(test_labelme_json, 'coco/annotations/instances_test.json')
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101
    • 102
    • 103
    • 104
    • 105
    • 106
    • 107
    • 108
    • 109
    • 110
    • 111
    • 112
    • 113
    • 114
    • 115
    • 116
    • 117
    • 118
    • 119
    • 120
    • 121
    • 122
    • 123
    • 124
    • 125
    • 126
    • 127
    • 128
    • 129
    • 130
    • 131
    • 132
    • 133
    • 134
    • 135
    • 136
    • 137
    • 138
    • 139
    • 140
    • 141
    • 142
    • 143
    • 144
    • 145
    • 146
    • 147
    • 148
    • 149
    • 150
    • 151
    • 152
    • 153
    • 154
    • 155
    • 156
    • 157
    • 158
    • 159
    • 160
    • 161
    • 162
    • 163
    • 164

    4. 计算训练集三通道均值

    4.1 问题背景

    配置文件中有如下参数,这是 IMAGENET 数据集的均值和方差:

    MODEL:
      PIXEL_MEAN: [123.675, 116.280, 103.530]
      PIXEL_STD: [58.395, 57.120, 57.375]
    
    • 1
    • 2
    • 3

    除以 255.0 就得到 mean=(0.485, 0.456, 0.406),std=(0.229, 0.224, 0.225)

    Detectron2 代码中的注释如下:

    • pixel_mean : per-channel mean to normalize input image
    • pixel_std : per-channel stddev to normalize input image

    在训练代码时,最好替换为自定义数据集的均值和标准差。

    4.2 代码实现

    """
    计算训练集的三通道均值和标准差
    适用于训练集中存在不同尺寸的图像
    """
    
    from importlib.resources import path
    import os
    from PIL import Image
    import matplotlib.pyplot as plt
    import numpy as np
    import imageio.v2 as imageio
    from tqdm import trange
    
    def get_mean_std(pathDir: list):
    
        # 计算三通道的均值
        R_channel = 0
        G_channel = 0
        B_channel = 0
        all_num = 0 # 像素点总数量
        print("计算三通道均值:")
        for idx in trange(len(pathDir)):
            filename = pathDir[idx]
            img = imageio.imread(os.path.join(filepath, filename))# / 255.0
            R_channel = R_channel + np.sum(img[:, :, 0])
            G_channel = G_channel + np.sum(img[:, :, 1])
            B_channel = B_channel + np.sum(img[:, :, 2])
    
            all_num = img.shape[0] * img.shape[1] + all_num
    
        R_mean = R_channel / all_num
        G_mean = G_channel / all_num
        B_mean = B_channel / all_num
    
        # 计算三通道的标准差
        R_channel = 0
        G_channel = 0
        B_channel = 0
        print("计算三通道标准差:")
        for idx in trange(len(pathDir)):
            filename = pathDir[idx]
            img = imageio.imread(os.path.join(filepath, filename))# / 255.0
            R_channel = R_channel + np.sum((img[:, :, 0] - R_mean) ** 2)
            G_channel = G_channel + np.sum((img[:, :, 1] - G_mean) ** 2)
            B_channel = B_channel + np.sum((img[:, :, 2] - B_mean) ** 2)
    
        R_std = np.sqrt(R_channel / all_num)
        G_std = np.sqrt(G_channel / all_num)
        B_std = np.sqrt(B_channel / all_num)
        
        return [R_mean, G_mean, B_mean], [R_std, G_std, B_std]
    
    if __name__ == "__main__":
        filepath = 'coco/train'  # 数据集目录
    
        # 对目录下的 jpg 图像进行处理
        image_paths = []
        for filename in os.listdir(filepath):
            if os.path.splitext(filename)[1] == ".jpg":
                image_paths.append(filename)
    
        # 计算均值和标准差        
        mean, std = get_mean_std(image_paths)
    
        # 打印结果(保留三位小数)
        print("PIXEL_MEAN: ", [round(i,3) for i in mean])
        print("PIXEL_STD: ", [round(i,3) for i in std])
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67

    5. bbox和seg标签可视化

    5.1 问题背景

    从已经制作的 COCO 的标签中可视化 bbox 和分割标注,对比 labelme 中每个图像标签的标注结果,检查图像和标签是否对应的上,否则在训练时容易出现 loss 为 nan 值的情况。

    5.2 代码实现

    '''
    Auther: zth
    Date: 2022-08-16 00:22:45
    LastEditTime: 2022-08-16 00:38:41
    Description: 
    '''
    import cv2
    import random
    import json, os
    from pycocotools.coco import COCO
    from skimage import io
    from matplotlib import pyplot as plt
    
    train_json = 'coco/annotations/instances_train.json'
    train_path = 'coco/train/'
    
    
    def visualization_bbox_seg(num_image, json_path, img_path,
                               *str):  # 需要画图的是第num副图片, 对应的json路径和图片路径
    
        coco = COCO(json_path)
    
        if len(str) == 0:
            catIds = []
        else:
            catIds = coco.getCatIds(
                catNms=[str[0]])  # 获取给定类别对应的id 的dict(单个内嵌字典的类别[{}])
            catIds = coco.loadCats(catIds)[0]['id']  # 获取给定类别对应的id 的dict中的具体id
    
        list_imgIds = coco.getImgIds(catIds=catIds)  # 获取含有该给定类别的所有图片的id
        img = coco.loadImgs(
            list_imgIds[num_image - 1])[0]  # 获取满足上述要求,并给定显示第num幅image对应的dict
        image = io.imread(img_path + img['file_name'])  # 读取图像
        image_name = img['file_name']  # 读取图像名字
        image_id = img['id']  # 读取图像id
    
        img_annIds = coco.getAnnIds(
            imgIds=img['id'], catIds=catIds, iscrowd=None)  # 读取这张图片的所有seg_id
        img_anns = coco.loadAnns(img_annIds)
    
        for i in range(len(img_annIds)):
            x, y, w, h = img_anns[i - 1]['bbox']  # 读取边框
            image = cv2.rectangle(image, (int(x), int(y)),
                                  (int(x + w), int(y + h)), (0, 255, 255), 2)
    
        plt.rcParams['figure.figsize'] = (20.0, 20.0)
        plt.imshow(image)
        coco.showAnns(img_anns)
        plt.show()
    
    
    if __name__ == "__main__":
        visualization_bbox_seg(30, train_json, train_path,
                               '1')  # 最后一个参数不写就是画出一张图中的所有类别
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
  • 相关阅读:
    企业纷纷选择数字化转型,数字化转型给企业带来了哪些提升?
    LeetCode75——Day13
    1.Mysql8.0新特性
    Femas:云原生多运行时微服务框架
    LORA项目源码解读
    山东企业应该做的体系认证
    MS31804四通道低边驱动器可pin对pin兼容DRV8804
    高性能分布式对象存储——MinIO(环境部署)
    《两化融合 数字化转型 价值效益参考模型》国家标准全文
    Label 相关论文汇总
  • 原文地址:https://blog.csdn.net/qq_44324181/article/details/126365453