参考:链接
代码:
"""
将整个数据集分为train和test,相应的也分别分配整个json文件
"""
import os
import random
import json
total_select_path = r"C:\Users\9ling\Desktop\YiLiuWuDataset\train\yuedongguan_select"
total_json_path = r"C:\Users\9ling\Desktop\YiLiuWuDataset\train\yuedongguan.json"
test_path = r"C:\Users\9ling\Desktop\YiLiuWuDataset\test\has_yiliuwu\yuedongguan_test"
test_json_path = r"C:\Users\9ling\Desktop\YiLiuWuDataset\test\has_yiliuwu\yuedongguan_test\yuedongguan_test.json"
train_path = r"C:\Users\9ling\Desktop\YiLiuWuDataset\train\yuedongguan"
train_json_path = r"C:\Users\9ling\Desktop\YiLiuWuDataset\train\yuedongguan\yuedongguan.json"
data = json.load(open(total_json_path))["labels"]
# test_data = json.load(open(test_json_path))["labels"]
all_select_path = os.listdir(total_select_path)
all_file_path = [] # 待分配的图片路径
for item in all_select_path:
file_path = os.path.join(total_select_path, item)
all_file_path.append(file_path)
# print(all_file_path)
idx = [i for i in range(len(all_select_path))]
random.shuffle(idx) # 在idx上改变
def copy_dir(src_path, target_path): # src_path原文件,target_path目标文件
if os.path.isdir(src_path) and os.path.isdir(target_path):
filelist_src = os.listdir(src_path)
for file in filelist_src:
path = os.path.join(os.path.abspath(src_path), file)
if os.path.isdir(path):
path1 = os.path.join(os.path.abspath(target_path), file)
if not os.path.exists(path1):
os.mkdir(path1)
copy_dir(path, path1)
else:
with open(path, 'rb') as read_stream:
contents = read_stream.read()
path1 = os.path.join(target_path, file)
with open(path1, 'wb') as write_stream:
write_stream.write(contents)
return True
else:
return False
test_data_dir = {"labels": []}
for item in idx[:41]:
with open(all_file_path[item], 'rb') as read_stream:
contents = read_stream.read()
path1 = os.path.join(test_path, all_file_path[item].split("\\")[-1]) # 测试集图片的路径
with open(path1, 'wb') as write_stream:
write_stream.write(contents)
for s in data:
if s["filename"].split("\\")[-1] == all_file_path[item].split("\\")[-1]:
test_data_dir["labels"].append(s)
# print(s)
json_test_str = json.dumps(test_data_dir, indent=4)
with open(test_json_path, 'w') as json_file:
json_file.write(json_test_str)
print(test_data_dir)
print(len(test_data_dir["labels"]))
print("*"*30)
train_data_dir = {"labels": []}
for item in idx[41:]:
with open(all_file_path[item], 'rb') as read_stream:
contents = read_stream.read()
path2 = os.path.join(train_path, all_file_path[item].split("\\")[-1])
with open(path2, 'wb') as write_stream:
write_stream.write(contents)
for s1 in data:
if s1["filename"].split("\\")[-1] == all_file_path[item].split("\\")[-1]:
train_data_dir["labels"].append(s1)
json_train_str = json.dumps(train_data_dir, indent=4)
with open(train_json_path, 'w') as json_file:
json_file.write(json_train_str)
print(train_data_dir)
print(len(train_data_dir["labels"]))
# print(s)
二次迭代代码:
"""
将整个数据集分为train和test,相应的也分别分配整个json文件
"""
import os
import random
import json
from copy import deepcopy
ydgjson_01 = r"C:\Users\9ling\Desktop\YiLiuWuDataset\original_dataset\yuedongguan_01.json"
ydgjson_02 = r"C:\Users\9ling\Desktop\YiLiuWuDataset\original_dataset\yuedongguan_02.json"
ydgjson_03 = r"C:\Users\9ling\Desktop\YiLiuWuDataset\original_dataset\yuedongguan_03.json"
ydgimg_01 = r"C:\Users\9ling\Desktop\YiLiuWuDataset\original_dataset\yuedongguan_01"
ydgimg_02 = r"C:\Users\9ling\Desktop\YiLiuWuDataset\original_dataset\yuedongguan_02"
ydgimg_03 = r"C:\Users\9ling\Desktop\YiLiuWuDataset\original_dataset\yuedongguan_03"
train_path = r"C:\Users\9ling\Desktop\YiLiuWuDataset\YiliuwuDataset\train\yuedongguan"
test_path = r"C:\Users\9ling\Desktop\YiLiuWuDataset\YiliuwuDataset\test\has_yiliuwu\yuedongguan_test"
trainjson_path = r"C:\Users\9ling\Desktop\YiLiuWuDataset\YiliuwuDataset\train\yuedongguan\yuedongguan.json"
testjson_path = r"C:\Users\9ling\Desktop\YiLiuWuDataset\YiliuwuDataset\test\has_yiliuwu\yuedongguan_test\yuedongguan_test.json"
test_data_dir = {"labels": []}
train_data_dir = {"labels": []}
original_clses = ['trunkk', 'Deskphone', 'pedestrain', 'cashbundle', 'purse', 'smog', 'fire']
new_clses = ['key', 'mobilephone', 'hand', 'money', 'purse', 'identitycard', 'bankcard']
def all_power(treat_divide_json_path, treat_divide_img_path, test_img_path, train_img_path):
data = json.load(open(treat_divide_json_path))["labels"]
# test_data = json.load(open(test_json_path))["labels"]
all_select_path = os.listdir(treat_divide_img_path)
all_file_path = [] # 待分配的图片路径
for item in all_select_path:
file_path = os.path.join(treat_divide_img_path, item)
all_file_path.append(file_path)
# print(all_file_path)
idx = [i for i in range(len(all_select_path))]
random.shuffle(idx) # 在idx上改变
for item in idx[:round(len(idx) * 0.05)]:
with open(all_file_path[item], 'rb') as read_stream:
contents = read_stream.read()
path1 = os.path.join(test_img_path, all_file_path[item].split("\\")[-1]) # 测试集图片的路径
with open(path1, 'wb') as write_stream:
write_stream.write(contents)
for s in data:
if s["filename"] == all_file_path[item].split("\\")[-1]:
test_data_dir["labels"].append(s)
elif s["filename"].split("\\")[-1] == all_file_path[item].split("\\")[-1]:
s["filename"] = s["filename"][2:]
test_data_dir["labels"].append(s)
# print(s)
for item in idx[round(len(idx) * 0.05):]:
with open(all_file_path[item], 'rb') as read_stream:
contents02 = read_stream.read()
path2 = os.path.join(train_img_path, all_file_path[item].split("\\")[-1])
with open(path2, 'wb') as write_stream:
write_stream.write(contents02)
for s2 in data:
if s2["filename"] == all_file_path[item].split("\\")[-1]:
train_data_dir["labels"].append(s2)
elif s2["filename"].split("\\")[-1] == all_file_path[item].split("\\")[-1]:
s2["filename"] = s2["filename"][2:]
train_data_dir["labels"].append(s2)
def swap_class(file):
for item in file["labels"]:
for item1 in item["annotations"]:
if item1["class"] in original_clses:
index = original_clses.index(item1["class"])
item1["class"] = new_clses[index]
return file
if __name__ == '__main__':
all_power(ydgjson_01, ydgimg_01, test_path, train_path)
all_power(ydgjson_02, ydgimg_02, test_path, train_path)
all_power(ydgjson_03, ydgimg_03, test_path, train_path)
test_data_dir = swap_class(test_data_dir)
train_data_dir = swap_class(train_data_dir)
json_test_str = json.dumps(test_data_dir, indent=4)
with open(testjson_path, 'w') as json_file:
json_file.write(json_test_str)
print(test_data_dir)
print(len(test_data_dir["labels"]))
print("*" * 30)
json_train_str = json.dumps(train_data_dir, indent=4)
with open(trainjson_path, 'w') as json_file:
json_file.write(json_train_str)
print(train_data_dir)
print(len(train_data_dir["labels"]))
参考代码:
def copy_dir(src_path, target_path): # src_path原文件,target_path目标文件
if os.path.isdir(src_path) and os.path.isdir(target_path):
filelist_src = os.listdir(src_path)
for file in filelist_src:
path = os.path.join(os.path.abspath(src_path), file)
if os.path.isdir(path):
path1 = os.path.join(os.path.abspath(target_path), file)
if not os.path.exists(path1):
os.mkdir(path1)
copy_dir(path, path1)
else:
with open(path, 'rb') as read_stream:
contents = read_stream.read()
path1 = os.path.join(target_path, file)
with open(path1, 'wb') as write_stream:
write_stream.write(contents)
return True
else:
return False