常见数据集格式转换

经常遇到一些需要数据转换的场景,如coco格式、yolo格式(归一化记录在txt中)、mask图片,这里记录一下常用的格式转换

本篇内容:

mask转换为json(coco数据集的话自己再改一下)

python json_to_mask.py 文件夹或json文件 输出文件夹

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import cv2
import os
import json
import sys


def func(file:str) -> dict:
png = cv2.imread(file)
gray = cv2.cvtColor(png, cv2.COLOR_BGR2GRAY)
_, binary = cv2.threshold(gray,10,255,cv2.THRESH_BINARY)
contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

dic = {"version": "5.0.1", "flags": {},"shapes":list(), "imagePath":os.path.basename(file),
"imageHeight":png.shape[0], "imageWidth":png.shape[1]}
for contour in contours:
temp = list()
for point in contour[2:]:
if len(temp) > 1 and temp[-2][0] * temp[-2][1] * int(point[0][0]) * int(point[0][1]) != 0 and (int(point[0][0]) - temp[-2][0]) * (
temp[-1][1] - temp[-2][1]) == (int(point[0][1]) - temp[-2][1]) * (temp[-1][0] - temp[-1][0]):
temp[-1][0] = int(point[0][0])
temp[-1][1] = int(point[0][1])
else:
temp.append([int(point[0][0]), int(point[0][1])])
dic["shapes"].append({"label": "result", "points":temp, "group_id": None,
"shape_type": "polygon", "flags": {}})

return dic


if __name__ == "__main__":

if len(sys.argv) != 3:
raise ValueError("mask文件或目录 输出路径")

if os.path.isdir(sys.argv[1]):
for file in os.listdir(sys.argv[1]):
with open(os.path.join(sys.argv[2], os.path.splitext(file)[0]+".json"), mode='w', encoding="utf-8") as f:
json.dump(func(os.path.join(sys.argv[1], file)), f)
else:
with open(os.path.join(sys.argv[2], os.path.splitext(os.path.basename(sys.argv[1]))[0]+".json"), mode='w', encoding="utf-8") as f:
json.dump(func(sys.argv[1]), f)

可视化json

检查json格式是否正确,更改为json文件和对应的原图

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import json
from PIL import Image, ImageDraw

# 读取原始图像
image = Image.open('images.png')

# 读取JSON实例坐标文件
with open('m04.json') as f:
instances = json.load(f)
# 在原始图像上绘制实例
draw = ImageDraw.Draw(image)
for i,instance in enumerate(instances['shapes']):
coords = instance['points']
# 为每个实例指定不同颜色
color = (255 * (i%3), 255 * ((i+1)%3), 255 * ((i+2)%3))
for x, y in coords:
draw.ellipse((x-2, y-2, x+2, y+2), fill=color)

image.show()
image.save('result.jpg')

json转化为YOLO格式

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import os
import json
from PIL import Image, ImageDraw

json_folder = 'json/'
labels_folder = 'labels/'

# 遍历json文件夹
for json_file in os.listdir(json_folder):

# 读取每个json文件
with open(os.path.join(json_folder, json_file)) as f:
instances = json.load(f)

# 处理坐标
all_coords = []
for instance in instances['shapes']:
coords = instance['points']
instance_points = []
for coord in coords:
x, y = coord
x /= instances['imageWidth']
y /= instances['imageHeight']
instance_points.append([x, y])
all_coords.append(instance_points)

# 写入labels文件夹
label_file = os.path.join(labels_folder, json_file[1:-4] + 'txt')
with open(label_file, 'w') as f:
for coords in all_coords:
if len(coords) == 0:
continue
f.write('0 ')
for x, y in coords:
f.write(f'{x} {y} ')
f.write('\n')

参考:Mask图像与json标记文件的相互转换