PaddleDetection检测结果转VOC格式与扣图

双元魁

2023-12-01

接上一篇工作，最后在每七节会推理，并且可以保存推理结果，原始结果是coco json那么本文是将目标检测推理获得的结果转换成xml,样这方便使用labelimg进行查看和调整模型输出结果。这个工作有一个应用场景那就是我有少量数据，首先训练一个模型，然后对更多的无标签数据进行推理，获得推理结果，经转换后，人工对图片进行重新标注，相当于模型起到预标注的工作，这样可以加快标注速度，提升标注精度，降低标注的成本。

1 读取json

import json
def read_json(path):
    with open(path,'r') as f:
        data = json.load(f)
    return data

data 是一个列表，里边是每个元素是字典，显示部分结果data[:3]：

[{'image_file': '/home/tl/PaddleDetection/dianti/scrapy_lcd/20220704_175803_1247.jpg',
  'bbox': [132.89439392089844,
   0.0909591093659401,
   8.795181274414062,
   8.854251347482204],
  'score': 0.2835327088832855,
  'category_id': 0},
 {'image_file': '/home/tl/PaddleDetection/dianti/scrapy_lcd/20220704_175803_1247.jpg',
  'bbox': [132.91290283203125,
   15.55500316619873,
   8.981765747070312,
   10.08714771270752],
  'score': 0.253669410943985,
  'category_id': 0},
 {'image_file': '/home/tl/PaddleDetection/dianti/scrapy_lcd/20220704_175803_1247.jpg',
  'bbox': [0.34634438157081604,
   -0.026488304138183594,
   343.17108115553856,
   501.06280422210693],
  'score': 0.2448078989982605,
  'category_id': 0}]

20230424更新：
paddledetection2.6 新的json:

[{'image_id':0,
  'category_id': 0,
  'file_name': '20220704_175803_1247.jpg',
  'bbox': [132.89439392089844,
   0.0909591093659401,
   8.795181274414062,
   8.854251347482204],
  'score': 0.2835327088832855,
 },
 {'image_id':0,  
  'category_id': 0,
  'file_name': '20220704_175803_1247.jpg',
  'bbox': [132.91290283203125,
   15.55500316619873,
   8.981765747070312,
   10.08714771270752],
  'score': 0.253669410943985
  }]

可以看到，绝对路径的image_file 变成了只有文件名的file_name.

2 试写一个xml文件

from lxml import etree,objectify
#有两个检测框
objs=[['cat',1,2,3,4],['dog',5,6,7,8]] # 依次是类名，左，上，右，下
E = objectify.ElementMaker(annotate=False)
anno_tree = E.annotation(
    E.folder("test"),
    E.filename('test.jpg'),
    E.source(
        E.database("Unknow"),
    ),
    E.size(
        E.width(640),
        E.height(640),
        E.depth(3)
    ),
    E.segmented(0),
)
for obj in objs:
    E2 = objectify.ElementMaker(annotate=False)
    anno_tree2 = E2.object(
        E.name(obj[0]),
        E.pose("Unspecified"),
        E.truncated(0),
        E.difficult("0"),
        E.bndbox(
            E.xmin(obj[1]),
            E.ymin(obj[2]),
            E.xmax(obj[3]),
            E.ymax(obj[4]),
        )
    )
    anno_tree.append(anno_tree2)
etree.ElementTree(anno_tree).write('test.xml',pretty_print=True)

结果保存在test.xml，内容如下：

<annotation>
  <folder>test</folder>
  <filename>test.jpg</filename>
  <source>
    <database>Unknow</database>
  </source>
  <size>
    <width>640</width>
    <height>640</height>
    <depth>3</depth>
  </size>
  <segmented>0</segmented>
  <object>
    <name>cat</name>
    <pose>Unspecified</pose>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>1</xmin>
      <ymin>2</ymin>
      <xmax>3</xmax>
      <ymax>4</ymax>
    </bndbox>
  </object>
  <object>
    <name>dog</name>
    <pose>Unspecified</pose>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>5</xmin>
      <ymin>6</ymin>
      <xmax>7</xmax>
      <ymax>8</ymax>
    </bndbox>
  </object>
</annotation>

从上边也可以看到，对于数字可以是数字也可以是数字转成字符类型。

3 获取要转换数据

从上边可以看到，要生成一个xml,需要的内容是filename,width,height,depth,object及里边的name,xmin,ymin,xmax,ymax,原始json是cocos格式，所以我们可以把json里的内容整理成一个新的字典：

{‘filename(就是实际的图名做为字典key)’:{‘size:’:[width,height,depth],‘objs’:[[‘name’,xmin,ymin,xmax,ymax’],[]]},…}

具本代码，先获得原数据id转分类：

label_file='/path/label_list.txt'
def get_id2label(label_file):
    with open(label_file) as f:
        labels = f.readlines()
    id2label ={}
    for id,label in enumerate(labels):
        id2label[str(id)]=label
    return id2label

接着获取相关的数据：

from PIL import Image
import json
import numpy as np
def get_infos(json_file,thresh):
    #读取paddle推理的结果
    with open(json_file) as f:
        data = json.load(f)
    all_info={}
    for d in data:
        score = np.round(d['score'],2)
        if score>thresh:
            filepath=d['image_file']
            filename = os.path.split(filepath)[-1]
            xmin,ymin,w,h = list(map(int,d['bbox']))
            xmax=xmin+w
            ymax=ymin+h
            name = id2label[str(d['category_id'])]
            info = all_info.get(filename,None)
            if info is None:
                width,height = Image.open(filepath).size 
                depth = 3
                xmin,xmax = np.clip([xmin,xmax],0,width)
                ymin,ymax = np.clip([ymin,ymax],0,height)
                all_info[filename]={}
                all_info[filename]['path']=filepath
                all_info[filename]['size']=[width,height,depth]
                all_info[filename]['objs']=[]
                all_info[filename]['objs'].append([name,xmin,ymin,xmax,ymax,score])
            else:        
                width,height,_ = info['size']
                xmin,xmax = np.clip([xmin,xmax],0,width)
                ymin,ymax = np.clip([ymin,ymax],0,height)
                info['objs'].append([name,xmin,ymin,xmax,ymax,score])
    return all_info,data

all_info这个结果是这样的：

[{'20220704_174247_148.jpg': {'path': '/path/20220704_174247_148.jpg',
   'size': [556, 429, 3],
   'objs': [['screen', 48, 207, 62, 213, 0.61],
    ['screen', 48, 219, 62, 225, 0.59],
    ['screen', 48, 213, 62, 219, 0.56]]}},
 {'20220704_174155_2172.jpg': {'path': '/path/20220704_174155_2172.jpg',
   'size': [310, 310, 3],
   'objs': [['screen', 117, 76, 133, 89, 0.72],
    ['screen', 117, 153, 132, 166, 0.63],
    ['screen', 166, 153, 182, 166, 0.62],
    ['screen', 101, 101, 115, 114, 0.51]]}}]

20230424更新：
json文件发生变化，所以读取也要发生变化

from PIL import Image
import json
import numpy as np
def get_infos(json_file,thresh,imgdir):
    #读取paddle推理的结果
    with open(json_file) as f:
        data = json.load(f)
    all_info={}
    for d in data:
        score = np.round(d['score'],2)
        if score>thresh:
            filename=d['file_name']
            filepath = os.path.join(imgdir,filename)
            xmin,ymin,w,h = list(map(int,d['bbox']))
            xmax=xmin+w
            ymax=ymin+h
            name = id2label[str(d['category_id'])]
            info = all_info.get(filename,None)
            if info is None:
                width,height = Image.open(filepath).size 
                depth = 3
                xmin,xmax = np.clip([xmin,xmax],0,width)
                ymin,ymax = np.clip([ymin,ymax],0,height)
                all_info[filename]={}
                all_info[filename]['path']=filepath
                all_info[filename]['size']=[width,height,depth]
                all_info[filename]['objs']=[]
                all_info[filename]['objs'].append([name,xmin,ymin,xmax,ymax,score])
            else:        
                width,height,_ = info['size']
                xmin,xmax = np.clip([xmin,xmax],0,width)
                ymin,ymax = np.clip([ymin,ymax],0,height)
                info['objs'].append([name,xmin,ymin,xmax,ymax,score])
    return all_info,data

4 写入xml

def write_xml(save_path,all_info):
    if not os.path.exists(save_path):
        os.makedirs(save_path,exist_ok=True)

    for key,value in all_info.items():
        E = objectify.ElementMaker(annotate=False)
        anno_tree = E.annotation(
            E.folder("test"),
            E.filename(key),
            E.source(
                E.database("Unknow"),
            ),
            E.size(
                E.width(value['size'][0]),
                E.height(value['size'][1]),
                E.depth(value['size'][2])
            ),
            E.segmented(0),
        )
        objs=value['objs']
        for obj in objs:
            E2 = objectify.ElementMaker(annotate=False)
            anno_tree2 = E2.object(
                E.name(obj[0]),
                E.pose("Unspecified"),
                E.truncated(0),
                E.difficult("0"),
                E.bndbox(
                    E.xmin(obj[1]),
                    E.ymin(obj[2]),
                    E.xmax(obj[3]),
                    E.ymax(obj[4]),
                )
            )
            anno_tree.append(anno_tree2)
        save_file = os.path.splitext(key)[0]+'.xml'
        save_file = os.path.join(save_path,save_file)
        etree.ElementTree(anno_tree).write(save_file,pretty_print=True)

5 把检测结果裁剪出来

上文中all_info已经把所有结果按图片进行整理，可以把所有的检测结果提取出来。

from PIL import Image
import time
def crop_img(save_path,all_info,score_thresh,size_score):
    """_summary_

    Args:
        save_path (str): 要保存的位置
        all_info (dict): _description_
        score_thresh (float): 调定分数阈值，因为all_info生成是就有过设定阈值，这个要结合使用
        size_score (list) : 对要扣取部分的高，宽做出阈值限制
    """
    if not os.path.exists(save_path):
        os.makedirs(save_path,exist_ok=True)
    else:
        shutil.rmtree(save_path)
        os.makedirs(save_path,exist_ok=True)
    for key,value in all_info.items():
        filename=key
        filepath = value['path']
        if os.path.exists(filepath):
            img = Image.open(filepath)
        else:
            print(f"file:{filepath} does not exist.")
            continue

        objs=value['objs']
        for i,obj in enumerate(objs):
            score = obj[-1]
            xmin,ymin,xmax,ymax = obj[1:5]
            w_thresh,h_thresh=size_score
            w=xmax-xmin
            h=ymax-ymin
            if score>score_thresh and w> w_thresh and h> h_thresh:
                name = obj[0]
                crop = img.crop([xmin,ymin,xmax,ymax])
                current_time = time.strftime("%Y%m%d%H%M%S",time.localtime())
                save_file = os.path.splitext(filename)[0]+'_'+current_time+"_"+name+"_"+str(i)+os.path.splitext(filename)[-1]
                save_file = os.path.join(save_path,save_file)
                crop.save(save_file,quality=95)
            else:
                continue

引入time包是为了记录时间，同时也是为了生成保存图片的文件名尽量不因为重名而覆盖。

PaddleDetection检测结果转VOC格式与扣图

1 读取json

2 试写一个xml文件

3 获取要转换数据

4 写入xml

5 把检测结果裁剪出来

相关阅读

相关文章

相关问答

相关文档