预处理后的训练:
Pysot训练自己的数据集_Vesper0412的博客-CSDN博客
因为我用的数据集是视频数据集,所以比照着Pysot中使用的VID数据集进行预处理。
VID数据集的标签是以.xml格式存储的,具体如下:
<?xml version="1.0"?>
<annotation>
<folder>ILSVRC2015_VID_train_0000/ILSVRC2015_train_00000000</folder>
<filename>000000</filename>
<source>
<database>ILSVRC_2015</database>
</source>
<size>
<width>1280</width>
<height>720</height>
</size>
<object>
<trackid>0</trackid>
<name>n01674464</name>
<bndbox>
<xmax>1050</xmax>
<xmin>323</xmin>
<ymax>428</ymax>
<ymin>216</ymin>
</bndbox>
<occluded>1</occluded>
<generated>0</generated>
</object>
</annotation>
但我使用数据集的ground_truth是以 ‘.txt’ 形式存储的,所以代码调整一下。
在par_crop阶段,只用到了.xml文件中的bndbox信息以及trackid信息。改进后将bndbox置换成自己的ground_truth就好,在.xml中,bndbox的存储顺序为[xmin,ymin,xmax,ymax],如果与自己的ground_truth不匹配,记得在代码里做出相应调整。
至于trackid,若一张图里有i个跟踪对象,则trackid = i-1,根据实际情况调整即可,我自己的数据集一张图片中只有一个跟踪对象,所以trackid = 0 。
主要修改 def crop_video函数
def crop_video(sub_set, video, crop_path, instanc_size):
'''
sub_set : {'a', 'b', 'c', 'd', 'e'}中的一个
video : 视频名称
crop_path : 存储路径的base_path
instanc_size : 511
'''
#设置视频剪裁后对应的存储路径
video_crop_base_path = join(crop_path, sub_set, video)
if not isdir(video_crop_base_path): makedirs(video_crop_base_path)
gt_path = join(ann_base_path, video+'.txt')#GT路径
gts=np.loadtxt(gt_path,dtype ='int')
# 获取该视频图片的路径
sub_set_base_path=join(data_base_path,sub_set)
images = sorted(glob.glob(join(sub_set_base_path, video, '*.jpg')))
for i , im_path in enumerate(images):
im = cv2.imread(im_path)
avg_chans = np.mean(im, axis=(0, 1))
bbox = [gts[i][0], gts[i][1],
gts[i][0]+gts[i][2], gts[i][1]+gts[i][3]]
# 剪裁
z, x = crop_like_SiamFC(im, bbox, instanc_size=instanc_size, padding=avg_chans)
# 存储
trackid = 0
cv2.imwrite(join(video_crop_base_path, '{:06d}.{:02d}.z.jpg'.format(int(i), trackid)), z)
cv2.imwrite(join(video_crop_base_path, '{:06d}.{:02d}.x.jpg'.format(int(i), trackid)), x)
参照vid.json,需要的信息如下:
路径(base_path与img_path), 图像大小,bbox,分类标签,是否遮挡,trackid。
"base_path": "a/ILSVRC2015_train_00000000",
"frame": [
{
"frame_sz": [
1280,
720
],
"img_path": "000000.JPEG",
"objs": [
{
"bbox": [
323,
216,
1050,
428
],
"c": "n01674464",
"occ": 1,
"trackid": 0
}
]
},
但是看VID后续的预处理过程,其实只需要路径,bbox,trackid信息,其他的信息可以省略掉,于是代码可以调整为:
from os.path import join
import os
from os import listdir
import json
import glob
import numpy as np
gt_name='groundtruth_rect.txt'
img_base_path = 'E:/my_train_datasets/Data'
sub_sets = sorted({'a', 'b', 'c', 'd', 'e','f','g','h'})
vid = []
for sub_set in sub_sets:
sub_set_base_path = join(img_base_path, sub_set)#装了很多视频的路径
videos = sorted(listdir(sub_set_base_path))#获取该路径中的视频名称'video1','video2'
s = []#存储a文件夹中所有视频序列的信息
#视频序列
for vi, video in enumerate(videos):
print('subset: {} video id: {:04d} / {:04d}'.format(sub_set, vi, len(videos)))
#创建一个字典
v = dict()#存储一个视频序列的信息
v['base_path'] = sub_set+'/'+video#'a/视频文件夹'
v['frame'] = []
video_base_path = join(sub_set_base_path, video)
images = sorted(glob.glob(join(video_base_path, '*.jpg')))#该视频标签文件夹中的所有标注
gt_path=join(video_base_path,gt_name)
gts=np.loadtxt(gt_path,dtype=int)
for i , im_path in enumerate(images):
f = dict()
objs = []
trackid = 0
bndbox = gts[i]
o = dict()
o['bbox'] = [int(bndbox[0]), int(bndbox[1]),
int(bndbox[0]+bndbox[2]), int(bndbox[1]+bndbox[3])]
o['trackid'] = trackid
objs.append(o)
f['img_path'] = os.path.basename(im_path)
f['objs'] = objs
v['frame'].append(f)
s.append(v)
vid.append(s)#存储所有文件夹中所有视频序列的信息
print('save json (raw vid info), please wait 1 min~')
json.dump(vid, open('my.json', 'w'), indent=4, sort_keys=True)
print('done!')
my.json部分内容如下:
"base_path": "a/MY_train_00000000",
"frame": [
{
"img_path": "000000.jpg",
"objs": [
{
"bbox": [
0,
130,
250,
294
],
"trackid": 0
}
]
},
一定要注意的是,base_path中, "a/MY_train_00000000"不能是 "a//MY_train_00000000"形式,否则后续在Linux训练时会报错: can’t open/read file: check file path/integrity
生成train.json与val.json。
for f, frame in enumerate(frames):
objs = frame['objs']
#frame_sz = frame['frame_sz']
for obj in objs:
trackid = obj['trackid']
#occluded = obj['occ']
bbox = obj['bbox']
把frame_sz与occluded直接注释掉就可以了。
生成的train.json部分内容如下:
{
"a/MY_train_00000000": {
"00": {
"000000": [
0,
130,
250,
294
],
"000001": [
1,
131,
259,
287
],
"000002": [
3,
132,
268,
281
],
生成的val.json部分内容如下:
{
"h/MY_val_00000000": {
"00": {
"000000": [
166,
190,
218,
217
],
"000001": [
164,
190,
215,
217
],
"000002": [
161,
191,
212,
217
],
至此,数据集预处理进行完毕 。