got-10k数据预处理

娄德运

2023-12-01

got数据文件夹格式

  |-- GOT-10k/
     |-- train/
     |  |-- GOT-10k_Train_000001/
     |  |   ......
     |  |-- GOT-10k_Train_009335/
     |  |-- list.txt
     |-- val/
     |  |-- GOT-10k_Val_000001/
     |  |   ......
     |  |-- GOT-10k_Val_000180/
     |  |-- list.txt
     |-- test/
     |  |-- GOT-10k_Test_000001/
     |  |   ......
     |  |-- GOT-10k_Test_000180/
     |  |-- list.txt

数据标注描述

Each sequence folder contains 4 annotation files and 1 meta file. A brief description of these files follows (let N denotes sequence length):

groundtruth.txt – An N×4 matrix with each line representing object location [xmin, ymin, width, height] in one frame.
cover.label – An N×1 array representing object visible ratios, with levels ranging from 0~8.
absense.label – An binary N×1 array indicating whether an object is absent or present in each frame.
cut_by_image.label – An binary N×1 array indicating whether an object is cut by image in each frame.
meta_info.ini – Meta information about the sequence, including object and motion classes, video URL and more.

Values 0~8 in file cover.label correspond to ranges of object visible ratios: 0%, (0%, 15%], (15%~30%], (30%, 45%], (45%, 60%], (60%, 75%], (75%, 90%], (90%, 100%) and 100% respectively.

使用shutil移动got10k文件夹

# -*- coding: utf-8 -*- 
import shutil
import os 

dst = r'C:\Users\lpf\Desktop\dataset\Got10k\train'
dst1 = r'd:/Learning_Data/BasicCode/move_file/c'
path = []
def Test1(rootDir, path):
    list_dirs = os.walk(rootDir)
    for root, dirs, files in list_dirs:
        for d in dirs:
            if d[-3]=='_':
            # if d[-1]=='a':
                
                s=os.path.join(root, d)
                path.append(s)
                # shutil.move(d,dst1)
        # for f in files:
        #     print (os.path.join(root, f))
Test1(r"C:\Users\lpf\Desktop\dataset\Got10k", path)
def Test2(rootDir, path):
    list_dirs = os.walk(rootDir)
    for root, dirs, files in list_dirs:
        for d in dirs:
            # if d[-3]=='_':
            # # if d[-1]=='a':
                
            s=os.path.join(root, d)
            path.append(s)
# Test1(r'd:\Learning_Data\BasicCode\move_file')
# print(path[0])

# shutil.move(path[0],dst)
images_path = []
for images in path:
    Test2(images, images_path)
    # break
print(images_path[0])
print(len(images_path))
print(images_path[-1])
count = 0
# images_path
for i in images_path:
    shutil.move(i,dst)
    count += 1
    if count % 100 == 0:
        print("已完成移动{}个文件......".format(count))
print("finish")

裁剪数据集，获取图像对

from os.path import join, isdir, exists
from os import listdir, mkdir, makedirs
import cv2
import numpy as np
import glob
from concurrent import futures
import sys
import time


got10k_base_path = '/data/share/GOT10K'
sub_sets = sorted({'train', 'val'})


# Print iterations progress (thanks StackOverflow)
def printProgress(iteration, total, prefix='', suffix='', decimals=1, barLength=100):
    """
    Call in a loop to create terminal progress bar
    @params:
        iteration   - Required  : current iteration (Int)
        total       - Required  : total iterations (Int)
        prefix      - Optional  : prefix string (Str)
        suffix      - Optional  : suffix string (Str)
        decimals    - Optional  : positive number of decimals in percent complete (Int)
        barLength   - Optional  : character length of bar (Int)
    """
    formatStr       = "{0:." + str(decimals) + "f}"
    percents        = formatStr.format(100 * (iteration / float(total)))
    filledLength    = int(round(barLength * iteration / float(total)))
    bar             = '' * filledLength + '-' * (barLength - filledLength)
    sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percents, '%', suffix)),
    if iteration == total:
        sys.stdout.write('\x1b[2K\r')
    sys.stdout.flush()


def crop_hwc(image, bbox, out_sz, padding=(0, 0, 0)):
    a = (out_sz-1) / (bbox[2]-bbox[0])
    b = (out_sz-1) / (bbox[3]-bbox[1])
    c = -a * bbox[0]
    d = -b * bbox[1]
    mapping = np.array([[a, 0, c],
                        [0, b, d]]).astype(np.float)
    crop = cv2.warpAffine(image, mapping, (out_sz, out_sz), borderMode=cv2.BORDER_CONSTANT, borderValue=padding)
    return crop


def pos_s_2_bbox(pos, s):
    return [pos[0]-s/2, pos[1]-s/2, pos[0]+s/2, pos[1]+s/2]


def crop_like_SiamFC(image, bbox, context_amount=0.5, exemplar_size=127, instanc_size=255, padding=(0, 0, 0)):
    target_pos = [(bbox[2]+bbox[0])/2., (bbox[3]+bbox[1])/2.]
    target_size = [bbox[2]-bbox[0], bbox[3]-bbox[1]]   # width, height
    wc_z = target_size[1] + context_amount * sum(target_size)
    hc_z = target_size[0] + context_amount * sum(target_size)
    s_z = np.sqrt(wc_z * hc_z)
    scale_z = exemplar_size / s_z
    d_search = (instanc_size - exemplar_size) / 2
    pad = d_search / scale_z
    s_x = s_z + 2 * pad

    z = crop_hwc(image, pos_s_2_bbox(target_pos, s_z), exemplar_size, padding)
    x = crop_hwc(image, pos_s_2_bbox(target_pos, s_x), instanc_size, padding)
    return z, x


def crop_video(sub_set, video, crop_path, instanc_size):
    video_crop_base_path = join(crop_path, sub_set, video)
    if not exists(video_crop_base_path): makedirs(video_crop_base_path)

    sub_set_base_path = join(got10k_base_path, sub_set)
    video_base_path = join(sub_set_base_path, video)
    gts_path = join(video_base_path, 'groundtruth.txt')
    gts = np.loadtxt(open(gts_path, "rb"), delimiter=',')
    jpgs = sorted(glob.glob(join(video_base_path, '*.jpg')))

    if not jpgs:
        print('no jpg files, try png files')
        jpgs = sorted(glob.glob(join(video_base_path, '*.png')))
        if not jpgs:
            print('no jpg and png files, check data please')

    for idx, img_path in enumerate(jpgs):
        im = cv2.imread(img_path)
        avg_chans = np.mean(im, axis=(0, 1))
        gt = gts[idx]
        bbox = [int(g) for g in gt]  # (x,y,w,h)
        bbox = [bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]]   # (xmin, ymin, xmax, ymax)

        z, x = crop_like_SiamFC(im, bbox, instanc_size=instanc_size, padding=avg_chans)
        cv2.imwrite(join(video_crop_base_path, '{:06d}.{:02d}.z.jpg'.format(int(idx), 0)), z)
        cv2.imwrite(join(video_crop_base_path, '{:06d}.{:02d}.x.jpg'.format(int(idx), 0)), x)


def main(instanc_size=511, num_threads=24):
    crop_path = '/data2/got10k/crop{:d}'.format(instanc_size)
    if not exists(crop_path): makedirs(crop_path)

    for sub_set in sub_sets:
        sub_set_base_path = join(got10k_base_path, sub_set)
        videos = sorted(listdir(sub_set_base_path))
        n_videos = len(videos)
        with futures.ProcessPoolExecutor(max_workers=num_threads) as executor:
            fs = [executor.submit(crop_video, sub_set, video, crop_path, instanc_size) for video in videos]
            for i, f in enumerate(futures.as_completed(fs)):
                # Write progress to error so that it can be seen
                printProgress(i, n_videos, prefix=sub_set, suffix='Done ', barLength=40)


if __name__ == '__main__':
    since = time.time()
    main(int(sys.argv[1]), int(sys.argv[2]))
    time_elapsed = time.time() - since
    print('Total complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

将got10k数据集信息写入json文件

# -*- coding:utf-8 -*-
# ! ./usr/bin/env python
# __author__ = 'zzp'

'''
    读取got10k的train，val下面的文件夹
    video+ground Truth
    将图像的size和bbox以及路径等信息层层嵌入到一个复合字典中
    将字典保存为json文件
'''



import cv2
import json
import glob
import numpy as np
from os.path import join
from os import listdir

import argparse

parser = argparse.ArgumentParser()
parser.add_argument('--dir',type=str, default='/data/share/GOT10K', help='your vid data dir')
args = parser.parse_args()

got10k_base_path = args.dir
sub_sets = sorted({'train', 'val'})

got10k = []
# 遍历got10k的train和val文件夹
for sub_set in sub_sets:
    sub_set_base_path = join(got10k_base_path, sub_set)
    # ./got10k/train/0001video-9553?
    videos = sorted(listdir(sub_set_base_path))
    s = []
    # 一个video表示一个文件夹，即一段100帧左右的视频
    for vi, video in enumerate(videos):
        print('subset: {} video id: {:04d} / {:04d}'.format(sub_set, vi, len(videos)))
        v = dict()
        # ./got10k/train/001video
        v['base_path'] = join(sub_set, video)
        v['frame'] = []
        video_base_path = join(sub_set_base_path, video)
        gts_path = join(video_base_path, 'groundtruth.txt')
        # gts_file = open(gts_path, 'r')
        # gts = gts_file.readlines()
        gts = np.loadtxt(open(gts_path, "rb"), delimiter=',')

        # get image size
        im_path = join(video_base_path, '00000001.jpg')
        im = cv2.imread(im_path)
        size = im.shape  # height, width
        frame_sz = [size[1], size[0]]  # width,height

        # get all im name
        jpgs = sorted(glob.glob(join(video_base_path, '*.jpg')))

        f = dict()
        for idx, img_path in enumerate(jpgs):
            f['frame_sz'] = frame_sz
            f['img_path'] = img_path.split('/')[-1]

            gt = gts[idx]
            bbox = [int(g) for g in gt]   # (x,y,w,h)
            f['bbox'] = bbox
            v['frame'].append(f.copy())
        s.append(v)
    got10k.append(s)
print('save json (raw got10k info), please wait 1 min~')
json.dump(got10k, open('got10k.json', 'w'), indent=4, sort_keys=True)
print('got10k.json has been saved in ./')

got-10k数据预处理

got数据文件夹格式

数据标注描述

使用shutil移动got10k文件夹

裁剪数据集，获取图像对

将got10k数据集信息写入json文件

相关阅读

相关文章

相关问答

相关文档