void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen)
{
//从datacfg文件中读取类别名称
list *options = read_data_cfg(datacfg);
char *name_list = option_find_str(options, "names", "data/names.list");
char **names = get_labels(name_list);
//从data/labels/下加载ASCII码32-127的8种尺寸的图片,后边显示标签用。
image **alphabet = load_alphabet();
/*image **load_alphabet()
{
int i, j;
const int nsize = 8;
image **alphabets = calloc(nsize, sizeof(image));
for(j = 0; j < nsize; ++j){
alphabets[j] = calloc(128, sizeof(image));
for(i = 32; i < 127; ++i){
char buff[256];
sprintf(buff, "data/labels/%d_%d.png", i, j);
alphabets[j][i] = load_image_color(buff, 0, 0);
}
}
return alphabets;
}
image load_image_color(char *filename, int w, int h)
{
return load_image(filename, w, h, 3);
}
image load_image(char *filename, int w, int h, int c)
{
#ifdef OPENCV
image out = load_image_cv(filename, c);
#else
image out = load_image_stb(filename, c);
#endif
if((h && w) && (h != out.h || w != out.w)){
image resized = resize_image(out, w, h);
free_image(out);
out = resized;
}
return out;
}
*/
//构建网络和加载权重文件
network *net = load_network(cfgfile, weightfile, 0);
//设置每层batch为1
set_batch_network(net, 1);
/*void set_batch_network(network *net, int b)
{
net->batch = b;
int i;
for(i = 0; i < net->n; ++i){
net->layers[i].batch = b;
}
}*/
//srand函数是随机数发生器的初始化函数
srand(2222222);
double time;
char buff[256];
char *input = buff;
float nms=.45;//设置非极大值抑制的阈值
while(1){
if(filename){//如果存在图像,则将图像地址名称复制到input中
strncpy(input, filename, 256);
} else {//如果不存在图像,则提醒输入图像,并将输入赋到input中否则退出
printf("Enter Image Path: ");
fflush(stdout);
input = fgets(input, 256, stdin);
if(!input) return;
strtok(input, "\n");
}
//加载图片,默认当做彩色处理
image im = load_image_color(input,0,0);
//调整图片尺寸,使其与网络输入图像大小相等
image sized = letterbox_image(im, net->w, net->h);
//将网络最后一层赋给l
layer l = net->layers[net->n-1];
//将图像数据导入X指向的内存内
float *X = sized.data;
time=what_time_is_it_now();
//预测
network_predict(net, X);
printf("%s: Predicted in %f seconds.\n", input, what_time_is_it_now()-time);
int nboxes = 0;
//获取预测框信息,这里thresh和hier_thresh默认都是0.5
detection *dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes);
//排除预测同一对象的多个框;
//即如果两个预测框预测同一类的prob都大于0,且iou大于num_thresh,
//则认为这两个预测框预测了同一个对象,将会令其中该类prob低的那个框的该类prob置为0;
if (nms) do_nms_sort(dets, nboxes, l.classes, nms);
//画图框和标签
draw_detections(im, dets, nboxes, thresh, names, alphabet, l.classes);
free_detections(dets, nboxes);
//保存图像
if(outfile){
save_image(im, outfile);
}
else{
save_image(im, "predictions");
#ifdef OPENCV
make_window("predictions", 512, 512, 0);
show_image(im, "predictions", 0);
#endif
}
free_image(im);
free_image(sized);
if (filename) break;
}
}
*detection dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes);
detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num)
{
detection *dets = make_network_boxes(net, thresh, num);
fill_network_boxes(net, w, h, thresh, hier, map, relative, dets);
return dets;
}
*detection dets = make_network_boxes(net, thresh, num);
detection结构体
typedef struct detection{
box bbox;
int classes;
float *prob;
float *mask;
float objectness;
int sort_class;
} detection;
detection *make_network_boxes(network *net, float thresh, int *num)
{
layer l = net->layers[net->n - 1];
int i;
//获取所有yolo层上置信度大于thresh的预测框总个数
int nboxes = num_detections(net, thresh);
//如果num存在,则将nboxes赋给它
if(num) *num = nboxes;
//为所有置信度大于thresh的预测框分配内存空间
detection *dets = calloc(nboxes, sizeof(detection));
//遍历所有置信度大于thresh的预测框
for(i = 0; i < nboxes; ++i){
//将每个预测框可以预测的类别个数分别赋给相应的dets[i].prob,voc为20,coco为80
dets[i].prob = calloc(l.classes, sizeof(float));
if(l.coords > 4){//yolov3没有设置coord,故其默认值为0,不会运行
dets[i].mask = calloc(l.coords-4, sizeof(float));
}
}
return dets;
}
int nboxes = num_detections(net, thresh);
int num_detections(network *net, float thresh)
{
int i;
int s = 0;
//遍历所有网络层,如果是yolo层,则使用yolo_num_detections函数挑选出每个yoloc层置信度大于thresh的预测框,
//并将所有yolo层挑选出来的预测框计数;
for(i = 0; i < net->n; ++i){
layer l = net->layers[i];
if(l.type == YOLO){
s += yolo_num_detections(l, thresh);
}
if(l.type == DETECTION || l.type == REGION){
s += l.w*l.h*l.n;
}
}
return s;
}
yolo_num_detections(l, thresh);
int yolo_num_detections(layer l, float thresh)
{
int i, n;
int count = 0;
//遍历所有预测框,并获取预测框的置信度,令其与thresh对比,如果大于则计数;
for (i = 0; i < l.w*l.h; ++i){
for(n = 0; n < l.n; ++n){
int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4);
if(l.output[obj_index] > thresh){
++count;
}
}
}
return count;
}
fill_network_boxes(net, w, h, thresh, hier, map, relative, dets);
void fill_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, detection *dets)
{
int j;
for(j = 0; j < net->n; ++j){
layer l = net->layers[j];
if(l.type == YOLO){//若是yolo层,则运行下面代码
int count = get_yolo_detections(l, w, h, net->w, net->h, thresh, map, relative, dets);
dets += count;
}
if(l.type == REGION){
get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets);
dets += l.w*l.h*l.n;
}
if(l.type == DETECTION){
get_detection_detections(l, w, h, thresh, dets);
dets += l.w*l.h*l.n;
}
}
}
int count = get_yolo_detections(l, w, h, net->w, net->h, thresh, map, relative, dets);
int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets)
{
int i,j,n;
//将yolo层的输出赋给*predictions
float *predictions = l.output;
if (l.batch == 2) avg_flipped_yolo(l);//之前已经设为了1,不会运行
int count = 0;
//遍历该yolo层所有的grid cell
for (i = 0; i < l.w*l.h; ++i){
//获取所有网格的行和列,便于定位
int row = i / l.w;
int col = i % l.w;
//遍历每个网格上的预测框
for(n = 0; n < l.n; ++n){
//获取预测框的置信度索引
int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4);
//通过索引获取预测框置信度
float objectness = predictions[obj_index];
//如果置信度小于或等于thresh,则继续下一个预测框,若是大于则运行下面代码
if(objectness <= thresh) continue;
//获取预测框坐标索引
int box_index = entry_index(l, 0, n*l.w*l.h + i, 0);
//将该预测框坐标信息赋给dets[count].bbox
dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h);
//将该预测框置信度信息赋给dets[count].objectness
dets[count].objectness = objectness;
//将类别数赋给dets[count].classes
dets[count].classes = l.classes;
//遍历所有的类别
for(j = 0; j < l.classes; ++j){
//获取该预测框的该类的类别概率索引
int class_index = entry_index(l, 0, n*l.w*l.h + i, 4 + 1 + j);
//通过置信度与概率类别的乘积获得得分
float prob = objectness*predictions[class_index];
//判断如果该预测框在该类的得分大于阈值thresh,则将类得分赋给dets[count].prob[j],否则赋0;
dets[count].prob[j] = (prob > thresh) ? prob : 0;
}
++count;
}
}
//调整坐标位置,因为之前将图像转化为了网络输入图像相同的尺寸,现在需要修改过来;
correct_yolo_boxes(dets, count, w, h, netw, neth, relative);
return count;
}
correct_yolo_boxes(dets, count, w, h, netw, neth, relative);
void correct_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative)
{
//w 和 h 是输入图片的尺寸,netw 和 neth 是网络输入尺寸
int i;
int new_w=0;
int new_h=0;
//按照长边缩放原图长和宽,即缩放netw/长边,获取新的图像尺寸
if (((float)netw/w) < ((float)neth/h)) {
new_w = netw;
new_h = (h * netw)/w;
} else {
new_h = neth;
new_w = (w * neth)/h;
}
//遍历所有置信度大于thresh的预测框
for (i = 0; i < n; ++i){
//将该预测框坐标信息赋给b
box b = dets[i].bbox;
b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw);
b.y = (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth);
b.w *= (float)netw/new_w;
b.h *= (float)neth/new_h;
if(!relative){
b.x *= w;
b.w *= w;
b.y *= h;
b.h *= h;
}
//将调整后的坐标赋给dets[i].bbox
dets[i].bbox = b;
}
}
do_nms_sort(dets, nboxes, l.classes, nms);
void do_nms_sort(detection *dets, int total, int classes, float thresh)
{
int i, j, k;
k = total-1;
//遍历所有保存的预测框,排除置信度为0的预测框
for(i = 0; i <= k; ++i){
if(dets[i].objectness == 0){
detection swap = dets[i];
dets[i] = dets[k];
dets[k] = swap;
--k;
--i;
}
}
total = k+1;//将排除后的框数赋给total
//遍历所有类别
for(k = 0; k < classes; ++k){
//遍历所有预测框,将第k类赋给sort_class
for(i = 0; i < total; ++i){
dets[i].sort_class = k;
}
//根据前面的dets[i].sort_class类别的prob对dets进行排序,这里是降序排列
qsort(dets, total, sizeof(detection), nms_comparator);
//遍历所有预测框
for(i = 0; i < total; ++i){
//如果预测框的第k类得分为0则继续下一个框的此类
if(dets[i].prob[k] == 0) continue;
//将第i个框的坐标信息赋给a
box a = dets[i].bbox;
//遍历之后的预测框,如果之后的框与a框的交并比大于thresh,则说明这两个框重合度过高,且都预测同一个目标,
//需要将此预测框该类得分置为0;就是非极大值抑制;
for(j = i+1; j < total; ++j){
box b = dets[j].bbox;
if (box_iou(a, b) > thresh){
dets[j].prob[k] = 0;
}
}
}
}
}
draw_detections(im, dets, nboxes, thresh, names, alphabet, l.classes);
void draw_detections(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes)
{
int i,j;
//遍历所有符号条件的预测框
for(i = 0; i < num; ++i){
char labelstr[4096] = {0};
int class = -1;
//遍历所有类别
for(j = 0; j < classes; ++j){
//如果第i个预测框的第j类得分大于阈值thresh
if (dets[i].prob[j] > thresh){
if (class < 0) {
strcat(labelstr, names[j]);
class = j;
} else {
strcat(labelstr, ", ");
strcat(labelstr, names[j]);
}
printf("%s: %.0f%%\n", names[j], dets[i].prob[j]*100);
}
}
//如果这个框大于阈值,即class=j>=0,就进行下面的操作,画图等等
if(class >= 0){
//画框时框的线条宽度
int width = im.h * .006;
//框的颜色
int offset = class*123457 % classes;
float red = get_color(2,offset,classes);
float green = get_color(1,offset,classes);
float blue = get_color(0,offset,classes);
float rgb[3];
rgb[0] = red;
rgb[1] = green;
rgb[2] = blue;
box b = dets[i].bbox;
//获取框的左上角和右下角坐标
int left = (b.x-b.w/2.)*im.w;
int right = (b.x+b.w/2.)*im.w;
int top = (b.y-b.h/2.)*im.h;
int bot = (b.y+b.h/2.)*im.h;
//防止越界
if(left < 0) left = 0;
if(right > im.w-1) right = im.w-1;
if(top < 0) top = 0;
if(bot > im.h-1) bot = im.h-1;
//画框
draw_box_width(im, left, top, right, bot, width, red, green, blue);
//画标签
if (alphabet) {
image label = get_label(alphabet, labelstr, (im.h*.03));
draw_label(im, top + width, left, label, rgb);
free_image(label);
}
//yolov3中不会运行
if (dets[i].mask){
image mask = float_to_image(14, 14, 1, dets[i].mask);
image resized_mask = resize_image(mask, b.w*im.w, b.h*im.h);
image tmask = threshold_image(resized_mask, .5);
embed_image(tmask, im, left, top);
free_image(mask);
free_image(resized_mask);
free_image(tmask);
}
}
}
}
draw_box_width(im, left, top, right, bot, width, red, green, blue);
void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b)
{
int i;
for(i = 0; i < w; ++i){//遍历w个图像像素,防止有些行或列画不出来
draw_box(a, x1+i, y1+i, x2-i, y2-i, r, g, b);
}
}
void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b)
{
//normalize_image(a);
//防止坐标越界,框到图像外
int i;
if(x1 < 0) x1 = 0;
if(x1 >= a.w) x1 = a.w-1;
if(x2 < 0) x2 = 0;
if(x2 >= a.w) x2 = a.w-1;
if(y1 < 0) y1 = 0;
if(y1 >= a.h) y1 = a.h-1;
if(y2 < 0) y2 = 0;
if(y2 >= a.h) y2 = a.h-1;
//画行和列
for(i = x1; i <= x2; ++i){
//画行,这里对图像3个通道分别进行赋值覆盖
a.data[i + y1*a.w + 0*a.w*a.h] = r;
a.data[i + y2*a.w + 0*a.w*a.h] = r;
a.data[i + y1*a.w + 1*a.w*a.h] = g;
a.data[i + y2*a.w + 1*a.w*a.h] = g;
a.data[i + y1*a.w + 2*a.w*a.h] = b;
a.data[i + y2*a.w + 2*a.w*a.h] = b;
}
for(i = y1; i <= y2; ++i){
//画列,这里对图像3个通道分别进行赋值覆盖
a.data[x1 + i*a.w + 0*a.w*a.h] = r;
a.data[x2 + i*a.w + 0*a.w*a.h] = r;
a.data[x1 + i*a.w + 1*a.w*a.h] = g;
a.data[x2 + i*a.w + 1*a.w*a.h] = g;
a.data[x1 + i*a.w + 2*a.w*a.h] = b;
a.data[x2 + i*a.w + 2*a.w*a.h] = b;
}
}
image get_label(image **characters, char *string, int size)
{
size = size/10;
if(size > 7) size = 7;
//创建一个空的图像模板用来存放标签
image label = make_empty_image(0,0,0);
/*image make_empty_image(int w, int h, int c)
{
image out;
out.data = 0;
out.h = h;
out.w = w;
out.c = c;
return out;
}*/
while(*string){
image l = characters[size][(int)*string];
image n = tile_images(label, l, -size - 1 + (size+1)/2);
/*image tile_images(image a, image b, int dx)
{
if(a.w == 0) return copy_image(b);
image c = make_image(a.w + b.w + dx, (a.h > b.h) ? a.h : b.h, (a.c > b.c) ? a.c : b.c);
fill_cpu(c.w*c.h*c.c, 1, c.data, 1);
embed_image(a, c, 0, 0);
composite_image(b, c, a.w + dx, 0);
return c;
}*/
free_image(label);
label = n;
++string;
}
image b = border_image(label, label.h*.25);
free_image(label);
return b;
}
写标签
void draw_label(image a, int r, int c, image label, const float *rgb)
{
int w = label.w;
int h = label.h;
if (r - h >= 0) r = r - h;
int i, j, k;
for(j = 0; j < h && j + r < a.h; ++j){
for(i = 0; i < w && i + c < a.w; ++i){
for(k = 0; k < label.c; ++k){
float val = get_pixel(label, i, j, k);
set_pixel(a, i+c, j+r, k, rgb[k] * val);
}
}
}
}
测试过程
(1)通过加载训练好的模型对输入的图像进行测试,获取所有yolo层置信度大于阈值thresh(默认0.5)的预测框;
(2)将之前获取的符合条件的预测框信息存入detection * dets中,并对每一个预测框每类得分(置信度*该类类别概率)进行判断,如果该预测框的该类得分大于阈值thresh,则将其存入,否则将其得分置为0;
(3)进行图像坐标的转换;
(4)非极大值抑制,遍历所有类别,当一个预测框该类得分不为0时,选择该预测框遍历之后的所有该类不为0的预测框,如果之后的预测框与该预测框的交并比大于thresh,则说明这两个框重合度过高,且都预测同一类别,说明预测了同一目标,需要将此预测框该类得分置为0;
(5)画图