OS 名称及版本: Microsoft Windows 10 专业版 10.0.17763 暂缺 Build 17763
CUDA版本1: cuda_11.0.1_451.22_win10.exe
Python版本: Anaconda3-2020.02-Windows-x86_64.exe
Pytorch版本: torch-1.5.0-cp37-cp37m-win_amd64.whl
Torchvision版本: torchvision-0.6.0-cp37-cp37m-win_amd64.whl
IDE名称及版本: pycharm-community-2020.1.1.exe
文档制作软件: typora-setup-x64.exe
浏览器: QQBrowser_Setup_Qqpcmgr_10.5.4043.400.exe
代码管理工具: Git-2.27.0-64-bit.exe
图片标注工具: labelme Version: 4.4.0 (pip方式安装)
(1)阿里云 http://mirrors.aliyun.com/pypi/simple/
(3)清华大学 https://pypi.tuna.tsinghua.edu.cn/simple/
(4)中国科学技术大学 http://pypi.mirrors.ustc.edu.cn/simple/
:主要用到其中的IOU计算的库来评价模型的性能。git clone https://github.com/cocodataset/cocoapi.git cd cocoapi/PythonAPI python setup.py build_ext install
:本教程使用Penn-Fudan的行人检测和分割数据集来训练Mask R-CNN实例分割模型。Penn-Fudan数据集中有170张图像,包含345个行人的实例。图像中场景主要是校园和城市街景,每张图中至少有一个行人,具体的介绍和下载地址如下:# 下载Penn-Fudan dataset wget https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip # 解压到当前目录 unzip PennFudanPed.zip
# 网络模型 maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth #预训练的maskrcnn resnet50-19c8e357.pth #没训练的maskrcnn cd C:\Users\Administrator\.cache\torch\checkpoints # 将网络模型放入此文件夹 copy {网络模型}
:在PyTorch官方的references/detection/中,有一些封装好的用于模型训练和测试的函数,其中references/detection/engine.py、references/detection/utils.py、references/detection/transforms.py是我们需要用到的。首先,将这些文件拷贝过来。# Download TorchVision repo to use some files from references/detection git clone https://github.com/pytorch/vision.git cd visiongit checkout v0.4.0 cp references/detection/utils.py ../ cp references/detection/transforms.py ../ cp references/detection/coco_eval.py ../ cp references/detection/engine.py ../ cp references/detection/coco_utils.py ../
# coding:utf-8
__author__ = "Xuyc"
__date__ = "2020/6/19 11:11"
__filename__ = "jsontool.py"
import os
for i in os.listdir(r'..\json'):
os.system(r'python ..\Anaconda3\Lib\site-packages\labelme\cli\json_to_dataset.py "{}"'.format(i))
if not os.path.exists('img'):
if not os.path.exists('mask'):
dirs = os.listdir(r'..\json')
jsons = []
for i in os.listdir(r'..\json'):
if '_json' in i:
for i in jsons:
os.system(r'copy "{}\img.png" "img\{}.png"'.format(i, i))
os.system(r'copy "{}\label.png" "mask\{}_mask.png"'.format(i, i))
cl: 命令行 error D8021 :无效的数值参数“/Wno-cpp”
from setuptools import setup, Extension
import numpy as np
# To compile and install locally run "python setup.py build_ext --inplace"
# To install library to Python site-packages run "python setup.py build_ext install"
ext_modules = [
sources=['../common/maskApi.c', 'pycocotools/_mask.pyx'],
include_dirs = [np.get_include(), '../common'],
#extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'],
extra_compile_args=['', '', ''],
package_dir = {'pycocotools': 'pycocotools'},
ext_modules= ext_modules
File "build/bdist.linux-x86_64/egg/pycocotools/mask.py", line 82, in encode File "pycocotools/_mask.pyx", line 137, in pycocotools._mask.encodeValueError: Does not understand character buffer dtype format string ('?)
In coco_eval.py:
rles = [
mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0]
for mask in masks
3 缺少C++ 14.0
# Sample code from the TorchVision 0.3 Object Detection Finetuning Tutorial
# http://pytorch.org/tutorials/intermediate/torchvision_tutorial.html
__author__ = "torch"
__filename__ = "tv-training-code.py"
import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from engine import train_one_epoch, evaluate
import utils
import transforms as T
class PennFudanDataset(object):
def __init__(self, root, transforms):
self.root = root
self.transforms = transforms
# load all image files, sorting them to
# ensure that they are aligned
self.imgs = list(sorted(os.listdir(os.path.join(root, "PNGImages"))))
self.masks = list(sorted(os.listdir(os.path.join(root, "PedMasks"))))
def __getitem__(self, idx):
# load images ad masks
img_path = os.path.join(self.root, "PNGImages", self.imgs[idx])
mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])
img = Image.open(img_path).convert("RGB")
# note that we haven't converted the mask to RGB,
# because each color corresponds to a different instance
# with 0 being background
mask = Image.open(mask_path)
mask = np.array(mask)
# instances are encoded as different colors
obj_ids = np.unique(mask)
# first id is the background, so remove it
obj_ids = obj_ids[1:]
# split the color-encoded mask into a set
# of binary masks
masks = mask == obj_ids[:, None, None]
# get bounding box coordinates for each mask
num_objs = len(obj_ids)
boxes = []
for i in range(num_objs):
pos = np.where(masks[i])
xmin = np.min(pos[1])
xmax = np.max(pos[1])
ymin = np.min(pos[0])
ymax = np.max(pos[0])
boxes.append([xmin, ymin, xmax, ymax])
boxes = torch.as_tensor(boxes, dtype=torch.float32)
# there is only one class
labels = torch.ones((num_objs,), dtype=torch.int64)
masks = torch.as_tensor(masks, dtype=torch.uint8)
image_id = torch.tensor([idx])
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# suppose all instances are not crowd
iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["masks"] = masks
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
if self.transforms is not None:
img, target = self.transforms(img, target)
return img, target
def __len__(self):
return len(self.imgs)
def get_model_instance_segmentation(num_classes):
# load an instance segmentation model pre-trained pre-trained on COCO
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
# now get the number of input features for the mask classifier
in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
hidden_layer = 256
# and replace the mask predictor with a new one
model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
return model
def get_transform(train):
transforms = []
if train:
return T.Compose(transforms)
def main():
# train on the GPU or on the CPU, if a GPU is not available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# our dataset has two classes only - background and person
num_classes = 2
# use our dataset and defined transformations
dataset = PennFudanDataset('PennFudanPed', get_transform(train=True))
dataset_test = PennFudanDataset('PennFudanPed', get_transform(train=False))
# split the dataset in train and test set
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-5])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-5:])
# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
dataset, batch_size=2, shuffle=True, num_workers=4,
data_loader_test = torch.utils.data.DataLoader(
dataset_test, batch_size=1, shuffle=False, num_workers=4,
# get the model using our helper function
model = get_model_instance_segmentation(num_classes)
# move model to the right device
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
# let's train it for 10 epochs
num_epochs = 10
for epoch in range(num_epochs):
# train for one epoch, printing every 10 iterations
train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
# update the learning rate
# evaluate on the test dataset
evaluate(model, data_loader_test, device=device)
print("That's it!")
if __name__ == "__main__":
查看CUDA版本,cmd命令nvcc --version