中涉及 cuda 和 TensoRT 的相关路径需要匹配当前系统
如下
40-44行:
static const int INPUT_H = 960;
static const int INPUT_W = 960;
static const int cls_num = 15;
static const unsigned int maxBatchSize = 2;
static const int anchor_output_num = 56700;
49-50行:代码 onnx 文件的路径(注意路径需是绝对路径,如果是相对路径的话,会有问题)
const char* onnx_path = "/home/onnx2engine/yolov5_5.0/model_onnx/yolov5s.onnx";
const char* TrtSaveFileName = "/home/onnx2engine/yolov5_5.0/model_onnx/yolov5s.engine";
// 不同输入尺寸anchor:640-->25200 | 960-->56700
2^5 = 32
640/32 = 20
20 40 80
20和40和80分别对应3个尺寸的anchor框,每个尺寸的anchor框有3个大小
(20*20+40*40+80*80)*3 = 25200
960/32 = 30
30 60 120
30和60和120分别对应3个尺寸的anchor框,每个尺寸的anchor框有3个大小
(30*30+60*60+120*120)*3 = 56700
yolo_onnx2engine.cpp
ref1 : https://blog.csdn.net/weixin_43863869/article/details/124614334
ref2 : https://www.cnblogs.com/tangjunjun/p/16639361.html
#include "NvInfer.h"
#include "cuda_runtime_api.h"
#include <fstream>
#include <iostream>
#include <map>
#include <sstream>
#include <vector>
#include <chrono>
#include <cmath>
#include <cassert>
#include<opencv2/core/core.hpp>
#include<opencv2/highgui/highgui.hpp>
#include <opencv2/opencv.hpp>
// onnx转换头文件
#include "NvOnnxParser.h"
using namespace nvonnxparser;
using namespace nvinfer1;
using namespace std;
#define CHECK(status) \
do\
{\
auto ret = (status);\
if (ret != 0)\
{\
std::cerr << "Cuda failure: " << ret << std::endl;\
abort();\
}\
} while (0)
// stuff we know about the network and the input/output blobs
static const int INPUT_H = 960;
static const int INPUT_W = 960;
static const int cls_num = 15;
static const unsigned int maxBatchSize = 2;
static const int anchor_output_num = 56700; // 不同输入尺寸anchor:640-->25200 | 960-->56700
static const int OUTPUT_SIZE = 1* anchor_output_num *(cls_num+5); //1000 * sizeof(Detection) / sizeof(float) + 1;
const char* INPUT_BLOB_NAME = "images";
const char* OUTPUT_BLOB_NAME = "output";
const char* onnx_path = "/home/mec/hlj/onnx2engine/yolov5_5.0/model_onnx/yolov5s.onnx";
const char* TrtSaveFileName = "/home/mec/hlj/onnx2engine/yolov5_5.0/model_onnx/yolov5s.engine";
//static Logger gLogger;
//构建Logger
class Logger : public ILogger
{
void log(Severity severity, const char* msg) noexcept override
{
// suppress info-level messages
if (severity <= Severity::kWARNING)
std::cout << msg << std::endl;
}
} gLogger;
void saveToTrtModel(const char * TrtSaveFileName,IHostMemory*trtModelStream)
{
std::ofstream out(TrtSaveFileName, std::ios::binary);
if (!out.is_open())
{
std::cout << "打开文件失败!" <<std:: endl;
}
out.write(reinterpret_cast<const char*>(trtModelStream->data()), trtModelStream->size());
out.close();
}
// Creat the engine using only the API and not any parser.
ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config)
{
/*等价于*bulider.createNetwork(),通过Ibulider定义的
名为creatNetwork()方法,创建INetworkDefinition的对象,ntework这个指针指向这个对象*/
INetworkDefinition* network = builder->createNetworkV2(1U); //此处重点1U为OU就有问题
//创建解析器
IParser* parser = createParser(*network, gLogger);
parser->parseFromFile(onnx_path, static_cast<int32_t>(ILogger::Severity::kWARNING));
//解析有错误将返回
for (int32_t i = 0; i < parser->getNbErrors(); ++i)
{
std::cout << parser->getError(i)->desc() << std::endl;
}
std::cout << "successfully parse the onnx model" << std::endl;
// Build engine
builder->setMaxBatchSize(maxBatchSize);
config->setMaxWorkspaceSize(1 << 20);
//config->setFlag(nvinfer1::BuilderFlag::kFP16); // 设置精度计算
//config->setFlag(nvinfer1::BuilderFlag::kINT8);
//通过Ibuilder类的buildCudaEngine()方法创建IcudaEngine对象,
ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
std::cout << "successfully convert onnx to engine!!! " << std::endl;
//销毁
network->destroy();
parser->destroy();
return engine;
}
void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream)
{
// Create builder//创建构建器(即指向Ibuilder类型对象的指针)
IBuilder* builder = createInferBuilder(gLogger);
IBuilderConfig* config = builder->createBuilderConfig();
// Create model to populate the network, then set the outputs and create an engine
ICudaEngine* engine = createEngine(maxBatchSize, builder, config);
assert(engine != nullptr);
// Serialize the engine
//将引擎序列化,保存到文件中
(*modelStream) = engine->serialize();
// Close everything down
engine->destroy();
builder->destroy();
config->destroy();
saveToTrtModel(TrtSaveFileName, *modelStream);
}
int get_trtengine() {
IHostMemory* modelStream{ nullptr };
APIToModel(maxBatchSize, &modelStream);
assert(modelStream != nullptr);
std::ofstream p(TrtSaveFileName , std::ios::binary);
if (!p)
{
std::cerr << "could not open plan output file" << std::endl;
return -1;
}
p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
modelStream->destroy();
return 0;
}
int main(int argc, char** argv)
{
// cout<<"argv = "<<argv<<endl;
// cout<<"argv[1] = "<<argv[1]<<endl;
get_trtengine();
return 0;
}
cmake_minimum_required(VERSION 2.6)
project(yolo)
add_definitions(-std=c++11)
option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_BUILD_TYPE Debug)
include_directories(${PROJECT_SOURCE_DIR}/include)
# include and link dirs of cuda and tensorrt, you need adapt them if yours are different
# cuda
include_directories(/usr/local/cuda-11.6/include)
link_directories(/usr/local/cuda-11.6/lib64)
# tensorrt
include_directories(/home/package/TensorRT-8.2.5.1/include/)
link_directories(/home/package/TensorRT-8.2.5.1/lib/)
include_directories(/home/package/TensorRT-8.2.5.1/samples/common/)
#link_directories(/home/package/TensorRT-8.2.5.1/lib/stubs/)
# opencv
find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})
add_executable(yolo ${PROJECT_SOURCE_DIR}/yolo_onnx2engine.cpp)
target_link_libraries(yolo nvinfer)
target_link_libraries(yolo cudart)
target_link_libraries(yolo ${OpenCV_LIBS})
#如果onnx2engine则需要如下库
target_link_libraries(yolo /home/package/TensorRT-8.2.5.1/lib/stubs/libnvonnxparser.so)
add_definitions(-O2 -pthread)