import tensorrt as trt
import os
EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
TRT_LOGGER = trt.Logger()
def get_engine(onnx_file_path, engine_file_path=""):
"""Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
def build_engine():
"""Takes an ONNX file and creates a TensorRT engine to run inference with"""
with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
) as network, builder.create_builder_config() as config, trt.OnnxParser(
network, TRT_LOGGER
) as parser, trt.Runtime(
) as runtime:
config.max_workspace_size = 1 << 32 # 4GB
builder.max_batch_size = 1
# Parse model file
if not os.path.exists(onnx_file_path):
"ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.".format(onnx_file_path)
print("Loading ONNX file from path {}...".format(onnx_file_path))
with open(onnx_file_path, "rb") as model:
print("Beginning ONNX file parsing")
if not parser.parse(model.read()):
print("ERROR: Failed to parse the ONNX file.")
for error in range(parser.num_errors):
return None
# # The actual yolov3.onnx is generated with batch size 64. Reshape input to batch size 1
# network.get_input(0).shape = [1, 3, 608, 608]
print("Completed parsing of ONNX file")
print("Building an engine from file {}; this may take a while...".format(onnx_file_path))
plan = builder.build_serialized_network(network, config)
engine = runtime.deserialize_cuda_engine(plan)
print("Completed creating Engine")
with open(engine_file_path, "wb") as f:
return engine
if os.path.exists(engine_file_path):
# If a serialized engine exists, use it instead of building an engine.
print("Reading engine from file {}".format(engine_file_path))
with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
return runtime.deserialize_cuda_engine(f.read())
return build_engine()
def main():
"""Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""
# Try to load a previously generated YOLOv3-608 network graph in ONNX format:
onnx_file_path = "model.onnx"
engine_file_path = "model.trt"
get_engine(onnx_file_path, engine_file_path)
if __name__ == "__main__":
(mypytorch) PS F:\DeepStereo\AppleShow2> python onnx2trt.py
onnx2trt.py:20: DeprecationWarning: Use set_memory_pool_limit instead.
config.max_workspace_size = 1 << 32 # 4GB
Loading ONNX file from path G:\jupyter\Model_Zoo\resources_iter10_modify\crestereo_combined_iter10_240x320.onnx...
Beginning ONNX file parsing
[06/16/2022-16:59:16] [TRT] [W] onnx2trt_utils.cpp:365: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down t
o INT32.
Completed parsing of ONNX file
Building an engine from file G:\jupyter\Model_Zoo\resources_iter10_modify\crestereo_combined_iter10_240x320.onnx; this may take a while...
[06/16/2022-17:03:52] [TRT] [W] TensorRT was linked against cuBLAS/cuBLAS LT 11.8.0 but loaded cuBLAS/cuBLAS LT 11.3.1
[06/16/2022-17:07:42] [TRT] [W] TensorRT was linked against cuBLAS/cuBLAS LT 11.8.0 but loaded cuBLAS/cuBLAS LT 11.3.1
[06/16/2022-17:07:43] [TRT] [W] TensorRT was linked against cuBLAS/cuBLAS LT 11.8.0 but loaded cuBLAS/cuBLAS LT 11.3.1
Completed creating Engine