# paddle模型转onnx
# paddlepaddle==2.2.1
# paddlenlp==2.3.4
# onnx==1.12.0
# onnxruntime==1.12.1
import paddle
from paddlenlp.transformers import AutoModelForMaskedLM
import onnxruntime
import numpy as np
import time
model_for_mask_lm = AutoModelForMaskedLM.from_pretrained('ernie-3.0-base-zh')
model_for_mask_lm.eval()
input_spec = paddle.static.InputSpec(shape=[1, None], dtype='int32', name='input_ids')
# 导出模型ernie_mask_lm.onnx
# paddle.onnx.export(model_for_mask_lm, 'ernie_mask_lm', input_spec=[input_spec], opset_version=12, enable_onnx_checker=True)
# 推理
session = onnxruntime.InferenceSession('ernie_mask_lm.onnx',
providers=['TensorrtExecutionProvider',
'CUDAExecutionProvider', 'CPUExecutionProvider'])
x = np.array([[1, 234, 2345, 234, 2345, 2]])
x_pd = paddle.to_tensor(x)
start = time.time()
inputs = {session.get_inputs()[0].name: x}
outs = session.run(None, inputs)[0]
print('onnx:', time.time() - start)
print(outs[0, 0, :5])
start2 = time.time()
outs2 = model_for_mask_lm(x_pd)
print('pd:', time.time() - start2)
print(outs2[0, 0, :5].numpy())
"""
运行结果
onnx: 0.022006750106811523
[-20.545616 -29.175095 -12.939949 -18.544882 5.578158]
pd: 0.20505905151367188
[-20.545612 -29.175083 -12.939948 -18.544868 5.5781507]
"""