bert tensorflow2 serving部署

卞轶

2023-12-01

使用tensorflow2 加载预训练的bert模型进行部署

1 模型准备

1.1 下载预训练的bert模型

本次使用的模型是谷歌提供的预训练模型
bert-case-chinese

1.2 安装transform

安装transform 包，用于加载bert模型

2 模型训练及保存

模型训练：（完整代码见最后）
1）将模型修改为计算图格式，使用tf.function(model.call)

# model initialization
model = TFBertForSequenceClassification.from_pretrained(model_path, num_labels=num_classes)
# 修改为计算图函数
callback=tf.function(model.call)

2）通过调用get_concrete_function设置模型输入参数

#设置模型的输入参数
concrete_function =callback.get_concrete_function([tf.TensorSpec([None, 50], tf.int32, name="input_ids"),
                                tf.TensorSpec([None, 50], tf.int32, name="attention_mask"),
                                                   tf.TensorSpec([None, 50], tf.int32, name="token_type_ids")])

模型保存，并设置signatures

#模型保存，并设置signatures                                                  
 tf.saved_model.save(model, './tfsevingmodel/', signatures=concrete_function)

print(model.summary())

查看模型保存情况

 python D:\pythonapp\anacondas\envs\torchenv\Lib\site-packages\tensorflow\python\tools\saved_model_cli.py show --dir tfsevingmodel --all

输出如下所示

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['attention_mask'] tensor_info:
        dtype: DT_INT32
        shape: (-1, 50)
        name: serving_default_attention_mask:0
    inputs['input_ids'] tensor_info:
        dtype: DT_INT32
        shape: (-1, 50)
        name: serving_default_input_ids:0
    inputs['token_type_ids'] tensor_info:
        dtype: DT_INT32
        shape: (-1, 50)
        name: serving_default_token_type_ids:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['logits'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 10)
        name: StatefulPartitionedCall:0
  Method name is: tensorflow/serving/predict

3 模型部署服务

1 docker 拉取tfserving 镜像。
docker pull tensorflow/serving

2 将上面保存的模型放到某个目录下
我是在windows下训练的模型，将保存在model路径下的模型放在了/opt/tfserving下。

3 . 构建模型和tserving 的链接，启动服务。

docker run -p 8501:8501 --mount type=bind,source=/opt/tfserving/model/,target=/models/model -e MODEL_NAME=model -t tensorflow/serving
1
4 模型提供的服务请求默认为 http://localhost:8501/v1/models/model:predict

4 模型部署 http请求推理

1 curl 请求

curl -H "Content-Type: application/json" -X POST -d "{\"signature_name\": \"serving_default\",\"instances\":[{\"input_ids\": [1,1159,1100,914,7218,2564,704,1068,3333,4178,7305,4324,3227,5011,6381,3315,3017,5384,102,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],\"attention_mask\": [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],\"token_type_ids\": [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}] }"   http://192.168.10.100:8501/v1/models/model:predict

返回结过如下
{
“predictions”: [[-2.01432037, -1.96287441, -2.01508093, -0.15862219, 10.0372896, -0.712031305, -1.18103349, -1.21998453, -0.111421183, -1.34767079]
]
}

2 http 请求推理


tokenizer = BertTokenizer.from_pretrained(model_path )
headers = {"content-type": "application/json"}
def predict(text):
    input_dict = tokenizer(text, return_tensors='tf',max_length=max_length,padding  ='max_length')
    input_ids = input_dict["input_ids"].numpy().tolist()[0]
    attention_mask = input_dict["attention_mask"].numpy().tolist()[0]
    token_type_ids = input_data["token_type_ids"].numpy().tolist()[0]

    features = [{'input_ids': input_ids, 'attention_mask': attention_mask,'token_type_ids':token_type_ids}]

    data = json.dumps({ "signature_name": "serving_default", "instances": features})

    json_response = requests.post('http://192.168.10.100:8501/v1/models/model:predict', data=data, headers=headers)

    predictions = json.loads(json_response.text)['predictions']
    return predictions
text = "上海2010上半年四六级考试报名4月8日前完成"
predictions=predict(text)
label_to_index=tf.math.argmax(predictions[0]).numpy()
index_to_label=new_label[label_to_index]
index_to_label

输出 ： '教育'

5 完整代码

5.1 模型训练保存


import logging
logging.basicConfig(level=logging.ERROR)
# from transformers import TFBertPreTrainedModel,TFBertMainLayer,BertTokenizer
from transformers import TFBertForSequenceClassification,BertTokenizer
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split

def convert_example_to_feature(review):
  
  # combine step for tokenization, WordPiece vector mapping, adding special tokens as well as truncating reviews longer than the max length
	return tokenizer.encode_plus(review, 
	            add_special_tokens = True, # add [CLS], [SEP]
	            max_length = max_length, # max length of the text that can go to BERT
	            pad_to_max_length = True, # add [PAD] tokens
	            return_attention_mask = True, # add attention mask to not focus on pad tokens
		    truncation=True
	          )
# map to the expected input to TFBertForSequenceClassification, see here 
def map_example_to_dict(input_ids, attention_masks, token_type_ids, label):
    return {
      "input_ids": input_ids,
      "token_type_ids": token_type_ids,
      "attention_mask": attention_masks,
  }, label

def encode_examples(ds, limit=-1):
    # prepare list, so that we can build up final TensorFlow dataset from slices.
    input_ids_list = []
    token_type_ids_list = []
    attention_mask_list = []
    label_list = []
    if (limit > 0):
        ds = ds.take(limit)
  
    for index, row in ds.iterrows():
        review = row["text"]
        label = row["y"]
        bert_input = convert_example_to_feature(review)
  
        input_ids_list.append(bert_input['input_ids'])
        token_type_ids_list.append(bert_input['token_type_ids'])
        attention_mask_list.append(bert_input['attention_mask'])
        label_list.append([label])
    return tf.data.Dataset.from_tensor_slices((input_ids_list, attention_mask_list, token_type_ids_list, label_list)).map(map_example_to_dict)



def split_dataset(df):
    train_set, x = train_test_split(df, 
        stratify=df['label'],
        test_size=0.1, 
        random_state=42)
    val_set, test_set = train_test_split(x, 
        stratify=x['label'],
        test_size=0.5, 
        random_state=43)

    return train_set,val_set, test_set
 data_path = "data.txt" # 数据路径
model_path = "./bert-case-chinese/" #模型路径，建议预先下载(https://huggingface.co/bert-base-chinese#)

max_length = 50
batch_size = 30
learning_rate = 2e-5
number_of_epochs = 5
num_classes = 10 # 类别数

# read data
df_raw = pd.read_csv(data_path,sep="\t",header=None,names=["text","label"])
df_label = pd.DataFrame({"label":["财经","房产","股票","教育","科技","社会","时政","体育","游戏","娱乐"],"y":list(range(10))})
new_label=df_label.to_dict()['label']
df_raw = pd.merge(df_raw,df_label,on="label",how="left")
# split data
train_data,val_data, test_data = split_dataset(df_raw)

# tokenizer
tokenizer = BertTokenizer.from_pretrained(model_path)
# train dataset
ds_train_encoded = encode_examples(train_data).shuffle(10000).batch(batch_size)
# val dataset
ds_val_encoded = encode_examples(val_data).batch(batch_size)
# test dataset
ds_test_encoded = encode_examples(test_data).batch(batch_size)
# model initialization
model = TFBertForSequenceClassification.from_pretrained(model_path, num_labels=num_classes)
callback=tf.function(model.call)
model.load_weights("./ckpt/news.ckpt")
concrete_function =callback.get_concrete_function([tf.TensorSpec([None, 50], tf.int32, name="input_ids"),
                                tf.TensorSpec([None, 50], tf.int32, name="attention_mask"),
                                                   tf.TensorSpec([None, 50], tf.int32, name="token_type_ids")])
# tf.saved_model.save(model, './tfsevingmodel/', signatures=concrete_function)

print(model.summary())

5.2 模型部署

docker run -p 8501:8501   --mount type=bind,source=/opt/tfserving/model/,target=/models/model   -e MODEL_NAME=model -t tensorflow/serving

5.2 模型推理

from transformers import BertTokenizer
import tensorflow as tf
import pandas as pd
import json
import requests
model_path = "./bert-case-chinese/"
max_length=50
df_label = pd.DataFrame({"label":["财经","房产","股票","教育","科技","社会","时政","体育","游戏","娱乐"],"y":list(range(10))})
new_label=df_label.to_dict()['label']

tokenizer = BertTokenizer.from_pretrained(model_path )
headers = {"content-type": "application/json"}
def predict(text):
    input_dict = tokenizer(text, return_tensors='tf',max_length=max_length,padding  ='max_length')
    input_ids = input_dict["input_ids"].numpy().tolist()[0]
    attention_mask = input_dict["attention_mask"].numpy().tolist()[0]
    token_type_ids = input_dict["token_type_ids"].numpy().tolist()[0]

    features = [{'input_ids': input_ids, 'attention_mask': attention_mask,'token_type_ids':token_type_ids}]

    data = json.dumps({ "signature_name": "serving_default", "instances": features})

    json_response = requests.post('http://192.168.10.100:8501/v1/models/model:predict', data=data, headers=headers)

    predictions = json.loads(json_response.text)['predictions']
    return predictions
text = "上海2010上半年四六级考试报名4月8日前完成"
predictions=predict(text)
label_to_index=tf.math.argmax(predictions[0]).numpy()
index_to_label=new_label[label_to_index]
index_to_label