对于多次调试的程序来说,重复执行如:读取数据、加载模型、得到导入的外部数据或三方库等重复操作的过程,可以使用网络服务搭建一个类似API一样的操作,一次读取,终身使用
整体的思路如下:
FastAPI
,也可换别的)我们使用tsfresh
为例,这是一个可以提取时间序列的特征的库,使用方法可以参考:python 使用tsfresh进行时间序列特征提取
import pandas as pd
import numpy as np
def convert_to_series(series: pd.Series):
covert_df = pd.DataFrame(data=[['id'] * series.shape[0], series.values], index=['id', "value"]).T
covert_df['value'] = covert_df['value'].astype("float")
return covert_df
def get_line_features(dataframe: pd.DataFrame):
"""本地:得到曲线的特征"""
from tsfresh import extract_features # 这个操作是费时间的
ext_feature = extract_features(dataframe, column_id="id")
ext_feature = ext_feature.dropna(axis=1)
return ext_feature
def main():
series = pd.Series(data=np.random.rand(100), index=pd.date_range(start="20190101", periods=100))
# 本地
conv_df = convert_to_series(series)
ext_feature = get_line_features(conv_df)
print(ext_feature.shape)
if __name__ == '__main__':
main()
首先先写一个FastAPI的服务:
from fastapi import FastAPI
import uvicorn
from tsfresh import extract_features
import pandas as pd
app = FastAPI()
def get_line_features(data_list: list):
"""得到曲线的特征"""
# 将 list 转为 dataframe
covert_df = pd.DataFrame(data=[['id_1'] * len(data_list), data_list], index=['id', "value"]).T
covert_df['value'] = covert_df['value'].astype("float")
# 使用 tsfresh 提取特征
ext_feature = extract_features(covert_df, column_id="id")
ext_feature = ext_feature.dropna(axis=1)
return ext_feature
@app.get("/data={data}")
async def data_service(data: str):
data_list: list = eval(data)
feature = get_line_features(data_list)
return feature.to_dict()
if __name__ == "__main__":
uvicorn.run(app, host='127.0.0.1', port=8999, debug=True)
在这个文件中我们已经把耗时的from tsfresh import extract_features
已经加载了,所以启动web服务可能比较费劲,但是之后就轻松多了
调试的程序如下:
import pandas as pd
import numpy as np
def convert_to_series(series: pd.Series):
covert_df = pd.DataFrame(data=[['id'] * series.shape[0], series.values], index=['id', "value"]).T
covert_df['value'] = covert_df['value'].astype("float")
return covert_df
def get_line_features_service(value_list):
"""使用服务:得到曲线的特征"""
import requests
url = "http://127.0.0.1:8999/data={}".format(value_list) # 这里是访问的url
request = requests.get(url)
if request.status_code == 200:
return pd.read_json(request.text)
def main():
series = pd.Series(data=np.random.rand(100), index=pd.date_range(start="20190101", periods=100))
# 远程
ext_feature = get_line_features_service(series.values.tolist())
print(ext_feature.shape)
if __name__ == '__main__':
main()
由于对数据的变化、加载库等耗时或固定的操作都写在了web服务中,因此主程序也看着简单很多
from fastapi import FastAPI
import uvicorn
from tsfresh import extract_features
import pandas as pd
app = FastAPI()
def get_line_features(data_list: list):
"""得到曲线的特征"""
# 将 list 转为 dataframe
covert_df = pd.DataFrame(data=[['id_1'] * len(data_list), data_list], index=['id', "value"]).T
covert_df['value'] = covert_df['value'].astype("float")
# 使用 tsfresh 提取特征
ext_feature = extract_features(covert_df, column_id="id")
ext_feature = ext_feature.dropna(axis=1)
return ext_feature
@app.get("/data={data}")
async def data_service(data: str):
data_list: list = eval(data)
feature = get_line_features(data_list)
return feature.to_dict()
if __name__ == "__main__":
uvicorn.run(app, host='127.0.0.1', port=8999, debug=True)
import pandas as pd
import numpy as np
def convert_to_series(series: pd.Series):
covert_df = pd.DataFrame(data=[['id'] * series.shape[0], series.values], index=['id', "value"]).T
covert_df['value'] = covert_df['value'].astype("float")
return covert_df
def get_line_features(dataframe: pd.DataFrame):
"""本地:得到曲线的特征"""
from tsfresh import extract_features
ext_feature = extract_features(dataframe, column_id="id")
ext_feature = ext_feature.dropna(axis=1)
return ext_feature
def get_line_features_service(value_list):
"""使用服务:得到曲线的特征"""
import requests
url = "http://127.0.0.1:8999/data={}".format(value_list) # 这里是访问的url
request = requests.get(url)
if request.status_code == 200:
return pd.read_json(request.text)
def main():
series = pd.Series(data=np.random.rand(100), index=pd.date_range(start="20190101", periods=100))
# 本地
conv_df = convert_to_series(series)
ext_feature = get_line_features(conv_df)
# 远程
ext_feature = get_line_features_service(series.values.tolist())
print(ext_feature.shape)
if __name__ == '__main__':
main()