集成模型就是把多个模型融合在一起使用,通过构建一层层的模型体系,最终得到不同模型的预测结果
首先安装:pip install mlens
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from mlens.ensemble import SuperLearner
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
if __name__ == '__main__':
# ============== 准备数据 ==============
x, y = make_classification(n_samples=10000, n_classes=4, n_informative=5)
x = MinMaxScaler().fit_transform(x)
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.3, random_state=100)
# ============== 搭建集成模型的结构 ==============
ensemble = SuperLearner(scorer=accuracy_score, random_state=0, verbose=2)
ensemble.add([KNeighborsClassifier(), # 第一层
RandomForestClassifier()])
ensemble.add_meta(LogisticRegression()) # 最后使用这个模型统一预测结果
ensemble.fit(xtrain, ytrain)
# ============== 开始预测 ==============
preds = ensemble.predict(xtest)
print(pd.DataFrame(ensemble.data))
print("acc:", accuracy_score(preds, ytest))