1. model pipeline 拆解
# -*- coding: utf-8 -*-
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn import metrics
feature_list = ["chat_7d_cnt", "chat_cnt_self_expression"]
drop_cols = ["uid", "random", "2d_retention"]
LABEL = "is_later_30d_loss"
model_file_path = "model.txt"
data_path = "data.txt"
params = {
'booster': 'gbtree',
'objective': 'binary:logistic',
'eval_metric': 'auc',
'max_depth': 10,
'lambda': 10,
'subsample': 0.85,
'colsample_bytree': 0.85,
'min_child_weight': 2,
'eta': 0.1,
'seed': 0,
'nthread': 8,
'silent': 1
}
if __name__ == "__main__":
X_train, X_test, y_train, y_test = get_dataset(data_pa