以下demo重点说明:
n_estimators // 10
次scale_pos_weight=3
from xgboost import XGBClassifier
from sklearn.metrics import f1_score, accuracy_score, log_loss, roc_auc_score
from sklearn.model_selection import RandomizedSearchCV
data = pd.DataFrame([{'A': 1, 'B': 2, 'C': 3, 'D': 10, 'y': 1}, {'A': 11, 'B': 22, 'C': 33, 'D': 1, 'y': 0},
{'A': 111, 'B': 222, 'C': 333, 'D': 1, 'y': 0}, {'A': 111, 'B': 222, 'C': 333, 'D': 1, 'y': 0},
{'A': 111, 'B': 21, 'C': 333, 'D': 1, 'y': 0}, {'A': 111, 'B': 90, 'C': 333, 'D': 1, 'y': 0},
{'A': 111, 'B': 64, 'C': 12, 'D': 1, 'y': 0}, {'A': 111, 'B': 222, 'C': 6, 'D': 1, 'y': 0}])
X, y = data.iloc[:, :-1].values, data.iloc[:, -1].values
model = XGBClassifier(booster='gbtree', objective='binary:logistic', n_jobs=4,
tree_method="hist", importance_type='gain',
n_estimators=100, learning_rate=0.01, max_depth=6,
gamma=0.01, reg_alpha=0.01, reg_lambda=0.01,
min_child_weight=20, subsample=0.9, colsample_bytree=0.9,
# scale_pos_weight=3,
)
def custom_eval(preds, dtrain):
pred_score = 1.0 / (1.0 + np.exp(-preds))
pred = [1 if p > 0.5 else 0 for p in pred_score]
labels = dtrain.get_label()
auc = roc_auc_score(labels, pred_score)
acc = accuracy_score(labels, pred)
f1 = f1_score(labels, pred)
return [('accuracy', acc), ('auc', auc), ('f1', f1)]
model.fit(X, y,
eval_set=[(X, y)], early_stopping_rounds=10,
# eval_metric=['auc', 'logloss'],
eval_metric=custom_eval,
# feature_weights=[0.1, 0.2, 0.6, 0.1],
verbose=10)
print(model.predict_proba(X))
print(model.feature_importances_)
print(model.evals_result())
def random_search_param(train_X, train_y):
fixed_params = {
'objective': 'binary:logistic',
'booster': 'gbtree',
'n_jobs': 4,
'scale_pos_weight': 3,
'use_label_encoder': False,
}
params = {
'tree_method': ['hist', 'exact'],
'importance_type': ['gain', 'weight', 'cover', 'total_gain', 'total_cover'],
'n_estimators': [100, 200, 300, 350, 400, 500],
'learning_rate': [0.001, 0.01, 0.015, 0.025, 0.05, 0.1, 0.2],
'max_depth': [3, 5, 6, 7, 9, 12, 15, 17, 25],
'gamma': [0, 0.01, 0.05, 0.1, 0,2],
'reg_alpha': [0, 0.01, 0.05, 0.1, 0,2],
'reg_lambda': [0, 0.01, 0.05, 0.1, 0,2],
'min_child_weight': [3, 10, 15, 20, 30, 50, 60],
'subsample': [0.8, 0.9, 1],
'colsample_bytree': [0.8, 0.9, 1],
}
model = XGBClassifier(**fixed_params)
optimized_GBM = RandomizedSearchCV(model, params, n_iter=50, cv=5, n_jobs=4)
optimized_GBM.fit(train_X, train_y)
print('参数的最佳取值:{0}'.format(optimized_GBM.best_params_))
print('最佳模型得分:{0}'.format(optimized_GBM.best_score_))