当前位置: 首页 > 工具软件 > minimal-light > 使用案例 >

lightgbm java_搭建基于 java + LightGBM 线上实时预测系统

高慈
2023-12-01

# task type, support train and predict

task = train

# boosting type, support gbdt for now, alias: boosting, boost

boosting_type = gbdt

# application type, support following application

# regression , regression task

# binary , binary classification task

# lambdarank , lambdarank task

# alias: application, app

objective = regression

# eval metrics, support multi metric, delimite by ',' , support following metrics

# l1

# l2 , default metric for regression

# ndcg , default metric for lambdarank

# auc

# binary_logloss , default metric for binary

# binary_error

metric = l2

# frequence for metric output

metric_freq = 1

# true if need output metric for training data, alias: tranining_metric, train_metric

is_training_metric = true

# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy.

max_bin = 255

# training data

# if exsting weight file, should name to "regression.train.weight"

# alias: train_data, train

data = TRAIN_FILE_PLACEHOLDER

# validation data, support multi validation data, separated by ','

# if exsting weight file, should name to "regression.test.weight"

# alias: valid, test, test_data,

valid_data = VALID_FILE_PLACEHOLDER

# number of trees(iterations), alias: num_tree, num_iteration, num_iterations, num_round, num_rounds

num_trees = 100

# shrinkage rate , alias: shrinkage_rate

learning_rate = 0.05

# number of leaves for one tree, alias: num_leaf

num_leaves = 31

# type of tree learner, support following types:

# serial , single machine version

# feature , use feature parallel to train

# data , use data parallel to train

# voting , use voting based parallel to train

# alias: tree

tree_learner = serial

# number of threads for multi-threading. One thread will use one CPU, default is setted to #cpu.

# num_threads = 8

# feature sub-sample, will random select 80% feature to train on each iteration

# alias: sub_feature

feature_fraction = 0.9

# Support bagging (data sub-sample), will perform bagging every 5 iterations

bagging_freq = 5

# Bagging farction, will random select 80% data on bagging

# alias: sub_row

bagging_fraction = 0.8

# minimal number data for one leaf, use this to deal with over-fit

# alias : min_data_per_leaf, min_data

min_data_in_leaf = 100

# minimal sum hessians for one leaf, use this to deal with over-fit

min_sum_hessian_in_leaf = 5.0

# save memory and faster speed for sparse feature, alias: is_sparse

is_enable_sparse = true

# when data is bigger than memory size, set this to true. otherwise set false will have faster speed

# alias: two_round_loading, two_round

use_two_round_loading = false

# true if need to save data to binary file and application will auto load data from binary file next time

# alias: is_save_binary, save_binary

is_save_binary_file = false

# output model file

output_model = LightGBM_model.txt

# support continuous train from trained gbdt model

# input_model= trained_model.txt

# output prediction file for predict task

# output_result= prediction.txt

# support continuous train from initial score file

# input_init_score= init_score.txt

# number of machines in parallel training, alias: num_machine

num_machines = 1

# local listening port in parallel training, alias: local_port

local_listen_port = 12400

# machines list file for parallel training, alias: mlist

machine_list_file = mlist.txt

 类似资料: