当前位置: 首页 > 工具软件 > Light 4J > 使用案例 >

lightgbm java_如何在C++程序中调用lightgbm (How to use lightgbm in C++ program)

仉运乾
2023-12-01

本文作者为tieying zhang,有任何问题请联系zhangtiey@gmail.com

Lightgbm以轻量著称,所以在实际的C++程序中,常常需要使用。但是官方文档并没有介绍如何在C++中调用lightgbm接口,也没有任何例子可供参考,网上的文档也基本没有。这篇文章中我介绍下如何在C++中调用lightgbm。有任何问题请联系zhangtiey@gmail.com

具体步骤如下:

首先需要下载lightgbm的源码包,从官网下载即可。官网也给出了如何编译,但是最后一定要sudo make install(这个官网没有给出)。

C++调用的代码片段如下。首先load已经train好的model(以txt的形式存在磁盘上),之后用该模型进行inference,需要预测的数据可以是文件形式直接指定目录,也可以直接多行数据塞给模型。

编译C++文件:g++ -g -Wall -std=c++11 test.cpp -l_lightgbm 注意,用到了l_lightgbm,这个.so库是上面make install直接放入到了/usr/local/lib下。如果找不到该库,需要whereis查看一下,把相应目录加入到lib path里如:export LD_LIBRARY_PATH=/lib:/usr/lib:/usr/local/lib

#include

#include

#include

std::string predict(std::string data)

{

std::string pred_result = "";

int temp;

int p = 1;

BoosterHandle handle;

// load model

temp = LGBM_BoosterCreateFromModelfile("test_model1.txt", &p, &handle);

std::cout <

// file data

const char* para = "None";

int res = LGBM_BoosterPredictForFile(handle, "test_data.csv", 0, C_API_PREDICT_NORMAL, 0, para, "result");

std::cout << "file predict result is " << res << std::endl;

// row data

std::vector row(40, 0);

void* in_p = static_cast(row.data());

std::vector out(1, 0);

double* out_result = static_cast(out.data());

int64_t out_len;

res = LGBM_BoosterPredictForMat(handle, in_p, C_API_DTYPE_FLOAT32, 1, 40, 1, C_API_PREDICT_NORMAL, 50, "None", &out_len, out_result);

std::cout << "row predict return is " << res << std::endl;

std::cout << "row predict result size is " << out.size() << " value is " << out[0] << std::endl;

return pred_result;

/*I know the above return statement is completely insignificant. But i wanted to use the loaded model to predict the data points further.*/

}

int main() {

predict("hahaha");

std::cout << "Ok complete!"<< std::endl;

return 0;

}

遇到的问题汇总:

lib_lightgbm.so: cannot open shared object file: No such file or directory

export LD_LIBRARY_PATH=/lib:/usr/lib:/usr/local/lib

代码参照

data_size_t定义在include/LightGBM/meta.h:

typedef int32_t data_size_t;

用C++解析输入file可以借鉴已有code:在application/predictor.hpp中。注意比较重要的是TextReader predict_data_reader(data_filename, header)使用了utils下面的utils/text_reader.h

真正的predict函数在application/predictor.cpp里:

/*!

brief predicting on data, then saving result to disk

param data_filename Filename of data

param result_filename Filename of output result

*/

void Predict(const char* data_filename, const char* result_filename, bool header) {

auto writer = VirtualFileWriter::Make(result_filename);

if (!writer->Init()) {

Log::Fatal("Prediction results file %s cannot be found", result_filename);

}

auto parser = std::unique_ptr(Parser::CreateParser(data_filename, header, boosting_->MaxFeatureIdx() + 1, boosting_->LabelIdx()));

if (parser == nullptr) {

Log::Fatal("Could not recognize the data format of data file %s", data_filename);

}

TextReader predict_data_reader(data_filename, header);

std::unordered_map feature_names_map_;

bool need_adjust = false;

if (header) {

std::string first_line = predict_data_reader.first_line();

std::vector<:string> header_words = Common::Split(first_line.c_str(), "\t,");

header_words.erase(header_words.begin() + boosting_->LabelIdx());

for (int i = 0; i < static_cast(header_words.size()); ++i) {

for (int j = 0; j < static_cast(boosting_->FeatureNames().size()); ++j) {

if (header_words[i] == boosting_->FeatureNames()[j]) {

feature_names_map_[i] = j;

break;

}

}

}

for (auto s : feature_names_map_) {

if (s.first != s.second) {

need_adjust = true;

break;

}

}

}

// function for parse data

std::function>*)> parser_fun;

double tmp_label;

parser_fun = [&]

(const char* buffer, std::vector<:pair double>>* feature) {

parser->ParseOneLine(buffer, feature, &tmp_label);

if (need_adjust) {

int i = 0, j = static_cast(feature->size());

while (i < j) {

if (feature_names_map_.find((*feature)[i].first) != feature_names_map_.end()) {

(*feature)[i].first = feature_names_map_[(*feature)[i].first];

++i;

} else {

//move the non-used features to the end of the feature vector

std::swap((*feature)[i], (*feature)[--j]);

}

}

feature->resize(i);

}

};

std::function&)> process_fun = [&]

(data_size_t, const std::vector<:string>& lines) {

std::vector<:pair double>> oneline_features;

std::vector<:string> result_to_write(lines.size());

OMP_INIT_EX();

#pragma omp parallel for schedule(static) firstprivate(oneline_features)

for (data_size_t i = 0; i < static_cast(lines.size()); ++i) {

OMP_LOOP_EX_BEGIN();

oneline_features.clear();

// parser

parser_fun(lines[i].c_str(), &oneline_features);

// predict

std::vector result(num_pred_one_row_);

predict_fun_(oneline_features, result.data());

auto str_result = Common::Join(result, "\t");

result_to_write[i] = str_result;

OMP_LOOP_EX_END();

}

OMP_THROW_EX();

for (data_size_t i = 0; i < static_cast(result_to_write.size()); ++i) {

writer->Write(result_to_write[i].c_str(), result_to_write[i].size());

writer->Write("\n", 1);

}

};

predict_data_reader.ReadAllAndProcessParallel(process_fun);

}

 类似资料: