mlpack 是一个C++的机器学习库,它重点在于其扩展性、高速性和易用性。它的目的是让新用户通过简单、一致的API使用机器学习,同时为专业用户提供C++的高性能和最大灵活性。他的性能超出大量类似的机器学习库,如WEKA、Shogun、MATLAB、mlpy及sklearn,适合机器学习算法的工程化
以下代码示例了mlpack 训练随机森林分类模型,并保存、加载网络,预测结果
#include "mlpack/core.hpp"
#include "mlpack/methods/random_forest/random_forest.hpp"
#include "mlpack/methods/decision_tree/random_dimension_select.hpp"
#include "mlpack/core/cv/k_fold_cv.hpp"
#include "mlpack/core/cv/metrics/accuracy.hpp"
#include "mlpack/core/cv/metrics/precision.hpp"
#include "mlpack/core/cv/metrics/recall.hpp"
#include "mlpack/core/cv/metrics/F1.hpp"
using namespace arma;
using namespace mlpack;
using namespace mlpack::tree;
using namespace mlpack::cv;
int main()
{
mat dataset;
bool loaded = mlpack::data::Load("D:/MLPackProject/data/german.csv", dataset);
if (!loaded)
return -1;
Row<size_t> labels;
labels = conv_to<Row<size_t>>::from(dataset.row(dataset.n_rows - 1));
dataset.shed_row(dataset.n_rows - 1);
const size_t numClasses = 2;
const size_t minimumLeafSize = 5;
const size_t numTrees = 10;
RandomForest<GiniGain, RandomDimensionSelect> rf;
rf = RandomForest<GiniGain, RandomDimensionSelect>(dataset, labels,
numClasses, numTrees, minimumLeafSize);
Row<size_t> predictions;
rf.Classify(dataset, predictions);
const size_t correct = arma::accu(predictions == labels);
cout << "\nTraining Accuracy: " << (double(correct) / double(labels.n_elem));
// Instead of training the Random Forest directly, we could also use K-fold cross-validation
//for training, which will give us a measure of performance on a held-out test set. This can give
//us a better estimate of how the model will perform when given new data. We also define which metric
//to use in order to assess the quality of the trained model.
const size_t k = 10;
KFoldCV<RandomForest<GiniGain, RandomDimensionSelect>, Accuracy> cv(k,
dataset, labels, numClasses);
double cvAcc = cv.Evaluate(numTrees, minimumLeafSize);
cout << "\nKFoldCV Accuracy: " << cvAcc;
//To compute other relevant metrics, such as Precision, Recall and F1:
double cvPrecision = Precision<Binary>::Evaluate(rf, dataset, labels);
cout << "\nPrecision: " << cvPrecision;
double cvRecall = Recall<Binary>::Evaluate(rf, dataset, labels);
cout << "\nRecall: " << cvRecall;
double cvF1 = F1<Binary>::Evaluate(rf, dataset, labels);
cout << "\nF1: " << cvF1;
//Saving the model
mlpack::data::Save("mymodel.xml", "model", rf, false);
//Loading the model
mlpack::data::Load("mymodel.xml", "model", rf);
// Create a test sample containing only one point. Because Armadillo is
// column-major, this matrix has one column (one point) and the number of rows
// is equal to the dimensionality of the point (23).
mat sample("2; 12; 2; 13; 1; 2; 2; 1; 3; 24; 3; 1; 1; 1; 1; 1; 0; 1; 0; 1;"
" 0; 0; 0");
mat probabilities;
rf.Classify(sample, predictions, probabilities);
u64 result = predictions.at(0);
cout << "\nClassification result: " << result << " , Probabilities: " <<
probabilities.at(0) << "/" << probabilities.at(1);
}