主要构造函数头文件:
template<typename WeakLearnerType = mlpack::perceptron::Perceptron<>,
typename MatType = arma::mat>
class AdaBoost
{
public:
/**
* Constructor. This runs the AdaBoost.MH algorithm to provide a trained
* boosting model. This constructor takes an already-initialized weak
* learner; all other weak learners will learn with the same parameters as the
* given weak learner.
*
* @param data Input data.
* @param labels Corresponding labels.
* @param numClasses The number of classes.
* @param iterations Number of boosting rounds.
* @param tolerance The tolerance for change in values of rt.
* @param other Weak learner that has already been initialized.
*/
AdaBoost(const MatType& data,
const arma::Row<size_t>& labels,
const size_t numClasses,
const WeakLearnerType& other,
const size_t iterations = 100,
const double tolerance = 1e-6);
实现:
/**
* Constructor. Currently runs the AdaBoost.MH algorithm.
*
* @param data Input data
* @param labels Corresponding labels
* @param iterations Number of boosting rounds
* @param tol Tolerance for termination of Adaboost.MH.
* @param other Weak Learner, which has been initialized already.
*/
template<typename WeakLearnerType, typename MatType>
AdaBoost<WeakLearnerType, MatType>::AdaBoost(
const MatType& data,
const arma::Row<size_t>& labels,
const size_t numClasses,
const WeakLearnerType& other,
const size_t iterations,
const double tol)
{
Train(data, labels, numClasses, other, iterations, tol);
}
在构造函数初始化完必要的参数后,直接调用 Train 函数,注意弱学习算法作为模板参数传入
Train 头文件:
/**
* Train AdaBoost on the given dataset. This method takes an initialized
* WeakLearnerType; the parameters for this weak learner will be used to train
* each of the weak learners during AdaBoost training. Note that this will
* completely overwrite any model that has already been trained with this
* object.
*
* @param data Dataset to train on.
* @param labels Labels for each point in the dataset.
* @param numClasses The number of classes.
* @param learner Learner to use for training.
* @param iterations Number of boosting rounds.
* @param tolerance The tolerance for change in values of rt.
* @return The upper bound for training error.
*/
double Train(const MatType& data,
const arma::Row<size_t>& labels,
const size_t numClasses,
const WeakLearnerType& learner,
const size_t iterations = 100,
const double tolerance = 1e-6);
实现:
// Train AdaBoost.
template<typename WeakLearnerType, typename MatType>
double AdaBoost<WeakLearnerType, MatType>::Train(
const MatType& data,
const arma::Row<size_t>& labels,
const size_t numClasses,
const WeakLearnerType& other,
const size_t iterations,
const double tolerance)
{
// Clear information from previous runs.
wl.clear();
alpha.clear();
this->tolerance = tolerance;
this->numClasses = numClasses;
// crt is the cumulative rt value for terminating the optimization when rt is
// changing by less than the tolerance.
double rt, crt = 0.0, alphat = 0.0, zt;
double ztProduct = 1.0;
// To be used for prediction by the weak learner.
arma::Row<size_t> predictedLabels(labels.n_cols);
// Use tempData to modify input data for incorporating weights.
MatType tempData(data);
// This matrix is a helper matrix used to calculate the final hypothesis.
arma::mat sumFinalH = arma::zeros<arma::mat>(numClasses,
predictedLabels.n_cols);
// Load the initial weights into a 2-D matrix.
const double initWeight = 1.0 / double(data.n_cols * numClasses);
arma::mat D(numClasses, data.n_cols);
D.fill(initWeight);
// Weights are stored in this row vector.
arma::rowvec weights(predictedLabels.n_cols);
// This is the final hypothesis.
arma::Row<size_t> finalH(predictedLabels.n_cols);
// Now, start the boosting rounds.
for (size_t i = 0; i < iterations; ++i)
{
// Initialized to zero in every round. rt is used for calculation of
// alphat; it is the weighted error.
// rt = (sum) D(i) y(i) ht(xi)
rt = 0.0;
// zt is used for weight normalization.
zt = 0.0;
// Build the weight vectors.
weights = arma::sum(D);
// Use the existing weak learner to train a new one with new weights.
WeakLearnerType w(other, tempData, labels, numClasses, weights);
w.Classify(tempData, predictedLabels);
// Now from predictedLabels, build ht, the weak hypothesis
// buildClassificationMatrix(ht, predictedLabels);
// Now, calculate alpha(t) using ht.
for (size_t j = 0; j < D.n_cols; ++j) // instead of D, ht
{
if (predictedLabels(j) == labels(j))
rt += arma::accu(D.col(j));
else
rt -= arma::accu(D.col(j));
}
if ((i > 0) && (std::abs(rt - crt) < tolerance))
break;
// Check if model has converged.
if (rt >= 1.0)
{
// Save the weak learner and terminate.
alpha.push_back(1.0);
wl.push_back(w);
break;
}
crt = rt;
// Our goal is to find alphat which mizimizes or approximately minimizes the
// value of Z as a function of alpha.
alphat = 0.5 * log((1 + rt) / (1 - rt));
alpha.push_back(alphat);
wl.push_back(w);
// Now start modifying the weights.
for (size_t j = 0; j < D.n_cols; ++j)
{
const double expo = exp(alphat);
if (predictedLabels(j) == labels(j))
{
for (size_t k = 0; k < D.n_rows; ++k)
{
// We calculate zt, the normalization constant.
D(k, j) /= expo;
zt += D(k, j); // * exp(-1 * alphat * yt(j,k) * ht(j,k));
// Add to the final hypothesis matrix.
// sumFinalH(k, j) += (alphat * ht(k, j));
if (k == labels(j))
sumFinalH(k, j) += (alphat); // * ht(k, j));
else
sumFinalH(k, j) -= (alphat);
}
}
else
{
for (size_t k = 0; k < D.n_rows; ++k)
{
// We calculate zt, the normalization constant.
D(k, j) *= expo;
zt += D(k, j);
// Add to the final hypothesis matrix.
if (k == labels(j))
sumFinalH(k, j) += alphat; // * ht(k, j));
else
sumFinalH(k, j) -= alphat;
}
}
}
// Normalize D.
D /= zt;
// Accumulate the value of zt for the Hamming loss bound.
ztProduct *= zt;
}
return ztProduct;
}
私有成员变量说明:
//! The number of classes in the model.
size_t numClasses;
// The tolerance for change in rt and when to stop.
double tolerance;
//! The vector of weak learners.
std::vector<WeakLearnerType> wl;
//! The weights corresponding to each weak learner.
std::vector<double> alpha;
假设训练数据集:
T
=
{
(
x
1
,
y
1
)
,
(
x
2
,
y
2
)
,
⋯
,
(
x
N
,
y
N
)
}
T = \{ (x_1, y_1), (x_2, y_2) , \cdots , (x_N, y_N) \}
T={(x1,y1),(x2,y2),⋯,(xN,yN)}
其中,实例
x
i
∈
X
⊆
R
n
x_i \in \mathcal{X} \subseteq \mathbb{R}^n
xi∈X⊆Rn,标记
y
i
∈
Y
⊆
R
m
y_i \in \mathcal{Y} \subseteq \mathbb{R}^m
yi∈Y⊆Rm
因此:
var | value |
---|---|
data | ( n × N ) (n \times N) (n×N) |
labels | ( 1 × N ) (1 \times N) (1×N) |
numClasses | m m m |
predictedLabels | ( 1 × N ) (1 \times N) (1×N) |
sumFinalH | ( m × N ) (m \times N) (m×N) |
initWeight | 1 N × m \frac{1}{N \times m} N×m1 |
D | ( m × N ) (m \times N) (m×N) |
weights | ( 1 × N ) (1 \times N) (1×N) |
finalH | ( 1 × N ) (1 \times N) (1×N) |
一开始初始化时,假设训练数据集具有均匀的权值分布,之后进行 iterations 次迭代:
使用弱分类器 w 对加权 (wights) 后的数据进行分类,计算该分类器 w 的系数 (
α
t
\alpha_t
αt):
r
t
=
∑
i
N
D
.
c
o
l
(
i
)
I
(
w
(
x
i
)
,
y
i
)
I
(
w
(
x
i
)
,
y
i
)
=
{
+
1
,
w
(
x
i
)
=
y
i
−
1
,
w
(
x
i
)
≠
y
i
r_t = \sum_i^N D.col(i) \ I(w(x_i), y_i) \\[5pt] I(w(x_i), y_i) = \begin{cases} +1 \ , \quad w(x_i) = y_i \\ -1 \ , \quad w(x_i) \neq y_i \end{cases}
rt=i∑ND.col(i) I(w(xi),yi)I(w(xi),yi)={+1 ,w(xi)=yi−1 ,w(xi)=yi
注意到该分类器 w 正确率越高,
r
t
r_t
rt 也越大
进行判断是否结束:
没有结束:
更新 crt
得到
α
t
\alpha_t
αt:
α
t
=
1
2
ln
1
+
r
t
1
−
r
t
\alpha_t = \dfrac{1}{2} \ln \dfrac{1 + r_t}{1 - r_t}
αt=21ln1−rt1+rt
r
t
r_t
rt 越大,
α
t
\alpha_t
αt 也越大,这说明正确率高的分类器,其系数更大,更有话语权
接着是更新权重:
D
(
k
,
j
)
=
{
D
(
k
,
j
)
exp
(
α
t
)
,
w
(
x
j
)
=
y
j
D
(
k
,
j
)
⋅
exp
(
α
t
)
,
w
(
x
j
)
≠
y
j
D(k, j) = \begin{cases} \dfrac{D(k, j)}{\exp(\alpha_t)} \ , \quad w(x_j) = y_j \\[6pt] D(k, j) \cdot \exp(\alpha_t) \ , \quad w(x_j) \neq y_j \end{cases}
D(k,j)=⎩⎨⎧exp(αt)D(k,j) ,w(xj)=yjD(k,j)⋅exp(αt) ,w(xj)=yj
可以看到,被正确分类的样本点权重下降,而被误分类的则权重上升
而 z t = ∑ D ( k , j ) z_t = \sum D(k, j) zt=∑D(k,j)
最后矩阵 D D D 规范化,而 ztProduct 就是 AdaBoost 的训练误差界
Classify 头文件:
/**
* Classify the given test points.
*
* @param test Testing data.
* @param predictedLabels Vector in which the predicted labels of the test
* set will be stored.
* @param probabilities matrix to store the predicted class probabilities for
* each point in the test set.
*/
void Classify(const MatType& test,
arma::Row<size_t>& predictedLabels,
arma::mat& probabilities);
实现:
/**
* Classify the given test points.
*/
template<typename WeakLearnerType, typename MatType>
void AdaBoost<WeakLearnerType, MatType>::Classify(
const MatType& test,
arma::Row<size_t>& predictedLabels,
arma::mat& probabilities)
{
arma::Row<size_t> tempPredictedLabels(test.n_cols);
probabilities.zeros(numClasses, test.n_cols);
predictedLabels.set_size(test.n_cols);
for (size_t i = 0; i < wl.size(); ++i)
{
wl[i].Classify(test, tempPredictedLabels);
for (size_t j = 0; j < tempPredictedLabels.n_cols; ++j)
probabilities(tempPredictedLabels(j), j) += alpha[i];
}
arma::colvec pRow;
arma::uword maxIndex = 0;
for (size_t i = 0; i < predictedLabels.n_cols; ++i)
{
probabilities.col(i) /= arma::accu(probabilities.col(i));
pRow = probabilities.unsafe_col(i);
pRow.max(maxIndex);
predictedLabels(i) = maxIndex;
}
}
分类函数依次调用弱分类器向量中的每一个,对数据集进行分类,并使用相应的系数
void adaboost_test()
{
arma::mat train_dataset;
mlpack::data::Load("../ml_test/data/iris_train.csv", train_dataset);
arma::Row<size_t> train_labels;
mlpack::data::Load("../ml_test/data/iris_train_labels.csv", train_labels);
arma::mat test_dataset;
mlpack::data::Load("../ml_test/data/iris_test.csv", test_dataset);
arma::Row<size_t> test_labels;
mlpack::data::Load("../ml_test/data/iris_test_labels.csv", test_labels);
mlpack::adaboost::AdaBoost<> adaboost(train_dataset, train_labels, 3, mlpack::perceptron::Perceptron<>());
arma::Row<size_t> predictedLabels(test_labels.n_cols);
adaboost.Classify(test_dataset, predictedLabels);
std::cout << "Accuracy:\n"
<< static_cast<double>(arma::accu(predictedLabels == test_labels)) / test_labels.n_cols << std::endl;
}
int main()
{
adaboost_test();
}
输出:
Accuracy:
0.936508
AdaBoost
《统计学习方法》