AdaBoost(mlpack)

夔博

2023-12-01

源码

主要构造函数头文件：

template<typename WeakLearnerType = mlpack::perceptron::Perceptron<>,
         typename MatType = arma::mat>
class AdaBoost
{
 public:
  /**
   * Constructor.  This runs the AdaBoost.MH algorithm to provide a trained
   * boosting model.  This constructor takes an already-initialized weak
   * learner; all other weak learners will learn with the same parameters as the
   * given weak learner.
   *
   * @param data Input data.
   * @param labels Corresponding labels.
   * @param numClasses The number of classes.
   * @param iterations Number of boosting rounds.
   * @param tolerance The tolerance for change in values of rt.
   * @param other Weak learner that has already been initialized.
   */
  AdaBoost(const MatType& data,
           const arma::Row<size_t>& labels,
           const size_t numClasses,
           const WeakLearnerType& other,
           const size_t iterations = 100,
           const double tolerance = 1e-6);

实现：

/**
 * Constructor. Currently runs the AdaBoost.MH algorithm.
 *
 * @param data Input data
 * @param labels Corresponding labels
 * @param iterations Number of boosting rounds
 * @param tol Tolerance for termination of Adaboost.MH.
 * @param other Weak Learner, which has been initialized already.
 */
template<typename WeakLearnerType, typename MatType>
AdaBoost<WeakLearnerType, MatType>::AdaBoost(
    const MatType& data,
    const arma::Row<size_t>& labels,
    const size_t numClasses,
    const WeakLearnerType& other,
    const size_t iterations,
    const double tol)
{
  Train(data, labels, numClasses, other, iterations, tol);
}

在构造函数初始化完必要的参数后，直接调用 Train 函数，注意弱学习算法作为模板参数传入

Train 头文件：

  /**
   * Train AdaBoost on the given dataset.  This method takes an initialized
   * WeakLearnerType; the parameters for this weak learner will be used to train
   * each of the weak learners during AdaBoost training.  Note that this will
   * completely overwrite any model that has already been trained with this
   * object.
   *
   * @param data Dataset to train on.
   * @param labels Labels for each point in the dataset.
   * @param numClasses The number of classes.
   * @param learner Learner to use for training.
   * @param iterations Number of boosting rounds.
   * @param tolerance The tolerance for change in values of rt.
   * @return The upper bound for training error.
   */
  double Train(const MatType& data,
               const arma::Row<size_t>& labels,
               const size_t numClasses,
               const WeakLearnerType& learner,
               const size_t iterations = 100,
               const double tolerance = 1e-6);

实现：

// Train AdaBoost.
template<typename WeakLearnerType, typename MatType>
double AdaBoost<WeakLearnerType, MatType>::Train(
    const MatType& data,
    const arma::Row<size_t>& labels,
    const size_t numClasses,
    const WeakLearnerType& other,
    const size_t iterations,
    const double tolerance)
{
  // Clear information from previous runs.
  wl.clear();
  alpha.clear();

  this->tolerance = tolerance;
  this->numClasses = numClasses;

  // crt is the cumulative rt value for terminating the optimization when rt is
  // changing by less than the tolerance.
  double rt, crt = 0.0, alphat = 0.0, zt;

  double ztProduct = 1.0;

  // To be used for prediction by the weak learner.
  arma::Row<size_t> predictedLabels(labels.n_cols);

  // Use tempData to modify input data for incorporating weights.
  MatType tempData(data);

  // This matrix is a helper matrix used to calculate the final hypothesis.
  arma::mat sumFinalH = arma::zeros<arma::mat>(numClasses,
      predictedLabels.n_cols);

  // Load the initial weights into a 2-D matrix.
  const double initWeight = 1.0 / double(data.n_cols * numClasses);
  arma::mat D(numClasses, data.n_cols);
  D.fill(initWeight);

  // Weights are stored in this row vector.
  arma::rowvec weights(predictedLabels.n_cols);

  // This is the final hypothesis.
  arma::Row<size_t> finalH(predictedLabels.n_cols);

  // Now, start the boosting rounds.
  for (size_t i = 0; i < iterations; ++i)
  {
    // Initialized to zero in every round.  rt is used for calculation of
    // alphat; it is the weighted error.
    // rt = (sum) D(i) y(i) ht(xi)
    rt = 0.0;

    // zt is used for weight normalization.
    zt = 0.0;

    // Build the weight vectors.
    weights = arma::sum(D);

    // Use the existing weak learner to train a new one with new weights.
    WeakLearnerType w(other, tempData, labels, numClasses, weights);
    w.Classify(tempData, predictedLabels);

    // Now from predictedLabels, build ht, the weak hypothesis
    // buildClassificationMatrix(ht, predictedLabels);

    // Now, calculate alpha(t) using ht.
    for (size_t j = 0; j < D.n_cols; ++j) // instead of D, ht
    {
      if (predictedLabels(j) == labels(j))
        rt += arma::accu(D.col(j));
      else
        rt -= arma::accu(D.col(j));
    }

    if ((i > 0) && (std::abs(rt - crt) < tolerance))
      break;

    // Check if model has converged.
    if (rt >= 1.0)
    {
      // Save the weak learner and terminate.
      alpha.push_back(1.0);
      wl.push_back(w);
      break;
    }

    crt = rt;

    // Our goal is to find alphat which mizimizes or approximately minimizes the
    // value of Z as a function of alpha.
    alphat = 0.5 * log((1 + rt) / (1 - rt));

    alpha.push_back(alphat);
    wl.push_back(w);

    // Now start modifying the weights.
    for (size_t j = 0; j < D.n_cols; ++j)
    {
      const double expo = exp(alphat);
      if (predictedLabels(j) == labels(j))
      {
        for (size_t k = 0; k < D.n_rows; ++k)
        {
          // We calculate zt, the normalization constant.
          D(k, j) /= expo;
          zt += D(k, j); // * exp(-1 * alphat * yt(j,k) * ht(j,k));

          // Add to the final hypothesis matrix.
          // sumFinalH(k, j) += (alphat * ht(k, j));
          if (k == labels(j))
            sumFinalH(k, j) += (alphat); // * ht(k, j));
          else
            sumFinalH(k, j) -= (alphat);
        }
      }
      else
      {
        for (size_t k = 0; k < D.n_rows; ++k)
        {
          // We calculate zt, the normalization constant.
          D(k, j) *= expo;
          zt += D(k, j);

          // Add to the final hypothesis matrix.
          if (k == labels(j))
            sumFinalH(k, j) += alphat; // * ht(k, j));
          else
            sumFinalH(k, j) -= alphat;
        }
      }
    }

    // Normalize D.
    D /= zt;

    // Accumulate the value of zt for the Hamming loss bound.
    ztProduct *= zt;
  }
  return ztProduct;
}

私有成员变量说明：

  //! The number of classes in the model.
  size_t numClasses;
  // The tolerance for change in rt and when to stop.
  double tolerance;

  //! The vector of weak learners.
  std::vector<WeakLearnerType> wl;
  //! The weights corresponding to each weak learner.
  std::vector<double> alpha;

假设训练数据集：
$\{ (x_1, y_1), (x_2, y_2) , \cdots , (x_N, y_N) \}$
其中，实例 $x_i \in \mathcal{X} \subseteq \mathbb{R}^n$ ，标记 $y_i \in \mathcal{Y} \subseteq \mathbb{R}^m$
因此：

var	value
data	$\times N)$
labels	$\times N)$
numClasses	$m$
predictedLabels	$\times N)$
sumFinalH	$\times N)$
initWeight	$\frac{1}{N \times m}$
D	$\times N)$
weights	$\times N)$
finalH	$\times N)$

一开始初始化时，假设训练数据集具有均匀的权值分布，之后进行 iterations 次迭代：

使用弱分类器 w 对加权 (wights) 后的数据进行分类，计算该分类器 w 的系数 ( $\alpha_t$ )：
$r_t = \sum_i^N D.col(i) \ I(w(x_i), y_i) \\[5pt] I(w(x_i), y_i) = \begin{cases} +1 \ , \quad w(x_i) = y_i \\ -1 \ , \quad w(x_i) \neq y_i \end{cases}$
注意到该分类器 w 正确率越高， $r_t$ 也越大

进行判断是否结束：

已经进行过一次迭代，并且和上一次迭代的差值小于 tolerance
$r_t \geqslant 1$ ，说明已经有足够的正确率（ $r_t$ 的值可能会大于1，但分类器的系数最大为1）

没有结束：

更新 crt
得到 $\alpha_t$ ：
$\alpha_t = \dfrac{1}{2} \ln \dfrac{1 + r_t}{1 - r_t}$
$r_t$ 越大， $\alpha_t$ 也越大，这说明正确率高的分类器，其系数更大，更有话语权

接着是更新权重：
$\begin{cases} \dfrac{D(k, j)}{\exp(\alpha_t)} \ , \quad w(x_j) = y_j \\[6pt] D(k, j) \cdot \exp(\alpha_t) \ , \quad w(x_j) \neq y_j \end{cases}$
可以看到，被正确分类的样本点权重下降，而被误分类的则权重上升

而 $z_t = \sum D(k, j)$

最后矩阵 $D$ 规范化，而 ztProduct 就是 AdaBoost 的训练误差界

Classify 头文件：

  /**
   * Classify the given test points.
   *
   * @param test Testing data.
   * @param predictedLabels Vector in which the predicted labels of the test
   *      set will be stored.
   * @param probabilities matrix to store the predicted class probabilities for
   *      each point in the test set.
   */
  void Classify(const MatType& test,
                arma::Row<size_t>& predictedLabels,
                arma::mat& probabilities);

实现：

/**
 * Classify the given test points.
 */
template<typename WeakLearnerType, typename MatType>
void AdaBoost<WeakLearnerType, MatType>::Classify(
    const MatType& test,
    arma::Row<size_t>& predictedLabels,
    arma::mat& probabilities)
{
  arma::Row<size_t> tempPredictedLabels(test.n_cols);

  probabilities.zeros(numClasses, test.n_cols);
  predictedLabels.set_size(test.n_cols);

  for (size_t i = 0; i < wl.size(); ++i)
  {
    wl[i].Classify(test, tempPredictedLabels);

    for (size_t j = 0; j < tempPredictedLabels.n_cols; ++j)
      probabilities(tempPredictedLabels(j), j) += alpha[i];
  }

  arma::colvec pRow;
  arma::uword maxIndex = 0;

  for (size_t i = 0; i < predictedLabels.n_cols; ++i)
  {
    probabilities.col(i) /= arma::accu(probabilities.col(i));
    pRow = probabilities.unsafe_col(i);
    pRow.max(maxIndex);
    predictedLabels(i) = maxIndex;
  }
}

分类函数依次调用弱分类器向量中的每一个，对数据集进行分类，并使用相应的系数

测试

void adaboost_test()
{
    arma::mat train_dataset;
    mlpack::data::Load("../ml_test/data/iris_train.csv", train_dataset);
    arma::Row<size_t> train_labels;
    mlpack::data::Load("../ml_test/data/iris_train_labels.csv", train_labels);

    arma::mat test_dataset;
    mlpack::data::Load("../ml_test/data/iris_test.csv", test_dataset);
    arma::Row<size_t> test_labels;
    mlpack::data::Load("../ml_test/data/iris_test_labels.csv", test_labels);

    mlpack::adaboost::AdaBoost<> adaboost(train_dataset, train_labels, 3, mlpack::perceptron::Perceptron<>());

    arma::Row<size_t> predictedLabels(test_labels.n_cols);

    adaboost.Classify(test_dataset, predictedLabels);
    std::cout << "Accuracy:\n"
              << static_cast<double>(arma::accu(predictedLabels == test_labels)) / test_labels.n_cols << std::endl;
}

int main()
{
    adaboost_test();
}

输出：
Accuracy:
0.936508

参考

AdaBoost
《统计学习方法》

AdaBoost(mlpack)

源码

测试

参考

相关阅读

相关文章

相关问答