More than 3 years have passed since last update.

LightGBMのObjectiveをpythonで再現する

Last updated at 2020-07-07Posted at 2020-07-07

kaggleで人気のライブラリのLightGBMは最小化したい目的関数の勾配を与えることでいい感じに最適化してくれるのが特徴の一つです。
この目的関数を適切に選ぶことは良いモデルを作るためのコツの一つで、デフォルトでもたくさんのobjectiveが実装されていますが、LightGBMではpythonで勾配を計算する関数を渡すことができます。この記事ではLightGBMの公式実装を参考にしながらpythonで同等のobjectiveを作ることでどのような実装がされているか紹介していきたいと思います。

前提条件

この記事では回帰/二クラス分類で、lgb.trainにlgb.Datasetを渡して学習を行う場合を想定しています。多クラス分類のobjectiveを作る際は俵さんの記事(LightGBM で強引に Multi-Task(は???) Regression を行う)あたりを読むと分かりやすいと思います。また、gradとhessを求める理由はよく分かっていないのでその辺りは他の資料をあたってください。

公式の挙動を真似する

objective = "l2"　の時

本題に入ります。lightGBMはコア部分は高速化のためC++で実装されているのでObjectiveの部分もC++で書かれています。objective="l2"の時の挙動について該当箇所のコード(https://github.com/microsoft/LightGBM/blob/master/src/objective/regression_objective.hpp) を読んでみましょう。勾配を計算する部分はGetGradients()で実装されています。

github.com/microsoft/LightGBM/blob/master/src/objective/regression_objective.hpp

  void GetGradients(const double* score, score_t* gradients,
                    score_t* hessians) const override {
    if (weights_ == nullptr) {
      #pragma omp parallel for schedule(static)
      for (data_size_t i = 0; i < num_data_; ++i) {
        gradients[i] = static_cast<score_t>(score[i] - label_[i]);
        hessians[i] = 1.0f;
      }
    } else {
      #pragma omp parallel for schedule(static)
      for (data_size_t i = 0; i < num_data_; ++i) {
        gradients[i] = static_cast<score_t>((score[i] - label_[i]) * weights_[i]);
        hessians[i] = static_cast<score_t>(weights_[i]);
      }
    }
  }

遅くなるので実用性はありませんが、これをpythonで再現するとこうなります。

def l2_loss(pred, data):
    true = data.get_label()
    grad = pred - true
    hess = np.ones(len(grad))   
    return grad, hess

objective = "poisson" の時

このobjectiveはpoisson lossを最小化します。poisson lossについてmetricを読んでみると以下のようになっています。

github.com/microsoft/LightGBM/blob/master/src/metric/regression_metric.hpp

class PoissonMetric: public RegressionMetric<PoissonMetric> {
 public:
  explicit PoissonMetric(const Config& config) :RegressionMetric<PoissonMetric>(config) {
  }

  inline static double LossOnPoint(label_t label, double score, const Config&) {
    const double eps = 1e-10f;
    if (score < eps) {
      score = eps;
    }
    return score - label * std::log(score);
  }
  inline static const char* Name() {
    return "poisson";
  }
};

そして、objectiveを読んでみると、以下の様な実装になっています。

github.com/microsoft/LightGBM/blob/master/src/objective/regression_objective.hpp

  void GetGradients(const double* score, score_t* gradients,
                    score_t* hessians) const override {
    if (weights_ == nullptr) {
      #pragma omp parallel for schedule(static)
      for (data_size_t i = 0; i < num_data_; ++i) {
        gradients[i] = static_cast<score_t>(std::exp(score[i]) - label_[i]);
        hessians[i] = static_cast<score_t>(std::exp(score[i] + max_delta_step_));
      }
    } else {
      #pragma omp parallel for schedule(static)
      for (data_size_t i = 0; i < num_data_; ++i) {
        gradients[i] = static_cast<score_t>((std::exp(score[i]) - label_[i]) * weights_[i]);
        hessians[i] = static_cast<score_t>(std::exp(score[i] + max_delta_step_) * weights_[i]);
      }
    }
  }

...お気づきになられましたか？実はこのobjectiveのscoreは予測値そのままではなく、score = e^x で表した際のeの指数部xの値が入っているのです。WolframAlphaで実際に数式を入力してみるとわかるでしょう。そのため、objectiveでpoisson系のobjective(他にはgammaやtweedie等)を作った際は、metricも予測値 = e^(pred) で計算しないといけません。

def poisson_metric(pred, data):
    true = data.get_label()
    loss = np.exp(pred) - true*pred
    return "poisson", np.mean(loss), False

def poisson_object(pred, data):
    poisson_max_delta_step = 0.7
    true = data.get_label()
    grad = np.exp(pred) - true
    hess = exp(pred + poisson_max_delta_step)
    return grad, hess

objective = "binary" の時

ダメ押しでニクラス分類の時のobjectiveも見たいと思います。binaryの時のmetricは以下のようになっています。

github.com/microsoft/LightGBM/blob/master/src/metric/binary_metric.hpp

class BinaryLoglossMetric: public BinaryMetric<BinaryLoglossMetric> {
 public:
  explicit BinaryLoglossMetric(const Config& config) :BinaryMetric<BinaryLoglossMetric>(config) {}

  inline static double LossOnPoint(label_t label, double prob) {
    if (label <= 0) {
      if (1.0f - prob > kEpsilon) {
        return -std::log(1.0f - prob);
      }
    } else {
      if (prob > kEpsilon) {
        return -std::log(prob);
      }
    }
    return -std::log(kEpsilon);
  }

  inline static const char* Name() {
    return "binary_logloss";
  }
};

objectiveはis_unbalance=Falseのときsigmoid = 1, label_val = [-1, 1], label_weights = [1, 1]であることを気を付けてください。

github.com/microsoft/LightGBM/blob/master/src/objective/binary_objective.hpp

  void GetGradients(const double* score, score_t* gradients, score_t* hessians) const override {
    if (!need_train_) {
      return;
    }
    if (weights_ == nullptr) {
      #pragma omp parallel for schedule(static)
      for (data_size_t i = 0; i < num_data_; ++i) {
        // get label and label weights
        const int is_pos = is_pos_(label_[i]);
        const int label = label_val_[is_pos];
        const double label_weight = label_weights_[is_pos];
        // calculate gradients and hessians
        const double response = -label * sigmoid_ / (1.0f + std::exp(label * sigmoid_ * score[i]));
        const double abs_response = fabs(response);
        gradients[i] = static_cast<score_t>(response * label_weight);
        hessians[i] = static_cast<score_t>(abs_response * (sigmoid_ - abs_response) * label_weight);
      }
    } else {
      #pragma omp parallel for schedule(static)
      for (data_size_t i = 0; i < num_data_; ++i) {
        // get label and label weights
        const int is_pos = is_pos_(label_[i]);
        const int label = label_val_[is_pos];
        const double label_weight = label_weights_[is_pos];
        // calculate gradients and hessians
        const double response = -label * sigmoid_ / (1.0f + std::exp(label * sigmoid_ * score[i]));
        const double abs_response = fabs(response);
        gradients[i] = static_cast<score_t>(response * label_weight  * weights_[i]);
        hessians[i] = static_cast<score_t>(abs_response * (sigmoid_ - abs_response) * label_weight * weights_[i]);
      }
    }
  }

poissonの時と同様に、scoreが予測値 = sigmoid(score) になっているからこのような勾配になっています。先ほどと同様にWolframAlphaで確認するとlabel=0のとき、label=1のときになるので、pythonでobjectiveを書くと以下のようになります。

def binary_metric(pred, data):
    true = data.get_label()
    loss = -(true * np.log(1/(1+np.exp(-pred))) + (1 - true) * np.log(1 - 1/(1+np.exp(-pred))))
    return "binary", np.mean(loss), False

def binary_objective(pred, data):
    true = data.get_label()
    label = 2*true - 1
    response = -label / (1 + np.exp(label * pred))
    abs_response = np.abs(response)
    grad = response
    hess = abs_response * (1 - abs_response)
    return grad, hess

おわりに

今回はlightGBMの公式実装をpythonで再現しました。今回紹介した基本を理解するとcustom objectiveを自作しやすくなります。いずれ別の記事で私がコンペで実装したobjectiveを紹介したいと思っているのでその際はよろしくお願いします。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up