From 2ac5786431276588de7ef2f6182d25a47e535434 Mon Sep 17 00:00:00 2001
From: Guangda Liu <v-guangdaliu@microsoft.com>
Date: Fri, 5 Nov 2021 08:34:55 +0000
Subject: [PATCH 01/84] add parameter data_sample_strategy

---
 include/LightGBM/config.h |  2 ++
 src/io/config.cpp         | 15 +++++++++++++++
 src/io/config_auto.cpp    |  1 +
 3 files changed, 18 insertions(+)

diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
index 45fffa432819..7ba9b47f7298 100644
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -149,6 +149,8 @@ struct Config {
   // descl2 = **Note**: internally, LightGBM uses ``gbdt`` mode for the first ``1 / learning_rate`` iterations
   std::string boosting = "gbdt";
 
+  std::string data_sample_strategy = "bagging";
+
   // alias = train, train_data, train_data_file, data_filename
   // desc = path of training data, LightGBM will train from this data
   // desc = **Note**: can be used only in CLI version
diff --git a/src/io/config.cpp b/src/io/config.cpp
index a42b392dac3e..d5dab13f8413 100644
--- a/src/io/config.cpp
+++ b/src/io/config.cpp
@@ -64,6 +64,20 @@ void GetBoostingType(const std::unordered_map<std::string, std::string>& params,
   }
 }
 
+void GetDataSampleStrategy(const std::unordered_map<std::string, std::string>& params, std::string* strategy) {
+  std::string value;
+  if (Config::GetString(params, "data_sample_strategy", &value)) {
+    std::transform(value.begin(), value.end(), value.begin(), Common::tolower);
+    if (value == std::string("goss")) {
+      *strategy = "goss";
+    } else if (value == std::string("bagging")) {
+      *strategy = "bagging";
+    } else {
+      Log::Fatal("Unknown sample strategy %s", value.c_str());
+    }
+  }
+}
+
 void ParseMetrics(const std::string& value, std::vector<std::string>* out_metric) {
   std::unordered_set<std::string> metric_sets;
   out_metric->clear();
@@ -205,6 +219,7 @@ void Config::Set(const std::unordered_map<std::string, std::string>& params) {
 
   GetTaskType(params, &task);
   GetBoostingType(params, &boosting);
+  GetDataSampleStrategy(params, &data_sample_strategy);
   GetObjectiveType(params, &objective);
   GetMetricType(params, objective, &metric);
   GetDeviceType(params, &device_type);
diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp
index 4e3f000a88f5..18225c55a2fc 100644
--- a/src/io/config_auto.cpp
+++ b/src/io/config_auto.cpp
@@ -312,6 +312,7 @@ const std::unordered_set<std::string>& Config::parameter_set() {
   "gpu_device_id",
   "gpu_use_dp",
   "num_gpu",
+  "data_sample_strategy"
   });
   return params;
 }

From 590aec61b31c2bfd97d705fae20c0f7b8a3be068 Mon Sep 17 00:00:00 2001
From: Guangda Liu <v-guangdaliu@microsoft.com>
Date: Tue, 9 Nov 2021 14:46:14 +0000
Subject: [PATCH 02/84] abstract GOSS as a sample strategy(GOSS1), togetherwith
 origial GOSS (Normal Bagging has not been abstracted, so do NOT use it now)

---
 include/LightGBM/sample_strategy.h |  42 +++++++++
 src/boosting/gbdt.cpp              |  29 ++++++-
 src/boosting/gbdt.h                |   2 +
 src/boosting/goss1.hpp             | 131 +++++++++++++++++++++++++++++
 src/boosting/sample_strategy.cpp   |  16 ++++
 5 files changed, 218 insertions(+), 2 deletions(-)
 create mode 100644 include/LightGBM/sample_strategy.h
 create mode 100644 src/boosting/goss1.hpp
 create mode 100644 src/boosting/sample_strategy.cpp

diff --git a/include/LightGBM/sample_strategy.h b/include/LightGBM/sample_strategy.h
new file mode 100644
index 000000000000..cb181fc4892d
--- /dev/null
+++ b/include/LightGBM/sample_strategy.h
@@ -0,0 +1,42 @@
+#ifndef LIGHTGBM_SAMPLE_STRATEGY_H_
+#define LIGHTGBM_SAMPLE_STRATEGY_H_
+
+#include <LightGBM/utils/random.h>
+#include <LightGBM/utils/common.h>
+#include <LightGBM/utils/threading.h>
+#include <LightGBM/config.h>
+#include <LightGBM/dataset.h>
+#include <LightGBM/tree_learner.h>
+
+namespace LightGBM {
+
+class SampleStrategy {
+ public:
+  SampleStrategy() : balanced_bagging_(false), bagging_runner_(0, bagging_rand_block_) {};
+  virtual ~SampleStrategy() {};
+  static SampleStrategy* CreateSampleStrategy(const Config* config, const Dataset* train_data, int num_tree_per_iteration);
+  virtual void Bagging(int iter, score_t* gradients, score_t* hessians, TreeLearner* tree_learner) = 0;
+  virtual void Reset() = 0;
+  bool is_use_subset() {return is_use_subset_;}
+  data_size_t bag_data_cnt() {return bag_data_cnt_;}
+  std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>> bag_data_indices() {return bag_data_indices_;}
+
+ protected:
+  virtual data_size_t Helper(data_size_t start, data_size_t cnt, data_size_t* buffer, score_t* gradients, score_t* hessians) = 0;
+  
+  const Config* config_;
+  const Dataset* train_data_;
+  std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>> bag_data_indices_;
+  data_size_t bag_data_cnt_;
+  data_size_t num_data_;
+  int num_tree_per_iteration_;
+  std::unique_ptr<Dataset> tmp_subset_;
+  bool is_use_subset_;
+  bool balanced_bagging_;
+  const int bagging_rand_block_ = 1024;
+  std::vector<Random> bagging_rands_;
+  ParallelPartitionRunner<data_size_t, false> bagging_runner_;
+};
+
+} // namespace LightGBM
+#endif // LIGHTGBM_SAMPLE_STRATEGY_H_
\ No newline at end of file
diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index d393d46d5133..aca734e28eec 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -10,6 +10,7 @@
 #include <LightGBM/prediction_early_stop.h>
 #include <LightGBM/utils/common.h>
 #include <LightGBM/utils/openmp_wrapper.h>
+#include <LightGBM/sample_strategy.h>
 
 #include <chrono>
 #include <ctime>
@@ -87,6 +88,10 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
     }
   }
 
+  CHECK(!(config_->bagging_freq > 0));  // can not use normal bagging in this version
+  data_sample_strategy_.reset(SampleStrategy::CreateSampleStrategy(config_.get(), train_data_, num_tree_per_iteration_));
+  data_sample_strategy_->Reset();
+
   is_constant_hessian_ = GetIsConstHessian(objective_function);
 
   tree_learner_ = std::unique_ptr<TreeLearner>(TreeLearner::CreateTreeLearner(config_->tree_learner, config_->device_type,
@@ -107,10 +112,14 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
 
   num_data_ = train_data_->num_data();
   // create buffer for gradients and Hessians
+  size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
   if (objective_function_ != nullptr) {
-    size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
     gradients_.resize(total_size);
     hessians_.resize(total_size);
+  } else {
+    // use customized objective function, only for GOSS
+    gradients_.resize(total_size, 0.0f);
+    hessians_.resize(total_size, 0.0f);
   }
   // get max feature index
   max_feature_idx_ = train_data_->num_total_features() - 1;
@@ -377,9 +386,23 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
     Boosting();
     gradients = gradients_.data();
     hessians = hessians_.data();
+  } else if (gradients != nullptr) {
+    // use customized objective function
+    CHECK(hessians != nullptr && objective_function_ == nullptr);
+    // and will be only used for GOSS
+    CHECK(config_->boosting==std::string("goss") || config_->data_sample_strategy==std::string("goss"));
+    int64_t total_size = static_cast<int64_t>(num_data_) * num_tree_per_iteration_;
+    #pragma omp parallel for schedule(static)
+    for (int64_t i = 0; i < total_size; ++i) {
+      gradients_[i] = gradients[i];
+      hessians_[i] = hessians[i];
+    }
   }
   // bagging logic
-  Bagging(iter_);
+  data_sample_strategy_->Bagging(iter_, gradients_.data(), hessians_.data(), tree_learner_.get());
+  bag_data_indices_ = data_sample_strategy_->bag_data_indices();
+  bag_data_cnt_ = data_sample_strategy_->bag_data_cnt();
+  is_use_subset_ = data_sample_strategy_->is_use_subset();
 
   bool should_continue = false;
   for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
@@ -733,6 +756,7 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
 
     tree_learner_->ResetTrainingData(train_data, is_constant_hessian_);
     ResetBaggingConfig(config_.get(), true);
+    data_sample_strategy_->Reset();
   } else {
     tree_learner_->ResetIsConstantHessian(is_constant_hessian_);
   }
@@ -757,6 +781,7 @@ void GBDT::ResetConfig(const Config* config) {
   if (train_data_ != nullptr) {
     ResetBaggingConfig(new_config.get(), false);
   }
+  data_sample_strategy_->Reset();
   if (config_.get() != nullptr && config_->forcedsplits_filename != new_config->forcedsplits_filename) {
     // load forced_splits file
     if (!new_config->forcedsplits_filename.empty()) {
diff --git a/src/boosting/gbdt.h b/src/boosting/gbdt.h
index 472ea1707104..f41e14582f4d 100644
--- a/src/boosting/gbdt.h
+++ b/src/boosting/gbdt.h
@@ -11,6 +11,7 @@
 #include <LightGBM/cuda/vector_cudahost.h>
 #include <LightGBM/utils/json11.h>
 #include <LightGBM/utils/threading.h>
+#include <LightGBM/sample_strategy.h>
 
 #include <string>
 #include <algorithm>
@@ -534,6 +535,7 @@ class GBDT : public GBDTBase {
   ParallelPartitionRunner<data_size_t, false> bagging_runner_;
   Json forced_splits_json_;
   bool linear_tree_;
+  std::unique_ptr<SampleStrategy> data_sample_strategy_;
 };
 
 }  // namespace LightGBM
diff --git a/src/boosting/goss1.hpp b/src/boosting/goss1.hpp
new file mode 100644
index 000000000000..b0e49231a5a4
--- /dev/null
+++ b/src/boosting/goss1.hpp
@@ -0,0 +1,131 @@
+#ifndef LIGHTGBM_SAMPLE_STRATEGY_GOSS_HPP_
+#define LIGHTGBM_SAMPLE_STRATEGY_GOSS_HPP_
+
+#include <LightGBM/utils/array_args.h>
+#include <LightGBM/sample_strategy.h>
+
+
+namespace LightGBM {
+
+class GOSS1 : public SampleStrategy {
+ public:
+  GOSS1(const Config* config, const Dataset* train_data, int num_tree_per_iteration) {
+    config_ = config;
+    train_data_ = train_data;
+    num_tree_per_iteration_ = num_tree_per_iteration;
+    num_data_ = train_data->num_data();
+  }
+  
+  ~GOSS1() {
+  }
+
+  void Bagging(int iter, score_t* gradients, score_t* hessians, TreeLearner* tree_learner) override {
+    bag_data_cnt_ = num_data_;
+    // not subsample for first iterations
+    if (iter < static_cast<int>(1.0f / config_->learning_rate)) { return; }
+    auto left_cnt = bagging_runner_.Run<true>(
+        num_data_,
+        [=](int, data_size_t cur_start, data_size_t cur_cnt, data_size_t* left, 
+            data_size_t*) {
+          data_size_t cur_left_count = 0;
+          cur_left_count = Helper(cur_start, cur_cnt, left, gradients, hessians);
+          return cur_left_count;
+        },
+        bag_data_indices_.data());
+    bag_data_cnt_ = left_cnt;
+    // set bagging data to tree learner
+    if (!is_use_subset_) {
+      tree_learner->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_);
+    } else {
+      // get subset
+      tmp_subset_->ReSize(bag_data_cnt_);
+      tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(),
+                              bag_data_cnt_, false);
+      tree_learner->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(),
+                                    bag_data_cnt_);
+    }
+  }
+
+  void Reset() override {
+    CHECK_LE(config_->top_rate + config_->other_rate, 1.0f);
+    CHECK(config_->top_rate > 0.0f && config_->other_rate > 0.0f);
+    if (config_->bagging_freq > 0 && config_->bagging_fraction != 1.0f) {
+      Log::Fatal("Cannot use bagging in GOSS");
+    }
+    Log::Info("Using GOSS");
+    balanced_bagging_ = false;
+    bag_data_indices_.resize(num_data_);
+    bagging_runner_.ReSize(num_data_);
+    bagging_rands_.clear();
+    for (int i = 0;
+         i < (num_data_ + bagging_rand_block_ - 1) / bagging_rand_block_; ++i) {
+      bagging_rands_.emplace_back(config_->bagging_seed + i);
+    }
+    is_use_subset_ = false;
+    if (config_->top_rate + config_->other_rate <= 0.5) {
+      auto bag_data_cnt = static_cast<data_size_t>((config_->top_rate + config_->other_rate) * num_data_);
+      bag_data_cnt = std::max(1, bag_data_cnt);
+      tmp_subset_.reset(new Dataset(bag_data_cnt));
+      tmp_subset_->CopyFeatureMapperFrom(train_data_);
+      is_use_subset_ = true;
+    }
+    // flag to not bagging first
+    bag_data_cnt_ = num_data_; 
+  }
+
+ protected:
+  data_size_t Helper(data_size_t start, data_size_t cnt, data_size_t* buffer, score_t* gradients, score_t* hessians) override {
+    if (cnt <= 0) {
+      return 0;
+    }
+    std::vector<score_t> tmp_gradients(cnt, 0.0f);
+    for (data_size_t i = 0; i < cnt; ++i) {
+      for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
+        size_t idx = static_cast<size_t>(cur_tree_id) * num_data_ + start + i;
+        tmp_gradients[i] += std::fabs(gradients[idx] * hessians[idx]);
+      }
+    }
+    data_size_t top_k = static_cast<data_size_t>(cnt * config_->top_rate);
+    data_size_t other_k = static_cast<data_size_t>(cnt * config_->other_rate);
+    top_k = std::max(1, top_k);
+    ArrayArgs<score_t>::ArgMaxAtK(&tmp_gradients, 0, static_cast<int>(tmp_gradients.size()), top_k - 1);
+    score_t threshold = tmp_gradients[top_k - 1];
+
+    score_t multiply = static_cast<score_t>(cnt - top_k) / other_k;
+    data_size_t cur_left_cnt = 0;
+    data_size_t cur_right_pos = cnt;
+    data_size_t big_weight_cnt = 0;
+    for (data_size_t i = 0; i < cnt; ++i) {
+      auto cur_idx = start + i;
+      score_t grad = 0.0f;
+      for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
+        size_t idx = static_cast<size_t>(cur_tree_id) * num_data_ + cur_idx;
+        grad += std::fabs(gradients[idx] * hessians[idx]);
+      }
+      if (grad >= threshold) {
+        buffer[cur_left_cnt++] = cur_idx;
+        ++big_weight_cnt;
+      } else {
+        data_size_t sampled = cur_left_cnt - big_weight_cnt;
+        data_size_t rest_need = other_k - sampled;
+        data_size_t rest_all = (cnt - i) - (top_k - big_weight_cnt);
+        double prob = (rest_need) / static_cast<double>(rest_all);
+        if (bagging_rands_[cur_idx / bagging_rand_block_].NextFloat() < prob) {
+          buffer[cur_left_cnt++] = cur_idx;
+          for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
+            size_t idx = static_cast<size_t>(cur_tree_id) * num_data_ + cur_idx;
+            gradients[idx] *= multiply;
+            hessians[idx] *= multiply;
+          }
+        } else {
+          buffer[--cur_right_pos] = cur_idx;
+        }
+      }
+    }
+    return cur_left_cnt;
+  }
+  
+};
+
+} // namespace LightGBM
+#endif // LIGHTGBM_SAMPLE_STRATEGY_GOSS_HPP_
\ No newline at end of file
diff --git a/src/boosting/sample_strategy.cpp b/src/boosting/sample_strategy.cpp
new file mode 100644
index 000000000000..cac1badb6c13
--- /dev/null
+++ b/src/boosting/sample_strategy.cpp
@@ -0,0 +1,16 @@
+#include <LightGBM/sample_strategy.h>
+#include "goss1.hpp"
+
+namespace LightGBM {
+
+SampleStrategy* SampleStrategy::CreateSampleStrategy(const Config* config, const Dataset* train_data, int num_tree_per_iteration) {
+  bool use_goss_as_boosting = config->boosting == std::string("goss");
+  bool use_goss_as_strategy = config->data_sample_strategy == std::string("goss");
+  if (use_goss_as_boosting || use_goss_as_strategy) {
+      return new GOSS1(config, train_data, num_tree_per_iteration);
+  } else if (config->data_sample_strategy == std::string("bagging")) {
+      return nullptr;
+  }
+}
+
+} // namespace LightGBM
\ No newline at end of file

From c8dce4d83eb0c03b5e6a7e16913f7fcb97618da2 Mon Sep 17 00:00:00 2001
From: Guangda Liu <v-guangdaliu@microsoft.com>
Date: Fri, 12 Nov 2021 05:41:23 +0000
Subject: [PATCH 03/84] abstract Bagging as a subclass (BAGGING), but original
 Bagging members in GBDT are still kept

---
 include/LightGBM/sample_strategy.h |  11 +-
 src/boosting/bagging.hpp           | 169 +++++++++++++++++++++++++++++
 src/boosting/gbdt.cpp              |  21 ++--
 src/boosting/goss1.hpp             |   8 +-
 src/boosting/sample_strategy.cpp   |   9 +-
 5 files changed, 198 insertions(+), 20 deletions(-)
 create mode 100644 src/boosting/bagging.hpp

diff --git a/include/LightGBM/sample_strategy.h b/include/LightGBM/sample_strategy.h
index cb181fc4892d..b1770d28ccb4 100644
--- a/include/LightGBM/sample_strategy.h
+++ b/include/LightGBM/sample_strategy.h
@@ -7,6 +7,7 @@
 #include <LightGBM/config.h>
 #include <LightGBM/dataset.h>
 #include <LightGBM/tree_learner.h>
+#include <LightGBM/objective_function.h>
 
 namespace LightGBM {
 
@@ -14,18 +15,20 @@ class SampleStrategy {
  public:
   SampleStrategy() : balanced_bagging_(false), bagging_runner_(0, bagging_rand_block_) {};
   virtual ~SampleStrategy() {};
-  static SampleStrategy* CreateSampleStrategy(const Config* config, const Dataset* train_data, int num_tree_per_iteration);
-  virtual void Bagging(int iter, score_t* gradients, score_t* hessians, TreeLearner* tree_learner) = 0;
+  static SampleStrategy* CreateSampleStrategy(const Config* config, const Dataset* train_data, const ObjectiveFunction* objective_function, int num_tree_per_iteration);
+  virtual void Bagging(int iter, TreeLearner* tree_learner, score_t* gradients, score_t* hessians) = 0;
   virtual void Reset() = 0;
+  virtual void ResetConfig(const Config* config, bool is_change_dataset, 
+          std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>>& gradients, 
+          std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>>& hessians) = 0;
   bool is_use_subset() {return is_use_subset_;}
   data_size_t bag_data_cnt() {return bag_data_cnt_;}
   std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>> bag_data_indices() {return bag_data_indices_;}
 
  protected:
-  virtual data_size_t Helper(data_size_t start, data_size_t cnt, data_size_t* buffer, score_t* gradients, score_t* hessians) = 0;
-  
   const Config* config_;
   const Dataset* train_data_;
+  const ObjectiveFunction* objective_function_;
   std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>> bag_data_indices_;
   data_size_t bag_data_cnt_;
   data_size_t num_data_;
diff --git a/src/boosting/bagging.hpp b/src/boosting/bagging.hpp
new file mode 100644
index 000000000000..fb7e230cae16
--- /dev/null
+++ b/src/boosting/bagging.hpp
@@ -0,0 +1,169 @@
+#ifndef LIGHTGBM_SAMPLE_STRATEGY_BAGGING_HPP_
+#define LIGHTGBM_SAMPLE_STRATEGY_BAGGING_HPP_
+
+namespace LightGBM {
+
+class BAGGING : public SampleStrategy {
+ public:
+  BAGGING(const Config* config, const Dataset* train_data, const ObjectiveFunction* objective_function, int num_tree_per_iteration) 
+    : need_re_bagging_(false) {
+    config_ = config;
+    train_data_ = train_data;
+    num_data_ = train_data->num_data();
+    objective_function_ = objective_function;
+    num_tree_per_iteration_ = num_tree_per_iteration;
+  }
+  ~BAGGING() {}
+
+  void Bagging(int iter, TreeLearner* tree_learner, score_t* gradients, score_t* hessians) override {
+    Common::FunctionTimer fun_timer("GBDT::Bagging", global_timer);
+    // if need bagging
+    if ((bag_data_cnt_ < num_data_ && iter % config_->bagging_freq == 0) ||
+        need_re_bagging_) {
+      need_re_bagging_ = false;
+      auto left_cnt = bagging_runner_.Run<true>(
+          num_data_,
+          [=](int, data_size_t cur_start, data_size_t cur_cnt, data_size_t* left,
+              data_size_t*) {
+            data_size_t cur_left_count = 0;
+            if (balanced_bagging_) {
+              cur_left_count =
+                  BalancedBaggingHelper(cur_start, cur_cnt, left);
+            } else {
+              cur_left_count = BaggingHelper(cur_start, cur_cnt, left);
+            }
+            return cur_left_count;
+          },
+          bag_data_indices_.data());
+      bag_data_cnt_ = left_cnt;
+      Log::Debug("Re-bagging, using %d data to train", bag_data_cnt_);
+      // set bagging data to tree learner
+      if (!is_use_subset_) {
+        tree_learner->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_);
+      } else {
+        // get subset
+        tmp_subset_->ReSize(bag_data_cnt_);
+        tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(),
+                                bag_data_cnt_, false);
+        tree_learner->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(),
+                                      bag_data_cnt_);
+      }
+    }
+  }
+
+  void Reset() override {} 
+
+  void ResetConfig(const Config* config, bool is_change_dataset, 
+          std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>>& gradients, 
+          std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>>& hessians) override {
+    // if need bagging, create buffer
+    data_size_t num_pos_data = 0;
+    if (objective_function_ != nullptr) {
+      num_pos_data = objective_function_->NumPositiveData();
+    }
+    bool balance_bagging_cond = (config->pos_bagging_fraction < 1.0 || config->neg_bagging_fraction < 1.0) && (num_pos_data > 0);
+    if ((config->bagging_fraction < 1.0 || balance_bagging_cond) && config->bagging_freq > 0) {
+      need_re_bagging_ = false;
+      if (!is_change_dataset &&
+        config_ != nullptr && config_->bagging_fraction == config->bagging_fraction && config_->bagging_freq == config->bagging_freq
+        && config_->pos_bagging_fraction == config->pos_bagging_fraction && config_->neg_bagging_fraction == config->neg_bagging_fraction) {
+        return;
+      }
+      if (balance_bagging_cond) {
+        balanced_bagging_ = true;
+        bag_data_cnt_ = static_cast<data_size_t>(num_pos_data * config->pos_bagging_fraction)
+                        + static_cast<data_size_t>((num_data_ - num_pos_data) * config->neg_bagging_fraction);
+      } else {
+        bag_data_cnt_ = static_cast<data_size_t>(config->bagging_fraction * num_data_);
+      }
+      bag_data_indices_.resize(num_data_);
+      bagging_runner_.ReSize(num_data_);
+      bagging_rands_.clear();
+      for (int i = 0;
+          i < (num_data_ + bagging_rand_block_ - 1) / bagging_rand_block_; ++i) {
+        bagging_rands_.emplace_back(config_->bagging_seed + i);
+      }
+
+      double average_bag_rate =
+          (static_cast<double>(bag_data_cnt_) / num_data_) / config->bagging_freq;
+      is_use_subset_ = false;
+      const int group_threshold_usesubset = 100;
+      if (average_bag_rate <= 0.5
+          && (train_data_->num_feature_groups() < group_threshold_usesubset)) {
+        if (tmp_subset_ == nullptr || is_change_dataset) {
+          tmp_subset_.reset(new Dataset(bag_data_cnt_));
+          tmp_subset_->CopyFeatureMapperFrom(train_data_);
+        }
+        is_use_subset_ = true;
+        Log::Debug("Use subset for bagging");
+      }
+
+      need_re_bagging_ = true;
+
+      if (is_use_subset_ && bag_data_cnt_ < num_data_) {
+        if (objective_function_ == nullptr) {
+          size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
+          gradients.resize(total_size);
+          hessians.resize(total_size); 
+        }
+      }
+    } else {
+      bag_data_cnt_ = num_data_;
+      bag_data_indices_.clear();
+      bagging_runner_.ReSize(0);
+      is_use_subset_ = false;
+    }
+  }
+
+  data_size_t BaggingHelper(data_size_t start, data_size_t cnt, data_size_t* buffer) {
+    if (cnt <= 0) {
+      return 0;
+    }
+    data_size_t cur_left_cnt = 0;
+    data_size_t cur_right_pos = cnt;
+    // random bagging, minimal unit is one record
+    for (data_size_t i = 0; i < cnt; ++i) {
+      auto cur_idx = start + i;
+      if (bagging_rands_[cur_idx / bagging_rand_block_].NextFloat() < config_->bagging_fraction) {
+        buffer[cur_left_cnt++] = cur_idx;
+      } else {
+        buffer[--cur_right_pos] = cur_idx;
+      }
+    }
+    return cur_left_cnt;
+  }
+
+  data_size_t BalancedBaggingHelper(data_size_t start, data_size_t cnt, data_size_t* buffer) {
+    if (cnt <= 0) {
+      return 0;
+    }
+    auto label_ptr = train_data_->metadata().label();
+    data_size_t cur_left_cnt = 0;
+    data_size_t cur_right_pos = cnt;
+    // random bagging, minimal unit is one record
+    for (data_size_t i = 0; i < cnt; ++i) {
+      auto cur_idx = start + i;
+      bool is_pos = label_ptr[start + i] > 0;
+      bool is_in_bag = false;
+      if (is_pos) {
+        is_in_bag = bagging_rands_[cur_idx / bagging_rand_block_].NextFloat() <
+                    config_->pos_bagging_fraction;
+      } else {
+        is_in_bag = bagging_rands_[cur_idx / bagging_rand_block_].NextFloat() <
+                    config_->neg_bagging_fraction;
+      }
+      if (is_in_bag) {
+        buffer[cur_left_cnt++] = cur_idx;
+      } else {
+        buffer[--cur_right_pos] = cur_idx;
+      }
+    }
+    return cur_left_cnt;
+  }
+
+  bool need_re_bagging_;
+};
+
+} // namespace LightGBM
+
+#endif  // LIGHTGBM_SAMPLE_STRATEGY_BAGGING_HPP_
\ No newline at end of file
diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index aca734e28eec..84065f87e572 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -88,10 +88,6 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
     }
   }
 
-  CHECK(!(config_->bagging_freq > 0));  // can not use normal bagging in this version
-  data_sample_strategy_.reset(SampleStrategy::CreateSampleStrategy(config_.get(), train_data_, num_tree_per_iteration_));
-  data_sample_strategy_->Reset();
-
   is_constant_hessian_ = GetIsConstHessian(objective_function);
 
   tree_learner_ = std::unique_ptr<TreeLearner>(TreeLearner::CreateTreeLearner(config_->tree_learner, config_->device_type,
@@ -131,7 +127,10 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
   monotone_constraints_ = config->monotone_constraints;
 
   // if need bagging, create buffer
-  ResetBaggingConfig(config_.get(), true);
+  // ResetBaggingConfig(config_.get(), true);
+  data_sample_strategy_.reset(SampleStrategy::CreateSampleStrategy(config_.get(), train_data_, objective_function_, num_tree_per_iteration_));
+  data_sample_strategy_->ResetConfig(config_.get(), true, gradients_, hessians_);
+  data_sample_strategy_->Reset();
 
   class_need_train_ = std::vector<bool>(num_tree_per_iteration_, true);
   if (objective_function_ != nullptr && objective_function_->SkipEmptyClass()) {
@@ -399,7 +398,7 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
     }
   }
   // bagging logic
-  data_sample_strategy_->Bagging(iter_, gradients_.data(), hessians_.data(), tree_learner_.get());
+  data_sample_strategy_->Bagging(iter_, tree_learner_.get(), gradients_.data(), hessians_.data());
   bag_data_indices_ = data_sample_strategy_->bag_data_indices();
   bag_data_cnt_ = data_sample_strategy_->bag_data_cnt();
   is_use_subset_ = data_sample_strategy_->is_use_subset();
@@ -755,11 +754,12 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
     feature_infos_ = train_data_->feature_infos();
 
     tree_learner_->ResetTrainingData(train_data, is_constant_hessian_);
-    ResetBaggingConfig(config_.get(), true);
-    data_sample_strategy_->Reset();
+    // ResetBaggingConfig(config_.get(), true);
+    data_sample_strategy_->ResetConfig(config_.get(), true, gradients_, hessians_);
   } else {
     tree_learner_->ResetIsConstantHessian(is_constant_hessian_);
   }
+  data_sample_strategy_->Reset();
 }
 
 void GBDT::ResetConfig(const Config* config) {
@@ -779,9 +779,9 @@ void GBDT::ResetConfig(const Config* config) {
     tree_learner_->ResetConfig(new_config.get());
   }
   if (train_data_ != nullptr) {
-    ResetBaggingConfig(new_config.get(), false);
+    // ResetBaggingConfig(new_config.get(), false);
+    data_sample_strategy_->ResetConfig(new_config.get(), false, gradients_, hessians_);
   }
-  data_sample_strategy_->Reset();
   if (config_.get() != nullptr && config_->forcedsplits_filename != new_config->forcedsplits_filename) {
     // load forced_splits file
     if (!new_config->forcedsplits_filename.empty()) {
@@ -798,6 +798,7 @@ void GBDT::ResetConfig(const Config* config) {
     }
   }
   config_.reset(new_config.release());
+  data_sample_strategy_->Reset();
 }
 
 void GBDT::ResetBaggingConfig(const Config* config, bool is_change_dataset) {
diff --git a/src/boosting/goss1.hpp b/src/boosting/goss1.hpp
index b0e49231a5a4..a51e4a1dc79c 100644
--- a/src/boosting/goss1.hpp
+++ b/src/boosting/goss1.hpp
@@ -19,7 +19,7 @@ class GOSS1 : public SampleStrategy {
   ~GOSS1() {
   }
 
-  void Bagging(int iter, score_t* gradients, score_t* hessians, TreeLearner* tree_learner) override {
+  void Bagging(int iter, TreeLearner* tree_learner, score_t* gradients, score_t* hessians) override {
     bag_data_cnt_ = num_data_;
     // not subsample for first iterations
     if (iter < static_cast<int>(1.0f / config_->learning_rate)) { return; }
@@ -73,8 +73,12 @@ class GOSS1 : public SampleStrategy {
     bag_data_cnt_ = num_data_; 
   }
 
+  void ResetConfig(const Config* config, bool is_change_dataset, 
+          std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>>& gradients, 
+          std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>>& hessians) override {}
+
  protected:
-  data_size_t Helper(data_size_t start, data_size_t cnt, data_size_t* buffer, score_t* gradients, score_t* hessians) override {
+  data_size_t Helper(data_size_t start, data_size_t cnt, data_size_t* buffer, score_t* gradients, score_t* hessians) {
     if (cnt <= 0) {
       return 0;
     }
diff --git a/src/boosting/sample_strategy.cpp b/src/boosting/sample_strategy.cpp
index cac1badb6c13..35b56ee24020 100644
--- a/src/boosting/sample_strategy.cpp
+++ b/src/boosting/sample_strategy.cpp
@@ -1,15 +1,16 @@
 #include <LightGBM/sample_strategy.h>
 #include "goss1.hpp"
+#include "bagging.hpp"
 
 namespace LightGBM {
 
-SampleStrategy* SampleStrategy::CreateSampleStrategy(const Config* config, const Dataset* train_data, int num_tree_per_iteration) {
+SampleStrategy* SampleStrategy::CreateSampleStrategy(const Config* config, const Dataset* train_data, const ObjectiveFunction* objective_function, int num_tree_per_iteration) {
   bool use_goss_as_boosting = config->boosting == std::string("goss");
   bool use_goss_as_strategy = config->data_sample_strategy == std::string("goss");
   if (use_goss_as_boosting || use_goss_as_strategy) {
-      return new GOSS1(config, train_data, num_tree_per_iteration);
-  } else if (config->data_sample_strategy == std::string("bagging")) {
-      return nullptr;
+    return new GOSS1(config, train_data, num_tree_per_iteration);
+  } else {
+    return new BAGGING(config, train_data, objective_function, num_tree_per_iteration);
   }
 }
 

From dd40531b395dcac22243ae1c729510cb3a00b13d Mon Sep 17 00:00:00 2001
From: Guangda Liu <v-guangdaliu@microsoft.com>
Date: Fri, 12 Nov 2021 14:16:53 +0000
Subject: [PATCH 04/84] fix some variables

---
 include/LightGBM/sample_strategy.h |  2 +-
 src/boosting/gbdt.cpp              | 21 +++++++++++----------
 src/boosting/rf.hpp                | 20 ++++++++++++--------
 3 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/include/LightGBM/sample_strategy.h b/include/LightGBM/sample_strategy.h
index b1770d28ccb4..e86f5e6aa6f0 100644
--- a/include/LightGBM/sample_strategy.h
+++ b/include/LightGBM/sample_strategy.h
@@ -23,7 +23,7 @@ class SampleStrategy {
           std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>>& hessians) = 0;
   bool is_use_subset() {return is_use_subset_;}
   data_size_t bag_data_cnt() {return bag_data_cnt_;}
-  std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>> bag_data_indices() {return bag_data_indices_;}
+  std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>>& bag_data_indices() {return bag_data_indices_;}
 
  protected:
   const Config* config_;
diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index 84065f87e572..fd68263bbbe3 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -399,9 +399,9 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
   }
   // bagging logic
   data_sample_strategy_->Bagging(iter_, tree_learner_.get(), gradients_.data(), hessians_.data());
-  bag_data_indices_ = data_sample_strategy_->bag_data_indices();
-  bag_data_cnt_ = data_sample_strategy_->bag_data_cnt();
-  is_use_subset_ = data_sample_strategy_->is_use_subset();
+  const bool is_use_subset = data_sample_strategy_->is_use_subset();
+  const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt();
+  const std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>>& bag_data_indices = data_sample_strategy_->bag_data_indices();
 
   bool should_continue = false;
   for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
@@ -411,10 +411,10 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
       auto grad = gradients + offset;
       auto hess = hessians + offset;
       // need to copy gradients for bagging subset.
-      if (is_use_subset_ && bag_data_cnt_ < num_data_) {
-        for (int i = 0; i < bag_data_cnt_; ++i) {
-          gradients_[offset + i] = grad[bag_data_indices_[i]];
-          hessians_[offset + i] = hess[bag_data_indices_[i]];
+      if (is_use_subset && bag_data_cnt < num_data_) {
+        for (int i = 0; i < bag_data_cnt; ++i) {
+          gradients_[offset + i] = grad[bag_data_indices[i]];
+          hessians_[offset + i] = hess[bag_data_indices[i]];
         }
         grad = gradients_.data() + offset;
         hess = hessians_.data() + offset;
@@ -513,12 +513,13 @@ bool GBDT::EvalAndCheckEarlyStopping() {
 void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) {
   Common::FunctionTimer fun_timer("GBDT::UpdateScore", global_timer);
   // update training score
-  if (!is_use_subset_) {
+  if (!data_sample_strategy_->is_use_subset()) {
     train_score_updater_->AddScore(tree_learner_.get(), tree, cur_tree_id);
 
+    const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt();
     // we need to predict out-of-bag scores of data for boosting
-    if (num_data_ - bag_data_cnt_ > 0) {
-      train_score_updater_->AddScore(tree, bag_data_indices_.data() + bag_data_cnt_, num_data_ - bag_data_cnt_, cur_tree_id);
+    if (num_data_ - bag_data_cnt > 0) {
+      train_score_updater_->AddScore(tree, data_sample_strategy_->bag_data_indices().data() + bag_data_cnt, num_data_ - bag_data_cnt, cur_tree_id);
     }
 
   } else {
diff --git a/src/boosting/rf.hpp b/src/boosting/rf.hpp
index 5a9eb226fef5..20b80c025c18 100644
--- a/src/boosting/rf.hpp
+++ b/src/boosting/rf.hpp
@@ -48,7 +48,7 @@ class RF : public GBDT {
     shrinkage_rate_ = 1.0f;
     // only boosting one time
     Boosting();
-    if (is_use_subset_ && bag_data_cnt_ < num_data_) {
+    if (data_sample_strategy_->is_use_subset() && data_sample_strategy_->bag_data_cnt() < num_data_) {
       tmp_grad_.resize(num_data_);
       tmp_hess_.resize(num_data_);
     }
@@ -73,7 +73,7 @@ class RF : public GBDT {
     CHECK_EQ(num_tree_per_iteration_, num_class_);
     // only boosting one time
     Boosting();
-    if (is_use_subset_ && bag_data_cnt_ < num_data_) {
+    if (data_sample_strategy_->is_use_subset() && data_sample_strategy_->bag_data_cnt() < num_data_) {
       tmp_grad_.resize(num_data_);
       tmp_hess_.resize(num_data_);
     }
@@ -102,7 +102,11 @@ class RF : public GBDT {
 
   bool TrainOneIter(const score_t* gradients, const score_t* hessians) override {
     // bagging logic
-    Bagging(iter_);
+    data_sample_strategy_ ->Bagging(iter_, tree_learner_.get(), gradients_.data(), hessians_.data());
+    const bool is_use_subset = data_sample_strategy_->is_use_subset();
+    const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt();
+    const std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>>& bag_data_indices = data_sample_strategy_->bag_data_indices();
+    
     CHECK_EQ(gradients, nullptr);
     CHECK_EQ(hessians, nullptr);
 
@@ -116,10 +120,10 @@ class RF : public GBDT {
         auto hess = hessians + offset;
 
         // need to copy gradients for bagging subset.
-        if (is_use_subset_ && bag_data_cnt_ < num_data_) {
-          for (int i = 0; i < bag_data_cnt_; ++i) {
-            tmp_grad_[i] = grad[bag_data_indices_[i]];
-            tmp_hess_[i] = hess[bag_data_indices_[i]];
+        if (is_use_subset && bag_data_cnt < num_data_) {
+          for (int i = 0; i < bag_data_cnt; ++i) {
+            tmp_grad_[i] = grad[bag_data_indices[i]];
+            tmp_hess_[i] = hess[bag_data_indices[i]];
           }
           grad = tmp_grad_.data();
           hess = tmp_hess_.data();
@@ -132,7 +136,7 @@ class RF : public GBDT {
         double pred = init_scores_[cur_tree_id];
         auto residual_getter = [pred](const label_t* label, int i) {return static_cast<double>(label[i]) - pred; };
         tree_learner_->RenewTreeOutput(new_tree.get(), objective_function_, residual_getter,
-          num_data_, bag_data_indices_.data(), bag_data_cnt_);
+          num_data_, bag_data_indices.data(), bag_data_cnt);
         if (std::fabs(init_scores_[cur_tree_id]) > kEpsilon) {
           new_tree->AddBias(init_scores_[cur_tree_id]);
         }

From 4b6095db63acffd70e9ad57bac2f33177abe76ae Mon Sep 17 00:00:00 2001
From: Guangda Liu <v-guangdaliu@microsoft.com>
Date: Fri, 12 Nov 2021 14:35:23 +0000
Subject: [PATCH 05/84] remove GOSS(as boost) and Bagging logic in GBDT

---
 src/boosting/boosting.cpp |   5 +-
 src/boosting/gbdt.cpp     | 153 +------------------------------
 src/boosting/gbdt.h       |  21 -----
 src/boosting/goss.hpp     | 188 --------------------------------------
 4 files changed, 4 insertions(+), 363 deletions(-)
 delete mode 100644 src/boosting/goss.hpp

diff --git a/src/boosting/boosting.cpp b/src/boosting/boosting.cpp
index 91fa318a0f18..98f2554b1388 100644
--- a/src/boosting/boosting.cpp
+++ b/src/boosting/boosting.cpp
@@ -6,7 +6,6 @@
 
 #include "dart.hpp"
 #include "gbdt.h"
-#include "goss.hpp"
 #include "rf.hpp"
 
 namespace LightGBM {
@@ -39,7 +38,7 @@ Boosting* Boosting::CreateBoosting(const std::string& type, const char* filename
     } else if (type == std::string("dart")) {
       return new DART();
     } else if (type == std::string("goss")) {
-      return new GOSS();
+      return new GBDT();
     } else if (type == std::string("rf")) {
       return new RF();
     } else {
@@ -53,7 +52,7 @@ Boosting* Boosting::CreateBoosting(const std::string& type, const char* filename
       } else if (type == std::string("dart")) {
         ret.reset(new DART());
       } else if (type == std::string("goss")) {
-        ret.reset(new GOSS());
+        ret.reset(new GBDT());
       } else if (type == std::string("rf")) {
         return new RF();
       } else {
diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index fd68263bbbe3..1f9f7d834a47 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -35,10 +35,7 @@ GBDT::GBDT()
       num_class_(1),
       num_iteration_for_pred_(0),
       shrinkage_rate_(0.1f),
-      num_init_iteration_(0),
-      need_re_bagging_(false),
-      balanced_bagging_(false),
-      bagging_runner_(0, bagging_rand_block_) {
+      num_init_iteration_(0) {
   average_output_ = false;
   tree_learner_ = nullptr;
   linear_tree_ = false;
@@ -127,7 +124,6 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
   monotone_constraints_ = config->monotone_constraints;
 
   // if need bagging, create buffer
-  // ResetBaggingConfig(config_.get(), true);
   data_sample_strategy_.reset(SampleStrategy::CreateSampleStrategy(config_.get(), train_data_, objective_function_, num_tree_per_iteration_));
   data_sample_strategy_->ResetConfig(config_.get(), true, gradients_, hessians_);
   data_sample_strategy_->Reset();
@@ -186,89 +182,6 @@ void GBDT::Boosting() {
     GetGradients(GetTrainingScore(&num_score), gradients_.data(), hessians_.data());
 }
 
-data_size_t GBDT::BaggingHelper(data_size_t start, data_size_t cnt, data_size_t* buffer) {
-  if (cnt <= 0) {
-    return 0;
-  }
-  data_size_t cur_left_cnt = 0;
-  data_size_t cur_right_pos = cnt;
-  // random bagging, minimal unit is one record
-  for (data_size_t i = 0; i < cnt; ++i) {
-    auto cur_idx = start + i;
-    if (bagging_rands_[cur_idx / bagging_rand_block_].NextFloat() < config_->bagging_fraction) {
-      buffer[cur_left_cnt++] = cur_idx;
-    } else {
-      buffer[--cur_right_pos] = cur_idx;
-    }
-  }
-  return cur_left_cnt;
-}
-
-data_size_t GBDT::BalancedBaggingHelper(data_size_t start, data_size_t cnt,
-                                        data_size_t* buffer) {
-  if (cnt <= 0) {
-    return 0;
-  }
-  auto label_ptr = train_data_->metadata().label();
-  data_size_t cur_left_cnt = 0;
-  data_size_t cur_right_pos = cnt;
-  // random bagging, minimal unit is one record
-  for (data_size_t i = 0; i < cnt; ++i) {
-    auto cur_idx = start + i;
-    bool is_pos = label_ptr[start + i] > 0;
-    bool is_in_bag = false;
-    if (is_pos) {
-      is_in_bag = bagging_rands_[cur_idx / bagging_rand_block_].NextFloat() <
-                  config_->pos_bagging_fraction;
-    } else {
-      is_in_bag = bagging_rands_[cur_idx / bagging_rand_block_].NextFloat() <
-                  config_->neg_bagging_fraction;
-    }
-    if (is_in_bag) {
-      buffer[cur_left_cnt++] = cur_idx;
-    } else {
-      buffer[--cur_right_pos] = cur_idx;
-    }
-  }
-  return cur_left_cnt;
-}
-
-void GBDT::Bagging(int iter) {
-  Common::FunctionTimer fun_timer("GBDT::Bagging", global_timer);
-  // if need bagging
-  if ((bag_data_cnt_ < num_data_ && iter % config_->bagging_freq == 0) ||
-      need_re_bagging_) {
-    need_re_bagging_ = false;
-    auto left_cnt = bagging_runner_.Run<true>(
-        num_data_,
-        [=](int, data_size_t cur_start, data_size_t cur_cnt, data_size_t* left,
-            data_size_t*) {
-          data_size_t cur_left_count = 0;
-          if (balanced_bagging_) {
-            cur_left_count =
-                BalancedBaggingHelper(cur_start, cur_cnt, left);
-          } else {
-            cur_left_count = BaggingHelper(cur_start, cur_cnt, left);
-          }
-          return cur_left_count;
-        },
-        bag_data_indices_.data());
-    bag_data_cnt_ = left_cnt;
-    Log::Debug("Re-bagging, using %d data to train", bag_data_cnt_);
-    // set bagging data to tree learner
-    if (!is_use_subset_) {
-      tree_learner_->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_);
-    } else {
-      // get subset
-      tmp_subset_->ReSize(bag_data_cnt_);
-      tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(),
-                              bag_data_cnt_, false);
-      tree_learner_->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(),
-                                    bag_data_cnt_);
-    }
-  }
-}
-
 void GBDT::Train(int snapshot_freq, const std::string& model_output_path) {
   Common::FunctionTimer fun_timer("GBDT::Train", global_timer);
   bool is_finished = false;
@@ -428,7 +341,7 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
       auto score_ptr = train_score_updater_->score() + offset;
       auto residual_getter = [score_ptr](const label_t* label, int i) {return static_cast<double>(label[i]) - score_ptr[i]; };
       tree_learner_->RenewTreeOutput(new_tree.get(), objective_function_, residual_getter,
-                                     num_data_, bag_data_indices_.data(), bag_data_cnt_);
+                                     num_data_, bag_data_indices.data(), bag_data_cnt);
       // shrinkage by learning rate
       new_tree->Shrinkage(shrinkage_rate_);
       // update score
@@ -755,7 +668,6 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
     feature_infos_ = train_data_->feature_infos();
 
     tree_learner_->ResetTrainingData(train_data, is_constant_hessian_);
-    // ResetBaggingConfig(config_.get(), true);
     data_sample_strategy_->ResetConfig(config_.get(), true, gradients_, hessians_);
   } else {
     tree_learner_->ResetIsConstantHessian(is_constant_hessian_);
@@ -780,7 +692,6 @@ void GBDT::ResetConfig(const Config* config) {
     tree_learner_->ResetConfig(new_config.get());
   }
   if (train_data_ != nullptr) {
-    // ResetBaggingConfig(new_config.get(), false);
     data_sample_strategy_->ResetConfig(new_config.get(), false, gradients_, hessians_);
   }
   if (config_.get() != nullptr && config_->forcedsplits_filename != new_config->forcedsplits_filename) {
@@ -802,64 +713,4 @@ void GBDT::ResetConfig(const Config* config) {
   data_sample_strategy_->Reset();
 }
 
-void GBDT::ResetBaggingConfig(const Config* config, bool is_change_dataset) {
-  // if need bagging, create buffer
-  data_size_t num_pos_data = 0;
-  if (objective_function_ != nullptr) {
-    num_pos_data = objective_function_->NumPositiveData();
-  }
-  bool balance_bagging_cond = (config->pos_bagging_fraction < 1.0 || config->neg_bagging_fraction < 1.0) && (num_pos_data > 0);
-  if ((config->bagging_fraction < 1.0 || balance_bagging_cond) && config->bagging_freq > 0) {
-    need_re_bagging_ = false;
-    if (!is_change_dataset &&
-      config_.get() != nullptr && config_->bagging_fraction == config->bagging_fraction && config_->bagging_freq == config->bagging_freq
-      && config_->pos_bagging_fraction == config->pos_bagging_fraction && config_->neg_bagging_fraction == config->neg_bagging_fraction) {
-      return;
-    }
-    if (balance_bagging_cond) {
-      balanced_bagging_ = true;
-      bag_data_cnt_ = static_cast<data_size_t>(num_pos_data * config->pos_bagging_fraction)
-                      + static_cast<data_size_t>((num_data_ - num_pos_data) * config->neg_bagging_fraction);
-    } else {
-      bag_data_cnt_ = static_cast<data_size_t>(config->bagging_fraction * num_data_);
-    }
-    bag_data_indices_.resize(num_data_);
-    bagging_runner_.ReSize(num_data_);
-    bagging_rands_.clear();
-    for (int i = 0;
-         i < (num_data_ + bagging_rand_block_ - 1) / bagging_rand_block_; ++i) {
-      bagging_rands_.emplace_back(config_->bagging_seed + i);
-    }
-
-    double average_bag_rate =
-        (static_cast<double>(bag_data_cnt_) / num_data_) / config->bagging_freq;
-    is_use_subset_ = false;
-    const int group_threshold_usesubset = 100;
-    if (average_bag_rate <= 0.5
-        && (train_data_->num_feature_groups() < group_threshold_usesubset)) {
-      if (tmp_subset_ == nullptr || is_change_dataset) {
-        tmp_subset_.reset(new Dataset(bag_data_cnt_));
-        tmp_subset_->CopyFeatureMapperFrom(train_data_);
-      }
-      is_use_subset_ = true;
-      Log::Debug("Use subset for bagging");
-    }
-
-    need_re_bagging_ = true;
-
-    if (is_use_subset_ && bag_data_cnt_ < num_data_) {
-      if (objective_function_ == nullptr) {
-        size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
-        gradients_.resize(total_size);
-        hessians_.resize(total_size);
-      }
-    }
-  } else {
-    bag_data_cnt_ = num_data_;
-    bag_data_indices_.clear();
-    bagging_runner_.ReSize(0);
-    is_use_subset_ = false;
-  }
-}
-
 }  // namespace LightGBM
diff --git a/src/boosting/gbdt.h b/src/boosting/gbdt.h
index f41e14582f4d..13c427bd266c 100644
--- a/src/boosting/gbdt.h
+++ b/src/boosting/gbdt.h
@@ -413,18 +413,6 @@ class GBDT : public GBDTBase {
   */
   void ResetBaggingConfig(const Config* config, bool is_change_dataset);
 
-  /*!
-  * \brief Implement bagging logic
-  * \param iter Current interation
-  */
-  virtual void Bagging(int iter);
-
-  virtual data_size_t BaggingHelper(data_size_t start, data_size_t cnt,
-                                    data_size_t* buffer);
-
-  data_size_t BalancedBaggingHelper(data_size_t start, data_size_t cnt,
-                                    data_size_t* buffer);
-
   /*!
   * \brief calculate the object function
   */
@@ -497,10 +485,6 @@ class GBDT : public GBDTBase {
   std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>> hessians_;
 #endif
 
-  /*! \brief Store the indices of in-bag data */
-  std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>> bag_data_indices_;
-  /*! \brief Number of in-bag data */
-  data_size_t bag_data_cnt_;
   /*! \brief Number of training data */
   data_size_t num_data_;
   /*! \brief Number of trees per iterations */
@@ -520,8 +504,6 @@ class GBDT : public GBDTBase {
   /*! \brief Feature names */
   std::vector<std::string> feature_names_;
   std::vector<std::string> feature_infos_;
-  std::unique_ptr<Dataset> tmp_subset_;
-  bool is_use_subset_;
   std::vector<bool> class_need_train_;
   bool is_constant_hessian_;
   std::unique_ptr<ObjectiveFunction> loaded_objective_;
@@ -530,9 +512,6 @@ class GBDT : public GBDTBase {
   bool balanced_bagging_;
   std::string loaded_parameter_;
   std::vector<int8_t> monotone_constraints_;
-  const int bagging_rand_block_ = 1024;
-  std::vector<Random> bagging_rands_;
-  ParallelPartitionRunner<data_size_t, false> bagging_runner_;
   Json forced_splits_json_;
   bool linear_tree_;
   std::unique_ptr<SampleStrategy> data_sample_strategy_;
diff --git a/src/boosting/goss.hpp b/src/boosting/goss.hpp
deleted file mode 100644
index 09b5b90763ce..000000000000
--- a/src/boosting/goss.hpp
+++ /dev/null
@@ -1,188 +0,0 @@
-/*!
- * Copyright (c) 2017 Microsoft Corporation. All rights reserved.
- * Licensed under the MIT License. See LICENSE file in the project root for license information.
- */
-#ifndef LIGHTGBM_BOOSTING_GOSS_H_
-#define LIGHTGBM_BOOSTING_GOSS_H_
-
-#include <LightGBM/boosting.h>
-#include <LightGBM/utils/array_args.h>
-#include <LightGBM/utils/log.h>
-
-#include <string>
-#include <algorithm>
-#include <chrono>
-#include <cstdio>
-#include <cstdint>
-#include <fstream>
-#include <vector>
-
-#include "gbdt.h"
-#include "score_updater.hpp"
-
-namespace LightGBM {
-
-class GOSS: public GBDT {
- public:
-  /*!
-  * \brief Constructor
-  */
-  GOSS() : GBDT() {
-  }
-
-  ~GOSS() {
-  }
-
-  void Init(const Config* config, const Dataset* train_data, const ObjectiveFunction* objective_function,
-            const std::vector<const Metric*>& training_metrics) override {
-    GBDT::Init(config, train_data, objective_function, training_metrics);
-    ResetGoss();
-    if (objective_function_ == nullptr) {
-      // use customized objective function
-      size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
-      gradients_.resize(total_size, 0.0f);
-      hessians_.resize(total_size, 0.0f);
-    }
-  }
-
-  void ResetTrainingData(const Dataset* train_data, const ObjectiveFunction* objective_function,
-                         const std::vector<const Metric*>& training_metrics) override {
-    GBDT::ResetTrainingData(train_data, objective_function, training_metrics);
-    ResetGoss();
-  }
-
-  void ResetConfig(const Config* config) override {
-    GBDT::ResetConfig(config);
-    ResetGoss();
-  }
-
-  bool TrainOneIter(const score_t* gradients, const score_t* hessians) override {
-    if (gradients != nullptr) {
-      // use customized objective function
-      CHECK(hessians != nullptr && objective_function_ == nullptr);
-      int64_t total_size = static_cast<int64_t>(num_data_) * num_tree_per_iteration_;
-      #pragma omp parallel for schedule(static)
-      for (int64_t i = 0; i < total_size; ++i) {
-        gradients_[i] = gradients[i];
-        hessians_[i] = hessians[i];
-      }
-      return GBDT::TrainOneIter(gradients_.data(), hessians_.data());
-    } else {
-      CHECK(hessians == nullptr);
-      return GBDT::TrainOneIter(nullptr, nullptr);
-    }
-  }
-
-  void ResetGoss() {
-    CHECK_LE(config_->top_rate + config_->other_rate, 1.0f);
-    CHECK(config_->top_rate > 0.0f && config_->other_rate > 0.0f);
-    if (config_->bagging_freq > 0 && config_->bagging_fraction != 1.0f) {
-      Log::Fatal("Cannot use bagging in GOSS");
-    }
-    Log::Info("Using GOSS");
-    balanced_bagging_ = false;
-    bag_data_indices_.resize(num_data_);
-    bagging_runner_.ReSize(num_data_);
-    bagging_rands_.clear();
-    for (int i = 0;
-         i < (num_data_ + bagging_rand_block_ - 1) / bagging_rand_block_; ++i) {
-      bagging_rands_.emplace_back(config_->bagging_seed + i);
-    }
-    is_use_subset_ = false;
-    if (config_->top_rate + config_->other_rate <= 0.5) {
-      auto bag_data_cnt = static_cast<data_size_t>((config_->top_rate + config_->other_rate) * num_data_);
-      bag_data_cnt = std::max(1, bag_data_cnt);
-      tmp_subset_.reset(new Dataset(bag_data_cnt));
-      tmp_subset_->CopyFeatureMapperFrom(train_data_);
-      is_use_subset_ = true;
-    }
-    // flag to not bagging first
-    bag_data_cnt_ = num_data_;
-  }
-
-  data_size_t BaggingHelper(data_size_t start, data_size_t cnt, data_size_t* buffer) override {
-    if (cnt <= 0) {
-      return 0;
-    }
-    std::vector<score_t> tmp_gradients(cnt, 0.0f);
-    for (data_size_t i = 0; i < cnt; ++i) {
-      for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
-        size_t idx = static_cast<size_t>(cur_tree_id) * num_data_ + start + i;
-        tmp_gradients[i] += std::fabs(gradients_[idx] * hessians_[idx]);
-      }
-    }
-    data_size_t top_k = static_cast<data_size_t>(cnt * config_->top_rate);
-    data_size_t other_k = static_cast<data_size_t>(cnt * config_->other_rate);
-    top_k = std::max(1, top_k);
-    ArrayArgs<score_t>::ArgMaxAtK(&tmp_gradients, 0, static_cast<int>(tmp_gradients.size()), top_k - 1);
-    score_t threshold = tmp_gradients[top_k - 1];
-
-    score_t multiply = static_cast<score_t>(cnt - top_k) / other_k;
-    data_size_t cur_left_cnt = 0;
-    data_size_t cur_right_pos = cnt;
-    data_size_t big_weight_cnt = 0;
-    for (data_size_t i = 0; i < cnt; ++i) {
-      auto cur_idx = start + i;
-      score_t grad = 0.0f;
-      for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
-        size_t idx = static_cast<size_t>(cur_tree_id) * num_data_ + cur_idx;
-        grad += std::fabs(gradients_[idx] * hessians_[idx]);
-      }
-      if (grad >= threshold) {
-        buffer[cur_left_cnt++] = cur_idx;
-        ++big_weight_cnt;
-      } else {
-        data_size_t sampled = cur_left_cnt - big_weight_cnt;
-        data_size_t rest_need = other_k - sampled;
-        data_size_t rest_all = (cnt - i) - (top_k - big_weight_cnt);
-        double prob = (rest_need) / static_cast<double>(rest_all);
-        if (bagging_rands_[cur_idx / bagging_rand_block_].NextFloat() < prob) {
-          buffer[cur_left_cnt++] = cur_idx;
-          for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
-            size_t idx = static_cast<size_t>(cur_tree_id) * num_data_ + cur_idx;
-            gradients_[idx] *= multiply;
-            hessians_[idx] *= multiply;
-          }
-        } else {
-          buffer[--cur_right_pos] = cur_idx;
-        }
-      }
-    }
-    return cur_left_cnt;
-  }
-
-  void Bagging(int iter) override {
-    bag_data_cnt_ = num_data_;
-    // not subsample for first iterations
-    if (iter < static_cast<int>(1.0f / config_->learning_rate)) { return; }
-    auto left_cnt = bagging_runner_.Run<true>(
-        num_data_,
-        [=](int, data_size_t cur_start, data_size_t cur_cnt, data_size_t* left,
-            data_size_t*) {
-          data_size_t cur_left_count = 0;
-          cur_left_count = BaggingHelper(cur_start, cur_cnt, left);
-          return cur_left_count;
-        },
-        bag_data_indices_.data());
-    bag_data_cnt_ = left_cnt;
-    // set bagging data to tree learner
-    if (!is_use_subset_) {
-      tree_learner_->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_);
-    } else {
-      // get subset
-      tmp_subset_->ReSize(bag_data_cnt_);
-      tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(),
-                              bag_data_cnt_, false);
-      tree_learner_->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(),
-                                    bag_data_cnt_);
-    }
-  }
-
- protected:
-  bool GetIsConstHessian(const ObjectiveFunction*) override {
-    return false;
-  }
-};
-
-}  // namespace LightGBM
-#endif   // LIGHTGBM_BOOSTING_GOSS_H_

From 2acb230593c83b78ec652805ea3704b33d689def Mon Sep 17 00:00:00 2001
From: Guangda Liu <v-guangdaliu@microsoft.com>
Date: Fri, 12 Nov 2021 14:37:28 +0000
Subject: [PATCH 06/84] rename GOSS1 to GOSS(as sample strategy)

---
 src/boosting/{goss1.hpp => goss.hpp} | 6 +++---
 src/boosting/sample_strategy.cpp     | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)
 rename src/boosting/{goss1.hpp => goss.hpp} (97%)

diff --git a/src/boosting/goss1.hpp b/src/boosting/goss.hpp
similarity index 97%
rename from src/boosting/goss1.hpp
rename to src/boosting/goss.hpp
index a51e4a1dc79c..605b660bd8c9 100644
--- a/src/boosting/goss1.hpp
+++ b/src/boosting/goss.hpp
@@ -7,16 +7,16 @@
 
 namespace LightGBM {
 
-class GOSS1 : public SampleStrategy {
+class GOSS : public SampleStrategy {
  public:
-  GOSS1(const Config* config, const Dataset* train_data, int num_tree_per_iteration) {
+  GOSS(const Config* config, const Dataset* train_data, int num_tree_per_iteration) {
     config_ = config;
     train_data_ = train_data;
     num_tree_per_iteration_ = num_tree_per_iteration;
     num_data_ = train_data->num_data();
   }
   
-  ~GOSS1() {
+  ~GOSS() {
   }
 
   void Bagging(int iter, TreeLearner* tree_learner, score_t* gradients, score_t* hessians) override {
diff --git a/src/boosting/sample_strategy.cpp b/src/boosting/sample_strategy.cpp
index 35b56ee24020..53e00fcee14f 100644
--- a/src/boosting/sample_strategy.cpp
+++ b/src/boosting/sample_strategy.cpp
@@ -1,5 +1,5 @@
 #include <LightGBM/sample_strategy.h>
-#include "goss1.hpp"
+#include "goss.hpp"
 #include "bagging.hpp"
 
 namespace LightGBM {
@@ -8,7 +8,7 @@ SampleStrategy* SampleStrategy::CreateSampleStrategy(const Config* config, const
   bool use_goss_as_boosting = config->boosting == std::string("goss");
   bool use_goss_as_strategy = config->data_sample_strategy == std::string("goss");
   if (use_goss_as_boosting || use_goss_as_strategy) {
-    return new GOSS1(config, train_data, num_tree_per_iteration);
+    return new GOSS(config, train_data, num_tree_per_iteration);
   } else {
     return new BAGGING(config, train_data, objective_function, num_tree_per_iteration);
   }

From 8b25d657519d4562f89fcaf78b794b5102900854 Mon Sep 17 00:00:00 2001
From: Guangda Liu <v-guangdaliu@microsoft.com>
Date: Fri, 12 Nov 2021 14:47:15 +0000
Subject: [PATCH 07/84] add warning about use GOSS as boosting_type

---
 src/boosting/sample_strategy.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/boosting/sample_strategy.cpp b/src/boosting/sample_strategy.cpp
index 53e00fcee14f..656504eef7c7 100644
--- a/src/boosting/sample_strategy.cpp
+++ b/src/boosting/sample_strategy.cpp
@@ -7,6 +7,12 @@ namespace LightGBM {
 SampleStrategy* SampleStrategy::CreateSampleStrategy(const Config* config, const Dataset* train_data, const ObjectiveFunction* objective_function, int num_tree_per_iteration) {
   bool use_goss_as_boosting = config->boosting == std::string("goss");
   bool use_goss_as_strategy = config->data_sample_strategy == std::string("goss");
+  if (use_goss_as_boosting) {
+    Log::Warning("Setting goss as `boosting_type` is NOT suggested. Please set `data_sample_strategy = goss` in your config file.");
+    if (use_goss_as_strategy) {
+      Log::Warning("Both `boosting_type` and `data_sample_strategy` are set as GOSS. Only one time of sampling will be conducted. Please check and modify your config file.")
+    }
+  }
   if (use_goss_as_boosting || use_goss_as_strategy) {
     return new GOSS(config, train_data, num_tree_per_iteration);
   } else {

From 05a8d15af99a078cd5c36e0d16454eeca95ec937 Mon Sep 17 00:00:00 2001
From: Guangda Liu <v-guangdaliu@microsoft.com>
Date: Fri, 12 Nov 2021 14:52:04 +0000
Subject: [PATCH 08/84] a little ; bug

---
 src/boosting/sample_strategy.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/boosting/sample_strategy.cpp b/src/boosting/sample_strategy.cpp
index 656504eef7c7..c3d6ea8378c9 100644
--- a/src/boosting/sample_strategy.cpp
+++ b/src/boosting/sample_strategy.cpp
@@ -10,7 +10,7 @@ SampleStrategy* SampleStrategy::CreateSampleStrategy(const Config* config, const
   if (use_goss_as_boosting) {
     Log::Warning("Setting goss as `boosting_type` is NOT suggested. Please set `data_sample_strategy = goss` in your config file.");
     if (use_goss_as_strategy) {
-      Log::Warning("Both `boosting_type` and `data_sample_strategy` are set as GOSS. Only one time of sampling will be conducted. Please check and modify your config file.")
+      Log::Warning("Both `boosting_type` and `data_sample_strategy` are set as GOSS. Only one time of sampling will be conducted. Please check and modify your config file.");
     }
   }
   if (use_goss_as_boosting || use_goss_as_strategy) {

From 6f9c8cceb26f283a127ed50815e7f08407824bfa Mon Sep 17 00:00:00 2001
From: Guangda Liu <v-guangdaliu@microsoft.com>
Date: Mon, 15 Nov 2021 08:42:08 +0000
Subject: [PATCH 09/84] remove CHECK when "gradients != nullptr"

---
 src/boosting/gbdt.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index 1f9f7d834a47..f7392dd29dc2 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -301,8 +301,8 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
   } else if (gradients != nullptr) {
     // use customized objective function
     CHECK(hessians != nullptr && objective_function_ == nullptr);
-    // and will be only used for GOSS
-    CHECK(config_->boosting==std::string("goss") || config_->data_sample_strategy==std::string("goss"));
+    // and will be only used for GOSS (No? But copying gradients will not have effect when bagging)
+    // CHECK(config_->boosting==std::string("goss") || config_->data_sample_strategy==std::string("goss"));
     int64_t total_size = static_cast<int64_t>(num_data_) * num_tree_per_iteration_;
     #pragma omp parallel for schedule(static)
     for (int64_t i = 0; i < total_size; ++i) {

From 80c4f70b4b7855e8dec91b8a30ef1a573c775d06 Mon Sep 17 00:00:00 2001
From: Guangda Liu <v-guangdaliu@microsoft.com>
Date: Sun, 5 Dec 2021 09:13:45 +0000
Subject: [PATCH 10/84] rename DataSampleStrategy to avoid confusion

---
 include/LightGBM/sample_strategy.h |  4 ++--
 src/boosting/bagging.hpp           |  4 ++--
 src/boosting/gbdt.cpp              | 12 ++++++------
 src/boosting/goss.hpp              |  4 ++--
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/LightGBM/sample_strategy.h b/include/LightGBM/sample_strategy.h
index e86f5e6aa6f0..96768bbae99f 100644
--- a/include/LightGBM/sample_strategy.h
+++ b/include/LightGBM/sample_strategy.h
@@ -17,8 +17,8 @@ class SampleStrategy {
   virtual ~SampleStrategy() {};
   static SampleStrategy* CreateSampleStrategy(const Config* config, const Dataset* train_data, const ObjectiveFunction* objective_function, int num_tree_per_iteration);
   virtual void Bagging(int iter, TreeLearner* tree_learner, score_t* gradients, score_t* hessians) = 0;
-  virtual void Reset() = 0;
-  virtual void ResetConfig(const Config* config, bool is_change_dataset, 
+  virtual void ResetGOSS() = 0;
+  virtual void ResetBaggingConfig(const Config* config, bool is_change_dataset, 
           std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>>& gradients, 
           std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>>& hessians) = 0;
   bool is_use_subset() {return is_use_subset_;}
diff --git a/src/boosting/bagging.hpp b/src/boosting/bagging.hpp
index fb7e230cae16..577ffa7e14a1 100644
--- a/src/boosting/bagging.hpp
+++ b/src/boosting/bagging.hpp
@@ -51,9 +51,9 @@ class BAGGING : public SampleStrategy {
     }
   }
 
-  void Reset() override {} 
+  void ResetGOSS() override {} 
 
-  void ResetConfig(const Config* config, bool is_change_dataset, 
+  void ResetBaggingConfig(const Config* config, bool is_change_dataset, 
           std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>>& gradients, 
           std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>>& hessians) override {
     // if need bagging, create buffer
diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index f7392dd29dc2..559a2d7b1002 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -125,8 +125,8 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
 
   // if need bagging, create buffer
   data_sample_strategy_.reset(SampleStrategy::CreateSampleStrategy(config_.get(), train_data_, objective_function_, num_tree_per_iteration_));
-  data_sample_strategy_->ResetConfig(config_.get(), true, gradients_, hessians_);
-  data_sample_strategy_->Reset();
+  data_sample_strategy_->ResetBaggingConfig(config_.get(), true, gradients_, hessians_);
+  data_sample_strategy_->ResetGOSS();
 
   class_need_train_ = std::vector<bool>(num_tree_per_iteration_, true);
   if (objective_function_ != nullptr && objective_function_->SkipEmptyClass()) {
@@ -668,11 +668,11 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
     feature_infos_ = train_data_->feature_infos();
 
     tree_learner_->ResetTrainingData(train_data, is_constant_hessian_);
-    data_sample_strategy_->ResetConfig(config_.get(), true, gradients_, hessians_);
+    data_sample_strategy_->ResetBaggingConfig(config_.get(), true, gradients_, hessians_);
   } else {
     tree_learner_->ResetIsConstantHessian(is_constant_hessian_);
   }
-  data_sample_strategy_->Reset();
+  data_sample_strategy_->ResetGOSS();
 }
 
 void GBDT::ResetConfig(const Config* config) {
@@ -692,7 +692,7 @@ void GBDT::ResetConfig(const Config* config) {
     tree_learner_->ResetConfig(new_config.get());
   }
   if (train_data_ != nullptr) {
-    data_sample_strategy_->ResetConfig(new_config.get(), false, gradients_, hessians_);
+    data_sample_strategy_->ResetBaggingConfig(new_config.get(), false, gradients_, hessians_);
   }
   if (config_.get() != nullptr && config_->forcedsplits_filename != new_config->forcedsplits_filename) {
     // load forced_splits file
@@ -710,7 +710,7 @@ void GBDT::ResetConfig(const Config* config) {
     }
   }
   config_.reset(new_config.release());
-  data_sample_strategy_->Reset();
+  data_sample_strategy_->ResetGOSS();
 }
 
 }  // namespace LightGBM
diff --git a/src/boosting/goss.hpp b/src/boosting/goss.hpp
index 605b660bd8c9..79c15fd278c1 100644
--- a/src/boosting/goss.hpp
+++ b/src/boosting/goss.hpp
@@ -46,7 +46,7 @@ class GOSS : public SampleStrategy {
     }
   }
 
-  void Reset() override {
+  void ResetGOSS() override {
     CHECK_LE(config_->top_rate + config_->other_rate, 1.0f);
     CHECK(config_->top_rate > 0.0f && config_->other_rate > 0.0f);
     if (config_->bagging_freq > 0 && config_->bagging_fraction != 1.0f) {
@@ -73,7 +73,7 @@ class GOSS : public SampleStrategy {
     bag_data_cnt_ = num_data_; 
   }
 
-  void ResetConfig(const Config* config, bool is_change_dataset, 
+  void ResetBaggingConfig(const Config* config, bool is_change_dataset, 
           std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>>& gradients, 
           std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>>& hessians) override {}
 

From 8103d81574b6c6ca905c925d8abfd41b0770028b Mon Sep 17 00:00:00 2001
From: Guangda Liu <v-guangdaliu@microsoft.com>
Date: Sun, 5 Dec 2021 09:33:04 +0000
Subject: [PATCH 11/84] remove and add some ccomments, followingconvention

---
 include/LightGBM/config.h          |  5 +++++
 include/LightGBM/sample_strategy.h | 15 ++++++++++++---
 src/boosting/gbdt.cpp              |  2 --
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
index 7ba9b47f7298..238f14126dca 100644
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -149,6 +149,11 @@ struct Config {
   // descl2 = **Note**: internally, LightGBM uses ``gbdt`` mode for the first ``1 / learning_rate`` iterations
   std::string boosting = "gbdt";
 
+  // [doc-only]
+  // type = enum
+  // options = bagging, goss
+  // desc = ``bagging``, Randomly Bagging Sampling
+  // desc = ``goss``, Gradient-based One-Side Sampling
   std::string data_sample_strategy = "bagging";
 
   // alias = train, train_data, train_data_file, data_filename
diff --git a/include/LightGBM/sample_strategy.h b/include/LightGBM/sample_strategy.h
index 96768bbae99f..bcb499e42cf2 100644
--- a/include/LightGBM/sample_strategy.h
+++ b/include/LightGBM/sample_strategy.h
@@ -13,16 +13,25 @@ namespace LightGBM {
 
 class SampleStrategy {
  public:
+
   SampleStrategy() : balanced_bagging_(false), bagging_runner_(0, bagging_rand_block_) {};
+ 
   virtual ~SampleStrategy() {};
+ 
   static SampleStrategy* CreateSampleStrategy(const Config* config, const Dataset* train_data, const ObjectiveFunction* objective_function, int num_tree_per_iteration);
+ 
   virtual void Bagging(int iter, TreeLearner* tree_learner, score_t* gradients, score_t* hessians) = 0;
+ 
   virtual void ResetGOSS() = 0;
+ 
   virtual void ResetBaggingConfig(const Config* config, bool is_change_dataset, 
           std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>>& gradients, 
           std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>>& hessians) = 0;
-  bool is_use_subset() {return is_use_subset_;}
-  data_size_t bag_data_cnt() {return bag_data_cnt_;}
+ 
+  bool is_use_subset() const { return is_use_subset_; }
+ 
+  data_size_t bag_data_cnt() const { return bag_data_cnt_; }
+ 
   std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>>& bag_data_indices() {return bag_data_indices_;}
 
  protected:
@@ -42,4 +51,4 @@ class SampleStrategy {
 };
 
 } // namespace LightGBM
-#endif // LIGHTGBM_SAMPLE_STRATEGY_H_
\ No newline at end of file
+#endif // LIGHTGBM_SAMPLE_STRATEGY_H_
diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index 559a2d7b1002..3aa141c0b320 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -301,8 +301,6 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
   } else if (gradients != nullptr) {
     // use customized objective function
     CHECK(hessians != nullptr && objective_function_ == nullptr);
-    // and will be only used for GOSS (No? But copying gradients will not have effect when bagging)
-    // CHECK(config_->boosting==std::string("goss") || config_->data_sample_strategy==std::string("goss"));
     int64_t total_size = static_cast<int64_t>(num_data_) * num_tree_per_iteration_;
     #pragma omp parallel for schedule(static)
     for (int64_t i = 0; i < total_size; ++i) {

From 94a17eec9ac7b847afe6b972ffa93fa2786da28a Mon Sep 17 00:00:00 2001
From: Guangda Liu <v-guangdaliu@microsoft.com>
Date: Sun, 5 Dec 2021 09:48:51 +0000
Subject: [PATCH 12/84] =?UTF-8?q?fix=20bug=20about=20GBDT::ResetConfig=20(?=
 =?UTF-8?q?ObjectiveFunction=20inconsistencty=20bet=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/LightGBM/sample_strategy.h | 8 ++++++++
 src/boosting/gbdt.cpp              | 2 ++
 src/boosting/goss.hpp              | 8 +++++++-
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/include/LightGBM/sample_strategy.h b/include/LightGBM/sample_strategy.h
index bcb499e42cf2..1ea52b999206 100644
--- a/include/LightGBM/sample_strategy.h
+++ b/include/LightGBM/sample_strategy.h
@@ -34,6 +34,14 @@ class SampleStrategy {
  
   std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>>& bag_data_indices() {return bag_data_indices_;}
 
+  void UpdateObjectiveFunction(const ObjectiveFunction* objective_function) {
+    objective_function_ = objective_function;
+  }
+
+  void UpdateTrainingData(const Dataset* train_data) {
+    train_data_ = train_data;
+  }
+
  protected:
   const Config* config_;
   const Dataset* train_data_;
diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index 3aa141c0b320..7e3f509754ab 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -622,6 +622,7 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
   }
 
   objective_function_ = objective_function;
+  data_sample_strategy_->UpdateObjectiveFunction(objective_function);
   if (objective_function_ != nullptr) {
     CHECK_EQ(num_tree_per_iteration_, objective_function_->NumModelPerIteration());
     if (objective_function_->IsRenewTreeOutput() && !config_->monotone_constraints.empty()) {
@@ -639,6 +640,7 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
 
   if (train_data != train_data_) {
     train_data_ = train_data;
+    data_sample_strategy_->UpdateTrainingData(train_data);
     // not same training data, need reset score and others
     // create score tracker
     train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_));
diff --git a/src/boosting/goss.hpp b/src/boosting/goss.hpp
index 79c15fd278c1..28bc06267aa0 100644
--- a/src/boosting/goss.hpp
+++ b/src/boosting/goss.hpp
@@ -75,7 +75,13 @@ class GOSS : public SampleStrategy {
 
   void ResetBaggingConfig(const Config* config, bool is_change_dataset, 
           std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>>& gradients, 
-          std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>>& hessians) override {}
+          std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>>& hessians) override {
+    // Cannot use bagging in GOSS
+    bag_data_cnt_ = num_data_;
+    bag_data_indices_.clear();
+    bagging_runner_.ReSize(0);
+    is_use_subset_ = false;   
+  } 
 
  protected:
   data_size_t Helper(data_size_t start, data_size_t cnt, data_size_t* buffer, score_t* gradients, score_t* hessians) {

From f000f0a500e84e7d493952360858192ffcdfa840 Mon Sep 17 00:00:00 2001
From: Guangda Liu <v-guangdaliu@microsoft.com>
Date: Tue, 7 Dec 2021 07:04:39 +0000
Subject: [PATCH 13/84] add std::ignore to avoid compiler warnings (anpotential
 fails)

---
 src/boosting/bagging.hpp | 3 +++
 src/boosting/goss.hpp    | 7 ++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/boosting/bagging.hpp b/src/boosting/bagging.hpp
index 577ffa7e14a1..ef32486ab4c6 100644
--- a/src/boosting/bagging.hpp
+++ b/src/boosting/bagging.hpp
@@ -49,6 +49,9 @@ class BAGGING : public SampleStrategy {
                                       bag_data_cnt_);
       }
     }
+    // avoid warnings
+    std::ignore = gradients;
+    std::ignore = hessians;
   }
 
   void ResetGOSS() override {} 
diff --git a/src/boosting/goss.hpp b/src/boosting/goss.hpp
index 28bc06267aa0..990b4fec8fc6 100644
--- a/src/boosting/goss.hpp
+++ b/src/boosting/goss.hpp
@@ -80,7 +80,12 @@ class GOSS : public SampleStrategy {
     bag_data_cnt_ = num_data_;
     bag_data_indices_.clear();
     bagging_runner_.ReSize(0);
-    is_use_subset_ = false;   
+    is_use_subset_ = false;
+    // avoid warnings
+    std::ignore = config;
+    std::ignore = is_change_dataset;
+    std::ignore = gradients;
+    std::ignore = hessians;
   } 
 
  protected:

From 0ca5cb1400112eece6d8f99983415f99491637b9 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Wed, 8 Dec 2021 04:14:07 +0000
Subject: [PATCH 14/84] update Makevars and vcxproj

---
 R-package/src/Makevars.in        | 1 +
 R-package/src/Makevars.win.in    | 1 +
 windows/LightGBM.vcxproj         | 2 ++
 windows/LightGBM.vcxproj.filters | 6 ++++++
 4 files changed, 10 insertions(+)

diff --git a/R-package/src/Makevars.in b/R-package/src/Makevars.in
index 2490ba0757df..eca4ccc73e0a 100644
--- a/R-package/src/Makevars.in
+++ b/R-package/src/Makevars.in
@@ -26,6 +26,7 @@ OBJECTS = \
     boosting/gbdt_model_text.o \
     boosting/gbdt_prediction.o \
     boosting/prediction_early_stop.o \
+    boosting/sample_strategy.o \
     io/bin.o \
     io/config.o \
     io/config_auto.o \
diff --git a/R-package/src/Makevars.win.in b/R-package/src/Makevars.win.in
index 0fb2de926905..bbefe3c4fe15 100644
--- a/R-package/src/Makevars.win.in
+++ b/R-package/src/Makevars.win.in
@@ -27,6 +27,7 @@ OBJECTS = \
     boosting/gbdt_model_text.o \
     boosting/gbdt_prediction.o \
     boosting/prediction_early_stop.o \
+    boosting/sample_strategy.o \
     io/bin.o \
     io/config.o \
     io/config_auto.o \
diff --git a/windows/LightGBM.vcxproj b/windows/LightGBM.vcxproj
index 59b589a40d51..f309d4fab820 100644
--- a/windows/LightGBM.vcxproj
+++ b/windows/LightGBM.vcxproj
@@ -253,6 +253,7 @@
     <ClInclude Include="..\include\LightGBM\network.h" />
     <ClInclude Include="..\include\LightGBM\objective_function.h" />
     <ClInclude Include="..\include\LightGBM\prediction_early_stop.h" />
+    <ClInclude Include="..\include\LightGBM\sample_strategy.h" />
     <ClInclude Include="..\include\LightGBM\tree.h" />
     <ClInclude Include="..\include\LightGBM\tree_learner.h" />
     <ClInclude Include="..\include\LightGBM\utils\yamc\alternate_shared_mutex.hpp" />
@@ -311,6 +312,7 @@
     <ClCompile Include="..\src\boosting\gbdt_model_text.cpp" />
     <ClCompile Include="..\src\boosting\gbdt_prediction.cpp" />
     <ClCompile Include="..\src\boosting\prediction_early_stop.cpp" />
+    <ClCompile Include="..\src\boosting\sample_strategy.cpp" />
     <ClCompile Include="..\src\c_api.cpp" />
     <ClCompile Include="..\src\io\bin.cpp" />
     <ClCompile Include="..\src\io\config.cpp" />
diff --git a/windows/LightGBM.vcxproj.filters b/windows/LightGBM.vcxproj.filters
index 0f48c7564580..56b4e29287d5 100644
--- a/windows/LightGBM.vcxproj.filters
+++ b/windows/LightGBM.vcxproj.filters
@@ -129,6 +129,9 @@
     <ClInclude Include="..\include\LightGBM\prediction_early_stop.h">
       <Filter>include\LightGBM</Filter>
     </ClInclude>
+    <ClInclude Include="..\include\LightGBM\sample_strategy.h">
+      <Filter>include\LightGBM</Filter>
+    </ClInclude>
     <ClInclude Include="..\include\LightGBM\tree.h">
       <Filter>include\LightGBM</Filter>
     </ClInclude>
@@ -311,6 +314,9 @@
     <ClCompile Include="..\src\boosting\gbdt_model_text.cpp">
       <Filter>src\boosting</Filter>
     </ClCompile>
+    <ClCompile Include="..\src\boosting\sample_strategy.cpp">
+      <Filter>src\boosting</Filter>
+    </ClCompile>
     <ClCompile Include="..\src\io\file_io.cpp">
       <Filter>src\io</Filter>
     </ClCompile>

From 2a58353354a06c80c14a3522bfdba8fcda810db8 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Wed, 8 Dec 2021 06:39:41 +0000
Subject: [PATCH 15/84] handle constant hessian

move resize of gradient vectors out of sample strategy
---
 include/LightGBM/sample_strategy.h | 15 +++++----
 src/boosting/bagging.hpp           | 19 +++++++-----
 src/boosting/gbdt.cpp              | 49 ++++++++++++++++++++----------
 src/boosting/goss.hpp              | 24 +++++++++------
 src/boosting/sample_strategy.cpp   |  2 +-
 5 files changed, 70 insertions(+), 39 deletions(-)

diff --git a/include/LightGBM/sample_strategy.h b/include/LightGBM/sample_strategy.h
index 1ea52b999206..7dba6c7b69e9 100644
--- a/include/LightGBM/sample_strategy.h
+++ b/include/LightGBM/sample_strategy.h
@@ -13,8 +13,7 @@ namespace LightGBM {
 
 class SampleStrategy {
  public:
-
-  SampleStrategy() : balanced_bagging_(false), bagging_runner_(0, bagging_rand_block_) {};
+  SampleStrategy() : balanced_bagging_(false), bagging_runner_(0, bagging_rand_block_), need_resize_gradients_(false) {};
  
   virtual ~SampleStrategy() {};
  
@@ -23,10 +22,8 @@ class SampleStrategy {
   virtual void Bagging(int iter, TreeLearner* tree_learner, score_t* gradients, score_t* hessians) = 0;
  
   virtual void ResetGOSS() = 0;
- 
-  virtual void ResetBaggingConfig(const Config* config, bool is_change_dataset, 
-          std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>>& gradients, 
-          std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>>& hessians) = 0;
+
+  virtual void ResetBaggingConfig(const Config* config, bool is_change_dataset) = 0;
  
   bool is_use_subset() const { return is_use_subset_; }
  
@@ -42,6 +39,10 @@ class SampleStrategy {
     train_data_ = train_data;
   }
 
+  virtual bool IsHessianChange() const = 0;
+
+  bool NeedResizeGradients() const { return need_resize_gradients_; }
+
  protected:
   const Config* config_;
   const Dataset* train_data_;
@@ -56,6 +57,8 @@ class SampleStrategy {
   const int bagging_rand_block_ = 1024;
   std::vector<Random> bagging_rands_;
   ParallelPartitionRunner<data_size_t, false> bagging_runner_;
+  /*! \brief whether need to resize the gradient vectors */
+  bool need_resize_gradients_;
 };
 
 } // namespace LightGBM
diff --git a/src/boosting/bagging.hpp b/src/boosting/bagging.hpp
index ef32486ab4c6..a9ce2f19d11b 100644
--- a/src/boosting/bagging.hpp
+++ b/src/boosting/bagging.hpp
@@ -13,6 +13,7 @@ class BAGGING : public SampleStrategy {
     objective_function_ = objective_function;
     num_tree_per_iteration_ = num_tree_per_iteration;
   }
+
   ~BAGGING() {}
 
   void Bagging(int iter, TreeLearner* tree_learner, score_t* gradients, score_t* hessians) override {
@@ -54,11 +55,10 @@ class BAGGING : public SampleStrategy {
     std::ignore = hessians;
   }
 
-  void ResetGOSS() override {} 
+  void ResetGOSS() override {}
 
-  void ResetBaggingConfig(const Config* config, bool is_change_dataset, 
-          std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>>& gradients, 
-          std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>>& hessians) override {
+  void ResetBaggingConfig(const Config* config, bool is_change_dataset) override {
+    need_resize_gradients_ = false;
     // if need bagging, create buffer
     data_size_t num_pos_data = 0;
     if (objective_function_ != nullptr) {
@@ -105,9 +105,8 @@ class BAGGING : public SampleStrategy {
 
       if (is_use_subset_ && bag_data_cnt_ < num_data_) {
         if (objective_function_ == nullptr) {
-          size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
-          gradients.resize(total_size);
-          hessians.resize(total_size); 
+          // resize gradient vectors to copy the customized gradients for using subset data
+          need_resize_gradients_ = true;
         }
       }
     } else {
@@ -118,6 +117,11 @@ class BAGGING : public SampleStrategy {
     }
   }
 
+  bool IsHessianChange() const {
+    return false;
+  }
+
+ private:
   data_size_t BaggingHelper(data_size_t start, data_size_t cnt, data_size_t* buffer) {
     if (cnt <= 0) {
       return 0;
@@ -164,6 +168,7 @@ class BAGGING : public SampleStrategy {
     return cur_left_cnt;
   }
 
+  /*! \brief whether need restart bagging in continued training */
   bool need_re_bagging_;
 };
 
diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index 7e3f509754ab..8a0bca95a8dd 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -39,6 +39,7 @@ GBDT::GBDT()
   average_output_ = false;
   tree_learner_ = nullptr;
   linear_tree_ = false;
+  data_sample_strategy_.reset(nullptr);
 }
 
 GBDT::~GBDT() {
@@ -85,7 +86,8 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
     }
   }
 
-  is_constant_hessian_ = GetIsConstHessian(objective_function);
+  data_sample_strategy_.reset(SampleStrategy::CreateSampleStrategy(config_.get(), train_data_, objective_function_, num_tree_per_iteration_));
+  is_constant_hessian_ = GetIsConstHessian(objective_function) && !data_sample_strategy_->IsHessianChange();
 
   tree_learner_ = std::unique_ptr<TreeLearner>(TreeLearner::CreateTreeLearner(config_->tree_learner, config_->device_type,
                                                                               config_.get()));
@@ -105,14 +107,10 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
 
   num_data_ = train_data_->num_data();
   // create buffer for gradients and Hessians
-  size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
   if (objective_function_ != nullptr) {
+    const size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
     gradients_.resize(total_size);
     hessians_.resize(total_size);
-  } else {
-    // use customized objective function, only for GOSS
-    gradients_.resize(total_size, 0.0f);
-    hessians_.resize(total_size, 0.0f);
   }
   // get max feature index
   max_feature_idx_ = train_data_->num_total_features() - 1;
@@ -124,9 +122,14 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
   monotone_constraints_ = config->monotone_constraints;
 
   // if need bagging, create buffer
-  data_sample_strategy_.reset(SampleStrategy::CreateSampleStrategy(config_.get(), train_data_, objective_function_, num_tree_per_iteration_));
-  data_sample_strategy_->ResetBaggingConfig(config_.get(), true, gradients_, hessians_);
+  data_sample_strategy_->ResetBaggingConfig(config_.get(), true);
   data_sample_strategy_->ResetGOSS();
+  if (data_sample_strategy_->NeedResizeGradients()) {
+    // resize gradient vectors to copy the customized gradients for goss or bagging with subset
+    const size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
+    gradients_.resize(total_size, 0.0f);
+    hessians_.resize(total_size, 0.0f);
+  }
 
   class_need_train_ = std::vector<bool>(num_tree_per_iteration_, true);
   if (objective_function_ != nullptr && objective_function_->SkipEmptyClass()) {
@@ -301,11 +304,13 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
   } else if (gradients != nullptr) {
     // use customized objective function
     CHECK(hessians != nullptr && objective_function_ == nullptr);
-    int64_t total_size = static_cast<int64_t>(num_data_) * num_tree_per_iteration_;
-    #pragma omp parallel for schedule(static)
-    for (int64_t i = 0; i < total_size; ++i) {
-      gradients_[i] = gradients[i];
-      hessians_[i] = hessians[i];
+    if (config_->boosting == std::string("goss") || config_->data_sample_strategy == std::string("goss")) {
+      int64_t total_size = static_cast<int64_t>(num_data_) * num_tree_per_iteration_;
+      #pragma omp parallel for schedule(static)
+      for (int64_t i = 0; i < total_size; ++i) {
+        gradients_[i] = gradients[i];
+        hessians_[i] = hessians[i];
+      }
     }
   }
   // bagging logic
@@ -629,7 +634,7 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
       Log::Fatal("Cannot use ``monotone_constraints`` in %s objective, please disable it.", objective_function_->GetName());
     }
   }
-  is_constant_hessian_ = GetIsConstHessian(objective_function);
+  is_constant_hessian_ = GetIsConstHessian(objective_function) && !data_sample_strategy_->IsHessianChange();
 
   // push training metrics
   training_metrics_.clear();
@@ -668,7 +673,13 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
     feature_infos_ = train_data_->feature_infos();
 
     tree_learner_->ResetTrainingData(train_data, is_constant_hessian_);
-    data_sample_strategy_->ResetBaggingConfig(config_.get(), true, gradients_, hessians_);
+    data_sample_strategy_->ResetBaggingConfig(config_.get(), true);
+    if (data_sample_strategy_->NeedResizeGradients()) {
+      // resize gradient vectors to copy the customized gradients for goss or bagging with subset
+      const size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
+      gradients_.resize(total_size, 0.0f);
+      hessians_.resize(total_size, 0.0f);
+    }
   } else {
     tree_learner_->ResetIsConstantHessian(is_constant_hessian_);
   }
@@ -692,7 +703,13 @@ void GBDT::ResetConfig(const Config* config) {
     tree_learner_->ResetConfig(new_config.get());
   }
   if (train_data_ != nullptr) {
-    data_sample_strategy_->ResetBaggingConfig(new_config.get(), false, gradients_, hessians_);
+    data_sample_strategy_->ResetBaggingConfig(new_config.get(), false);
+    if (data_sample_strategy_->NeedResizeGradients()) {
+      // resize gradient vectors to copy the customized gradients for goss or bagging with subset
+      const size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
+      gradients_.resize(total_size, 0.0f);
+      hessians_.resize(total_size, 0.0f);
+    }
   }
   if (config_.get() != nullptr && config_->forcedsplits_filename != new_config->forcedsplits_filename) {
     // load forced_splits file
diff --git a/src/boosting/goss.hpp b/src/boosting/goss.hpp
index 990b4fec8fc6..d75acbb45c7e 100644
--- a/src/boosting/goss.hpp
+++ b/src/boosting/goss.hpp
@@ -15,7 +15,7 @@ class GOSS : public SampleStrategy {
     num_tree_per_iteration_ = num_tree_per_iteration;
     num_data_ = train_data->num_data();
   }
-  
+
   ~GOSS() {
   }
 
@@ -73,22 +73,27 @@ class GOSS : public SampleStrategy {
     bag_data_cnt_ = num_data_; 
   }
 
-  void ResetBaggingConfig(const Config* config, bool is_change_dataset, 
-          std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>>& gradients, 
-          std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>>& hessians) override {
+  void ResetBaggingConfig(const Config* config, bool is_change_dataset) override {
     // Cannot use bagging in GOSS
     bag_data_cnt_ = num_data_;
     bag_data_indices_.clear();
     bagging_runner_.ReSize(0);
     is_use_subset_ = false;
+    need_resize_gradients_ = false;
+    if (objective_function_ == nullptr) {
+      // resize gradient vectors to copy the customized gradients for goss
+      need_resize_gradients_ = true;
+    }
     // avoid warnings
     std::ignore = config;
     std::ignore = is_change_dataset;
-    std::ignore = gradients;
-    std::ignore = hessians;
-  } 
+  }
 
- protected:
+  bool IsHessianChange() const {
+    return true;
+  }
+
+ private:
   data_size_t Helper(data_size_t start, data_size_t cnt, data_size_t* buffer, score_t* gradients, score_t* hessians) {
     if (cnt <= 0) {
       return 0;
@@ -143,4 +148,5 @@ class GOSS : public SampleStrategy {
 };
 
 } // namespace LightGBM
-#endif // LIGHTGBM_SAMPLE_STRATEGY_GOSS_HPP_
\ No newline at end of file
+
+#endif // LIGHTGBM_SAMPLE_STRATEGY_GOSS_HPP_
diff --git a/src/boosting/sample_strategy.cpp b/src/boosting/sample_strategy.cpp
index c3d6ea8378c9..64aa50540ce1 100644
--- a/src/boosting/sample_strategy.cpp
+++ b/src/boosting/sample_strategy.cpp
@@ -20,4 +20,4 @@ SampleStrategy* SampleStrategy::CreateSampleStrategy(const Config* config, const
   }
 }
 
-} // namespace LightGBM
\ No newline at end of file
+} // namespace LightGBM

From 8775c055e5ed31d56d1f43f27e9d182f65b8a4c7 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Wed, 8 Dec 2021 06:57:31 +0000
Subject: [PATCH 16/84] mark override for IsHessianChange

---
 src/boosting/bagging.hpp | 2 +-
 src/boosting/goss.hpp    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/boosting/bagging.hpp b/src/boosting/bagging.hpp
index a9ce2f19d11b..1cdeb31c55db 100644
--- a/src/boosting/bagging.hpp
+++ b/src/boosting/bagging.hpp
@@ -117,7 +117,7 @@ class BAGGING : public SampleStrategy {
     }
   }
 
-  bool IsHessianChange() const {
+  bool IsHessianChange() const override {
     return false;
   }
 
diff --git a/src/boosting/goss.hpp b/src/boosting/goss.hpp
index d75acbb45c7e..77bcbfb2591d 100644
--- a/src/boosting/goss.hpp
+++ b/src/boosting/goss.hpp
@@ -89,7 +89,7 @@ class GOSS : public SampleStrategy {
     std::ignore = is_change_dataset;
   }
 
-  bool IsHessianChange() const {
+  bool IsHessianChange() const override {
     return true;
   }
 

From 1e888efac5e5f88c49ccec8edc3dbfddda4fb8bc Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Wed, 8 Dec 2021 07:10:58 +0000
Subject: [PATCH 17/84] fix lint errors

---
 include/LightGBM/sample_strategy.h | 31 +++++++++++++++++++-----------
 src/boosting/bagging.hpp           | 11 ++++++++---
 src/boosting/goss.hpp              | 17 +++++++++++-----
 src/boosting/rf.hpp                |  2 +-
 src/boosting/sample_strategy.cpp   |  7 ++++++-
 5 files changed, 47 insertions(+), 21 deletions(-)

diff --git a/include/LightGBM/sample_strategy.h b/include/LightGBM/sample_strategy.h
index 7dba6c7b69e9..3bfb37b2c78f 100644
--- a/include/LightGBM/sample_strategy.h
+++ b/include/LightGBM/sample_strategy.h
@@ -1,3 +1,8 @@
+/*!
+ * Copyright (c) 2021 Microsoft Corporation. All rights reserved.
+ * Licensed under the MIT License. See LICENSE file in the project root for license information.
+ */
+
 #ifndef LIGHTGBM_SAMPLE_STRATEGY_H_
 #define LIGHTGBM_SAMPLE_STRATEGY_H_
 
@@ -9,26 +14,29 @@
 #include <LightGBM/tree_learner.h>
 #include <LightGBM/objective_function.h>
 
+#include <memory>
+#include <vector>
+
 namespace LightGBM {
 
 class SampleStrategy {
  public:
-  SampleStrategy() : balanced_bagging_(false), bagging_runner_(0, bagging_rand_block_), need_resize_gradients_(false) {};
- 
-  virtual ~SampleStrategy() {};
- 
+  SampleStrategy() : balanced_bagging_(false), bagging_runner_(0, bagging_rand_block_), need_resize_gradients_(false) {}
+
+  virtual ~SampleStrategy() {}
+
   static SampleStrategy* CreateSampleStrategy(const Config* config, const Dataset* train_data, const ObjectiveFunction* objective_function, int num_tree_per_iteration);
- 
+
   virtual void Bagging(int iter, TreeLearner* tree_learner, score_t* gradients, score_t* hessians) = 0;
- 
+
   virtual void ResetGOSS() = 0;
 
   virtual void ResetBaggingConfig(const Config* config, bool is_change_dataset) = 0;
- 
+
   bool is_use_subset() const { return is_use_subset_; }
- 
+
   data_size_t bag_data_cnt() const { return bag_data_cnt_; }
- 
+
   std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>>& bag_data_indices() {return bag_data_indices_;}
 
   void UpdateObjectiveFunction(const ObjectiveFunction* objective_function) {
@@ -61,5 +69,6 @@ class SampleStrategy {
   bool need_resize_gradients_;
 };
 
-} // namespace LightGBM
-#endif // LIGHTGBM_SAMPLE_STRATEGY_H_
+}  // namespace LightGBM
+
+#endif  // LIGHTGBM_SAMPLE_STRATEGY_H_
diff --git a/src/boosting/bagging.hpp b/src/boosting/bagging.hpp
index 1cdeb31c55db..f897c1af3e3a 100644
--- a/src/boosting/bagging.hpp
+++ b/src/boosting/bagging.hpp
@@ -1,3 +1,8 @@
+/*!
+ * Copyright (c) 2021 Microsoft Corporation. All rights reserved.
+ * Licensed under the MIT License. See LICENSE file in the project root for license information.
+ */
+
 #ifndef LIGHTGBM_SAMPLE_STRATEGY_BAGGING_HPP_
 #define LIGHTGBM_SAMPLE_STRATEGY_BAGGING_HPP_
 
@@ -5,7 +10,7 @@ namespace LightGBM {
 
 class BAGGING : public SampleStrategy {
  public:
-  BAGGING(const Config* config, const Dataset* train_data, const ObjectiveFunction* objective_function, int num_tree_per_iteration) 
+  BAGGING(const Config* config, const Dataset* train_data, const ObjectiveFunction* objective_function, int num_tree_per_iteration)
     : need_re_bagging_(false) {
     config_ = config;
     train_data_ = train_data;
@@ -172,6 +177,6 @@ class BAGGING : public SampleStrategy {
   bool need_re_bagging_;
 };
 
-} // namespace LightGBM
+}  // namespace LightGBM
 
-#endif  // LIGHTGBM_SAMPLE_STRATEGY_BAGGING_HPP_
\ No newline at end of file
+#endif  // LIGHTGBM_SAMPLE_STRATEGY_BAGGING_HPP_
diff --git a/src/boosting/goss.hpp b/src/boosting/goss.hpp
index 77bcbfb2591d..53decd7c84a5 100644
--- a/src/boosting/goss.hpp
+++ b/src/boosting/goss.hpp
@@ -1,9 +1,16 @@
+/*!
+ * Copyright (c) 2021 Microsoft Corporation. All rights reserved.
+ * Licensed under the MIT License. See LICENSE file in the project root for license information.
+ */
+
 #ifndef LIGHTGBM_SAMPLE_STRATEGY_GOSS_HPP_
 #define LIGHTGBM_SAMPLE_STRATEGY_GOSS_HPP_
 
 #include <LightGBM/utils/array_args.h>
 #include <LightGBM/sample_strategy.h>
 
+#include <algorithm>
+#include <vector>
 
 namespace LightGBM {
 
@@ -25,7 +32,7 @@ class GOSS : public SampleStrategy {
     if (iter < static_cast<int>(1.0f / config_->learning_rate)) { return; }
     auto left_cnt = bagging_runner_.Run<true>(
         num_data_,
-        [=](int, data_size_t cur_start, data_size_t cur_cnt, data_size_t* left, 
+        [=](int, data_size_t cur_start, data_size_t cur_cnt, data_size_t* left,
             data_size_t*) {
           data_size_t cur_left_count = 0;
           cur_left_count = Helper(cur_start, cur_cnt, left, gradients, hessians);
@@ -70,7 +77,7 @@ class GOSS : public SampleStrategy {
       is_use_subset_ = true;
     }
     // flag to not bagging first
-    bag_data_cnt_ = num_data_; 
+    bag_data_cnt_ = num_data_;
   }
 
   void ResetBaggingConfig(const Config* config, bool is_change_dataset) override {
@@ -144,9 +151,9 @@ class GOSS : public SampleStrategy {
     }
     return cur_left_cnt;
   }
-  
+
 };
 
-} // namespace LightGBM
+}  // namespace LightGBM
 
-#endif // LIGHTGBM_SAMPLE_STRATEGY_GOSS_HPP_
+#endif  // LIGHTGBM_SAMPLE_STRATEGY_GOSS_HPP_
diff --git a/src/boosting/rf.hpp b/src/boosting/rf.hpp
index 20b80c025c18..96638697f1ca 100644
--- a/src/boosting/rf.hpp
+++ b/src/boosting/rf.hpp
@@ -106,7 +106,7 @@ class RF : public GBDT {
     const bool is_use_subset = data_sample_strategy_->is_use_subset();
     const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt();
     const std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>>& bag_data_indices = data_sample_strategy_->bag_data_indices();
-    
+
     CHECK_EQ(gradients, nullptr);
     CHECK_EQ(hessians, nullptr);
 
diff --git a/src/boosting/sample_strategy.cpp b/src/boosting/sample_strategy.cpp
index 64aa50540ce1..8e005dbc9c22 100644
--- a/src/boosting/sample_strategy.cpp
+++ b/src/boosting/sample_strategy.cpp
@@ -1,3 +1,8 @@
+/*!
+ * Copyright (c) 2021 Microsoft Corporation. All rights reserved.
+ * Licensed under the MIT License. See LICENSE file in the project root for license information.
+ */
+
 #include <LightGBM/sample_strategy.h>
 #include "goss.hpp"
 #include "bagging.hpp"
@@ -20,4 +25,4 @@ SampleStrategy* SampleStrategy::CreateSampleStrategy(const Config* config, const
   }
 }
 
-} // namespace LightGBM
+}  // namespace LightGBM

From 22ad1c873b165584b1717319928677a7c4352305 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Wed, 8 Dec 2021 07:15:38 +0000
Subject: [PATCH 18/84] rerun parameter_generator.py

---
 docs/Parameters.rst    | 6 ++++++
 src/io/config_auto.cpp | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/docs/Parameters.rst b/docs/Parameters.rst
index 5faa9af9fd31..da796bb0c0a0 100644
--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -139,6 +139,12 @@ Core Parameters
 
       -  **Note**: internally, LightGBM uses ``gbdt`` mode for the first ``1 / learning_rate`` iterations
 
+-  ``data_sample_strategy`` :raw-html:`<a id="data_sample_strategy" title="Permalink to this parameter" href="#data_sample_strategy">&#x1F517;&#xFE0E;</a>`, default = ``bagging``, type = enum, options: ``bagging``, ``goss``
+
+   -  ``bagging``, Randomly Bagging Sampling
+
+   -  ``goss``, Gradient-based One-Side Sampling
+
 -  ``data`` :raw-html:`<a id="data" title="Permalink to this parameter" href="#data">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = string, aliases: ``train``, ``train_data``, ``train_data_file``, ``data_filename``
 
    -  path of training data, LightGBM will train from this data
diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp
index 18225c55a2fc..ca8a00f2249a 100644
--- a/src/io/config_auto.cpp
+++ b/src/io/config_auto.cpp
@@ -185,6 +185,7 @@ const std::unordered_set<std::string>& Config::parameter_set() {
   "task",
   "objective",
   "boosting",
+  "data_sample_strategy",
   "data",
   "valid",
   "num_iterations",
@@ -312,7 +313,6 @@ const std::unordered_set<std::string>& Config::parameter_set() {
   "gpu_device_id",
   "gpu_use_dp",
   "num_gpu",
-  "data_sample_strategy"
   });
   return params;
 }

From e64ad6fe2b8ac8347fc18306ce0bdc873aed0b6f Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Wed, 8 Dec 2021 07:21:44 +0000
Subject: [PATCH 19/84] update config_auto.cpp

---
 src/io/config_auto.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp
index 4c772d3a10e6..46097a4741e6 100644
--- a/src/io/config_auto.cpp
+++ b/src/io/config_auto.cpp
@@ -764,6 +764,7 @@ const std::string Config::DumpAliases() {
   str_buf << "\"task\": [\"task_type\"], ";
   str_buf << "\"objective\": [\"objective_type\", \"app\", \"application\", \"loss\"], ";
   str_buf << "\"boosting\": [\"boosting_type\", \"boost\"], ";
+  str_buf << "\"data_sample_strategy\": [], ";
   str_buf << "\"data\": [\"train\", \"train_data\", \"train_data_file\", \"data_filename\"], ";
   str_buf << "\"valid\": [\"test\", \"valid_data\", \"valid_data_file\", \"test_data\", \"test_data_file\", \"valid_filenames\"], ";
   str_buf << "\"num_iterations\": [\"num_iteration\", \"n_iter\", \"num_tree\", \"num_trees\", \"num_round\", \"num_rounds\", \"nrounds\", \"num_boost_round\", \"n_estimators\", \"max_iter\"], ";

From 8dec6306ab688aab04741b0f8b77ab3ddf102910 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Wed, 8 Dec 2021 07:22:45 +0000
Subject: [PATCH 20/84] delete redundant blank line

---
 src/boosting/goss.hpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/boosting/goss.hpp b/src/boosting/goss.hpp
index 53decd7c84a5..401a25ec2c6d 100644
--- a/src/boosting/goss.hpp
+++ b/src/boosting/goss.hpp
@@ -151,7 +151,6 @@ class GOSS : public SampleStrategy {
     }
     return cur_left_cnt;
   }
-
 };
 
 }  // namespace LightGBM

From aa63de8df8c22dbb608a7555c07bbbf48e8a01aa Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Wed, 8 Dec 2021 09:33:03 +0000
Subject: [PATCH 21/84] update num_data_ when train_data_ is updated

set gradients and hessians when GOSS
---
 include/LightGBM/sample_strategy.h | 1 +
 src/boosting/gbdt.cpp              | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/include/LightGBM/sample_strategy.h b/include/LightGBM/sample_strategy.h
index 3bfb37b2c78f..816826464b00 100644
--- a/include/LightGBM/sample_strategy.h
+++ b/include/LightGBM/sample_strategy.h
@@ -45,6 +45,7 @@ class SampleStrategy {
 
   void UpdateTrainingData(const Dataset* train_data) {
     train_data_ = train_data;
+    num_data_ = train_data->num_data();
   }
 
   virtual bool IsHessianChange() const = 0;
diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index 7f428720e734..4b2bb5b35679 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -307,12 +307,15 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
     // use customized objective function
     CHECK(hessians != nullptr && objective_function_ == nullptr);
     if (config_->boosting == std::string("goss") || config_->data_sample_strategy == std::string("goss")) {
+      // need to copy customized gradients when using GOSS
       int64_t total_size = static_cast<int64_t>(num_data_) * num_tree_per_iteration_;
       #pragma omp parallel for schedule(static)
       for (int64_t i = 0; i < total_size; ++i) {
         gradients_[i] = gradients[i];
         hessians_[i] = hessians[i];
       }
+      gradients = gradients_.data();
+      hessians = hessians_.data();
     }
   }
   // bagging logic

From 6405361e6b5631c794ee00203a4b068f899c71f1 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Wed, 8 Dec 2021 10:44:17 +0000
Subject: [PATCH 22/84] check bagging_freq is not zero

---
 include/LightGBM/sample_strategy.h | 2 +-
 src/boosting/bagging.hpp           | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/LightGBM/sample_strategy.h b/include/LightGBM/sample_strategy.h
index 816826464b00..e64c2e193725 100644
--- a/include/LightGBM/sample_strategy.h
+++ b/include/LightGBM/sample_strategy.h
@@ -37,7 +37,7 @@ class SampleStrategy {
 
   data_size_t bag_data_cnt() const { return bag_data_cnt_; }
 
-  std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>>& bag_data_indices() {return bag_data_indices_;}
+  std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>>& bag_data_indices() { return bag_data_indices_; }
 
   void UpdateObjectiveFunction(const ObjectiveFunction* objective_function) {
     objective_function_ = objective_function;
diff --git a/src/boosting/bagging.hpp b/src/boosting/bagging.hpp
index f897c1af3e3a..04661349f49d 100644
--- a/src/boosting/bagging.hpp
+++ b/src/boosting/bagging.hpp
@@ -24,6 +24,9 @@ class BAGGING : public SampleStrategy {
   void Bagging(int iter, TreeLearner* tree_learner, score_t* gradients, score_t* hessians) override {
     Common::FunctionTimer fun_timer("GBDT::Bagging", global_timer);
     // if need bagging
+    if (bag_data_cnt_ < num_data_ && config_->bagging_freq == 0) {
+      Log::Fatal("error !!! bag_data_cnt_ = %d, num_data_ = %d, config_->bagging_freq = %d", bag_data_cnt_, num_data_, config_->bagging_freq);
+    }
     if ((bag_data_cnt_ < num_data_ && iter % config_->bagging_freq == 0) ||
         need_re_bagging_) {
       need_re_bagging_ = false;

From 4d6362a0b6fe3720a0ecfb5a7b69ab2e794f3f6e Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Wed, 8 Dec 2021 12:34:24 +0000
Subject: [PATCH 23/84] reset config_ value

merge ResetBaggingConfig and ResetGOSS
---
 include/LightGBM/sample_strategy.h |  4 +---
 src/boosting/bagging.hpp           |  5 ++---
 src/boosting/gbdt.cpp              |  9 +++------
 src/boosting/goss.hpp              | 26 +++++++++-----------------
 4 files changed, 15 insertions(+), 29 deletions(-)

diff --git a/include/LightGBM/sample_strategy.h b/include/LightGBM/sample_strategy.h
index e64c2e193725..d3dbff9be3f7 100644
--- a/include/LightGBM/sample_strategy.h
+++ b/include/LightGBM/sample_strategy.h
@@ -29,9 +29,7 @@ class SampleStrategy {
 
   virtual void Bagging(int iter, TreeLearner* tree_learner, score_t* gradients, score_t* hessians) = 0;
 
-  virtual void ResetGOSS() = 0;
-
-  virtual void ResetBaggingConfig(const Config* config, bool is_change_dataset) = 0;
+  virtual void ResetSampleConfig(const Config* config, bool is_change_dataset) = 0;
 
   bool is_use_subset() const { return is_use_subset_; }
 
diff --git a/src/boosting/bagging.hpp b/src/boosting/bagging.hpp
index 04661349f49d..bb13442661e5 100644
--- a/src/boosting/bagging.hpp
+++ b/src/boosting/bagging.hpp
@@ -63,9 +63,7 @@ class BAGGING : public SampleStrategy {
     std::ignore = hessians;
   }
 
-  void ResetGOSS() override {}
-
-  void ResetBaggingConfig(const Config* config, bool is_change_dataset) override {
+  void ResetSampleConfig(const Config* config, bool is_change_dataset) override {
     need_resize_gradients_ = false;
     // if need bagging, create buffer
     data_size_t num_pos_data = 0;
@@ -80,6 +78,7 @@ class BAGGING : public SampleStrategy {
         && config_->pos_bagging_fraction == config->pos_bagging_fraction && config_->neg_bagging_fraction == config->neg_bagging_fraction) {
         return;
       }
+      config_ = config;
       if (balance_bagging_cond) {
         balanced_bagging_ = true;
         bag_data_cnt_ = static_cast<data_size_t>(num_pos_data * config->pos_bagging_fraction)
diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index 4b2bb5b35679..44fcbd338106 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -124,8 +124,7 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
   parser_config_str_ = train_data_->parser_config_str();
 
   // if need bagging, create buffer
-  data_sample_strategy_->ResetBaggingConfig(config_.get(), true);
-  data_sample_strategy_->ResetGOSS();
+  data_sample_strategy_->ResetSampleConfig(config_.get(), true);
   if (data_sample_strategy_->NeedResizeGradients()) {
     // resize gradient vectors to copy the customized gradients for goss or bagging with subset
     const size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
@@ -679,7 +678,7 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
     parser_config_str_ = train_data_->parser_config_str();
 
     tree_learner_->ResetTrainingData(train_data, is_constant_hessian_);
-    data_sample_strategy_->ResetBaggingConfig(config_.get(), true);
+    data_sample_strategy_->ResetSampleConfig(config_.get(), true);
     if (data_sample_strategy_->NeedResizeGradients()) {
       // resize gradient vectors to copy the customized gradients for goss or bagging with subset
       const size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
@@ -689,7 +688,6 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
   } else {
     tree_learner_->ResetIsConstantHessian(is_constant_hessian_);
   }
-  data_sample_strategy_->ResetGOSS();
 }
 
 void GBDT::ResetConfig(const Config* config) {
@@ -709,7 +707,7 @@ void GBDT::ResetConfig(const Config* config) {
     tree_learner_->ResetConfig(new_config.get());
   }
   if (train_data_ != nullptr) {
-    data_sample_strategy_->ResetBaggingConfig(new_config.get(), false);
+    data_sample_strategy_->ResetSampleConfig(new_config.get(), false);
     if (data_sample_strategy_->NeedResizeGradients()) {
       // resize gradient vectors to copy the customized gradients for goss or bagging with subset
       const size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
@@ -733,7 +731,6 @@ void GBDT::ResetConfig(const Config* config) {
     }
   }
   config_.reset(new_config.release());
-  data_sample_strategy_->ResetGOSS();
 }
 
 }  // namespace LightGBM
diff --git a/src/boosting/goss.hpp b/src/boosting/goss.hpp
index 401a25ec2c6d..e048226ffb0e 100644
--- a/src/boosting/goss.hpp
+++ b/src/boosting/goss.hpp
@@ -53,7 +53,15 @@ class GOSS : public SampleStrategy {
     }
   }
 
-  void ResetGOSS() override {
+  void ResetSampleConfig(const Config* config, bool /*is_change_dataset*/) override {
+    // Cannot use bagging in GOSS
+    config_ = config;
+    need_resize_gradients_ = false;
+    if (objective_function_ == nullptr) {
+      // resize gradient vectors to copy the customized gradients for goss
+      need_resize_gradients_ = true;
+    }
+
     CHECK_LE(config_->top_rate + config_->other_rate, 1.0f);
     CHECK(config_->top_rate > 0.0f && config_->other_rate > 0.0f);
     if (config_->bagging_freq > 0 && config_->bagging_fraction != 1.0f) {
@@ -80,22 +88,6 @@ class GOSS : public SampleStrategy {
     bag_data_cnt_ = num_data_;
   }
 
-  void ResetBaggingConfig(const Config* config, bool is_change_dataset) override {
-    // Cannot use bagging in GOSS
-    bag_data_cnt_ = num_data_;
-    bag_data_indices_.clear();
-    bagging_runner_.ReSize(0);
-    is_use_subset_ = false;
-    need_resize_gradients_ = false;
-    if (objective_function_ == nullptr) {
-      // resize gradient vectors to copy the customized gradients for goss
-      need_resize_gradients_ = true;
-    }
-    // avoid warnings
-    std::ignore = config;
-    std::ignore = is_change_dataset;
-  }
-
   bool IsHessianChange() const override {
     return true;
   }

From 21ee487a64b83a67a71a543bdf44e4db88fd0074 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Wed, 8 Dec 2021 12:50:53 +0000
Subject: [PATCH 24/84] remove useless check

---
 src/boosting/bagging.hpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/boosting/bagging.hpp b/src/boosting/bagging.hpp
index bb13442661e5..7eb2ab881864 100644
--- a/src/boosting/bagging.hpp
+++ b/src/boosting/bagging.hpp
@@ -24,9 +24,6 @@ class BAGGING : public SampleStrategy {
   void Bagging(int iter, TreeLearner* tree_learner, score_t* gradients, score_t* hessians) override {
     Common::FunctionTimer fun_timer("GBDT::Bagging", global_timer);
     // if need bagging
-    if (bag_data_cnt_ < num_data_ && config_->bagging_freq == 0) {
-      Log::Fatal("error !!! bag_data_cnt_ = %d, num_data_ = %d, config_->bagging_freq = %d", bag_data_cnt_, num_data_, config_->bagging_freq);
-    }
     if ((bag_data_cnt_ < num_data_ && iter % config_->bagging_freq == 0) ||
         need_re_bagging_) {
       need_re_bagging_ = false;

From 634fab48109b570cb40c08a515f04691ea889255 Mon Sep 17 00:00:00 2001
From: Guangda Liu <v-guangdaliu@microsoft.com>
Date: Fri, 10 Dec 2021 15:14:55 +0000
Subject: [PATCH 25/84] add ttests in test_engine.py

---
 tests/python_package_test/test_engine.py | 133 +++++++++++++++++++++++
 1 file changed, 133 insertions(+)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index b75c8945669c..8b849568fc22 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3000,3 +3000,136 @@ def test_force_split_with_feature_fraction(tmp_path):
     for tree in tree_info:
         tree_structure = tree["tree_structure"]
         assert tree_structure['split_feature'] == 0
+
+
+def test_goss_boosting_and_strategy_equivalent():
+    X, y = load_boston(return_X_y=True)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
+    lgb_train = lgb.Dataset(X_train, y_train)
+    lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
+    params1 = {
+        'boosting': 'goss',
+        'metric': 'l2',
+        'verbose': -1
+    }
+    evals_result1 = {}
+    gbm = lgb.train(params1, lgb_train,
+                    num_boost_round=50,
+                    valid_sets=lgb_eval,
+                    verbose_eval=False,
+                    evals_result=evals_result1)
+    params2 = {
+        'data_sample_strategy': 'goss',
+        'metric': 'l2',
+        'verbose': -1
+    }
+    evals_result2 = {}
+    gbm = lgb.train(params2, lgb_train,
+                    num_boost_round=50,
+                    valid_sets=lgb_eval,
+                    verbose_eval=False,
+                    evals_result=evals_result2)
+    np.testing.assert_allclose(evals_result1['valid_0']['l2'], evals_result2['valid_0']['l2'])
+
+
+def test_sample_strategy_with_boosting():
+    X, y = load_boston(return_X_y=True)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
+    lgb_train = lgb.Dataset(X_train, y_train)
+    lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
+    
+    params = {
+        'boosting': 'dart',
+        'data_sample_strategy': 'goss',
+        'metric': 'l2',
+        'verbose': -1
+    }
+    evals_result = {}
+    gbm = lgb.train(params, lgb_train,
+                    num_boost_round=50,
+                    valid_sets=lgb_eval,
+                    verbose_eval=False,
+                    evals_result=evals_result)
+    ret = mean_squared_error(y_test, gbm.predict(X_test))
+    assert ret < 14
+    assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
+    
+    params = {
+        'boosting': 'gbdt',
+        'data_sample_strategy': 'goss',
+        'metric': 'l2',
+        'verbose': -1
+    }
+    evals_result = {}
+    gbm = lgb.train(params, lgb_train,
+                    num_boost_round=50,
+                    valid_sets=lgb_eval,
+                    verbose_eval=False,
+                    evals_result=evals_result)
+    ret = mean_squared_error(y_test, gbm.predict(X_test))
+    assert ret < 12
+    assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
+
+    params = {
+        'boosting': 'goss',
+        'data_sample_strategy': 'goss',
+        'metric': 'l2',
+        'verbose': -1
+    }
+    evals_result = {}
+    gbm = lgb.train(params, lgb_train,
+                    num_boost_round=50,
+                    valid_sets=lgb_eval,
+                    verbose_eval=False,
+                    evals_result=evals_result)
+    ret = mean_squared_error(y_test, gbm.predict(X_test))
+    assert ret < 12
+    assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
+
+    params = {
+        'boosting': 'dart',
+        'data_sample_strategy': 'bagging',
+        'metric': 'l2',
+        'verbose': -1
+    }
+    evals_result = {}
+    gbm = lgb.train(params, lgb_train,
+                    num_boost_round=50,
+                    valid_sets=lgb_eval,
+                    verbose_eval=False,
+                    evals_result=evals_result)
+    ret = mean_squared_error(y_test, gbm.predict(X_test))
+    assert ret < 12
+    assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
+    
+    params = {
+        'boosting': 'gbdt',
+        'data_sample_strategy': 'bagging',
+        'metric': 'l2',
+        'verbose': -1
+    }
+    evals_result = {}
+    gbm = lgb.train(params, lgb_train,
+                    num_boost_round=50,
+                    valid_sets=lgb_eval,
+                    verbose_eval=False,
+                    evals_result=evals_result)
+    ret = mean_squared_error(y_test, gbm.predict(X_test))
+    assert ret < 7
+    assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
+
+    params = {
+        'boosting': 'goss',
+        'data_sample_strategy': 'bagging',
+        'metric': 'l2',
+        'verbose': -1
+    }
+    evals_result = {}
+    gbm = lgb.train(params, lgb_train,
+                    num_boost_round=50,
+                    valid_sets=lgb_eval,
+                    verbose_eval=False,
+                    evals_result=evals_result)
+    ret = mean_squared_error(y_test, gbm.predict(X_test))
+    assert ret < 12
+    assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)

From a68fc252c35eccf7d5bd0924ff16cb7631c6caac Mon Sep 17 00:00:00 2001
From: Guangda Liu <v-guangdaliu@microsoft.com>
Date: Sat, 11 Dec 2021 09:44:22 +0000
Subject: [PATCH 26/84] remove whitespace in blank line

---
 tests/python_package_test/test_engine.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 8b849568fc22..22aab0507aa5 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3037,7 +3037,7 @@ def test_sample_strategy_with_boosting():
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
     lgb_train = lgb.Dataset(X_train, y_train)
     lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
-    
+
     params = {
         'boosting': 'dart',
         'data_sample_strategy': 'goss',
@@ -3053,7 +3053,7 @@ def test_sample_strategy_with_boosting():
     ret = mean_squared_error(y_test, gbm.predict(X_test))
     assert ret < 14
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
-    
+
     params = {
         'boosting': 'gbdt',
         'data_sample_strategy': 'goss',
@@ -3101,7 +3101,7 @@ def test_sample_strategy_with_boosting():
     ret = mean_squared_error(y_test, gbm.predict(X_test))
     assert ret < 12
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
-    
+
     params = {
         'boosting': 'gbdt',
         'data_sample_strategy': 'bagging',

From ac387b3a66a0b5c2cfced620cd39f8e738e211b3 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Fri, 7 Jan 2022 07:50:58 +0000
Subject: [PATCH 27/84] remove arguments verbose_eval and evals_result

---
 tests/python_package_test/test_engine.py | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index e1777ad1d2bd..08c8cb985995 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3240,8 +3240,7 @@ def test_goss_boosting_and_strategy_equivalent():
     gbm = lgb.train(params1, lgb_train,
                     num_boost_round=50,
                     valid_sets=lgb_eval,
-                    verbose_eval=False,
-                    evals_result=evals_result1)
+                    callbacks=[lgb.record_evaluation(evals_result1)])
     params2 = {
         'data_sample_strategy': 'goss',
         'metric': 'l2',
@@ -3251,8 +3250,7 @@ def test_goss_boosting_and_strategy_equivalent():
     gbm = lgb.train(params2, lgb_train,
                     num_boost_round=50,
                     valid_sets=lgb_eval,
-                    verbose_eval=False,
-                    evals_result=evals_result2)
+                    callbacks=[lgb.record_evaluation(evals_result2)])
     np.testing.assert_allclose(evals_result1['valid_0']['l2'], evals_result2['valid_0']['l2'])
 
 
@@ -3272,8 +3270,7 @@ def test_sample_strategy_with_boosting():
     gbm = lgb.train(params, lgb_train,
                     num_boost_round=50,
                     valid_sets=lgb_eval,
-                    verbose_eval=False,
-                    evals_result=evals_result)
+                    callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))
     assert ret < 14
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
@@ -3288,8 +3285,7 @@ def test_sample_strategy_with_boosting():
     gbm = lgb.train(params, lgb_train,
                     num_boost_round=50,
                     valid_sets=lgb_eval,
-                    verbose_eval=False,
-                    evals_result=evals_result)
+                    callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))
     assert ret < 12
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
@@ -3304,8 +3300,7 @@ def test_sample_strategy_with_boosting():
     gbm = lgb.train(params, lgb_train,
                     num_boost_round=50,
                     valid_sets=lgb_eval,
-                    verbose_eval=False,
-                    evals_result=evals_result)
+                    callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))
     assert ret < 12
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
@@ -3320,8 +3315,7 @@ def test_sample_strategy_with_boosting():
     gbm = lgb.train(params, lgb_train,
                     num_boost_round=50,
                     valid_sets=lgb_eval,
-                    verbose_eval=False,
-                    evals_result=evals_result)
+                    callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))
     assert ret < 12
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
@@ -3336,8 +3330,7 @@ def test_sample_strategy_with_boosting():
     gbm = lgb.train(params, lgb_train,
                     num_boost_round=50,
                     valid_sets=lgb_eval,
-                    verbose_eval=False,
-                    evals_result=evals_result)
+                    callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))
     assert ret < 7
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
@@ -3352,8 +3345,7 @@ def test_sample_strategy_with_boosting():
     gbm = lgb.train(params, lgb_train,
                     num_boost_round=50,
                     valid_sets=lgb_eval,
-                    verbose_eval=False,
-                    evals_result=evals_result)
+                    callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))
     assert ret < 12
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)

From 6e94059690804502f6b4c6320c8da0e9f8698730 Mon Sep 17 00:00:00 2001
From: GuangdaLiu <90019144+GuangdaLiu@users.noreply.github.com>
Date: Tue, 11 Jan 2022 19:21:50 +0800
Subject: [PATCH 28/84] Update tests/python_package_test/test_engine.py

reduce num_boost_round

Co-authored-by: James Lamb <jaylamb20@gmail.com>
---
 tests/python_package_test/test_engine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 08c8cb985995..240ce5433928 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3248,7 +3248,7 @@ def test_goss_boosting_and_strategy_equivalent():
     }
     evals_result2 = {}
     gbm = lgb.train(params2, lgb_train,
-                    num_boost_round=50,
+                    num_boost_round=10,
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result2)])
     np.testing.assert_allclose(evals_result1['valid_0']['l2'], evals_result2['valid_0']['l2'])

From 0fe6dc8dd0d80df0701c8def2c497e89438b8f76 Mon Sep 17 00:00:00 2001
From: GuangdaLiu <90019144+GuangdaLiu@users.noreply.github.com>
Date: Tue, 11 Jan 2022 19:22:26 +0800
Subject: [PATCH 29/84] Update tests/python_package_test/test_engine.py

reduce num_boost_round

Co-authored-by: James Lamb <jaylamb20@gmail.com>
---
 tests/python_package_test/test_engine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 240ce5433928..0916135d9eb8 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3268,7 +3268,7 @@ def test_sample_strategy_with_boosting():
     }
     evals_result = {}
     gbm = lgb.train(params, lgb_train,
-                    num_boost_round=50,
+                    num_boost_round=10,
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))

From ab39d21aac6252d0534ce643dce062203c9c469d Mon Sep 17 00:00:00 2001
From: GuangdaLiu <90019144+GuangdaLiu@users.noreply.github.com>
Date: Tue, 11 Jan 2022 19:22:38 +0800
Subject: [PATCH 30/84] Update tests/python_package_test/test_engine.py

reduce num_boost_round

Co-authored-by: James Lamb <jaylamb20@gmail.com>
---
 tests/python_package_test/test_engine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 0916135d9eb8..c55b94f79fbb 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3328,7 +3328,7 @@ def test_sample_strategy_with_boosting():
     }
     evals_result = {}
     gbm = lgb.train(params, lgb_train,
-                    num_boost_round=50,
+                    num_boost_round=10,
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))

From 9978c3c3feae653831b951a1fb5c4f2c73c5ce32 Mon Sep 17 00:00:00 2001
From: GuangdaLiu <90019144+GuangdaLiu@users.noreply.github.com>
Date: Tue, 11 Jan 2022 19:22:46 +0800
Subject: [PATCH 31/84] Update tests/python_package_test/test_engine.py

reduce num_boost_round

Co-authored-by: James Lamb <jaylamb20@gmail.com>
---
 tests/python_package_test/test_engine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index c55b94f79fbb..468b149eeeb0 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3343,7 +3343,7 @@ def test_sample_strategy_with_boosting():
     }
     evals_result = {}
     gbm = lgb.train(params, lgb_train,
-                    num_boost_round=50,
+                    num_boost_round=10,
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))

From 7ba17504edceb9f52c4d4d7dfa3175bf7d8e4439 Mon Sep 17 00:00:00 2001
From: GuangdaLiu <90019144+GuangdaLiu@users.noreply.github.com>
Date: Tue, 11 Jan 2022 19:22:54 +0800
Subject: [PATCH 32/84] Update tests/python_package_test/test_engine.py

reduce num_boost_round

Co-authored-by: James Lamb <jaylamb20@gmail.com>
---
 tests/python_package_test/test_engine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 468b149eeeb0..00a0a29e65e2 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3313,7 +3313,7 @@ def test_sample_strategy_with_boosting():
     }
     evals_result = {}
     gbm = lgb.train(params, lgb_train,
-                    num_boost_round=50,
+                    num_boost_round=10,
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))

From ecaaabe8fd9d6650e02f53746a609f02b2f014a8 Mon Sep 17 00:00:00 2001
From: GuangdaLiu <90019144+GuangdaLiu@users.noreply.github.com>
Date: Tue, 11 Jan 2022 19:23:03 +0800
Subject: [PATCH 33/84] Update tests/python_package_test/test_engine.py

reduce num_boost_round

Co-authored-by: James Lamb <jaylamb20@gmail.com>
---
 tests/python_package_test/test_engine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 00a0a29e65e2..1f9d031e7559 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3298,7 +3298,7 @@ def test_sample_strategy_with_boosting():
     }
     evals_result = {}
     gbm = lgb.train(params, lgb_train,
-                    num_boost_round=50,
+                    num_boost_round=10,
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))

From c1f1b91a713cfc7ba736e5228af50b4a6839914b Mon Sep 17 00:00:00 2001
From: GuangdaLiu <90019144+GuangdaLiu@users.noreply.github.com>
Date: Tue, 11 Jan 2022 19:24:39 +0800
Subject: [PATCH 34/84] Update src/boosting/sample_strategy.cpp

modify warning about setting goss as `boosting_type`

Co-authored-by: James Lamb <jaylamb20@gmail.com>
---
 src/boosting/sample_strategy.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/boosting/sample_strategy.cpp b/src/boosting/sample_strategy.cpp
index 8e005dbc9c22..0d8d581efb01 100644
--- a/src/boosting/sample_strategy.cpp
+++ b/src/boosting/sample_strategy.cpp
@@ -13,7 +13,7 @@ SampleStrategy* SampleStrategy::CreateSampleStrategy(const Config* config, const
   bool use_goss_as_boosting = config->boosting == std::string("goss");
   bool use_goss_as_strategy = config->data_sample_strategy == std::string("goss");
   if (use_goss_as_boosting) {
-    Log::Warning("Setting goss as `boosting_type` is NOT suggested. Please set `data_sample_strategy = goss` in your config file.");
+    Log::Warning("Setting goss as `boosting_type` is not recommended. Please set `data_sample_strategy = goss` instead.");
     if (use_goss_as_strategy) {
       Log::Warning("Both `boosting_type` and `data_sample_strategy` are set as GOSS. Only one time of sampling will be conducted. Please check and modify your config file.");
     }

From 006de872210d046f4097b365e5507c365c9eb24b Mon Sep 17 00:00:00 2001
From: Guangda Liu <v-guangdaliu@microsoft.com>
Date: Tue, 11 Jan 2022 12:15:39 +0000
Subject: [PATCH 35/84] Update tests/python_package_test/test_engine.py

replace load_boston() with make_regression()

remove value checks of mean_squared_error in test_sample_strategy_with_boosting()
---
 tests/python_package_test/test_engine.py | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 1f9d031e7559..7a5356747194 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -12,7 +12,7 @@
 import psutil
 import pytest
 from scipy.sparse import csr_matrix, isspmatrix_csc, isspmatrix_csr
-from sklearn.datasets import load_svmlight_file, make_multilabel_classification
+from sklearn.datasets import load_svmlight_file, make_multilabel_classification, make_regression
 from sklearn.metrics import average_precision_score, log_loss, mean_absolute_error, mean_squared_error, roc_auc_score
 from sklearn.model_selection import GroupKFold, TimeSeriesSplit, train_test_split
 
@@ -3227,7 +3227,7 @@ def test_force_split_with_feature_fraction(tmp_path):
 
 
 def test_goss_boosting_and_strategy_equivalent():
-    X, y = load_boston(return_X_y=True)
+    X, y = make_regression(n_samples=10_000, n_features=10, n_informative=5)
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
     lgb_train = lgb.Dataset(X_train, y_train)
     lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
@@ -3238,7 +3238,7 @@ def test_goss_boosting_and_strategy_equivalent():
     }
     evals_result1 = {}
     gbm = lgb.train(params1, lgb_train,
-                    num_boost_round=50,
+                    num_boost_round=10,
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result1)])
     params2 = {
@@ -3255,7 +3255,7 @@ def test_goss_boosting_and_strategy_equivalent():
 
 
 def test_sample_strategy_with_boosting():
-    X, y = load_boston(return_X_y=True)
+    X, y = make_regression(n_samples=10_000, n_features=10, n_informative=5)
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
     lgb_train = lgb.Dataset(X_train, y_train)
     lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
@@ -3272,7 +3272,6 @@ def test_sample_strategy_with_boosting():
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret < 14
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
 
     params = {
@@ -3283,11 +3282,10 @@ def test_sample_strategy_with_boosting():
     }
     evals_result = {}
     gbm = lgb.train(params, lgb_train,
-                    num_boost_round=50,
+                    num_boost_round=10,
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret < 12
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
 
     params = {
@@ -3302,7 +3300,6 @@ def test_sample_strategy_with_boosting():
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret < 12
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
 
     params = {
@@ -3317,7 +3314,6 @@ def test_sample_strategy_with_boosting():
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret < 12
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
 
     params = {
@@ -3332,7 +3328,6 @@ def test_sample_strategy_with_boosting():
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret < 7
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
 
     params = {
@@ -3347,5 +3342,4 @@ def test_sample_strategy_with_boosting():
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret < 12
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)

From 20ddcb4c84a2129d7551739f090b4b59cc567693 Mon Sep 17 00:00:00 2001
From: Guangda Liu <v-guangdaliu@microsoft.com>
Date: Sat, 15 Jan 2022 08:44:18 +0000
Subject: [PATCH 36/84] Update tests/python_package_test/test_engine.py

add value checks of mean_squared_error in test_sample_strategy_with_boosting()
---
 tests/python_package_test/test_engine.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 7a5356747194..030b0a51c809 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3272,6 +3272,7 @@ def test_sample_strategy_with_boosting():
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))
+    assert ret < 10000
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
 
     params = {
@@ -3286,6 +3287,7 @@ def test_sample_strategy_with_boosting():
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))
+    assert ret < 10000
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
 
     params = {
@@ -3300,6 +3302,7 @@ def test_sample_strategy_with_boosting():
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))
+    assert ret < 10000
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
 
     params = {
@@ -3314,6 +3317,7 @@ def test_sample_strategy_with_boosting():
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))
+    assert ret < 10000
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
 
     params = {
@@ -3328,6 +3332,7 @@ def test_sample_strategy_with_boosting():
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))
+    assert ret < 10000
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
 
     params = {
@@ -3342,4 +3347,5 @@ def test_sample_strategy_with_boosting():
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))
+    assert ret < 10000
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)

From 73d7db76785a5a4b13bacf4c1026dbeb6ba62b64 Mon Sep 17 00:00:00 2001
From: Guangda Liu <v-guangdaliu@microsoft.com>
Date: Sat, 15 Jan 2022 08:49:48 +0000
Subject: [PATCH 37/84] Modify warnning about using goss as boosting type

---
 src/boosting/sample_strategy.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/boosting/sample_strategy.cpp b/src/boosting/sample_strategy.cpp
index 0d8d581efb01..059eba9ecc42 100644
--- a/src/boosting/sample_strategy.cpp
+++ b/src/boosting/sample_strategy.cpp
@@ -13,10 +13,7 @@ SampleStrategy* SampleStrategy::CreateSampleStrategy(const Config* config, const
   bool use_goss_as_boosting = config->boosting == std::string("goss");
   bool use_goss_as_strategy = config->data_sample_strategy == std::string("goss");
   if (use_goss_as_boosting) {
-    Log::Warning("Setting goss as `boosting_type` is not recommended. Please set `data_sample_strategy = goss` instead.");
-    if (use_goss_as_strategy) {
-      Log::Warning("Both `boosting_type` and `data_sample_strategy` are set as GOSS. Only one time of sampling will be conducted. Please check and modify your config file.");
-    }
+    Log::Warning("Found boosting_type=goss. For backwards compatibility reasons, LightGBM interprets this as boosting_type=gbdt, data_sample_strategy=goss. To suppress this warning, set data_sample_strategy=goss instead.");
   }
   if (use_goss_as_boosting || use_goss_as_strategy) {
     return new GOSS(config, train_data, num_tree_per_iteration);

From beaaf19f711d88e9d31ac1ada5dc63d062638926 Mon Sep 17 00:00:00 2001
From: Guangda Liu <v-guangdaliu@microsoft.com>
Date: Tue, 18 Jan 2022 06:27:42 +0000
Subject: [PATCH 38/84] Update tests/python_package_test/test_engine.py

add random_state=42 for make_regression()

reduce the threshold of mean_square_error
---
 tests/python_package_test/test_engine.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 030b0a51c809..0b654be4ce08 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3227,7 +3227,7 @@ def test_force_split_with_feature_fraction(tmp_path):
 
 
 def test_goss_boosting_and_strategy_equivalent():
-    X, y = make_regression(n_samples=10_000, n_features=10, n_informative=5)
+    X, y = make_regression(n_samples=10_000, n_features=10, n_informative=5, random_state=42)
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
     lgb_train = lgb.Dataset(X_train, y_train)
     lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
@@ -3255,7 +3255,7 @@ def test_goss_boosting_and_strategy_equivalent():
 
 
 def test_sample_strategy_with_boosting():
-    X, y = make_regression(n_samples=10_000, n_features=10, n_informative=5)
+    X, y = make_regression(n_samples=10_000, n_features=10, n_informative=5, random_state=42)
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
     lgb_train = lgb.Dataset(X_train, y_train)
     lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
@@ -3272,7 +3272,7 @@ def test_sample_strategy_with_boosting():
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret < 10000
+    assert ret < 4000
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
 
     params = {
@@ -3287,7 +3287,7 @@ def test_sample_strategy_with_boosting():
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret < 10000
+    assert ret < 4000
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
 
     params = {
@@ -3302,7 +3302,7 @@ def test_sample_strategy_with_boosting():
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret < 10000
+    assert ret < 4000
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
 
     params = {
@@ -3317,7 +3317,7 @@ def test_sample_strategy_with_boosting():
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret < 10000
+    assert ret < 4000
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
 
     params = {
@@ -3332,7 +3332,7 @@ def test_sample_strategy_with_boosting():
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret < 10000
+    assert ret < 4000
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
 
     params = {
@@ -3347,5 +3347,5 @@ def test_sample_strategy_with_boosting():
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret < 10000
+    assert ret < 4000
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)

From 1dbbee4cbd3ff78750ca7bd5f66fcddf8fbf0502 Mon Sep 17 00:00:00 2001
From: shiyu1994 <shiyu_k1994@qq.com>
Date: Tue, 15 Mar 2022 16:32:00 +0800
Subject: [PATCH 39/84] Update src/boosting/sample_strategy.cpp

Co-authored-by: James Lamb <jaylamb20@gmail.com>
---
 src/boosting/sample_strategy.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/boosting/sample_strategy.cpp b/src/boosting/sample_strategy.cpp
index 059eba9ecc42..a49a08a0a36f 100644
--- a/src/boosting/sample_strategy.cpp
+++ b/src/boosting/sample_strategy.cpp
@@ -13,7 +13,7 @@ SampleStrategy* SampleStrategy::CreateSampleStrategy(const Config* config, const
   bool use_goss_as_boosting = config->boosting == std::string("goss");
   bool use_goss_as_strategy = config->data_sample_strategy == std::string("goss");
   if (use_goss_as_boosting) {
-    Log::Warning("Found boosting_type=goss. For backwards compatibility reasons, LightGBM interprets this as boosting_type=gbdt, data_sample_strategy=goss. To suppress this warning, set data_sample_strategy=goss instead.");
+    Log::Warning("Found boosting=goss. For backwards compatibility reasons, LightGBM interprets this as boosting=gbdt, data_sample_strategy=goss. To suppress this warning, set data_sample_strategy=goss instead.");
   }
   if (use_goss_as_boosting || use_goss_as_strategy) {
     return new GOSS(config, train_data, num_tree_per_iteration);

From cddfcd69674bf19d6b897852cdf3e6eb9d3d8ce6 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Tue, 15 Mar 2022 09:08:08 +0000
Subject: [PATCH 40/84] remove goss from boosting types in documentation

---
 docs/Parameters.rst       | 4 +---
 include/LightGBM/config.h | 3 +--
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/docs/Parameters.rst b/docs/Parameters.rst
index d51c0e6001ee..61355c08b880 100644
--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -127,7 +127,7 @@ Core Parameters
 
       -  label should be ``int`` type, and larger number represents the higher relevance (e.g. 0:bad, 1:fair, 2:good, 3:perfect)
 
--  ``boosting`` :raw-html:`<a id="boosting" title="Permalink to this parameter" href="#boosting">&#x1F517;&#xFE0E;</a>`, default = ``gbdt``, type = enum, options: ``gbdt``, ``rf``, ``dart``, ``goss``, aliases: ``boosting_type``, ``boost``
+-  ``boosting`` :raw-html:`<a id="boosting" title="Permalink to this parameter" href="#boosting">&#x1F517;&#xFE0E;</a>`, default = ``gbdt``, type = enum, options: ``gbdt``, ``rf``, ``dart``, aliases: ``boosting_type``, ``boost``
 
    -  ``gbdt``, traditional Gradient Boosting Decision Tree, aliases: ``gbrt``
 
@@ -135,8 +135,6 @@ Core Parameters
 
    -  ``dart``, `Dropouts meet Multiple Additive Regression Trees <https://arxiv.org/abs/1505.01866>`__
 
-   -  ``goss``, Gradient-based One-Side Sampling
-
       -  **Note**: internally, LightGBM uses ``gbdt`` mode for the first ``1 / learning_rate`` iterations
 
 -  ``data_sample_strategy`` :raw-html:`<a id="data_sample_strategy" title="Permalink to this parameter" href="#data_sample_strategy">&#x1F517;&#xFE0E;</a>`, default = ``bagging``, type = enum, options: ``bagging``, ``goss``
diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
index 086c0f453966..8ebcc62b51bc 100644
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -141,11 +141,10 @@ struct Config {
   // [doc-only]
   // type = enum
   // alias = boosting_type, boost
-  // options = gbdt, rf, dart, goss
+  // options = gbdt, rf, dart
   // desc = ``gbdt``, traditional Gradient Boosting Decision Tree, aliases: ``gbrt``
   // desc = ``rf``, Random Forest, aliases: ``random_forest``
   // desc = ``dart``, `Dropouts meet Multiple Additive Regression Trees <https://arxiv.org/abs/1505.01866>`__
-  // desc = ``goss``, Gradient-based One-Side Sampling
   // descl2 = **Note**: internally, LightGBM uses ``gbdt`` mode for the first ``1 / learning_rate`` iterations
   std::string boosting = "gbdt";
 

From df523f31b237845bfec2fb85be7dc93155d40cec Mon Sep 17 00:00:00 2001
From: shiyu1994 <shiyu_k1994@qq.com>
Date: Tue, 15 Mar 2022 18:38:08 +0800
Subject: [PATCH 41/84] Update src/boosting/bagging.hpp

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
---
 src/boosting/bagging.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/boosting/bagging.hpp b/src/boosting/bagging.hpp
index 7eb2ab881864..de4728e1e6de 100644
--- a/src/boosting/bagging.hpp
+++ b/src/boosting/bagging.hpp
@@ -3,8 +3,8 @@
  * Licensed under the MIT License. See LICENSE file in the project root for license information.
  */
 
-#ifndef LIGHTGBM_SAMPLE_STRATEGY_BAGGING_HPP_
-#define LIGHTGBM_SAMPLE_STRATEGY_BAGGING_HPP_
+#ifndef LIGHTGBM_BOOSTING_BAGGING_HPP_
+#define LIGHTGBM_BOOSTING_BAGGING_HPP_
 
 namespace LightGBM {
 

From 85e7fd161de495100d734accde6d9590f5249a39 Mon Sep 17 00:00:00 2001
From: shiyu1994 <shiyu_k1994@qq.com>
Date: Tue, 15 Mar 2022 18:38:42 +0800
Subject: [PATCH 42/84] Update src/boosting/bagging.hpp

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
---
 src/boosting/bagging.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/boosting/bagging.hpp b/src/boosting/bagging.hpp
index de4728e1e6de..2c02c6285372 100644
--- a/src/boosting/bagging.hpp
+++ b/src/boosting/bagging.hpp
@@ -178,4 +178,4 @@ class BAGGING : public SampleStrategy {
 
 }  // namespace LightGBM
 
-#endif  // LIGHTGBM_SAMPLE_STRATEGY_BAGGING_HPP_
+#endif  // LIGHTGBM_BOOSTING_BAGGING_HPP_

From efb5e286861063bb78f992b768a93e38811ee9f9 Mon Sep 17 00:00:00 2001
From: shiyu1994 <shiyu_k1994@qq.com>
Date: Tue, 15 Mar 2022 18:38:56 +0800
Subject: [PATCH 43/84] Update src/boosting/goss.hpp

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
---
 src/boosting/goss.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/boosting/goss.hpp b/src/boosting/goss.hpp
index e048226ffb0e..30d463564e45 100644
--- a/src/boosting/goss.hpp
+++ b/src/boosting/goss.hpp
@@ -3,8 +3,8 @@
  * Licensed under the MIT License. See LICENSE file in the project root for license information.
  */
 
-#ifndef LIGHTGBM_SAMPLE_STRATEGY_GOSS_HPP_
-#define LIGHTGBM_SAMPLE_STRATEGY_GOSS_HPP_
+#ifndef LIGHTGBM_BOOSTING_GOSS_HPP_
+#define LIGHTGBM_BOOSTING_GOSS_HPP_
 
 #include <LightGBM/utils/array_args.h>
 #include <LightGBM/sample_strategy.h>

From beb9f8c71d6b38a9a70741a603f1b3cda61d27af Mon Sep 17 00:00:00 2001
From: shiyu1994 <shiyu_k1994@qq.com>
Date: Tue, 15 Mar 2022 18:39:06 +0800
Subject: [PATCH 44/84] Update src/boosting/goss.hpp

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
---
 src/boosting/goss.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/boosting/goss.hpp b/src/boosting/goss.hpp
index 30d463564e45..0265a1b3656d 100644
--- a/src/boosting/goss.hpp
+++ b/src/boosting/goss.hpp
@@ -147,4 +147,4 @@ class GOSS : public SampleStrategy {
 
 }  // namespace LightGBM
 
-#endif  // LIGHTGBM_SAMPLE_STRATEGY_GOSS_HPP_
+#endif  // LIGHTGBM_BOOSTING_GOSS_HPP_

From 4bdcdd5ba53b2b8599858dd25f7880b3dbdb0aa1 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Sat, 19 Mar 2022 03:05:19 +0000
Subject: [PATCH 45/84] rename GOSS with GOSSStrategy

---
 include/LightGBM/config.h          |  3 ++-
 python-package/lightgbm/sklearn.py |  1 -
 src/boosting/bagging.hpp           | 11 ++++-------
 src/boosting/goss.hpp              |  6 +++---
 src/boosting/sample_strategy.cpp   |  4 ++--
 5 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
index 8ebcc62b51bc..d7277ea25a6f 100644
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -153,6 +153,7 @@ struct Config {
   // options = bagging, goss
   // desc = ``bagging``, Randomly Bagging Sampling
   // desc = ``goss``, Gradient-based One-Side Sampling
+  // desc = ``bagging`` is only effective when bagging_freq > 0 and bagging_fraction < 1.0
   std::string data_sample_strategy = "bagging";
 
   // alias = train, train_data, train_data_file, data_filename
@@ -253,7 +254,7 @@ struct Config {
   // desc = enabling this is recommended when:
   // descl2 = the number of data points is large, and the total number of bins is relatively small
   // descl2 = ``num_threads`` is relatively small, e.g. ``<= 16``
-  // descl2 = you want to use small ``bagging_fraction`` or ``goss`` boosting to speed up
+  // descl2 = you want to use small ``bagging_fraction`` or ``goss`` sample strategy to speed up
   // desc = **Note**: setting this to ``true`` will double the memory cost for Dataset object. If you have not enough memory, you can try setting ``force_col_wise=true``
   // desc = **Note**: when both ``force_col_wise`` and ``force_row_wise`` are ``false``, LightGBM will firstly try them both, and then use the faster one. To remove the overhead of testing set the faster one to ``true`` manually
   // desc = **Note**: this parameter cannot be used at the same time with ``force_col_wise``, choose only one of them
diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
index 7ebba0bc962c..f0f141fc33d6 100644
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -373,7 +373,6 @@ def __init__(
         boosting_type : str, optional (default='gbdt')
             'gbdt', traditional Gradient Boosting Decision Tree.
             'dart', Dropouts meet Multiple Additive Regression Trees.
-            'goss', Gradient-based One-Side Sampling.
             'rf', Random Forest.
         num_leaves : int, optional (default=31)
             Maximum tree leaves for base learners.
diff --git a/src/boosting/bagging.hpp b/src/boosting/bagging.hpp
index 7eb2ab881864..e0f5c5806f4a 100644
--- a/src/boosting/bagging.hpp
+++ b/src/boosting/bagging.hpp
@@ -8,9 +8,9 @@
 
 namespace LightGBM {
 
-class BAGGING : public SampleStrategy {
+class BaggingSampleStrategy : public SampleStrategy {
  public:
-  BAGGING(const Config* config, const Dataset* train_data, const ObjectiveFunction* objective_function, int num_tree_per_iteration)
+  BaggingSampleStrategy(const Config* config, const Dataset* train_data, const ObjectiveFunction* objective_function, int num_tree_per_iteration)
     : need_re_bagging_(false) {
     config_ = config;
     train_data_ = train_data;
@@ -19,9 +19,9 @@ class BAGGING : public SampleStrategy {
     num_tree_per_iteration_ = num_tree_per_iteration;
   }
 
-  ~BAGGING() {}
+  ~BaggingSampleStrategy() {}
 
-  void Bagging(int iter, TreeLearner* tree_learner, score_t* gradients, score_t* hessians) override {
+  void Bagging(int iter, TreeLearner* tree_learner, score_t* /*gradients*/, score_t* /*hessians*/) override {
     Common::FunctionTimer fun_timer("GBDT::Bagging", global_timer);
     // if need bagging
     if ((bag_data_cnt_ < num_data_ && iter % config_->bagging_freq == 0) ||
@@ -55,9 +55,6 @@ class BAGGING : public SampleStrategy {
                                       bag_data_cnt_);
       }
     }
-    // avoid warnings
-    std::ignore = gradients;
-    std::ignore = hessians;
   }
 
   void ResetSampleConfig(const Config* config, bool is_change_dataset) override {
diff --git a/src/boosting/goss.hpp b/src/boosting/goss.hpp
index e048226ffb0e..a0766cc16673 100644
--- a/src/boosting/goss.hpp
+++ b/src/boosting/goss.hpp
@@ -14,16 +14,16 @@
 
 namespace LightGBM {
 
-class GOSS : public SampleStrategy {
+class GOSSStrategy : public SampleStrategy {
  public:
-  GOSS(const Config* config, const Dataset* train_data, int num_tree_per_iteration) {
+  GOSSStrategy(const Config* config, const Dataset* train_data, int num_tree_per_iteration) {
     config_ = config;
     train_data_ = train_data;
     num_tree_per_iteration_ = num_tree_per_iteration;
     num_data_ = train_data->num_data();
   }
 
-  ~GOSS() {
+  ~GOSSStrategy() {
   }
 
   void Bagging(int iter, TreeLearner* tree_learner, score_t* gradients, score_t* hessians) override {
diff --git a/src/boosting/sample_strategy.cpp b/src/boosting/sample_strategy.cpp
index a49a08a0a36f..77c992e166d3 100644
--- a/src/boosting/sample_strategy.cpp
+++ b/src/boosting/sample_strategy.cpp
@@ -16,9 +16,9 @@ SampleStrategy* SampleStrategy::CreateSampleStrategy(const Config* config, const
     Log::Warning("Found boosting=goss. For backwards compatibility reasons, LightGBM interprets this as boosting=gbdt, data_sample_strategy=goss. To suppress this warning, set data_sample_strategy=goss instead.");
   }
   if (use_goss_as_boosting || use_goss_as_strategy) {
-    return new GOSS(config, train_data, num_tree_per_iteration);
+    return new GOSSStrategy(config, train_data, num_tree_per_iteration);
   } else {
-    return new BAGGING(config, train_data, objective_function, num_tree_per_iteration);
+    return new BaggingSampleStrategy(config, train_data, objective_function, num_tree_per_iteration);
   }
 }
 

From 3291d7e8da1e29f4be5f8a77549b4d11e34916de Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Sat, 19 Mar 2022 03:05:52 +0000
Subject: [PATCH 46/84] update doc

---
 docs/Parameters.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/Parameters.rst b/docs/Parameters.rst
index 61355c08b880..52c84634c0a2 100644
--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -143,6 +143,8 @@ Core Parameters
 
    -  ``goss``, Gradient-based One-Side Sampling
 
+   -  ``bagging`` is only effective when bagging_freq > 0 and bagging_fraction < 1.0
+
 -  ``data`` :raw-html:`<a id="data" title="Permalink to this parameter" href="#data">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = string, aliases: ``train``, ``train_data``, ``train_data_file``, ``data_filename``
 
    -  path of training data, LightGBM will train from this data
@@ -268,7 +270,7 @@ Learning Control Parameters
 
       -  ``num_threads`` is relatively small, e.g. ``<= 16``
 
-      -  you want to use small ``bagging_fraction`` or ``goss`` boosting to speed up
+      -  you want to use small ``bagging_fraction`` or ``goss`` sample strategy to speed up
 
    -  **Note**: setting this to ``true`` will double the memory cost for Dataset object. If you have not enough memory, you can try setting ``force_col_wise=true``
 

From 93a87629c65677bee68f71b0db259897e1611ae5 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Sat, 19 Mar 2022 03:43:54 +0000
Subject: [PATCH 47/84] address comments

---
 docs/Development-Guide.rst       | 2 +-
 src/boosting/bagging.hpp         | 3 ++-
 src/boosting/sample_strategy.cpp | 6 +++++-
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/docs/Development-Guide.rst b/docs/Development-Guide.rst
index c8b30173da79..cea967dd8dde 100644
--- a/docs/Development-Guide.rst
+++ b/docs/Development-Guide.rst
@@ -19,7 +19,7 @@ Important Classes
 +-------------------------+----------------------------------------------------------------------------------------+
 | ``Bin``                 | Data structure used for storing feature discrete values (converted from float values)  |
 +-------------------------+----------------------------------------------------------------------------------------+
-| ``Boosting``            | Boosting interface (GBDT, DART, GOSS, etc.)                                            |
+| ``Boosting``            | Boosting interface (GBDT, DART, etc.)                                            |
 +-------------------------+----------------------------------------------------------------------------------------+
 | ``Config``              | Stores parameters and configurations                                                   |
 +-------------------------+----------------------------------------------------------------------------------------+
diff --git a/src/boosting/bagging.hpp b/src/boosting/bagging.hpp
index e617121d5cd1..aec37bb8d5ba 100644
--- a/src/boosting/bagging.hpp
+++ b/src/boosting/bagging.hpp
@@ -92,7 +92,8 @@ class BaggingSampleStrategy : public SampleStrategy {
           (static_cast<double>(bag_data_cnt_) / num_data_) / config->bagging_freq;
       is_use_subset_ = false;
       const int group_threshold_usesubset = 100;
-      if (average_bag_rate <= 0.5
+      const double average_bag_rate_threshold = 0.5;
+      if (average_bag_rate <= average_bag_rate_threshold
           && (train_data_->num_feature_groups() < group_threshold_usesubset)) {
         if (tmp_subset_ == nullptr || is_change_dataset) {
           tmp_subset_.reset(new Dataset(bag_data_cnt_));
diff --git a/src/boosting/sample_strategy.cpp b/src/boosting/sample_strategy.cpp
index 77c992e166d3..e991efd01045 100644
--- a/src/boosting/sample_strategy.cpp
+++ b/src/boosting/sample_strategy.cpp
@@ -9,7 +9,11 @@
 
 namespace LightGBM {
 
-SampleStrategy* SampleStrategy::CreateSampleStrategy(const Config* config, const Dataset* train_data, const ObjectiveFunction* objective_function, int num_tree_per_iteration) {
+SampleStrategy* SampleStrategy::CreateSampleStrategy(
+  const Config* config,
+  const Dataset* train_data,
+  const ObjectiveFunction* objective_function,
+  int num_tree_per_iteration) {
   bool use_goss_as_boosting = config->boosting == std::string("goss");
   bool use_goss_as_strategy = config->data_sample_strategy == std::string("goss");
   if (use_goss_as_boosting) {

From 7e1167aca598a69838f9e17be31b4107968c00d1 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Sat, 19 Mar 2022 03:48:22 +0000
Subject: [PATCH 48/84] fix table in doc

---
 docs/Development-Guide.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/Development-Guide.rst b/docs/Development-Guide.rst
index cea967dd8dde..6c4819e45209 100644
--- a/docs/Development-Guide.rst
+++ b/docs/Development-Guide.rst
@@ -19,7 +19,7 @@ Important Classes
 +-------------------------+----------------------------------------------------------------------------------------+
 | ``Bin``                 | Data structure used for storing feature discrete values (converted from float values)  |
 +-------------------------+----------------------------------------------------------------------------------------+
-| ``Boosting``            | Boosting interface (GBDT, DART, etc.)                                            |
+| ``Boosting``            | Boosting interface (GBDT, DART, etc.)                                                  |
 +-------------------------+----------------------------------------------------------------------------------------+
 | ``Config``              | Stores parameters and configurations                                                   |
 +-------------------------+----------------------------------------------------------------------------------------+

From a1b6bd18b6cc27458fa93afdadcaf691781a44d6 Mon Sep 17 00:00:00 2001
From: shiyu1994 <shiyu_k1994@qq.com>
Date: Mon, 21 Mar 2022 11:29:05 +0800
Subject: [PATCH 49/84] Update include/LightGBM/config.h

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
---
 include/LightGBM/config.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
index d7277ea25a6f..ec6e04cbf81b 100644
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -152,8 +152,8 @@ struct Config {
   // type = enum
   // options = bagging, goss
   // desc = ``bagging``, Randomly Bagging Sampling
+  // descl2 = **Note**: ``bagging`` is only effective when ``bagging_freq > 0`` and ``bagging_fraction < 1.0``
   // desc = ``goss``, Gradient-based One-Side Sampling
-  // desc = ``bagging`` is only effective when bagging_freq > 0 and bagging_fraction < 1.0
   std::string data_sample_strategy = "bagging";
 
   // alias = train, train_data, train_data_file, data_filename

From 4499113b2d9e4b5b5b403728c28adfbd3437ed6b Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Mon, 21 Mar 2022 03:38:26 +0000
Subject: [PATCH 50/84] update documentation

---
 docs/Parameters.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/Parameters.rst b/docs/Parameters.rst
index 52c84634c0a2..3c9284e92737 100644
--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -141,9 +141,9 @@ Core Parameters
 
    -  ``bagging``, Randomly Bagging Sampling
 
-   -  ``goss``, Gradient-based One-Side Sampling
+      -  **Note**: ``bagging`` is only effective when ``bagging_freq > 0`` and ``bagging_fraction < 1.0``
 
-   -  ``bagging`` is only effective when bagging_freq > 0 and bagging_fraction < 1.0
+   -  ``goss``, Gradient-based One-Side Sampling
 
 -  ``data`` :raw-html:`<a id="data" title="Permalink to this parameter" href="#data">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = string, aliases: ``train``, ``train_data``, ``train_data_file``, ``data_filename``
 

From 3a2235e5a3f75a5ed3da467f6d76ab046feec4a7 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Mon, 21 Mar 2022 06:22:18 +0000
Subject: [PATCH 51/84] update test case

---
 tests/python_package_test/test_engine.py | 34 +++++++++++++++---------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index c865060ef507..1728db1fe751 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -18,7 +18,7 @@
 
 import lightgbm as lgb
 
-from .utils import (load_boston, load_breast_cancer, load_digits, load_iris, make_synthetic_regression,
+from utils import (load_boston, load_breast_cancer, load_digits, load_iris, make_synthetic_regression,
                     sklearn_multiclass_custom_objective, softmax)
 
 decreasing_generator = itertools.count(0, -1)
@@ -3311,24 +3311,32 @@ def test_goss_boosting_and_strategy_equivalent():
     params1 = {
         'boosting': 'goss',
         'metric': 'l2',
-        'verbose': -1
+        'verbose': -1,
+        'bagging_seed': 0,
+        'learning_rate': 0.05,
+        'num_threads': 1,
+        'force_row_wise': True,
     }
     evals_result1 = {}
-    gbm = lgb.train(params1, lgb_train,
-                    num_boost_round=10,
-                    valid_sets=lgb_eval,
-                    callbacks=[lgb.record_evaluation(evals_result1)])
+    lgb.train(params1, lgb_train,
+              num_boost_round=10,
+              valid_sets=lgb_eval,
+              callbacks=[lgb.record_evaluation(evals_result1)])
     params2 = {
         'data_sample_strategy': 'goss',
         'metric': 'l2',
-        'verbose': -1
+        'verbose': -1,
+        'bagging_seed': 0,
+        'learning_rate': 0.05,
+        'num_threads': 1,
+        'force_row_wise': True,
     }
     evals_result2 = {}
-    gbm = lgb.train(params2, lgb_train,
-                    num_boost_round=10,
-                    valid_sets=lgb_eval,
-                    callbacks=[lgb.record_evaluation(evals_result2)])
-    np.testing.assert_allclose(evals_result1['valid_0']['l2'], evals_result2['valid_0']['l2'])
+    lgb.train(params2, lgb_train,
+              num_boost_round=10,
+              valid_sets=lgb_eval,
+              callbacks=[lgb.record_evaluation(evals_result2)])
+    np.testing.assert_equal(evals_result1['valid_0']['l2'], evals_result2['valid_0']['l2'])
 
 
 def test_sample_strategy_with_boosting():
@@ -3577,3 +3585,5 @@ def test_boost_from_average_with_single_leaf_trees():
     preds = model.predict(X)
     mean_preds = np.mean(preds)
     assert y.min() <= mean_preds <= y.max()
+
+test_goss_boosting_and_strategy_equivalent()

From 1e4c11a8f3f5d5f969627387a76c44d924f28464 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Mon, 21 Mar 2022 06:33:58 +0000
Subject: [PATCH 52/84] revert useless change in test_engine.py

---
 tests/python_package_test/test_engine.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 1728db1fe751..89395cca221f 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -18,7 +18,7 @@
 
 import lightgbm as lgb
 
-from utils import (load_boston, load_breast_cancer, load_digits, load_iris, make_synthetic_regression,
+from .utils import (load_boston, load_breast_cancer, load_digits, load_iris, make_synthetic_regression,
                     sklearn_multiclass_custom_objective, softmax)
 
 decreasing_generator = itertools.count(0, -1)
@@ -3585,5 +3585,3 @@ def test_boost_from_average_with_single_leaf_trees():
     preds = model.predict(X)
     mean_preds = np.mean(preds)
     assert y.min() <= mean_preds <= y.max()
-
-test_goss_boosting_and_strategy_equivalent()

From e72fb01f1de343b22579288d1291f0467356a30f Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Tue, 7 Jun 2022 09:02:41 +0000
Subject: [PATCH 53/84] add tests for evaluation results in
 test_sample_strategy_with_boosting

---
 tests/python_package_test/test_engine.py | 53 ++++++++++--------------
 1 file changed, 23 insertions(+), 30 deletions(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 871d6a976267..19c8e7289ae8 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3482,9 +3482,9 @@ def test_sample_strategy_with_boosting():
                     num_boost_round=10,
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
-    ret = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret < 4000
-    assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
+    ret1 = mean_squared_error(y_test, gbm.predict(X_test))
+    assert ret1 == pytest.approx(3149.393862)
+    assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret1)
 
     params = {
         'boosting': 'gbdt',
@@ -3497,9 +3497,9 @@ def test_sample_strategy_with_boosting():
                     num_boost_round=10,
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
-    ret = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret < 4000
-    assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
+    ret2 = mean_squared_error(y_test, gbm.predict(X_test))
+    assert ret2 == pytest.approx(2547.715968)
+    assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret2)
 
     params = {
         'boosting': 'goss',
@@ -3512,13 +3512,18 @@ def test_sample_strategy_with_boosting():
                     num_boost_round=10,
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
-    ret = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret < 4000
-    assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
+    ret3 = mean_squared_error(y_test, gbm.predict(X_test))
+    assert ret3 == pytest.approx(2547.715968)
+    assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret3)
+
+    assert ret1 != ret2
+    assert ret2 == ret3
 
     params = {
         'boosting': 'dart',
         'data_sample_strategy': 'bagging',
+        'bagging_freq': 1,
+        'bagging_fraction': 0.5,
         'metric': 'l2',
         'verbose': -1
     }
@@ -3527,13 +3532,15 @@ def test_sample_strategy_with_boosting():
                     num_boost_round=10,
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
-    ret = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret < 4000
-    assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
+    ret4 = mean_squared_error(y_test, gbm.predict(X_test))
+    assert ret4 == pytest.approx(3134.866931)
+    assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret4)
 
     params = {
         'boosting': 'gbdt',
         'data_sample_strategy': 'bagging',
+        'bagging_freq': 1,
+        'bagging_fraction': 0.5,
         'metric': 'l2',
         'verbose': -1
     }
@@ -3542,24 +3549,10 @@ def test_sample_strategy_with_boosting():
                     num_boost_round=10,
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
-    ret = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret < 4000
-    assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
-
-    params = {
-        'boosting': 'goss',
-        'data_sample_strategy': 'bagging',
-        'metric': 'l2',
-        'verbose': -1
-    }
-    evals_result = {}
-    gbm = lgb.train(params, lgb_train,
-                    num_boost_round=10,
-                    valid_sets=lgb_eval,
-                    callbacks=[lgb.record_evaluation(evals_result)])
-    ret = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret < 4000
-    assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret)
+    ret5 = mean_squared_error(y_test, gbm.predict(X_test))
+    assert ret5 == pytest.approx(2539.792378)
+    assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret5)
+    assert ret4 != ret5
 
 
 def test_record_evaluation_with_train():

From 05292ffbc45b722630eb0afc9757be9e6f6db345 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Thu, 9 Jun 2022 06:58:18 +0000
Subject: [PATCH 54/84] include <string>

---
 src/boosting/bagging.hpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/boosting/bagging.hpp b/src/boosting/bagging.hpp
index a87b7a9c1814..bf26381dd1b7 100644
--- a/src/boosting/bagging.hpp
+++ b/src/boosting/bagging.hpp
@@ -6,6 +6,8 @@
 #ifndef LIGHTGBM_BOOSTING_BAGGING_HPP_
 #define LIGHTGBM_BOOSTING_BAGGING_HPP_
 
+#include <string>
+
 namespace LightGBM {
 
 class BaggingSampleStrategy : public SampleStrategy {

From 6ec78125b6cf122d1bdbf6f3aa7b8c7ad9d34e30 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Thu, 9 Jun 2022 06:59:31 +0000
Subject: [PATCH 55/84] change to assert_allclose in
 test_goss_boosting_and_strategy_equivalent

---
 tests/python_package_test/test_engine.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 19c8e7289ae8..849b407afcbd 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3463,6 +3463,7 @@ def test_goss_boosting_and_strategy_equivalent():
               valid_sets=lgb_eval,
               callbacks=[lgb.record_evaluation(evals_result2)])
     np.testing.assert_equal(evals_result1['valid_0']['l2'], evals_result2['valid_0']['l2'])
+    np.testing.assert_allclose(evals_result1['valid_0']['l2'], evals_result2['valid_0']['l2'], atol=1e-5)
 
 
 def test_sample_strategy_with_boosting():

From 9f749fa10a14b15665e4ecad60c6d8b14ddb2301 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Thu, 9 Jun 2022 08:09:37 +0000
Subject: [PATCH 56/84] more tolerance in result checking, due to minor
 difference in results of gpu versions

---
 tests/python_package_test/test_engine.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 849b407afcbd..3becadf6eb90 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3484,7 +3484,7 @@ def test_sample_strategy_with_boosting():
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret1 = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret1 == pytest.approx(3149.393862)
+    assert ret1 == pytest.approx(3149.393862, abs=1.0)
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret1)
 
     params = {
@@ -3499,7 +3499,7 @@ def test_sample_strategy_with_boosting():
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret2 = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret2 == pytest.approx(2547.715968)
+    assert ret2 == pytest.approx(2547.715968, abs=1.0)
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret2)
 
     params = {
@@ -3514,7 +3514,7 @@ def test_sample_strategy_with_boosting():
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret3 = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret3 == pytest.approx(2547.715968)
+    assert ret3 == pytest.approx(2547.715968, abs=1.0)
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret3)
 
     assert ret1 != ret2
@@ -3534,7 +3534,7 @@ def test_sample_strategy_with_boosting():
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret4 = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret4 == pytest.approx(3134.866931)
+    assert ret4 == pytest.approx(3134.866931, abs=1.0)
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret4)
 
     params = {
@@ -3551,7 +3551,7 @@ def test_sample_strategy_with_boosting():
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     ret5 = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret5 == pytest.approx(2539.792378)
+    assert ret5 == pytest.approx(2539.792378, abs=1.0)
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret5)
     assert ret4 != ret5
 

From 808ccc609929a4d4427e5e5b0f5c8bb1d1da7b56 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Thu, 9 Jun 2022 12:16:00 +0000
Subject: [PATCH 57/84] change == to np.testing.assert_allclose

---
 tests/python_package_test/test_engine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 3becadf6eb90..4b3650115875 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3518,7 +3518,7 @@ def test_sample_strategy_with_boosting():
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret3)
 
     assert ret1 != ret2
-    assert ret2 == ret3
+    assert np.testing.assert_allclose(ret1, ret2)
 
     params = {
         'boosting': 'dart',

From 35f4eb50bfff279db557c7e410df6911c20db739 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Mon, 13 Jun 2022 10:41:39 +0000
Subject: [PATCH 58/84] fix test case

---
 tests/python_package_test/test_engine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 4b3650115875..0a42de8f9f7f 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3518,7 +3518,7 @@ def test_sample_strategy_with_boosting():
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret3)
 
     assert ret1 != ret2
-    assert np.testing.assert_allclose(ret1, ret2)
+    np.testing.assert_allclose(ret2, ret3)
 
     params = {
         'boosting': 'dart',

From 7fe6a944fdc04f1d857f743adaf3b4a87e2804a6 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Wed, 27 Jul 2022 15:06:41 +0000
Subject: [PATCH 59/84] set gpu_use_dp to true

---
 tests/python_package_test/test_engine.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 0a42de8f9f7f..92f9faa9b304 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3476,7 +3476,8 @@ def test_sample_strategy_with_boosting():
         'boosting': 'dart',
         'data_sample_strategy': 'goss',
         'metric': 'l2',
-        'verbose': -1
+        'verbose': -1,
+        'gpu_use_dp': True
     }
     evals_result = {}
     gbm = lgb.train(params, lgb_train,
@@ -3491,7 +3492,8 @@ def test_sample_strategy_with_boosting():
         'boosting': 'gbdt',
         'data_sample_strategy': 'goss',
         'metric': 'l2',
-        'verbose': -1
+        'verbose': -1,
+        'gpu_use_dp': True
     }
     evals_result = {}
     gbm = lgb.train(params, lgb_train,
@@ -3506,7 +3508,8 @@ def test_sample_strategy_with_boosting():
         'boosting': 'goss',
         'data_sample_strategy': 'goss',
         'metric': 'l2',
-        'verbose': -1
+        'verbose': -1,
+        'gpu_use_dp': True
     }
     evals_result = {}
     gbm = lgb.train(params, lgb_train,
@@ -3518,7 +3521,7 @@ def test_sample_strategy_with_boosting():
     assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret3)
 
     assert ret1 != ret2
-    np.testing.assert_allclose(ret2, ret3)
+    assert ret2 == ret3
 
     params = {
         'boosting': 'dart',
@@ -3526,7 +3529,8 @@ def test_sample_strategy_with_boosting():
         'bagging_freq': 1,
         'bagging_fraction': 0.5,
         'metric': 'l2',
-        'verbose': -1
+        'verbose': -1,
+        'gpu_use_dp': True
     }
     evals_result = {}
     gbm = lgb.train(params, lgb_train,
@@ -3543,7 +3547,8 @@ def test_sample_strategy_with_boosting():
         'bagging_freq': 1,
         'bagging_fraction': 0.5,
         'metric': 'l2',
-        'verbose': -1
+        'verbose': -1,
+        'gpu_use_dp': True
     }
     evals_result = {}
     gbm = lgb.train(params, lgb_train,

From 7f108181577cb86b036c3c924352feaa631f8e38 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Wed, 27 Jul 2022 15:26:13 +0000
Subject: [PATCH 60/84] change --report to --report-level for rstcheck

---
 .ci/test.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.ci/test.sh b/.ci/test.sh
index 0ba52ecbb998..d7e83cb48925 100755
--- a/.ci/test.sh
+++ b/.ci/test.sh
@@ -47,9 +47,9 @@ if [[ $TASK == "check-docs" ]] || [[ $TASK == "check-links" ]]; then
             rstcheck || exit -1
     # check reStructuredText formatting
     cd $BUILD_DIRECTORY/python-package
-    rstcheck --report warning $(find . -type f -name "*.rst") || exit -1
+    rstcheck --report-level warning $(find . -type f -name "*.rst") || exit -1
     cd $BUILD_DIRECTORY/docs
-    rstcheck --report warning --ignore-directives=autoclass,autofunction,doxygenfile $(find . -type f -name "*.rst") || exit -1
+    rstcheck --report-level warning --ignore-directives=autoclass,autofunction,doxygenfile $(find . -type f -name "*.rst") || exit -1
     # build docs
     make html || exit -1
     if [[ $TASK == "check-links" ]]; then

From 755cb3a38c51cc5361e5ac7966d4af53e5fced3f Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Fri, 29 Jul 2022 03:47:46 +0000
Subject: [PATCH 61/84] use gpu_use_dp=true in
 test_goss_boosting_and_strategy_equivalent

---
 tests/python_package_test/test_engine.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 92f9faa9b304..99b646ae0919 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -1089,7 +1089,7 @@ def test_feature_name_with_non_ascii():
     X_train = np.random.normal(size=(100, 4))
     y_train = np.random.random(100)
     # This has non-ascii strings.
-    feature_names = [u'F_零', u'F_一', u'F_二', u'F_三']
+    feature_names = [u'F_�?', u'F_一', u'F_�?', u'F_�?']
     params = {'verbose': -1}
     lgb_train = lgb.Dataset(X_train, y_train)
 
@@ -3442,6 +3442,7 @@ def test_goss_boosting_and_strategy_equivalent():
         'learning_rate': 0.05,
         'num_threads': 1,
         'force_row_wise': True,
+        'gpu_use_dp': True,
     }
     evals_result1 = {}
     lgb.train(params1, lgb_train,
@@ -3456,6 +3457,7 @@ def test_goss_boosting_and_strategy_equivalent():
         'learning_rate': 0.05,
         'num_threads': 1,
         'force_row_wise': True,
+        'gpu_use_dp': True,
     }
     evals_result2 = {}
     lgb.train(params2, lgb_train,
@@ -3463,7 +3465,6 @@ def test_goss_boosting_and_strategy_equivalent():
               valid_sets=lgb_eval,
               callbacks=[lgb.record_evaluation(evals_result2)])
     np.testing.assert_equal(evals_result1['valid_0']['l2'], evals_result2['valid_0']['l2'])
-    np.testing.assert_allclose(evals_result1['valid_0']['l2'], evals_result2['valid_0']['l2'], atol=1e-5)
 
 
 def test_sample_strategy_with_boosting():
@@ -3477,7 +3478,7 @@ def test_sample_strategy_with_boosting():
         'data_sample_strategy': 'goss',
         'metric': 'l2',
         'verbose': -1,
-        'gpu_use_dp': True
+        'gpu_use_dp': True,
     }
     evals_result = {}
     gbm = lgb.train(params, lgb_train,
@@ -3493,7 +3494,7 @@ def test_sample_strategy_with_boosting():
         'data_sample_strategy': 'goss',
         'metric': 'l2',
         'verbose': -1,
-        'gpu_use_dp': True
+        'gpu_use_dp': True,
     }
     evals_result = {}
     gbm = lgb.train(params, lgb_train,
@@ -3509,7 +3510,7 @@ def test_sample_strategy_with_boosting():
         'data_sample_strategy': 'goss',
         'metric': 'l2',
         'verbose': -1,
-        'gpu_use_dp': True
+        'gpu_use_dp': True,
     }
     evals_result = {}
     gbm = lgb.train(params, lgb_train,
@@ -3530,7 +3531,7 @@ def test_sample_strategy_with_boosting():
         'bagging_fraction': 0.5,
         'metric': 'l2',
         'verbose': -1,
-        'gpu_use_dp': True
+        'gpu_use_dp': True,
     }
     evals_result = {}
     gbm = lgb.train(params, lgb_train,
@@ -3548,7 +3549,7 @@ def test_sample_strategy_with_boosting():
         'bagging_fraction': 0.5,
         'metric': 'l2',
         'verbose': -1,
-        'gpu_use_dp': True
+        'gpu_use_dp': True,
     }
     evals_result = {}
     gbm = lgb.train(params, lgb_train,

From b431c2c9e751228d9293edea4b6134894ba41bc8 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Fri, 29 Jul 2022 04:11:25 +0000
Subject: [PATCH 62/84] revert unexpected changes of non-ascii characters

---
 tests/python_package_test/test_engine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 05e2d1ad0e3e..57c83ee3f93c 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -1090,7 +1090,7 @@ def test_feature_name_with_non_ascii():
     X_train = np.random.normal(size=(100, 4))
     y_train = np.random.random(100)
     # This has non-ascii strings.
-    feature_names = [u'F_�?', u'F_一', u'F_�?', u'F_�?']
+    feature_names = [u'F_��', u'F_һ', u'F_��', u'F_��']
     params = {'verbose': -1}
     lgb_train = lgb.Dataset(X_train, y_train)
 

From 43480d14fca741b2650339e01f0023cb2d2fdfc3 Mon Sep 17 00:00:00 2001
From: shiyu1994 <shiyu_k1994@qq.com>
Date: Fri, 29 Jul 2022 12:13:21 +0800
Subject: [PATCH 63/84] revert unexpected changes of non-ascii characters

---
 tests/python_package_test/test_engine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 57c83ee3f93c..cd1c776645b4 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -1090,7 +1090,7 @@ def test_feature_name_with_non_ascii():
     X_train = np.random.normal(size=(100, 4))
     y_train = np.random.random(100)
     # This has non-ascii strings.
-    feature_names = [u'F_��', u'F_һ', u'F_��', u'F_��']
+    feature_names = [u'F_零', u'F_一', u'F_二', u'F_三']
     params = {'verbose': -1}
     lgb_train = lgb.Dataset(X_train, y_train)
 

From 92971572b0db43d8c619a08925c82ca95b04c7d8 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Tue, 16 Aug 2022 08:03:48 +0000
Subject: [PATCH 64/84] remove useless changes

---
 src/boosting/gbdt.cpp | 105 ------------------------------------------
 src/boosting/goss.hpp |   1 +
 src/main.cpp          |  10 +---
 3 files changed, 3 insertions(+), 113 deletions(-)

diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index 89ed39271fc9..686ffca215d4 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -233,111 +233,6 @@ void GBDT::Boosting() {
     GetGradients(GetTrainingScore(&num_score), gradients_pointer_, hessians_pointer_);
 }
 
-// <<<<<<< HEAD
-// =======
-// data_size_t GBDT::BaggingHelper(data_size_t start, data_size_t cnt, data_size_t* buffer) {
-//   if (cnt <= 0) {
-//     return 0;
-//   }
-//   data_size_t cur_left_cnt = 0;
-//   data_size_t cur_right_pos = cnt;
-//   // random bagging, minimal unit is one record
-//   for (data_size_t i = 0; i < cnt; ++i) {
-//     auto cur_idx = start + i;
-//     if (bagging_rands_[cur_idx / bagging_rand_block_].NextFloat() < config_->bagging_fraction) {
-//       buffer[cur_left_cnt++] = cur_idx;
-//     } else {
-//       buffer[--cur_right_pos] = cur_idx;
-//     }
-//   }
-//   return cur_left_cnt;
-// }
-
-// data_size_t GBDT::BalancedBaggingHelper(data_size_t start, data_size_t cnt,
-//                                         data_size_t* buffer) {
-//   if (cnt <= 0) {
-//     return 0;
-//   }
-//   auto label_ptr = train_data_->metadata().label();
-//   data_size_t cur_left_cnt = 0;
-//   data_size_t cur_right_pos = cnt;
-//   // random bagging, minimal unit is one record
-//   for (data_size_t i = 0; i < cnt; ++i) {
-//     auto cur_idx = start + i;
-//     bool is_pos = label_ptr[start + i] > 0;
-//     bool is_in_bag = false;
-//     if (is_pos) {
-//       is_in_bag = bagging_rands_[cur_idx / bagging_rand_block_].NextFloat() <
-//                   config_->pos_bagging_fraction;
-//     } else {
-//       is_in_bag = bagging_rands_[cur_idx / bagging_rand_block_].NextFloat() <
-//                   config_->neg_bagging_fraction;
-//     }
-//     if (is_in_bag) {
-//       buffer[cur_left_cnt++] = cur_idx;
-//     } else {
-//       buffer[--cur_right_pos] = cur_idx;
-//     }
-//   }
-//   return cur_left_cnt;
-// }
-
-// void GBDT::Bagging(int iter) {
-//   Common::FunctionTimer fun_timer("GBDT::Bagging", global_timer);
-//   // if need bagging
-//   if ((bag_data_cnt_ < num_data_ && iter % config_->bagging_freq == 0) ||
-//       need_re_bagging_) {
-//     need_re_bagging_ = false;
-//     auto left_cnt = bagging_runner_.Run<true>(
-//         num_data_,
-//         [=](int, data_size_t cur_start, data_size_t cur_cnt, data_size_t* left,
-//             data_size_t*) {
-//           data_size_t cur_left_count = 0;
-//           if (balanced_bagging_) {
-//             cur_left_count =
-//                 BalancedBaggingHelper(cur_start, cur_cnt, left);
-//           } else {
-//             cur_left_count = BaggingHelper(cur_start, cur_cnt, left);
-//           }
-//           return cur_left_count;
-//         },
-//         bag_data_indices_.data());
-//     bag_data_cnt_ = left_cnt;
-//     Log::Debug("Re-bagging, using %d data to train", bag_data_cnt_);
-//     // set bagging data to tree learner
-//     if (!is_use_subset_) {
-//       #ifdef USE_CUDA_EXP
-//       if (config_->device_type == std::string("cuda_exp")) {
-//         CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
-//         tree_learner_->SetBaggingData(nullptr, cuda_bag_data_indices_.RawData(), bag_data_cnt_);
-//       } else {
-//       #endif  // USE_CUDA_EXP
-//         tree_learner_->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_);
-//       #ifdef USE_CUDA_EXP
-//       }
-//       #endif  // USE_CUDA_EXP
-//     } else {
-//       // get subset
-//       tmp_subset_->ReSize(bag_data_cnt_);
-//       tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(),
-//                               bag_data_cnt_, false);
-//       #ifdef USE_CUDA_EXP
-//       if (config_->device_type == std::string("cuda_exp")) {
-//         CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
-//         tree_learner_->SetBaggingData(tmp_subset_.get(), cuda_bag_data_indices_.RawData(),
-//                                       bag_data_cnt_);
-//       } else {
-//       #endif  // USE_CUDA_EXP
-//         tree_learner_->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(),
-//                                       bag_data_cnt_);
-//       #ifdef USE_CUDA_EXP
-//       }
-//       #endif  // USE_CUDA_EXP
-//     }
-//   }
-// }
-
-// >>>>>>> LightGBM/master
 void GBDT::Train(int snapshot_freq, const std::string& model_output_path) {
   Common::FunctionTimer fun_timer("GBDT::Train", global_timer);
   bool is_finished = false;
diff --git a/src/boosting/goss.hpp b/src/boosting/goss.hpp
index e0ad4697c35d..34b099e051bb 100644
--- a/src/boosting/goss.hpp
+++ b/src/boosting/goss.hpp
@@ -10,6 +10,7 @@
 #include <LightGBM/sample_strategy.h>
 
 #include <algorithm>
+#include <string>
 #include <vector>
 
 namespace LightGBM {
diff --git a/src/main.cpp b/src/main.cpp
index 4d69c53a1aec..8034da826811 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -8,16 +8,10 @@
 
 #include "network/linkers.h"
 
-int main(int /*argc*/, char** /*argv*/) {
+int main(int argc, char** argv) {
   bool success = false;
   try {
-    const std::string config_str = std::string("config=train.conf");
-    char* argv = new char[config_str.size() + 1];
-    for (size_t i = 0; i < config_str.size(); ++i) {
-      argv[i] = config_str[i];
-    }
-    argv[config_str.size()] = '\0';
-    LightGBM::Application app(2, &argv - 1);
+    LightGBM::Application app(argc, argv);
     app.Run();
 
 #ifdef USE_MPI

From 7a5fede86372ec0abee7ade24f71a405e2f15d51 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Wed, 24 Aug 2022 06:37:00 +0000
Subject: [PATCH 65/84] allocate gradients_pointer_ and hessians_pointer when
 necessary

---
 include/LightGBM/tree_learner.h               |  6 ++
 src/boosting/bagging.hpp                      |  6 +-
 src/boosting/gbdt.cpp                         | 77 ++++++++++---------
 src/boosting/gbdt.h                           |  4 +-
 .../cuda/cuda_single_gpu_tree_learner.cpp     | 10 +++
 .../cuda/cuda_single_gpu_tree_learner.hpp     |  4 +-
 6 files changed, 66 insertions(+), 41 deletions(-)

diff --git a/include/LightGBM/tree_learner.h b/include/LightGBM/tree_learner.h
index 197a80f18cd7..772e1422ff69 100644
--- a/include/LightGBM/tree_learner.h
+++ b/include/LightGBM/tree_learner.h
@@ -50,6 +50,12 @@ class TreeLearner {
   */
   virtual void ResetConfig(const Config* config) = 0;
 
+  /*!
+  * \brief Reset boosting_on_gpu_
+  * \param boosting_on_gpu flag for boosting on GPU
+  */
+  virtual void ResetBoostingOnGPU(const bool /*boosting_on_gpu*/) {}
+
   virtual void SetForcedSplit(const Json* forced_split_json) = 0;
 
   /*!
diff --git a/src/boosting/bagging.hpp b/src/boosting/bagging.hpp
index 27b1589f81c8..03b62f6c65e9 100644
--- a/src/boosting/bagging.hpp
+++ b/src/boosting/bagging.hpp
@@ -50,7 +50,7 @@ class BaggingSampleStrategy : public SampleStrategy {
         #ifdef USE_CUDA_EXP
         if (config_->device_type == std::string("cuda_exp")) {
           CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
-          tree_learner_->SetBaggingData(nullptr, cuda_bag_data_indices_.RawData(), bag_data_cnt_);
+          tree_learner->SetBaggingData(nullptr, cuda_bag_data_indices_.RawData(), bag_data_cnt_);
         } else {
         #endif  // USE_CUDA_EXP
           tree_learner->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_);
@@ -65,8 +65,8 @@ class BaggingSampleStrategy : public SampleStrategy {
         #ifdef USE_CUDA_EXP
         if (config_->device_type == std::string("cuda_exp")) {
           CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
-          tree_learner_->SetBaggingData(tmp_subset_.get(), cuda_bag_data_indices_.RawData(),
-                                        bag_data_cnt_);
+          tree_learner->SetBaggingData(tmp_subset_.get(), cuda_bag_data_indices_.RawData(),
+                                       bag_data_cnt_);
         } else {
         #endif  // USE_CUDA_EXP
           tree_learner->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(),
diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index 686ffca215d4..d2c61b85aafb 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -42,6 +42,7 @@ GBDT::GBDT()
   data_sample_strategy_.reset(nullptr);
   gradients_pointer_ = nullptr;
   hessians_pointer_ = nullptr;
+  boosting_on_gpu_ = false;
 }
 
 GBDT::~GBDT() {
@@ -95,11 +96,12 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
   }
 
   data_sample_strategy_.reset(SampleStrategy::CreateSampleStrategy(config_.get(), train_data_, objective_function_, num_tree_per_iteration_));
-  is_constant_hessian_ = GetIsConstHessian(objective_function) && !data_sample_strategy_->IsHessianChange();
+  is_constant_hessian_ = GetIsConstHessian(objective_function);
 
-  const bool boosting_on_gpu = objective_function_ != nullptr && objective_function_->IsCUDAObjective();
+  boosting_on_gpu_ = objective_function_ != nullptr && objective_function_->IsCUDAObjective() &&
+                               !data_sample_strategy_->IsHessianChange(); // for sample strategy with Hessian change, fall back to boosting on CPU
   tree_learner_ = std::unique_ptr<TreeLearner>(TreeLearner::CreateTreeLearner(config_->tree_learner, config_->device_type,
-                                                                              config_.get(), boosting_on_gpu));
+                                                                              config_.get(), boosting_on_gpu_));
 
   // init tree learner
   tree_learner_->Init(train_data_, is_constant_hessian_);
@@ -114,7 +116,7 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
 
   #ifdef USE_CUDA_EXP
   if (config_->device_type == std::string("cuda_exp")) {
-    train_score_updater_.reset(new CUDAScoreUpdater(train_data_, num_tree_per_iteration_, boosting_on_gpu));
+    train_score_updater_.reset(new CUDAScoreUpdater(train_data_, num_tree_per_iteration_, boosting_on_gpu_));
   } else {
   #endif  // USE_CUDA_EXP
     train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_));
@@ -127,7 +129,7 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
   if (objective_function_ != nullptr) {
     const size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
     #ifdef USE_CUDA_EXP
-    if (config_->device_type == std::string("cuda_exp") && boosting_on_gpu) {
+    if (config_->device_type == std::string("cuda_exp") && boosting_on_gpu_) {
       if (gradients_pointer_ != nullptr) {
         CHECK_NOTNULL(hessians_pointer_);
         DeallocateCUDAMemory<score_t>(&gradients_pointer_, __FILE__, __LINE__);
@@ -144,21 +146,14 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
     #ifdef USE_CUDA_EXP
     }
     #endif  // USE_CUDA_EXP
-  #ifndef USE_CUDA_EXP
-  }
-  #else  // USE_CUDA_EXP
-  } else {
-    if (config_->device_type == std::string("cuda_exp")) {
-      if (gradients_pointer_ != nullptr) {
-        CHECK_NOTNULL(hessians_pointer_);
-        DeallocateCUDAMemory<score_t>(&gradients_pointer_, __FILE__, __LINE__);
-        DeallocateCUDAMemory<score_t>(&hessians_pointer_, __FILE__, __LINE__);
-      }
-      AllocateCUDAMemory<score_t>(&gradients_pointer_, total_size, __FILE__, __LINE__);
-      AllocateCUDAMemory<score_t>(&hessians_pointer_, total_size, __FILE__, __LINE__);
-    }
+  } else if (data_sample_strategy_->IsHessianChange()) {
+    const size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
+    gradients_.resize(total_size);
+    hessians_.resize(total_size);
+    gradients_pointer_ = gradients_.data();
+    hessians_pointer_ = hessians_.data();
   }
-  #endif  // USE_CUDA_EXP
+
   // get max feature index
   max_feature_idx_ = train_data_->num_total_features() - 1;
   // get label index
@@ -352,7 +347,7 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
   } else {
     // use customized objective function
     CHECK(objective_function_ == nullptr);
-    if (config_->boosting == std::string("goss") || config_->data_sample_strategy == std::string("goss")) {
+    if (data_sample_strategy_->IsHessianChange()) {
       // need to copy customized gradients when using GOSS
       int64_t total_size = static_cast<int64_t>(num_data_) * num_tree_per_iteration_;
       #pragma omp parallel for schedule(static)
@@ -360,24 +355,25 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
         gradients_[i] = gradients[i];
         hessians_[i] = hessians[i];
       }
-      gradients = gradients_.data();
-      hessians = hessians_.data();
-    }
-    #ifdef USE_CUDA_EXP
-    if (config_->device_type == std::string("cuda_exp")) {
-      const size_t total_size = static_cast<size_t>(num_data_ * num_class_);
-      CopyFromHostToCUDADevice<score_t>(gradients_pointer_, gradients, total_size, __FILE__, __LINE__);
-      CopyFromHostToCUDADevice<score_t>(hessians_pointer_, hessians, total_size, __FILE__, __LINE__);
+      CHECK_EQ(gradients_pointer_, gradients_.data());
+      CHECK_EQ(hessians_pointer_, hessians_.data());
       gradients = gradients_pointer_;
       hessians = hessians_pointer_;
     }
-    #endif  // USE_CUDA_EXP
   }
 
   // bagging logic
   data_sample_strategy_->Bagging(iter_, tree_learner_.get(), gradients_.data(), hessians_.data());
   const bool is_use_subset = data_sample_strategy_->is_use_subset();
   const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt();
+  if (gradients != nullptr && is_use_subset && bag_data_cnt < num_data_ && !boosting_on_gpu_ && !data_sample_strategy_->IsHessianChange()) {
+    // allocate gradients_ and hessians_ for copy gradients for using data subset
+    int64_t total_size = static_cast<int64_t>(num_data_) * num_tree_per_iteration_;
+    gradients_.resize(total_size);
+    hessians_.resize(total_size);
+    gradients_pointer_ = gradients_.data();
+    hessians_pointer_ = hessians_.data();
+  }
   const std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>>& bag_data_indices = data_sample_strategy_->bag_data_indices();
 
   bool should_continue = false;
@@ -388,7 +384,7 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
       auto grad = gradients + offset;
       auto hess = hessians + offset;
       // need to copy gradients for bagging subset.
-      if (is_use_subset && bag_data_cnt < num_data_ && config_->device_type != std::string("cuda_exp")) {
+      if (is_use_subset && bag_data_cnt < num_data_ && !boosting_on_gpu_) {
         for (int i = 0; i < bag_data_cnt; ++i) {
           gradients_pointer_[offset + i] = grad[bag_data_indices[i]];
           hessians_pointer_[offset + i] = hess[bag_data_indices[i]];
@@ -493,7 +489,7 @@ void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) {
     if (num_data_ - bag_data_cnt > 0) {
       #ifdef USE_CUDA_EXP
       if (config_->device_type == std::string("cuda_exp")) {
-        train_score_updater_->AddScore(tree, data_sample_strategy_->cuda_bag_data_indices().data().RawData() + bag_data_cnt, num_data_ - bag_data_cnt, cur_tree_id);
+        train_score_updater_->AddScore(tree, data_sample_strategy_->cuda_bag_data_indices().RawData() + bag_data_cnt, num_data_ - bag_data_cnt, cur_tree_id);
       } else {
       #endif  // USE_CUDA_EXP
         train_score_updater_->AddScore(tree, data_sample_strategy_->bag_data_indices().data() + bag_data_cnt, num_data_ - bag_data_cnt, cur_tree_id);
@@ -720,7 +716,7 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
       Log::Fatal("Cannot use ``monotone_constraints`` in %s objective, please disable it.", objective_function_->GetName());
     }
   }
-  is_constant_hessian_ = GetIsConstHessian(objective_function) && !data_sample_strategy_->IsHessianChange();
+  is_constant_hessian_ = GetIsConstHessian(objective_function);
 
   // push training metrics
   training_metrics_.clear();
@@ -730,7 +726,9 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
   training_metrics_.shrink_to_fit();
 
   #ifdef USE_CUDA_EXP
-  const bool boosting_on_gpu = objective_function_ != nullptr && objective_function_->IsCUDAObjective();
+  boosting_on_gpu_ = objective_function_ != nullptr && objective_function_->IsCUDAObjective() &&
+                    !data_sample_strategy_->IsHessianChange(); // for sample strategy with Hessian change, fall back to boosting on CPU
+  tree_learner_->ResetBoostingOnGPU(boosting_on_gpu_);
   #endif  // USE_CUDA_EXP
 
   if (train_data != train_data_) {
@@ -740,7 +738,7 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
     // create score tracker
     #ifdef USE_CUDA_EXP
     if (config_->device_type == std::string("cuda_exp")) {
-      train_score_updater_.reset(new CUDAScoreUpdater(train_data_, num_tree_per_iteration_, boosting_on_gpu));
+      train_score_updater_.reset(new CUDAScoreUpdater(train_data_, num_tree_per_iteration_, boosting_on_gpu_));
     } else {
     #endif  // USE_CUDA_EXP
       train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_));
@@ -762,7 +760,7 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
     const size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
     if (objective_function_ != nullptr) {
       #ifdef USE_CUDA_EXP
-      if (config_->device_type == std::string("cuda_exp") && boosting_on_gpu) {
+      if (config_->device_type == std::string("cuda_exp") && boosting_on_gpu_) {
         if (gradients_pointer_ != nullptr) {
           CHECK_NOTNULL(hessians_pointer_);
           DeallocateCUDAMemory<score_t>(&gradients_pointer_, __FILE__, __LINE__);
@@ -825,6 +823,13 @@ void GBDT::ResetConfig(const Config* config) {
   if (tree_learner_ != nullptr) {
     tree_learner_->ResetConfig(new_config.get());
   }
+
+  #ifdef USE_CUDA_EXP
+  boosting_on_gpu_ = objective_function_ != nullptr && objective_function_->IsCUDAObjective() &&
+                    !data_sample_strategy_->IsHessianChange(); // for sample strategy with Hessian change, fall back to boosting on CPU
+  tree_learner_->ResetBoostingOnGPU(boosting_on_gpu_);
+  #endif  // USE_CUDA_EXP
+
   if (train_data_ != nullptr) {
     data_sample_strategy_->ResetSampleConfig(new_config.get(), false);
     if (data_sample_strategy_->NeedResizeGradients()) {
@@ -832,7 +837,7 @@ void GBDT::ResetConfig(const Config* config) {
       const size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
       if (objective_function_ != nullptr) {
         #ifdef USE_CUDA_EXP
-        if (config_->device_type == std::string("cuda_exp") && boosting_on_gpu) {
+        if (config_->device_type == std::string("cuda_exp") && boosting_on_gpu_) {
           if (gradients_pointer_ != nullptr) {
             CHECK_NOTNULL(hessians_pointer_);
             DeallocateCUDAMemory<score_t>(&gradients_pointer_, __FILE__, __LINE__);
diff --git a/src/boosting/gbdt.h b/src/boosting/gbdt.h
index 5625ee552495..15fb14ae6a70 100644
--- a/src/boosting/gbdt.h
+++ b/src/boosting/gbdt.h
@@ -400,7 +400,7 @@ class GBDT : public GBDTBase {
 
  protected:
   virtual bool GetIsConstHessian(const ObjectiveFunction* objective_function) {
-    if (objective_function != nullptr && config_->boosting != std::string("goss") && config_->data_sample_strategy != std::string("goss")) {
+    if (objective_function != nullptr && !data_sample_strategy_->IsHessianChange()) {
       return objective_function->IsConstantHessian();
     } else {
       return false;
@@ -493,6 +493,8 @@ class GBDT : public GBDTBase {
   score_t* gradients_pointer_;
   /*! \brief Pointer to hessian vector, can be on CPU or GPU */
   score_t* hessians_pointer_;
+  /*! \brief Whether boosting is done on GPU, used for cuda_exp */
+  bool boosting_on_gpu_;
   #ifdef USE_CUDA_EXP
   /*! \brief Buffer for scores when boosting is on GPU but evaluation is not, used only with cuda_exp */
   mutable std::vector<double> host_score_;
diff --git a/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp b/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp
index 55595765fcc6..fd48201e2c3a 100644
--- a/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp
+++ b/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp
@@ -447,6 +447,16 @@ void CUDASingleGPUTreeLearner::AllocateBitset() {
   cuda_bitset_inner_len_ = 0;
 }
 
+void CUDASingleGPUTreeLearner::ResetBoostingOnGPU(const bool boosting_on_cuda) {
+  boosting_on_cuda_ = boosting_on_cuda;
+  DeallocateCUDAMemory<score_t>(&cuda_gradients_, __FILE__, __LINE__);
+  DeallocateCUDAMemory<score_t>(&cuda_hessians_, __FILE__, __LINE__);
+  if (!boosting_on_cuda_) {
+    AllocateCUDAMemory<score_t>(&cuda_gradients_, static_cast<size_t>(num_data_), __FILE__, __LINE__);
+    AllocateCUDAMemory<score_t>(&cuda_hessians_, static_cast<size_t>(num_data_), __FILE__, __LINE__);
+  }
+}
+
 #ifdef DEBUG
 void CUDASingleGPUTreeLearner::CheckSplitValid(
   const int left_leaf,
diff --git a/src/treelearner/cuda/cuda_single_gpu_tree_learner.hpp b/src/treelearner/cuda/cuda_single_gpu_tree_learner.hpp
index 942a6d1cb17d..1c17aa009c84 100644
--- a/src/treelearner/cuda/cuda_single_gpu_tree_learner.hpp
+++ b/src/treelearner/cuda/cuda_single_gpu_tree_learner.hpp
@@ -49,6 +49,8 @@ class CUDASingleGPUTreeLearner: public SerialTreeLearner {
   Tree* FitByExistingTree(const Tree* old_tree, const std::vector<int>& leaf_pred,
                           const score_t* gradients, const score_t* hessians) const override;
 
+  virtual void ResetBoostingOnGPU(const bool boosting_on_gpu) override;
+
  protected:
   void BeforeTrain() override;
 
@@ -119,7 +121,7 @@ class CUDASingleGPUTreeLearner: public SerialTreeLearner {
   /*! \brief hessians on CUDA */
   score_t* cuda_hessians_;
   /*! \brief whether boosting is done on CUDA */
-  const bool boosting_on_cuda_;
+  bool boosting_on_cuda_;
 
   #ifdef DEBUG
   /*! \brief gradients on CPU */

From b4a014f2874decf3649a0985eaecc9dbcbb17cfc Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Wed, 24 Aug 2022 06:41:54 +0000
Subject: [PATCH 66/84] add spaces

---
 src/boosting/gbdt.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index d2c61b85aafb..029f39b2fb80 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -99,7 +99,7 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
   is_constant_hessian_ = GetIsConstHessian(objective_function);
 
   boosting_on_gpu_ = objective_function_ != nullptr && objective_function_->IsCUDAObjective() &&
-                               !data_sample_strategy_->IsHessianChange(); // for sample strategy with Hessian change, fall back to boosting on CPU
+                               !data_sample_strategy_->IsHessianChange();  // for sample strategy with Hessian change, fall back to boosting on CPU
   tree_learner_ = std::unique_ptr<TreeLearner>(TreeLearner::CreateTreeLearner(config_->tree_learner, config_->device_type,
                                                                               config_.get(), boosting_on_gpu_));
 
@@ -727,7 +727,7 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
 
   #ifdef USE_CUDA_EXP
   boosting_on_gpu_ = objective_function_ != nullptr && objective_function_->IsCUDAObjective() &&
-                    !data_sample_strategy_->IsHessianChange(); // for sample strategy with Hessian change, fall back to boosting on CPU
+                    !data_sample_strategy_->IsHessianChange();  // for sample strategy with Hessian change, fall back to boosting on CPU
   tree_learner_->ResetBoostingOnGPU(boosting_on_gpu_);
   #endif  // USE_CUDA_EXP
 
@@ -826,7 +826,7 @@ void GBDT::ResetConfig(const Config* config) {
 
   #ifdef USE_CUDA_EXP
   boosting_on_gpu_ = objective_function_ != nullptr && objective_function_->IsCUDAObjective() &&
-                    !data_sample_strategy_->IsHessianChange(); // for sample strategy with Hessian change, fall back to boosting on CPU
+                    !data_sample_strategy_->IsHessianChange();  // for sample strategy with Hessian change, fall back to boosting on CPU
   tree_learner_->ResetBoostingOnGPU(boosting_on_gpu_);
   #endif  // USE_CUDA_EXP
 

From f783a611b24f8401f91b0e047bc04a4e9c935ced Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Wed, 24 Aug 2022 06:43:08 +0000
Subject: [PATCH 67/84] remove redundant virtual

---
 src/treelearner/cuda/cuda_single_gpu_tree_learner.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/treelearner/cuda/cuda_single_gpu_tree_learner.hpp b/src/treelearner/cuda/cuda_single_gpu_tree_learner.hpp
index 1c17aa009c84..a55f9df8fc15 100644
--- a/src/treelearner/cuda/cuda_single_gpu_tree_learner.hpp
+++ b/src/treelearner/cuda/cuda_single_gpu_tree_learner.hpp
@@ -49,7 +49,7 @@ class CUDASingleGPUTreeLearner: public SerialTreeLearner {
   Tree* FitByExistingTree(const Tree* old_tree, const std::vector<int>& leaf_pred,
                           const score_t* gradients, const score_t* hessians) const override;
 
-  virtual void ResetBoostingOnGPU(const bool boosting_on_gpu) override;
+  void ResetBoostingOnGPU(const bool boosting_on_gpu) override;
 
  protected:
   void BeforeTrain() override;

From 204517b00cb46bd1efcaaca1499dc9737bb56864 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Wed, 24 Aug 2022 08:27:36 +0000
Subject: [PATCH 68/84] include <LightGBM/utils/log.h> for USE_CUDA

---
 include/LightGBM/cuda/cuda_utils.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/LightGBM/cuda/cuda_utils.h b/include/LightGBM/cuda/cuda_utils.h
index ee88c52a0404..f1c28213d9f3 100644
--- a/include/LightGBM/cuda/cuda_utils.h
+++ b/include/LightGBM/cuda/cuda_utils.h
@@ -10,10 +10,10 @@
 #include <cuda.h>
 #include <cuda_runtime.h>
 #include <stdio.h>
+#include <LightGBM/utils/log.h>
 #endif  // USE_CUDA || USE_CUDA_EXP
 
 #ifdef USE_CUDA_EXP
-#include <LightGBM/utils/log.h>
 #include <vector>
 #endif  // USE_CUDA_EXP
 

From e5d4605f8222cee61dce9f44d3ce5bb95deaa658 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Mon, 29 Aug 2022 12:36:35 +0000
Subject: [PATCH 69/84] check for  in
 test_goss_boosting_and_strategy_equivalent

---
 tests/python_package_test/test_engine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 0fd135f5f30e..23e7a970d823 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3601,7 +3601,7 @@ def test_goss_boosting_and_strategy_equivalent():
               num_boost_round=10,
               valid_sets=lgb_eval,
               callbacks=[lgb.record_evaluation(evals_result2)])
-    np.testing.assert_equal(evals_result1['valid_0']['l2'], evals_result2['valid_0']['l2'])
+    assert evals_result1['valid_0']['l2'] == evals_result2['valid_0']['l2']
 
 
 def test_sample_strategy_with_boosting():

From 469f6bbe6c0d6a425f637cd11f52acae73d180e6 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Mon, 29 Aug 2022 12:55:46 +0000
Subject: [PATCH 70/84] check for identity in
 test_sample_strategy_with_boosting

---
 tests/python_package_test/test_engine.py | 58 ++++++++++++++++--------
 1 file changed, 40 insertions(+), 18 deletions(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 23e7a970d823..c79e87a8c528 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3615,51 +3615,65 @@ def test_sample_strategy_with_boosting():
         'data_sample_strategy': 'goss',
         'metric': 'l2',
         'verbose': -1,
+        'num_threads': 1,
+        'force_row_wise': True,
         'gpu_use_dp': True,
+        'device': 'cuda'
     }
     evals_result = {}
     gbm = lgb.train(params, lgb_train,
                     num_boost_round=10,
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
-    ret1 = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret1 == pytest.approx(3149.393862, abs=1.0)
-    assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret1)
+    eval_res1 = evals_result['valid_0']['l2'][-1]
+    test_res1 = mean_squared_error(y_test, gbm.predict(X_test))
+    assert test_res1 == pytest.approx(3149.393862, abs=1.0)
+    assert eval_res1 == pytest.approx(test_res1)
 
     params = {
         'boosting': 'gbdt',
         'data_sample_strategy': 'goss',
         'metric': 'l2',
         'verbose': -1,
+        'num_threads': 1,
+        'force_row_wise': True,
         'gpu_use_dp': True,
+        'device': 'cuda'
     }
     evals_result = {}
     gbm = lgb.train(params, lgb_train,
                     num_boost_round=10,
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
-    ret2 = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret2 == pytest.approx(2547.715968, abs=1.0)
-    assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret2)
+    eval_res2 = evals_result['valid_0']['l2'][-1]
+    test_res2 = mean_squared_error(y_test, gbm.predict(X_test))
+    assert test_res2 == pytest.approx(2547.715968, abs=1.0)
+    assert eval_res2 == pytest.approx(test_res2)
 
     params = {
         'boosting': 'goss',
         'data_sample_strategy': 'goss',
         'metric': 'l2',
         'verbose': -1,
+        'num_threads': 1,
+        'force_row_wise': True,
         'gpu_use_dp': True,
+        'device': 'cuda'
     }
     evals_result = {}
     gbm = lgb.train(params, lgb_train,
                     num_boost_round=10,
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
-    ret3 = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret3 == pytest.approx(2547.715968, abs=1.0)
-    assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret3)
+    eval_res3 = evals_result['valid_0']['l2'][-1]
+    test_res3 = mean_squared_error(y_test, gbm.predict(X_test))
+    assert test_res3 == pytest.approx(2547.715968, abs=1.0)
+    assert eval_res3 == pytest.approx(test_res3)
 
-    assert ret1 != ret2
-    assert ret2 == ret3
+    assert test_res1 != test_res2
+    assert eval_res1 != eval_res2
+    assert test_res2 == test_res3
+    assert eval_res2 == eval_res3
 
     params = {
         'boosting': 'dart',
@@ -3668,16 +3682,20 @@ def test_sample_strategy_with_boosting():
         'bagging_fraction': 0.5,
         'metric': 'l2',
         'verbose': -1,
+        'num_threads': 1,
+        'force_row_wise': True,
         'gpu_use_dp': True,
+        'device': 'cuda'
     }
     evals_result = {}
     gbm = lgb.train(params, lgb_train,
                     num_boost_round=10,
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
-    ret4 = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret4 == pytest.approx(3134.866931, abs=1.0)
-    assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret4)
+    eval_res4 = evals_result['valid_0']['l2'][-1]
+    test_res4 = mean_squared_error(y_test, gbm.predict(X_test))
+    assert test_res4 == pytest.approx(3134.866931, abs=1.0)
+    assert eval_res4 == pytest.approx(test_res4)
 
     params = {
         'boosting': 'gbdt',
@@ -3686,17 +3704,21 @@ def test_sample_strategy_with_boosting():
         'bagging_fraction': 0.5,
         'metric': 'l2',
         'verbose': -1,
+        'num_threads': 1,
+        'force_row_wise': True,
         'gpu_use_dp': True,
+        'device': 'cuda'
     }
     evals_result = {}
     gbm = lgb.train(params, lgb_train,
                     num_boost_round=10,
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
-    ret5 = mean_squared_error(y_test, gbm.predict(X_test))
-    assert ret5 == pytest.approx(2539.792378, abs=1.0)
-    assert evals_result['valid_0']['l2'][-1] == pytest.approx(ret5)
-    assert ret4 != ret5
+    eval_res5 = evals_result['valid_0']['l2'][-1]
+    test_res5 = mean_squared_error(y_test, gbm.predict(X_test))
+    assert test_res5 == pytest.approx(2539.792378, abs=1.0)
+    assert eval_res5 == pytest.approx(test_res5)
+    assert eval_res5 != test_res5
 
 
 def test_record_evaluation_with_train():

From 512718889183ce2dba8e506bd96828421c3cb8b5 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Mon, 29 Aug 2022 14:32:46 +0000
Subject: [PATCH 71/84] remove cuda  option in
 test_sample_strategy_with_boosting

---
 tests/python_package_test/test_engine.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index c79e87a8c528..66cfe34748df 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3618,7 +3618,6 @@ def test_sample_strategy_with_boosting():
         'num_threads': 1,
         'force_row_wise': True,
         'gpu_use_dp': True,
-        'device': 'cuda'
     }
     evals_result = {}
     gbm = lgb.train(params, lgb_train,
@@ -3638,7 +3637,6 @@ def test_sample_strategy_with_boosting():
         'num_threads': 1,
         'force_row_wise': True,
         'gpu_use_dp': True,
-        'device': 'cuda'
     }
     evals_result = {}
     gbm = lgb.train(params, lgb_train,
@@ -3658,7 +3656,6 @@ def test_sample_strategy_with_boosting():
         'num_threads': 1,
         'force_row_wise': True,
         'gpu_use_dp': True,
-        'device': 'cuda'
     }
     evals_result = {}
     gbm = lgb.train(params, lgb_train,
@@ -3685,7 +3682,6 @@ def test_sample_strategy_with_boosting():
         'num_threads': 1,
         'force_row_wise': True,
         'gpu_use_dp': True,
-        'device': 'cuda'
     }
     evals_result = {}
     gbm = lgb.train(params, lgb_train,
@@ -3707,7 +3703,6 @@ def test_sample_strategy_with_boosting():
         'num_threads': 1,
         'force_row_wise': True,
         'gpu_use_dp': True,
-        'device': 'cuda'
     }
     evals_result = {}
     gbm = lgb.train(params, lgb_train,

From cc28c8a8a73486ed1280f01e5eb15fa996c67deb Mon Sep 17 00:00:00 2001
From: shiyu1994 <shiyu_k1994@qq.com>
Date: Mon, 5 Sep 2022 10:14:47 +0800
Subject: [PATCH 72/84] Update tests/python_package_test/test_engine.py

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
---
 tests/python_package_test/test_engine.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 66cfe34748df..f554d43296ce 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3713,7 +3713,8 @@ def test_sample_strategy_with_boosting():
     test_res5 = mean_squared_error(y_test, gbm.predict(X_test))
     assert test_res5 == pytest.approx(2539.792378, abs=1.0)
     assert eval_res5 == pytest.approx(test_res5)
-    assert eval_res5 != test_res5
+    assert test_res4 != test_res5
+    assert eval_res4 != eval_res5
 
 
 def test_record_evaluation_with_train():

From 42f3de9db9bb8a6a7af22072e24af2cdfded08a1 Mon Sep 17 00:00:00 2001
From: shiyu1994 <shiyu_k1994@qq.com>
Date: Mon, 5 Sep 2022 10:14:58 +0800
Subject: [PATCH 73/84] Update tests/python_package_test/test_engine.py

Co-authored-by: James Lamb <jaylamb20@gmail.com>
---
 tests/python_package_test/test_engine.py | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index f554d43296ce..ab63d1f9f0ab 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3571,8 +3571,7 @@ def test_goss_boosting_and_strategy_equivalent():
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
     lgb_train = lgb.Dataset(X_train, y_train)
     lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
-    params1 = {
-        'boosting': 'goss',
+    base_params = {
         'metric': 'l2',
         'verbose': -1,
         'bagging_seed': 0,
@@ -3581,21 +3580,13 @@ def test_goss_boosting_and_strategy_equivalent():
         'force_row_wise': True,
         'gpu_use_dp': True,
     }
+    params1 = {**base_params, 'boosting': 'goss'}
     evals_result1 = {}
     lgb.train(params1, lgb_train,
               num_boost_round=10,
               valid_sets=lgb_eval,
               callbacks=[lgb.record_evaluation(evals_result1)])
-    params2 = {
-        'data_sample_strategy': 'goss',
-        'metric': 'l2',
-        'verbose': -1,
-        'bagging_seed': 0,
-        'learning_rate': 0.05,
-        'num_threads': 1,
-        'force_row_wise': True,
-        'gpu_use_dp': True,
-    }
+    params2 = {**base_params, 'data_sample_strategy': 'goss'}
     evals_result2 = {}
     lgb.train(params2, lgb_train,
               num_boost_round=10,

From ea95e86482eacda91b6b739299407792ca7d5d93 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Wed, 7 Sep 2022 07:21:11 +0000
Subject: [PATCH 74/84] ResetGradientBuffers after ResetSampleConfig

---
 src/boosting/gbdt.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index bc86b5ae1465..fe2759681aba 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -126,7 +126,6 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
   #endif  // USE_CUDA_EXP
 
   num_data_ = train_data_->num_data();
-  ResetGradientBuffers();
 
   // get max feature index
   max_feature_idx_ = train_data_->num_total_features() - 1;
@@ -141,6 +140,7 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
 
   // if need bagging, create buffer
   data_sample_strategy_->ResetSampleConfig(config_.get(), true);
+  ResetGradientBuffers();
 
   class_need_train_ = std::vector<bool>(num_tree_per_iteration_, true);
   if (objective_function_ != nullptr && objective_function_->SkipEmptyClass()) {

From 18a54ef40bcd1ed096d8b2283c81b8e642d35e25 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Fri, 9 Sep 2022 05:06:12 +0000
Subject: [PATCH 75/84] ResetGradientBuffers after ResetSampleConfig

---
 src/boosting/gbdt.cpp | 3 +--
 src/boosting/gbdt.h   | 3 +++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index fe2759681aba..be4fabe9e078 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -733,8 +733,6 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
 
     num_data_ = train_data_->num_data();
 
-    ResetGradientBuffers();
-
     max_feature_idx_ = train_data_->num_total_features() - 1;
     label_idx_ = train_data_->label_idx();
     feature_names_ = train_data_->feature_names();
@@ -743,6 +741,7 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
 
     tree_learner_->ResetTrainingData(train_data, is_constant_hessian_);
     data_sample_strategy_->ResetSampleConfig(config_.get(), true);
+    ResetGradientBuffers();
   } else {
     tree_learner_->ResetIsConstantHessian(is_constant_hessian_);
   }
diff --git a/src/boosting/gbdt.h b/src/boosting/gbdt.h
index 2f18a575a1d6..1f8a778d619b 100644
--- a/src/boosting/gbdt.h
+++ b/src/boosting/gbdt.h
@@ -443,6 +443,9 @@ class GBDT : public GBDTBase {
 
   double BoostFromAverage(int class_id, bool update_scorer);
 
+  /*!
+  * \brief Reset gradient buffers, must be called after sample strategy is reset
+  */
   void ResetGradientBuffers();
 
   /*! \brief current iteration */

From beb12b99c76b124de74213efe5c271f599d71784 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Fri, 9 Sep 2022 06:52:30 +0000
Subject: [PATCH 76/84] ResetGradientBuffers after bagging

---
 include/LightGBM/cuda/cuda_utils.h |  1 +
 src/boosting/gbdt.cpp              | 27 +++++++++++++++++----------
 src/boosting/gbdt.h                |  4 ++++
 3 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/include/LightGBM/cuda/cuda_utils.h b/include/LightGBM/cuda/cuda_utils.h
index 2d419c260207..da73e8bc7b73 100644
--- a/include/LightGBM/cuda/cuda_utils.h
+++ b/include/LightGBM/cuda/cuda_utils.h
@@ -121,6 +121,7 @@ class CUDAVector {
   void Resize(size_t size) {
     if (size == 0) {
       Clear();
+      return;
     }
     T* new_data = nullptr;
     AllocateCUDAMemory<T>(&new_data, size, __FILE__, __LINE__);
diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index be4fabe9e078..32d680f39608 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -342,6 +342,10 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
   const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt();
   const std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>>& bag_data_indices = data_sample_strategy_->bag_data_indices();
 
+  if (gradients != nullptr && is_use_subset && bag_data_cnt < num_data_ && !boosting_on_gpu_ && !data_sample_strategy_->IsHessianChange()) {
+    ResetGradientBuffers();
+  }
+
   bool should_continue = false;
   for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
     const size_t offset = static_cast<size_t>(cur_tree_id) * num_data_;
@@ -800,17 +804,18 @@ void GBDT::ResetGradientBuffers() {
   if (objective_function_ != nullptr) {
     #ifdef USE_CUDA_EXP
     if (config_->device_type == std::string("cuda_exp") && boosting_on_gpu_) {
-      if (gradients_pointer_ != nullptr) {
-        CHECK_NOTNULL(hessians_pointer_);
-        DeallocateCUDAMemory<score_t>(&gradients_pointer_, __FILE__, __LINE__);
-        DeallocateCUDAMemory<score_t>(&hessians_pointer_, __FILE__, __LINE__);
+      if (cuda_gradients_.Size() < total_size) {
+        cuda_gradients_.Resize(total_size);
+        cuda_hessians_.Resize(total_size);
       }
-      AllocateCUDAMemory<score_t>(&gradients_pointer_, total_size, __FILE__, __LINE__);
-      AllocateCUDAMemory<score_t>(&hessians_pointer_, total_size, __FILE__, __LINE__);
+      gradients_pointer_ = cuda_gradients_.RawData();
+      hessians_pointer_ = cuda_hessians_.RawData();
     } else {
     #endif  // USE_CUDA_EXP
-      gradients_.resize(total_size);
-      hessians_.resize(total_size);
+      if (gradients_.size() < total_size) {
+        gradients_.resize(total_size);
+        hessians_.resize(total_size);
+      }
       gradients_pointer_ = gradients_.data();
       hessians_pointer_ = hessians_.data();
     #ifdef USE_CUDA_EXP
@@ -818,8 +823,10 @@ void GBDT::ResetGradientBuffers() {
     #endif  // USE_CUDA_EXP
   } else if (data_sample_strategy_->IsHessianChange() || (is_use_subset && bag_data_cnt < num_data_ && !boosting_on_gpu_)) {
     const size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
-    gradients_.resize(total_size);
-    hessians_.resize(total_size);
+    if (gradients_.size() < total_size) {
+      gradients_.resize(total_size);
+      hessians_.resize(total_size);
+    }
     gradients_pointer_ = gradients_.data();
     hessians_pointer_ = hessians_.data();
   }
diff --git a/src/boosting/gbdt.h b/src/boosting/gbdt.h
index 1f8a778d619b..c3934f5a0e73 100644
--- a/src/boosting/gbdt.h
+++ b/src/boosting/gbdt.h
@@ -501,6 +501,10 @@ class GBDT : public GBDTBase {
   /*! \brief Whether boosting is done on GPU, used for cuda_exp */
   bool boosting_on_gpu_;
   #ifdef USE_CUDA_EXP
+  /*! \brief Gradient vector on GPU */
+  CUDAVector<score_t> cuda_gradients_;
+  /*! \brief Hessian vector on GPU */
+  CUDAVector<score_t> cuda_hessians_;
   /*! \brief Buffer for scores when boosting is on GPU but evaluation is not, used only with cuda_exp */
   mutable std::vector<double> host_score_;
   /*! \brief Buffer for scores when boosting is not on GPU but evaluation is, used only with cuda_exp */

From c6175188d6e78d0de7317c2c3e7f19a15a7e1d38 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Fri, 9 Sep 2022 06:53:46 +0000
Subject: [PATCH 77/84] remove useless code

---
 src/boosting/gbdt.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index 32d680f39608..8681740bb298 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -822,7 +822,6 @@ void GBDT::ResetGradientBuffers() {
     }
     #endif  // USE_CUDA_EXP
   } else if (data_sample_strategy_->IsHessianChange() || (is_use_subset && bag_data_cnt < num_data_ && !boosting_on_gpu_)) {
-    const size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
     if (gradients_.size() < total_size) {
       gradients_.resize(total_size);
       hessians_.resize(total_size);

From 87b3e0e6e522997bd8fe008f0e146d245628ca2a Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Fri, 9 Sep 2022 09:24:18 +0000
Subject: [PATCH 78/84] check objective_function_ instead of gradients

---
 src/boosting/gbdt.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index 8681740bb298..65d730bc4740 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -342,7 +342,7 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
   const data_size_t bag_data_cnt = data_sample_strategy_->bag_data_cnt();
   const std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>>& bag_data_indices = data_sample_strategy_->bag_data_indices();
 
-  if (gradients != nullptr && is_use_subset && bag_data_cnt < num_data_ && !boosting_on_gpu_ && !data_sample_strategy_->IsHessianChange()) {
+  if (objective_function_ == nullptr && is_use_subset && bag_data_cnt < num_data_ && !boosting_on_gpu_ && !data_sample_strategy_->IsHessianChange()) {
     ResetGradientBuffers();
   }
 

From 58356e4a066e0b1f7195f9a7cf0bcaf2b1ea53c5 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Wed, 14 Sep 2022 03:46:32 +0000
Subject: [PATCH 79/84] enable rf with goss

simplify params in test cases
---
 src/boosting/rf.hpp                      |   8 +-
 tests/python_package_test/test_engine.py | 110 +++++++++++------------
 2 files changed, 58 insertions(+), 60 deletions(-)

diff --git a/src/boosting/rf.hpp b/src/boosting/rf.hpp
index 40a54ad8626d..30efb8fdd5c1 100644
--- a/src/boosting/rf.hpp
+++ b/src/boosting/rf.hpp
@@ -32,8 +32,12 @@ class RF : public GBDT {
 
   void Init(const Config* config, const Dataset* train_data, const ObjectiveFunction* objective_function,
     const std::vector<const Metric*>& training_metrics) override {
-    CHECK(config->bagging_freq > 0 && config->bagging_fraction < 1.0f && config->bagging_fraction > 0.0f);
-    CHECK(config->feature_fraction <= 1.0f && config->feature_fraction > 0.0f);
+    if (config->data_sample_strategy == std::string("bagging")) {
+      CHECK((config->bagging_freq > 0 && config->bagging_fraction < 1.0f && config->bagging_fraction > 0.0f) ||
+            (config->feature_fraction <= 1.0f && config->feature_fraction > 0.0f));
+    } else {
+      CHECK_EQ(config->data_sample_strategy, std::string("goss"));
+    }
     GBDT::Init(config, train_data, objective_function, training_metrics);
 
     if (num_init_iteration_ > 0) {
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 0b51a108dbb1..f9e37634ccde 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -1198,7 +1198,7 @@ def test_feature_name_with_non_ascii():
     X_train = np.random.normal(size=(100, 4))
     y_train = np.random.random(100)
     # This has non-ascii strings.
-    feature_names = [u'F_零', u'F_一', u'F_二', u'F_三']
+    feature_names = [u'F1', u'F2', u'F3', u'F4']
     params = {'verbose': -1}
     lgb_train = lgb.Dataset(X_train, y_train)
 
@@ -3607,17 +3607,17 @@ def test_sample_strategy_with_boosting():
     lgb_train = lgb.Dataset(X_train, y_train)
     lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
 
-    params = {
-        'boosting': 'dart',
-        'data_sample_strategy': 'goss',
+    base_params = {
         'metric': 'l2',
         'verbose': -1,
         'num_threads': 1,
         'force_row_wise': True,
         'gpu_use_dp': True,
     }
+
+    params1 = {**base_params, 'boosting': 'dart', 'data_sample_strategy': 'goss'}
     evals_result = {}
-    gbm = lgb.train(params, lgb_train,
+    gbm = lgb.train(params1, lgb_train,
                     num_boost_round=10,
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
@@ -3626,17 +3626,9 @@ def test_sample_strategy_with_boosting():
     assert test_res1 == pytest.approx(3149.393862, abs=1.0)
     assert eval_res1 == pytest.approx(test_res1)
 
-    params = {
-        'boosting': 'gbdt',
-        'data_sample_strategy': 'goss',
-        'metric': 'l2',
-        'verbose': -1,
-        'num_threads': 1,
-        'force_row_wise': True,
-        'gpu_use_dp': True,
-    }
+    params2 = {**base_params, 'boosting': 'gbdt', 'data_sample_strategy': 'goss'}
     evals_result = {}
-    gbm = lgb.train(params, lgb_train,
+    gbm = lgb.train(params2, lgb_train,
                     num_boost_round=10,
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
@@ -3645,17 +3637,9 @@ def test_sample_strategy_with_boosting():
     assert test_res2 == pytest.approx(2547.715968, abs=1.0)
     assert eval_res2 == pytest.approx(test_res2)
 
-    params = {
-        'boosting': 'goss',
-        'data_sample_strategy': 'goss',
-        'metric': 'l2',
-        'verbose': -1,
-        'num_threads': 1,
-        'force_row_wise': True,
-        'gpu_use_dp': True,
-    }
+    params3 = {**base_params, 'boosting': 'goss', 'data_sample_strategy': 'goss'}
     evals_result = {}
-    gbm = lgb.train(params, lgb_train,
+    gbm = lgb.train(params3, lgb_train,
                     num_boost_round=10,
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
@@ -3664,54 +3648,64 @@ def test_sample_strategy_with_boosting():
     assert test_res3 == pytest.approx(2547.715968, abs=1.0)
     assert eval_res3 == pytest.approx(test_res3)
 
-    assert test_res1 != test_res2
-    assert eval_res1 != eval_res2
-    assert test_res2 == test_res3
-    assert eval_res2 == eval_res3
-
-    params = {
-        'boosting': 'dart',
-        'data_sample_strategy': 'bagging',
-        'bagging_freq': 1,
-        'bagging_fraction': 0.5,
-        'metric': 'l2',
-        'verbose': -1,
-        'num_threads': 1,
-        'force_row_wise': True,
-        'gpu_use_dp': True,
-    }
+    params4 = {**base_params, 'boosting': 'rf', 'data_sample_strategy': 'goss'}
     evals_result = {}
-    gbm = lgb.train(params, lgb_train,
+    gbm = lgb.train(params4, lgb_train,
                     num_boost_round=10,
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     eval_res4 = evals_result['valid_0']['l2'][-1]
     test_res4 = mean_squared_error(y_test, gbm.predict(X_test))
-    assert test_res4 == pytest.approx(3134.866931, abs=1.0)
+    assert test_res4 == pytest.approx(2095.538735, abs=1.0)
     assert eval_res4 == pytest.approx(test_res4)
 
-    params = {
-        'boosting': 'gbdt',
-        'data_sample_strategy': 'bagging',
-        'bagging_freq': 1,
-        'bagging_fraction': 0.5,
-        'metric': 'l2',
-        'verbose': -1,
-        'num_threads': 1,
-        'force_row_wise': True,
-        'gpu_use_dp': True,
-    }
+    assert test_res1 != test_res2
+    assert eval_res1 != eval_res2
+    assert test_res2 == test_res3
+    assert eval_res2 == eval_res3
+    assert eval_res1 != eval_res4
+    assert test_res1 != test_res4
+    assert eval_res2 != eval_res4
+    assert test_res2 != test_res4
+
+    params5 = {**base_params, 'boosting': 'dart', 'data_sample_strategy': 'bagging', 'bagging_freq': 1, 'bagging_fraction': 0.5}
     evals_result = {}
-    gbm = lgb.train(params, lgb_train,
+    gbm = lgb.train(params5, lgb_train,
                     num_boost_round=10,
                     valid_sets=lgb_eval,
                     callbacks=[lgb.record_evaluation(evals_result)])
     eval_res5 = evals_result['valid_0']['l2'][-1]
     test_res5 = mean_squared_error(y_test, gbm.predict(X_test))
-    assert test_res5 == pytest.approx(2539.792378, abs=1.0)
+    assert test_res5 == pytest.approx(3134.866931, abs=1.0)
     assert eval_res5 == pytest.approx(test_res5)
-    assert test_res4 != test_res5
-    assert eval_res4 != eval_res5
+
+    params6 = {**base_params, 'boosting': 'gbdt', 'data_sample_strategy': 'bagging', 'bagging_freq': 1, 'bagging_fraction': 0.5}
+    evals_result = {}
+    gbm = lgb.train(params6, lgb_train,
+                    num_boost_round=10,
+                    valid_sets=lgb_eval,
+                    callbacks=[lgb.record_evaluation(evals_result)])
+    eval_res6 = evals_result['valid_0']['l2'][-1]
+    test_res6 = mean_squared_error(y_test, gbm.predict(X_test))
+    assert test_res6 == pytest.approx(2539.792378, abs=1.0)
+    assert eval_res6 == pytest.approx(test_res6)
+    assert test_res5 != test_res6
+    assert eval_res5 != eval_res6
+
+    params7 = {**base_params, 'boosting': 'rf', 'data_sample_strategy': 'bagging', 'bagging_freq': 1, 'bagging_fraction': 0.5}
+    evals_result = {}
+    gbm = lgb.train(params7, lgb_train,
+                    num_boost_round=10,
+                    valid_sets=lgb_eval,
+                    callbacks=[lgb.record_evaluation(evals_result)])
+    eval_res7 = evals_result['valid_0']['l2'][-1]
+    test_res7 = mean_squared_error(y_test, gbm.predict(X_test))
+    assert test_res7 == pytest.approx(1518.704481, abs=1.0)
+    assert eval_res7 == pytest.approx(test_res7)
+    assert test_res5 != test_res7
+    assert eval_res5 != eval_res7
+    assert test_res6 != test_res7
+    assert eval_res6 != eval_res7
 
 
 def test_record_evaluation_with_train():

From 47957b1bc2f0507e8e6862d1220c0bb5993fd8ca Mon Sep 17 00:00:00 2001
From: shiyu1994 <shiyu_k1994@qq.com>
Date: Wed, 14 Sep 2022 20:16:56 +0800
Subject: [PATCH 80/84] remove useless changes

---
 tests/python_package_test/test_engine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index f9e37634ccde..e6e1faf78d97 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -1198,7 +1198,7 @@ def test_feature_name_with_non_ascii():
     X_train = np.random.normal(size=(100, 4))
     y_train = np.random.random(100)
     # This has non-ascii strings.
-    feature_names = [u'F1', u'F2', u'F3', u'F4']
+    feature_names = [u'F_零', u'F_一', u'F_二', u'F_三']
     params = {'verbose': -1}
     lgb_train = lgb.Dataset(X_train, y_train)
 

From 1eb96d63cd10583cc322a2f11d49f3e998117650 Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Wed, 14 Sep 2022 14:21:32 +0000
Subject: [PATCH 81/84] allow rf with feature subsampling alone

---
 src/boosting/bagging.hpp | 11 ++++++-----
 src/boosting/rf.hpp      | 10 +++++++---
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/src/boosting/bagging.hpp b/src/boosting/bagging.hpp
index 98570252af18..65a937435105 100644
--- a/src/boosting/bagging.hpp
+++ b/src/boosting/bagging.hpp
@@ -91,19 +91,20 @@ class BaggingSampleStrategy : public SampleStrategy {
       if (!is_change_dataset &&
         config_ != nullptr && config_->bagging_fraction == config->bagging_fraction && config_->bagging_freq == config->bagging_freq
         && config_->pos_bagging_fraction == config->pos_bagging_fraction && config_->neg_bagging_fraction == config->neg_bagging_fraction) {
+        config_ = config;
         return;
       }
       config_ = config;
       if (balance_bagging_cond) {
         balanced_bagging_ = true;
-        bag_data_cnt_ = static_cast<data_size_t>(num_pos_data * config->pos_bagging_fraction)
-                        + static_cast<data_size_t>((num_data_ - num_pos_data) * config->neg_bagging_fraction);
+        bag_data_cnt_ = static_cast<data_size_t>(num_pos_data * config_->pos_bagging_fraction)
+                        + static_cast<data_size_t>((num_data_ - num_pos_data) * config_->neg_bagging_fraction);
       } else {
-        bag_data_cnt_ = static_cast<data_size_t>(config->bagging_fraction * num_data_);
+        bag_data_cnt_ = static_cast<data_size_t>(config_->bagging_fraction * num_data_);
       }
       bag_data_indices_.resize(num_data_);
       #ifdef USE_CUDA_EXP
-      if (config->device_type == std::string("cuda_exp")) {
+      if (config_->device_type == std::string("cuda_exp")) {
         cuda_bag_data_indices_.Resize(num_data_);
       }
       #endif  // USE_CUDA_EXP
@@ -115,7 +116,7 @@ class BaggingSampleStrategy : public SampleStrategy {
       }
 
       double average_bag_rate =
-          (static_cast<double>(bag_data_cnt_) / num_data_) / config->bagging_freq;
+          (static_cast<double>(bag_data_cnt_) / num_data_) / config_->bagging_freq;
       is_use_subset_ = false;
       if (config_->device_type != std::string("cuda_exp")) {
         const int group_threshold_usesubset = 100;
diff --git a/src/boosting/rf.hpp b/src/boosting/rf.hpp
index 30efb8fdd5c1..9a87e982483e 100644
--- a/src/boosting/rf.hpp
+++ b/src/boosting/rf.hpp
@@ -34,7 +34,7 @@ class RF : public GBDT {
     const std::vector<const Metric*>& training_metrics) override {
     if (config->data_sample_strategy == std::string("bagging")) {
       CHECK((config->bagging_freq > 0 && config->bagging_fraction < 1.0f && config->bagging_fraction > 0.0f) ||
-            (config->feature_fraction <= 1.0f && config->feature_fraction > 0.0f));
+            (config->feature_fraction < 1.0f && config->feature_fraction > 0.0f));
     } else {
       CHECK_EQ(config->data_sample_strategy, std::string("goss"));
     }
@@ -59,8 +59,12 @@ class RF : public GBDT {
   }
 
   void ResetConfig(const Config* config) override {
-    CHECK(config->bagging_freq > 0 && config->bagging_fraction < 1.0f && config->bagging_fraction > 0.0f);
-    CHECK(config->feature_fraction <= 1.0f && config->feature_fraction > 0.0f);
+    if (config->data_sample_strategy == std::string("bagging")) {
+      CHECK((config->bagging_freq > 0 && config->bagging_fraction < 1.0f && config->bagging_fraction > 0.0f) ||
+            (config->feature_fraction < 1.0f && config->feature_fraction > 0.0f));
+    } else {
+      CHECK_EQ(config->data_sample_strategy, std::string("goss"));
+    }
     GBDT::ResetConfig(config);
     // not shrinkage rate for the RF
     shrinkage_rate_ = 1.0f;

From 90a2b8fabba289cc8472bbf2b568cd3ff1b1b2ed Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Thu, 15 Sep 2022 10:53:57 +0000
Subject: [PATCH 82/84] change position of ResetGradientBuffers

---
 src/boosting/gbdt.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index 65d730bc4740..f7ac445131dd 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -737,6 +737,8 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
 
     num_data_ = train_data_->num_data();
 
+    ResetGradientBuffers();
+
     max_feature_idx_ = train_data_->num_total_features() - 1;
     label_idx_ = train_data_->label_idx();
     feature_names_ = train_data_->feature_names();
@@ -745,7 +747,6 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
 
     tree_learner_->ResetTrainingData(train_data, is_constant_hessian_);
     data_sample_strategy_->ResetSampleConfig(config_.get(), true);
-    ResetGradientBuffers();
   } else {
     tree_learner_->ResetIsConstantHessian(is_constant_hessian_);
   }

From c3d49338fcec764d2b2c35f70be578ea3576c85a Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Thu, 1 Dec 2022 02:54:04 +0000
Subject: [PATCH 83/84] check for dask

---
 python-package/lightgbm/dask.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python-package/lightgbm/dask.py b/python-package/lightgbm/dask.py
index 2152ac8e35cb..85021e65d7c0 100644
--- a/python-package/lightgbm/dask.py
+++ b/python-package/lightgbm/dask.py
@@ -1041,6 +1041,8 @@ def _lgb_dask_fit(
         eval_at: Optional[Iterable[int]] = None,
         **kwargs: Any
     ) -> "_DaskLGBMModel":
+        if not DASK_INSTALLED:
+            raise LightGBMError('dask is required for lightgbm.dask')
         if not all((DASK_INSTALLED, PANDAS_INSTALLED, SKLEARN_INSTALLED)):
             raise LightGBMError('dask, pandas and scikit-learn are required for lightgbm.dask')
 

From ced7b06cb87c9e940e31d1baff62c9604a23dc7f Mon Sep 17 00:00:00 2001
From: Yu Shi <shiyu_k1994@qq.com>
Date: Thu, 22 Dec 2022 02:10:05 +0000
Subject: [PATCH 84/84] add parameter types for data_sample_strategy

---
 src/io/config_auto.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp
index 5220b394edfe..b1dbcc378a27 100644
--- a/src/io/config_auto.cpp
+++ b/src/io/config_auto.cpp
@@ -901,6 +901,7 @@ const std::unordered_map<std::string, std::string>& Config::ParameterTypes() {
     {"config", "string"},
     {"objective", "string"},
     {"boosting", "string"},
+    {"data_sample_strategy", "string"},
     {"data", "string"},
     {"valid", "vector<string>"},
     {"num_iterations", "int"},