From c3b4cc58320759e3de8058134e72928e4e4e1043 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Fri, 13 Dec 2024 16:39:51 +0800 Subject: [PATCH] Various small cleanups. - Add deduction guide for Span. - Merge data split mode setter into the `MetaInfo` column sync method. - Check CUDA warning for cross space function calls. --- cmake/Utils.cmake | 9 ++++++++- include/xgboost/c_api.h | 2 ++ include/xgboost/data.h | 25 ++++++++++--------------- include/xgboost/span.h | 7 +++++++ include/xgboost/tree_model.h | 3 ++- src/common/host_device_vector.cu | 8 ++++---- src/common/transform_iterator.h | 9 +++++++++ src/data/data.cc | 3 ++- src/data/iterative_dmatrix.cu | 1 - src/data/proxy_dmatrix.h | 2 +- src/data/simple_dmatrix.cc | 13 ++++++------- src/data/simple_dmatrix.cu | 13 ++++++------- src/data/simple_dmatrix.h | 6 +++--- tests/cpp/common/test_span.cc | 5 +++++ 14 files changed, 65 insertions(+), 41 deletions(-) diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake index ec47bf6eb62a..8bec8c5489e2 100644 --- a/cmake/Utils.cmake +++ b/cmake/Utils.cmake @@ -81,7 +81,8 @@ function(xgboost_set_cuda_flags target) $<$:--expt-relaxed-constexpr> $<$:-Xcompiler=${OpenMP_CXX_FLAGS}> $<$:-Xfatbin=-compress-all> - $<$:--default-stream per-thread>) + $<$:--default-stream per-thread> + ) if(FORCE_COLORED_OUTPUT) if(FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND @@ -200,6 +201,12 @@ macro(xgboost_target_properties target) if(WIN32 AND MINGW) target_compile_options(${target} PUBLIC -static-libstdc++) endif() + + if (NOT WIN32 AND ENABLE_ALL_WARNINGS) + target_compile_options(${target} PRIVATE + $<$:-Werror=cross-execution-space-call> + ) + endif() endmacro() # Custom definitions used in xgboost. diff --git a/include/xgboost/c_api.h b/include/xgboost/c_api.h index 5adc689554d2..111e9cacc95b 100644 --- a/include/xgboost/c_api.h +++ b/include/xgboost/c_api.h @@ -899,6 +899,8 @@ XGB_DLL int XGBoosterFree(BoosterHandle handle); * @brief Reset the booster object to release data caches used for training. * * @since 3.0.0 + * + * @return 0 when success, -1 when failure happens */ XGB_DLL int XGBoosterReset(BoosterHandle handle); diff --git a/include/xgboost/data.h b/include/xgboost/data.h index 6a24ce7b08f0..7d363893eb25 100644 --- a/include/xgboost/data.h +++ b/include/xgboost/data.h @@ -172,7 +172,6 @@ class MetaInfo { * columns. */ void Extend(MetaInfo const& that, bool accumulate_rows, bool check_column); - /** * @brief Synchronize the number of columns across all workers. * @@ -180,23 +179,20 @@ class MetaInfo { * in vertical federated learning, since each worker loads its own list of columns, * we need to sum them. */ - void SynchronizeNumberOfColumns(Context const* ctx); - - /*! \brief Whether the data is split row-wise. */ - bool IsRowSplit() const { - return data_split_mode == DataSplitMode::kRow; - } + void SynchronizeNumberOfColumns(Context const* ctx, DataSplitMode split_mode); + /** @brief Whether the data is split row-wise. */ + [[nodiscard]] bool IsRowSplit() const { return data_split_mode == DataSplitMode::kRow; } /** @brief Whether the data is split column-wise. */ - bool IsColumnSplit() const { return data_split_mode == DataSplitMode::kCol; } + [[nodiscard]] bool IsColumnSplit() const { return data_split_mode == DataSplitMode::kCol; } /** @brief Whether this is a learning to rank data. */ - bool IsRanking() const { return !group_ptr_.empty(); } + [[nodiscard]] bool IsRanking() const { return !group_ptr_.empty(); } - /*! - * \brief A convenient method to check if we are doing vertical federated learning, which requires + /** + * @brief A convenient method to check if we are doing vertical federated learning, which requires * some special processing. */ - bool IsVerticalFederated() const; + [[nodiscard]] bool IsVerticalFederated() const; /*! * \brief A convenient method to check if the MetaInfo should contain labels. @@ -330,10 +326,9 @@ struct HostSparsePageView { common::Span offset; common::Span data; - Inst operator[](size_t i) const { + [[nodiscard]] Inst operator[](std::size_t i) const { auto size = *(offset.data() + i + 1) - *(offset.data() + i); - return {data.data() + *(offset.data() + i), - static_cast(size)}; + return {data.data() + *(offset.data() + i), static_cast(size)}; } [[nodiscard]] size_t Size() const { return offset.size() == 0 ? 0 : offset.size() - 1; } diff --git a/include/xgboost/span.h b/include/xgboost/span.h index e0277022a4cb..6692d2f68b73 100644 --- a/include/xgboost/span.h +++ b/include/xgboost/span.h @@ -37,6 +37,7 @@ #include // numeric_limits #include #include // for move +#include // for vector #if defined(__CUDACC__) #include @@ -707,6 +708,12 @@ class IterSpan { return it_ + size(); } }; + +template +Span(std::vector const&) -> Span; + +template +Span(std::vector&) -> Span; } // namespace xgboost::common diff --git a/include/xgboost/tree_model.h b/include/xgboost/tree_model.h index 32b93c5cacaf..e9f661215653 100644 --- a/include/xgboost/tree_model.h +++ b/include/xgboost/tree_model.h @@ -186,7 +186,8 @@ class RegTree : public Model { return this->DefaultLeft() ? this->LeftChild() : this->RightChild(); } /*! \brief feature index of split condition */ - [[nodiscard]] XGBOOST_DEVICE unsigned SplitIndex() const { + [[nodiscard]] XGBOOST_DEVICE bst_feature_t SplitIndex() const { + static_assert(!std::is_signed_v); return sindex_ & ((1U << 31) - 1U); } /*! \brief when feature is unknown, whether goes to left child */ diff --git a/src/common/host_device_vector.cu b/src/common/host_device_vector.cu index 00055ec69a7e..f4ec79539678 100644 --- a/src/common/host_device_vector.cu +++ b/src/common/host_device_vector.cu @@ -413,13 +413,13 @@ template class HostDeviceVector; template class HostDeviceVector; template class HostDeviceVector; template class HostDeviceVector; -template class HostDeviceVector; // bst_node_t -template class HostDeviceVector; -template class HostDeviceVector; +template class HostDeviceVector; // bst_node_t +template class HostDeviceVector; +template class HostDeviceVector; template class HostDeviceVector; template class HostDeviceVector; template class HostDeviceVector; -template class HostDeviceVector; // bst_feature_t +template class HostDeviceVector; // bst_feature_t template class HostDeviceVector; template class HostDeviceVector; template class HostDeviceVector; diff --git a/src/common/transform_iterator.h b/src/common/transform_iterator.h index 8125bd852cc5..15e2279fa159 100644 --- a/src/common/transform_iterator.h +++ b/src/common/transform_iterator.h @@ -62,6 +62,15 @@ class IndexTransformIter { ++(*this); return ret; } + IndexTransformIter &operator--() { + iter_--; + return *this; + } + IndexTransformIter operator--(int) { + auto ret = *this; + --(*this); + return ret; + } IndexTransformIter &operator+=(difference_type n) { iter_ += n; return *this; diff --git a/src/data/data.cc b/src/data/data.cc index ed3a17eaf841..1cb4c2bc0385 100644 --- a/src/data/data.cc +++ b/src/data/data.cc @@ -770,7 +770,8 @@ void MetaInfo::Extend(MetaInfo const& that, bool accumulate_rows, bool check_col } } -void MetaInfo::SynchronizeNumberOfColumns(Context const* ctx) { +void MetaInfo::SynchronizeNumberOfColumns(Context const* ctx, DataSplitMode split_mode) { + this->data_split_mode = split_mode; auto op = IsColumnSplit() ? collective::Op::kSum : collective::Op::kMax; auto rc = collective::Allreduce(ctx, linalg::MakeVec(&num_col_, 1), op); collective::SafeColl(rc); diff --git a/src/data/iterative_dmatrix.cu b/src/data/iterative_dmatrix.cu index 98b0b75ee249..79f62c1c9805 100644 --- a/src/data/iterative_dmatrix.cu +++ b/src/data/iterative_dmatrix.cu @@ -100,7 +100,6 @@ void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p, } iter.Reset(); - // Synchronise worker columns } IterativeDMatrix::IterativeDMatrix(std::shared_ptr ellpack, MetaInfo const& info, diff --git a/src/data/proxy_dmatrix.h b/src/data/proxy_dmatrix.h index 098343a53414..06cfbd9946a1 100644 --- a/src/data/proxy_dmatrix.h +++ b/src/data/proxy_dmatrix.h @@ -173,7 +173,7 @@ struct ExternalDataInfo { info.num_row_ = this->accumulated_rows; info.num_col_ = this->n_features; info.num_nonzero_ = this->nnz; - info.SynchronizeNumberOfColumns(ctx); + info.SynchronizeNumberOfColumns(ctx, DataSplitMode::kRow); this->Validate(); } }; diff --git a/src/data/simple_dmatrix.cc b/src/data/simple_dmatrix.cc index 241dea89b13c..821139ddaa3c 100644 --- a/src/data/simple_dmatrix.cc +++ b/src/data/simple_dmatrix.cc @@ -78,10 +78,10 @@ DMatrix* SimpleDMatrix::SliceCol(int num_slices, int slice_id) { return out; } -void SimpleDMatrix::ReindexFeatures(Context const* ctx) { - if (info_.IsColumnSplit() && collective::GetWorldSize() > 1) { +void SimpleDMatrix::ReindexFeatures(Context const* ctx, DataSplitMode split_mode) { + if (split_mode == DataSplitMode::kCol && collective::GetWorldSize() > 1) { std::vector buffer(collective::GetWorldSize()); - buffer[collective::GetRank()] = info_.num_col_; + buffer[collective::GetRank()] = this->Info().num_col_; auto rc = collective::Allgather(ctx, linalg::MakeVec(buffer.data(), buffer.size())); SafeColl(rc); auto offset = std::accumulate(buffer.cbegin(), buffer.cbegin() + collective::GetRank(), 0); @@ -287,10 +287,9 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread, info_.num_col_ = adapter->NumColumns(); } - // Synchronise worker columns - info_.data_split_mode = data_split_mode; - ReindexFeatures(&ctx); - info_.SynchronizeNumberOfColumns(&ctx); + // Must called before sync column + this->ReindexFeatures(&ctx, data_split_mode); + this->Info().SynchronizeNumberOfColumns(&ctx, data_split_mode); if (adapter->NumRows() == kAdapterUnknownSize) { using IteratorAdapterT = diff --git a/src/data/simple_dmatrix.cu b/src/data/simple_dmatrix.cu index 85988ffabb00..98c59b3bdc81 100644 --- a/src/data/simple_dmatrix.cu +++ b/src/data/simple_dmatrix.cu @@ -41,15 +41,14 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, std::int32_t nthr CopyToSparsePage(&ctx, adapter->Value(), device, missing, sparse_page_.get()); info_.num_col_ = adapter->NumColumns(); info_.num_row_ = adapter->NumRows(); - // Synchronise worker columns - info_.data_split_mode = data_split_mode; - info_.SynchronizeNumberOfColumns(&ctx); + + this->Info().SynchronizeNumberOfColumns(&ctx, data_split_mode); this->fmat_ctx_ = ctx; } -template SimpleDMatrix::SimpleDMatrix(CudfAdapter* adapter, float missing, - int nthread, DataSplitMode data_split_mode); -template SimpleDMatrix::SimpleDMatrix(CupyAdapter* adapter, float missing, - int nthread, DataSplitMode data_split_mode); +template SimpleDMatrix::SimpleDMatrix(CudfAdapter* adapter, float missing, std::int32_t nthread, + DataSplitMode data_split_mode); +template SimpleDMatrix::SimpleDMatrix(CupyAdapter* adapter, float missing, std::int32_t nthread, + DataSplitMode data_split_mode); } // namespace xgboost::data diff --git a/src/data/simple_dmatrix.h b/src/data/simple_dmatrix.h index ac757591cdb2..cef8e01ecdfe 100644 --- a/src/data/simple_dmatrix.h +++ b/src/data/simple_dmatrix.h @@ -21,7 +21,7 @@ class SimpleDMatrix : public DMatrix { public: SimpleDMatrix() = default; template - explicit SimpleDMatrix(AdapterT* adapter, float missing, int nthread, + explicit SimpleDMatrix(AdapterT* adapter, float missing, std::int32_t nthread, DataSplitMode data_split_mode = DataSplitMode::kRow); explicit SimpleDMatrix(dmlc::Stream* in_stream); @@ -61,14 +61,14 @@ class SimpleDMatrix : public DMatrix { bool SparsePageExists() const override { return true; } /** - * \brief Reindex the features based on a global view. + * @brief Reindex the features based on a global view. * * In some cases (e.g. column-wise data split and vertical federated learning), features are * loaded locally with indices starting from 0. However, all the algorithms assume the features * are globally indexed, so we reindex the features based on the offset needed to obtain the * global view. */ - void ReindexFeatures(Context const* ctx); + void ReindexFeatures(Context const* ctx, DataSplitMode split_mode); private: // Context used only for DMatrix initialization. diff --git a/tests/cpp/common/test_span.cc b/tests/cpp/common/test_span.cc index 53ce77d16c69..f7656b2a5fa2 100644 --- a/tests/cpp/common/test_span.cc +++ b/tests/cpp/common/test_span.cc @@ -24,6 +24,11 @@ TEST(Span, TestStatus) { int status = 1; TestTestStatus {&status}(); ASSERT_EQ(status, -1); + + std::vector foo; + auto bar = Span{foo}; + ASSERT_FALSE(bar.data()); + ASSERT_EQ(bar.size(), 0); } TEST(Span, DlfConstructors) {