From 1b107cf85cf2d0edb9db463cb264811e0ab949f1 Mon Sep 17 00:00:00 2001 From: rhdong Date: Thu, 27 Oct 2022 14:38:24 +0800 Subject: [PATCH] [Refactor] Change the project/repo name to HierarchicalKV --- CMakeLists.txt | 6 ++--- CONTRIBUTING.md | 14 +++++------ README.md | 40 +++++++++++++++--------------- STYLE_GUIDE.md | 2 +- docs/README.md | 12 ++++----- docs/source/conf.py | 8 +++--- docs/source/index.rst | 2 +- docs/source/toc.yaml | 2 +- include/merlin/flexible_buffer.cuh | 2 +- include/merlin/utils.cuh | 2 +- include/merlin_hashtable.cuh | 18 +++++++------- 11 files changed, 54 insertions(+), 54 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b0d393e88..8370a259b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,9 +57,9 @@ link_directories( file(GLOB_RECURSE merlin_hkvs_src RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp *.cu) # TODO: -# add_library(embed_kv STATIC ${embed_kv_src}) -# target_compile_features(embed_kv PUBLIC cxx_std_14) -# target_link_libraries(embed_kv PUBLIC ...) +# add_library(hierarchical_kv STATIC ${hierarchical_kv_src}) +# target_compile_features(hierarchical_kv PUBLIC cxx_std_14) +# target_link_libraries(hierarchical_kv PUBLIC ...) add_executable(merlin_hashtable_benchmark benchmark/merlin_hashtable_benchmark.cc.cu) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2a8337d82..4ea049cdc 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,21 +1,21 @@ # Contributing -## About EmbedKV +## About HierarchicalKV -EmbedKV is a part of NVIDIA Merlin and provides hierarchical key-value storage to meet RecSys requirements. +HierarchicalKV is a part of NVIDIA Merlin and provides hierarchical key-value storage to meet RecSys requirements. -The key capability of EmbedKV is to store key-value (feature-embedding) on high-bandwidth memory (HBM) of GPUs and in host memory. +The key capability of HierarchicalKV is to store key-value (feature-embedding) on high-bandwidth memory (HBM) of GPUs and in host memory. You can also use the library for generic key-value storage. ## Maintainership -EmbedKV is co-maintianed by [NVIDIA Merlin Team](https://github.com/NVIDIA-Merlin) and NVIDIA product end-users, +HierarchicalKV is co-maintianed by [NVIDIA Merlin Team](https://github.com/NVIDIA-Merlin) and NVIDIA product end-users, and also open for public contributions, bug fixes, and documentation. This project adheres to NVIDIA's Code of Conduct. ## Contributing -We’re grateful for your interest in EmbedKV and value your contributions. +We’re grateful for your interest in HierarchicalKV and value your contributions. We welcome contributions via pull requests(PR). Before sending out a pull request for significant change on the end-user API, we recommend you open an issue and @@ -24,7 +24,7 @@ All submissions require review by project reviewers. ### Coding Style -Refer to the [Style Guide](http://github.com/NVIDIA-Merlin/embed-kv/STYLE_GUIDE.md) +Refer to the [Style Guide](http://github.com/NVIDIA-Merlin/HierarchicalKV/STYLE_GUIDE.md) ### Additional Requirements @@ -34,7 +34,7 @@ In addition to the above requirements, contribution also needs to meet the follo ## Community -* EmbedKV code (https://github.com/NVIDIA-Merlin/embed-kv) +* HierarchicalKV code (https://github.com/NVIDIA-Merlin/HierarchicalKV) ## Licence Apache License 2.0 diff --git a/README.md b/README.md index 5afb37cec..4dd6b23ed 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,18 @@ -# [NVIDIA EmbedKV](https://github.com/NVIDIA-Merlin/embed-kv) +# [NVIDIA HierarchicalKV](https://github.com/NVIDIA-Merlin/HierarchicalKV) -[![Version](https://img.shields.io/github/v/release/NVIDIA-Merlin/embed-kv?color=orange)](https://github.com/NVIDIA-Merlin/embed-kv/releases) -[![GitHub License](https://img.shields.io/github/license/NVIDIA-Merlin/embed-kv)](https://github.com/NVIDIA-Merlin/embed-kv/blob/master/LICENSE) -[![Documentation](https://img.shields.io/badge/documentation-blue.svg)](https://nvidia-merlin.github.io/embed-kv/master/README.html) +[![Version](https://img.shields.io/github/v/release/NVIDIA-Merlin/HierarchicalKV?color=orange)](https://github.com/NVIDIA-Merlin/HierarchicalKV/releases) +[![GitHub License](https://img.shields.io/github/license/NVIDIA-Merlin/HierarchicalKV)](https://github.com/NVIDIA-Merlin/HierarchicalKV/blob/master/LICENSE) +[![Documentation](https://img.shields.io/badge/documentation-blue.svg)](https://nvidia-merlin.github.io/HierarchicalKV/master/README.html) -## About EmbedKV +## About HierarchicalKV -EmbedKV is a part of NVIDIA Merlin and provides hierarchical key-value storage to meet RecSys requirements. +HierarchicalKV is a part of NVIDIA Merlin and provides hierarchical key-value storage to meet RecSys requirements. -The key capability of EmbedKV is to store key-value (feature-embedding) on high-bandwidth memory (HBM) of GPUs and in host memory. +The key capability of HierarchicalKV is to store key-value (feature-embedding) on high-bandwidth memory (HBM) of GPUs and in host memory. You can also use the library for generic key-value storage. -## Benefits of EmbedKV +## Benefits of HierarchicalKV When building large recommender systems, machine learning (ML) engineers face the following challenges: @@ -21,7 +21,7 @@ When building large recommender systems, machine learning (ML) engineers face th - It is difficult to efficiently control consumption growth of limited HBM with customized strategies. - Most generic key-value libraries provide low HBM and host memory utilization. -EmbedKV alleviates these challenges and helps the machine learning engineers in RecSys with the following benefits: +HierarchicalKV alleviates these challenges and helps the machine learning engineers in RecSys with the following benefits: - Supports training large RecSys models on **HBM and host memory** at the same time. - Provides better performance by **full bypassing CPUs** and reducing the communication workload. @@ -29,19 +29,19 @@ EmbedKV alleviates these challenges and helps the machine learning engineers in The strategies are implemented by CUDA kernels. - Operates at a high working-status load factor that is close to 1.0. -EmbedKV makes NVIDIA GPUs more suitable for training large and super-large models of ***search, recommendations, and advertising***. +HierarchicalKV makes NVIDIA GPUs more suitable for training large and super-large models of ***search, recommendations, and advertising***. The library simplifies the common challenges to building, evaluating, and serving sophisticated recommenders models. ## API Documentation The main classes and structs are below, and it's recommended to read the comments in the source code directly: -- [`class HashTable`](https://github.com/NVIDIA-Merlin/embed-kv/blob/master/include/merlin_hashtable.cuh#L101) -- [`class EvictStrategy`](https://github.com/NVIDIA-Merlin/embed-kv/blob/master/include/merlin_hashtable.cuh#L106) -- [`struct HashTableOptions`](https://github.com/NVIDIA-Merlin/embed-kv/blob/master/include/merlin_hashtable.cuh#L34) -- [`Struct HashTable::Vector`](https://github.com/NVIDIA-Merlin/embed-kv/blob/master/include/merlin_hashtable.cuh#L106) +- [`class HashTable`](https://github.com/NVIDIA-Merlin/HierarchicalKV/blob/master/include/merlin_hashtable.cuh#L101) +- [`class EvictStrategy`](https://github.com/NVIDIA-Merlin/HierarchicalKV/blob/master/include/merlin_hashtable.cuh#L106) +- [`struct HashTableOptions`](https://github.com/NVIDIA-Merlin/HierarchicalKV/blob/master/include/merlin_hashtable.cuh#L34) +- [`Struct HashTable::Vector`](https://github.com/NVIDIA-Merlin/HierarchicalKV/blob/master/include/merlin_hashtable.cuh#L106) -For regular API doc, please refer to [API Docs](https://nvidia-merlin.github.io/embed-kv/master/api/index.html) +For regular API doc, please refer to [API Docs](https://nvidia-merlin.github.io/HierarchicalKV/master/api/index.html) ## Usage restrictions @@ -49,16 +49,16 @@ For regular API doc, please refer to [API Docs](https://nvidia-merlin.github.io/ ## Contributors -EmbedKV is co-maintianed by [NVIDIA Merlin Team](https://github.com/NVIDIA-Merlin) and NVIDIA product end-users, +HierarchicalKV is co-maintianed by [NVIDIA Merlin Team](https://github.com/NVIDIA-Merlin) and NVIDIA product end-users, and also open for public contributions, bug fixes, and documentation. [[Contribute](CONTRIBUTING.md)] ## How to build -Basically, EmbedKV is a headers only library, the commands below only create binaries for benchmark and unit testing. +Basically, HierarchicalKV is a headers only library, the commands below only create binaries for benchmark and unit testing. ```shell -git clone --recursive https://github.com/NVIDIA-Merlin/embed-kv.git -cd embed-kv && mkdir -p build && cd build +git clone --recursive https://github.com/NVIDIA-Merlin/HierarchicalKV.git +cd HierarchicalKV && mkdir -p build && cd build cmake -DCMAKE_BUILD_TYPE=Release -Dsm=80 .. && make -j // for Benchmark @@ -117,7 +117,7 @@ Your environment must meet the following requirements: ### Support and Feedback: -If you encounter any issues or have questions, go to [https://github.com/NVIDIA-Merlin/embed-kv/issues](https://github.com/NVIDIA-Merlin/embed-kv/issues) and submit an issue so that we can provide you with the necessary resolutions and answers. +If you encounter any issues or have questions, go to [https://github.com/NVIDIA-Merlin/HierarchicalKV/issues](https://github.com/NVIDIA-Merlin/HierarchicalKV/issues) and submit an issue so that we can provide you with the necessary resolutions and answers. ### Acknowledgment We are very grateful to external initial contributors [@Zhangyafei](https://github.com/zhangyafeikimi) and [@Lifan](https://github.com/Lifann) for their design, coding, and review work. diff --git a/STYLE_GUIDE.md b/STYLE_GUIDE.md index dd2e14327..b1687fb1f 100644 --- a/STYLE_GUIDE.md +++ b/STYLE_GUIDE.md @@ -1,7 +1,7 @@ #### C++ C++ code should conform to [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). -EmbedKV uses [clang-format](https://clang.llvm.org/docs/ClangFormat.html) +HierarchicalKV uses [clang-format](https://clang.llvm.org/docs/ClangFormat.html) to check your C/C++ changes. Sometimes you have some manually formatted code that you don’t want clang-format to touch. You can disable formatting like this: diff --git a/docs/README.md b/docs/README.md index e1d8f3538..2adcee53b 100755 --- a/docs/README.md +++ b/docs/README.md @@ -1,7 +1,7 @@ # Documentation -This folder contains the scripts necessary to build the documentation for EmbedKV. -You can view the generated [EmbedKV documentation](https://nvidia-merlin.github.io/embed-kv/master/README.html). +This folder contains the scripts necessary to build the documentation for HierarchicalKV. +You can view the generated [HierarchicalKV documentation](https://nvidia-merlin.github.io/HierarchicalKV/master/README.html). ## Contributing to Docs @@ -20,7 +20,7 @@ pip install -r docs/requirements-doc.txt The preceding command runs Sphinx in your shell and outputs to build/html/index.html. -The build process for EmbedKV is unique among the Merlin projects because it +The build process for HierarchicalKV is unique among the Merlin projects because it uses Doxygen, Breathe, and Exhale to create API documentation from the C++ source. ## Preview the changes @@ -45,7 +45,7 @@ for the `master` branch and the six most recent tags. The job runs daily, but you can trigger it manually by going to the following URL and clicking the *Run workflow* button. - + ### Source management: README and index files @@ -82,7 +82,7 @@ You can use a relative path for the link. For example--both the `README.md` fil directory, you could add a link to a heading in the `README.md` file like this: ```markdown -To build EmbedKV from scratch, refer to +To build HierarchicalKV from scratch, refer to [How to Build](./README.md#how-to-build) in the `README` file. ``` @@ -99,7 +99,7 @@ the repository: ```markdown ## Coding Style -Refer to the [Style Guide](http://github.com/NVIDIA-Merlin/embed-kv/STYLE_GUIDE.md) +Refer to the [Style Guide](http://github.com/NVIDIA-Merlin/HierarchicalKV/STYLE_GUIDE.md) in the GitHub repository for more details. ``` diff --git a/docs/source/conf.py b/docs/source/conf.py index 7c9a6bc34..a0928b72a 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -133,16 +133,16 @@ source_suffix = [".rst", ".md"] breathe_projects = { - "EmbedKV": "/tmp/doxygen/xml" + "HierarchicalKV": "/tmp/doxygen/xml" } -breathe_default_project = "EmbedKV" +breathe_default_project = "HierarchicalKV" exhale_args = { "containmentFolder": "./api", "rootFileName": "index.rst", "doxygenStripFromPath": "../../include", - "rootFileTitle": "EmbedKV C++ API Documentation", - "fullApiSubSectionTitle": "Complete EmbedKV API", + "rootFileTitle": "HierarchicalKV C++ API Documentation", + "fullApiSubSectionTitle": "Complete HierarchicalKV API", "createTreeView": False, "exhaleExecutesDoxygen": True, "exhaleDoxygenStdin": """ diff --git a/docs/source/index.rst b/docs/source/index.rst index 0972f48bf..63d6ef271 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -9,7 +9,7 @@ Related Resources ----------------- Merlin Key-Value Storage GitHub Repository - ``_ + ``_ About Merlin Merlin is the overarching project that brings together the Merlin projects. diff --git a/docs/source/toc.yaml b/docs/source/toc.yaml index 6cecc4f0b..29a9d4fe1 100755 --- a/docs/source/toc.yaml +++ b/docs/source/toc.yaml @@ -7,7 +7,7 @@ subtrees: - file: api/index.rst title: API Documentation - file: CONTRIBUTING.md - title: Contributing to EmbedKV + title: Contributing to HierarchicalKV # The multi-modal data example uses several notebooks to demonstrate how to use of multi-modal data (text and images) diff --git a/include/merlin/flexible_buffer.cuh b/include/merlin/flexible_buffer.cuh index c4cb97c5c..cfbc8b2d0 100644 --- a/include/merlin/flexible_buffer.cuh +++ b/include/merlin/flexible_buffer.cuh @@ -38,7 +38,7 @@ class FlexPinnedBuffer { try { if (!ptr_) CUDA_CHECK(cudaFreeHost(ptr_)); } catch (const nv::merlin::CudaException& e) { - cerr << "[embed-kv] Failed to free FlexPinnedBuffer!" << endl; + cerr << "[HierarchicalKV] Failed to free FlexPinnedBuffer!" << endl; } } diff --git a/include/merlin/utils.cuh b/include/merlin/utils.cuh index 79d834fcc..9407aea9b 100644 --- a/include/merlin/utils.cuh +++ b/include/merlin/utils.cuh @@ -123,7 +123,7 @@ inline void merlin_check_(bool cond, const std::string& msg, const char* file, int line) { if (!cond) { throw CudaException(std::string(file) + ":" + std::to_string(line) + - ": EmbedKV error " + msg); + ": HierarchicalKV error " + msg); } } diff --git a/include/merlin_hashtable.cuh b/include/merlin_hashtable.cuh index d6d9c9cdc..e7fd93d0e 100644 --- a/include/merlin_hashtable.cuh +++ b/include/merlin_hashtable.cuh @@ -49,7 +49,7 @@ enum class EvictStrategy { }; /** - * @brief The options struct of EmbedKV. + * @brief The options struct of HierarchicalKV. */ struct HashTableOptions { size_t init_capacity = 0; ///< The initial capacity of the hash table. @@ -103,7 +103,7 @@ using EraseIfPredict = bool (*)( ); /** - * A EmbedKV hash table is a concurrent and hierarchical hash table that is + * A HierarchicalKV hash table is a concurrent and hierarchical hash table that is * powered by GPUs and can use HBM and host memory as storage for key-value * pairs. Support for SSD storage is a future consideration. * @@ -129,7 +129,7 @@ template class HashTable { public: /** - * @brief The value type of a EmbedKV hash table. + * @brief The value type of a HierarchicalKV hash table. */ struct Vector { using value_type = V; @@ -194,7 +194,7 @@ class HashTable { options_.block_size = SAFE_GET_BLOCK_SIZE(options_.block_size); reach_max_capacity_ = (options_.init_capacity * 2 > options_.max_capacity); MERLIN_CHECK((!(options_.io_by_cpu && options_.max_hbm_for_vectors != 0)), - "[embed-kv] `io_by_cpu` should not be true when " + "[HierarchicalKV] `io_by_cpu` should not be true when " "`max_hbm_for_vectors` is not 0!"); initialized_ = true; CudaCheckError(); @@ -309,7 +309,7 @@ class HashTable { { static_assert(sizeof(value_type*) == sizeof(uint64_t), - "[embed-kv] illegal conversation. value_type pointer " + "[HierarchicalKV] illegal conversation. value_type pointer " "should be 64 bit!"); const size_t N = n; @@ -457,7 +457,7 @@ class HashTable { if (!is_fast_mode()) { static_assert(sizeof(value_type*) == sizeof(uint64_t), - "[embed-kv] illegal conversation. value_type pointer must " + "[HierarchicalKV] illegal conversation. value_type pointer must " "be 64 bit!"); const size_t N = n; @@ -554,7 +554,7 @@ class HashTable { { static_assert(sizeof(value_type*) == sizeof(uint64_t), - "[embed-kv] illegal conversation. value_type pointer " + "[HierarchicalKV] illegal conversation. value_type pointer " "must be 64 bit!"); const size_t N = n; @@ -752,7 +752,7 @@ class HashTable { MERLIN_CHECK( (block_size > 0), - "[embed-kv] block_size <= 0, the K-V-M size may be too large!"); + "[HierarchicalKV] block_size <= 0, the K-V-M size may be too large!"); const size_t shared_size = (sizeof(key_type) + sizeof(vector_type) + meta_size) * block_size; const int grid_size = (n - 1) / (block_size) + 1; @@ -851,7 +851,7 @@ class HashTable { MERLIN_CHECK( (block_size > 0), - "[embed-kv] block_size <= 0, the K-V-M size may be too large!"); + "[HierarchicalKV] block_size <= 0, the K-V-M size may be too large!"); const size_t shared_size = (sizeof(key_type) + sizeof(vector_type) + meta_size) * block_size; const int grid_size = (n - 1) / (block_size) + 1;