From c48c41066fd4739930cc102587f978012ab5af6f Mon Sep 17 00:00:00 2001 From: Yingqi Zhao <60782732+AidenPearce-ZYQ@users.noreply.github.com> Date: Wed, 27 Nov 2024 11:50:42 +0800 Subject: [PATCH] Add community K-hop implementation --- procedures/CMakeLists.txt | 10 +++ procedures/community/README.md | 11 ++- procedures/community/khop_core.cpp | 58 +++++++++++++ procedures/community/khop_standalone.cpp | 100 +++++++++++++++++++++++ 4 files changed, 178 insertions(+), 1 deletion(-) create mode 100644 procedures/community/khop_core.cpp create mode 100644 procedures/community/khop_standalone.cpp diff --git a/procedures/CMakeLists.txt b/procedures/CMakeLists.txt index 0a2852a309..b90c0c3e1e 100644 --- a/procedures/CMakeLists.txt +++ b/procedures/CMakeLists.txt @@ -46,6 +46,15 @@ function(add_v2procedure APP) LIBRARY_OUTPUT_DIRECTORY "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/algo") endfunction() +function(add_standalone_from_community APP) + add_executable(${APP}_standalone community/${APP}_core.cpp community/${APP}_standalone.cpp ${LGRAPH_API_SRC}) + target_link_libraries(${APP}_standalone ${Boost_LIBRARIES} libstdc++fs.a libgomp.a dl) + set_target_properties( ${APP}_standalone PROPERTIES + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/algo") + target_include_directories(${APP}_standalone PUBLIC ${CMAKE_SOURCE_DIR}/include + ${CMAKE_SOURCE_DIR}/src) +endfunction() + add_standalone(apsp) add_standalone(bfs) add_standalone(pagerank) @@ -81,6 +90,7 @@ add_standalone(wlpa) add_standalone(subgraph_isomorphism) add_standalone(sybilrank) add_standalone(leiden) +add_standalone_from_community(khop) add_embed(apsp) add_embed(bfs) diff --git a/procedures/community/README.md b/procedures/community/README.md index 464b31b919..33adb89493 100644 --- a/procedures/community/README.md +++ b/procedures/community/README.md @@ -1,3 +1,12 @@ # community procedures - In this directory, The procedures are contribution from the commmunity. \ No newline at end of file + In this directory, The procedures are contribution from the commmunity. + +## K-hop Algorithm + The K-hop algorithm is used to find all nodes within K hops from a given starting node in a graph. It is particularly useful in social network analysis, recommendation systems, and network topology analysis. + +### Contributor + This algorithm was contributed by [Yingqi Zhao](https://github.com/AidenPearce-ZYQ), [Junjie Wang](https://github.com/iwanttoknowwhy) and [Haibo Zheng](https://github.com/ZhengHeber). + +### Usage + For detailed usage, please refer to the TuGraph-DB OLAP C++ API documentation. After installation, you can run `khop_standalone --help` for additional information. \ No newline at end of file diff --git a/procedures/community/khop_core.cpp b/procedures/community/khop_core.cpp new file mode 100644 index 0000000000..510373bd51 --- /dev/null +++ b/procedures/community/khop_core.cpp @@ -0,0 +1,58 @@ +/* + * Copyright 2024 Yingqi Zhao + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "lgraph/olap_base.h" +#include "./algo.h" + +using namespace lgraph_api; +using namespace lgraph_api::olap; + +size_t k_hop(OlapBase& graph, size_t root_vid, ParallelVector& result, size_t k) { + size_t root = root_vid; + auto active_in = graph.AllocVertexSubset(); + active_in.Add(root); + auto active_out = graph.AllocVertexSubset(); + auto parent = graph.AllocVertexArray(); + parent.Fill(0); + parent[root] = root; + size_t num_activations = 1; + size_t discovered_vertices, j = 0; + for (size_t ii = 0; ii < k; ii++) { + active_out.Clear(); + num_activations = graph.ProcessVertexActive( + [&](size_t vi) { + size_t num_activations = 0; + for (auto& edge : graph.OutEdges(vi)) { + size_t dst = edge.neighbour; + if (parent[dst] == 0) { + auto lock = graph.GuardVertexLock(dst); + if (parent[dst] == 0) { + parent[dst] = vi; + num_activations += 1; + active_out.Add(dst); + result[j++] = dst; + } + } + } + return num_activations; + }, + active_in); + printf("activates(%lu) <= %lu \n", ii+1, num_activations); + discovered_vertices += num_activations; + active_in.Swap(active_out); + } + return discovered_vertices; +} diff --git a/procedures/community/khop_standalone.cpp b/procedures/community/khop_standalone.cpp new file mode 100644 index 0000000000..eeff9322a2 --- /dev/null +++ b/procedures/community/khop_standalone.cpp @@ -0,0 +1,100 @@ +/* + * Copyright 2024 Yingqi Zhao + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief The k-hop algorithm. + * + * @param root Identifier of root node. + * @param value_k Number of search layers (value of k in K-hop algorithm). + * @return Number of nodes in K-hop. + */ +#include "olap/olap_on_disk.h" +#include "tools/json.hpp" +#include "./algo.h" +using namespace lgraph_api; +using namespace lgraph_api::olap; +using json = nlohmann::json; + +class MyConfig : public ConfigBase { + public: + std::string root = "0"; + std::string name = std::string("khop"); + size_t value_k = 3; + void AddParameter(fma_common::Configuration& config) { + ConfigBase::AddParameter(config); + config.Add(root, "root", true).Comment("Identifier of the root node."); + config.Add(value_k, "value_k", true).Comment( + "Number of search layers(value of k in K-hop algorithm)."); + } + void Print() { + ConfigBase::Print(); + std::cout << " name: " << name << std::endl; + std::cout << " root: " << name << std::endl; + std::cout << " value_k: " << value_k << std::endl; + } + MyConfig(int& argc, char**& argv) : ConfigBase(argc, argv) { + fma_common::Configuration config; + AddParameter(config); + config.ExitAfterHelp(true); + config.ParseAndFinalize(argc, argv); + Print(); + } +}; + +extern size_t k_hop(OlapBase& graph, size_t root_vid, + ParallelVector& result, size_t k); + +int main(int argc, char** argv) { + double start_time; + MemUsage memUsage; + memUsage.startMemRecord(); + start_time = get_time(); + MyConfig config(argc, argv); + OlapOnDisk graph; + graph.Load(config, INPUT_SYMMETRIC); + size_t root_vid; + auto result = graph.AllocVertexArray(); + result.Fill(0); + if (config.id_mapping) + root_vid = graph.hash_list_.find(config.root); + else + root_vid = std::stoi(config.root); + size_t value_k = config.value_k; + memUsage.print(); + memUsage.reset(); + auto prepare_cost = get_time()- start_time; + printf("prepare_cost = %.2lf(s)\n", prepare_cost); + + start_time = get_time(); + size_t count_result = k_hop(graph, root_vid, result, value_k); + memUsage.print(); + memUsage.reset(); + auto core_cost = get_time()- start_time; + + start_time = get_time(); + if (config.output_dir != "") { + graph.Write(config, result, graph.NumVertices(), config.name); + } + printf("\n================\n"); + printf("Find %lu vertexes in %lu-hop from node NO.%lu", count_result, value_k, root_vid); + printf("\n================\n"); + auto output_cost = get_time()- start_time; + + printf("core_cost = %.2lf(s)\n", core_cost); + printf("output_cost = %.2lf(s)\n", output_cost); + printf("total_cost = %.2lf(s)\n", prepare_cost + core_cost + output_cost); + printf("DONE.\n"); + return 0; +}