Skip to content

Commit

Permalink
Add community K-hop implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
AidenPearce-ZYQ authored Nov 27, 2024
1 parent 055d41c commit c48c410
Show file tree
Hide file tree
Showing 4 changed files with 178 additions and 1 deletion.
10 changes: 10 additions & 0 deletions procedures/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,15 @@ function(add_v2procedure APP)
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/algo")
endfunction()

function(add_standalone_from_community APP)
add_executable(${APP}_standalone community/${APP}_core.cpp community/${APP}_standalone.cpp ${LGRAPH_API_SRC})
target_link_libraries(${APP}_standalone ${Boost_LIBRARIES} libstdc++fs.a libgomp.a dl)
set_target_properties( ${APP}_standalone PROPERTIES
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/algo")
target_include_directories(${APP}_standalone PUBLIC ${CMAKE_SOURCE_DIR}/include
${CMAKE_SOURCE_DIR}/src)
endfunction()

add_standalone(apsp)
add_standalone(bfs)
add_standalone(pagerank)
Expand Down Expand Up @@ -81,6 +90,7 @@ add_standalone(wlpa)
add_standalone(subgraph_isomorphism)
add_standalone(sybilrank)
add_standalone(leiden)
add_standalone_from_community(khop)

add_embed(apsp)
add_embed(bfs)
Expand Down
11 changes: 10 additions & 1 deletion procedures/community/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
# community procedures

In this directory, The procedures are contribution from the commmunity.
In this directory, The procedures are contribution from the commmunity.

## K-hop Algorithm
The K-hop algorithm is used to find all nodes within K hops from a given starting node in a graph. It is particularly useful in social network analysis, recommendation systems, and network topology analysis.

### Contributor
This algorithm was contributed by [Yingqi Zhao](https://github.com/AidenPearce-ZYQ), [Junjie Wang](https://github.com/iwanttoknowwhy) and [Haibo Zheng](https://github.com/ZhengHeber).

### Usage
For detailed usage, please refer to the TuGraph-DB OLAP C++ API documentation. After installation, you can run `khop_standalone --help` for additional information.
58 changes: 58 additions & 0 deletions procedures/community/khop_core.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright 2024 Yingqi Zhao
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "lgraph/olap_base.h"
#include "./algo.h"

using namespace lgraph_api;
using namespace lgraph_api::olap;

size_t k_hop(OlapBase<Empty>& graph, size_t root_vid, ParallelVector<size_t>& result, size_t k) {
size_t root = root_vid;
auto active_in = graph.AllocVertexSubset();
active_in.Add(root);
auto active_out = graph.AllocVertexSubset();
auto parent = graph.AllocVertexArray<size_t>();
parent.Fill(0);
parent[root] = root;
size_t num_activations = 1;
size_t discovered_vertices, j = 0;
for (size_t ii = 0; ii < k; ii++) {
active_out.Clear();
num_activations = graph.ProcessVertexActive<size_t>(
[&](size_t vi) {
size_t num_activations = 0;
for (auto& edge : graph.OutEdges(vi)) {
size_t dst = edge.neighbour;
if (parent[dst] == 0) {
auto lock = graph.GuardVertexLock(dst);
if (parent[dst] == 0) {
parent[dst] = vi;
num_activations += 1;
active_out.Add(dst);
result[j++] = dst;
}
}
}
return num_activations;
},
active_in);
printf("activates(%lu) <= %lu \n", ii+1, num_activations);
discovered_vertices += num_activations;
active_in.Swap(active_out);
}
return discovered_vertices;
}
100 changes: 100 additions & 0 deletions procedures/community/khop_standalone.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
* Copyright 2024 Yingqi Zhao
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @brief The k-hop algorithm.
*
* @param root Identifier of root node.
* @param value_k Number of search layers (value of k in K-hop algorithm).
* @return Number of nodes in K-hop.
*/
#include "olap/olap_on_disk.h"
#include "tools/json.hpp"
#include "./algo.h"
using namespace lgraph_api;
using namespace lgraph_api::olap;
using json = nlohmann::json;

class MyConfig : public ConfigBase<Empty> {
public:
std::string root = "0";
std::string name = std::string("khop");
size_t value_k = 3;
void AddParameter(fma_common::Configuration& config) {
ConfigBase<Empty>::AddParameter(config);
config.Add(root, "root", true).Comment("Identifier of the root node.");
config.Add(value_k, "value_k", true).Comment(
"Number of search layers(value of k in K-hop algorithm).");
}
void Print() {
ConfigBase<Empty>::Print();
std::cout << " name: " << name << std::endl;
std::cout << " root: " << name << std::endl;
std::cout << " value_k: " << value_k << std::endl;
}
MyConfig(int& argc, char**& argv) : ConfigBase<Empty>(argc, argv) {
fma_common::Configuration config;
AddParameter(config);
config.ExitAfterHelp(true);
config.ParseAndFinalize(argc, argv);
Print();
}
};

extern size_t k_hop(OlapBase<Empty>& graph, size_t root_vid,
ParallelVector<size_t>& result, size_t k);

int main(int argc, char** argv) {
double start_time;
MemUsage memUsage;
memUsage.startMemRecord();
start_time = get_time();
MyConfig config(argc, argv);
OlapOnDisk<Empty> graph;
graph.Load(config, INPUT_SYMMETRIC);
size_t root_vid;
auto result = graph.AllocVertexArray<size_t>();
result.Fill(0);
if (config.id_mapping)
root_vid = graph.hash_list_.find(config.root);
else
root_vid = std::stoi(config.root);
size_t value_k = config.value_k;
memUsage.print();
memUsage.reset();
auto prepare_cost = get_time()- start_time;
printf("prepare_cost = %.2lf(s)\n", prepare_cost);

start_time = get_time();
size_t count_result = k_hop(graph, root_vid, result, value_k);
memUsage.print();
memUsage.reset();
auto core_cost = get_time()- start_time;

start_time = get_time();
if (config.output_dir != "") {
graph.Write<size_t>(config, result, graph.NumVertices(), config.name);
}
printf("\n================\n");
printf("Find %lu vertexes in %lu-hop from node NO.%lu", count_result, value_k, root_vid);
printf("\n================\n");
auto output_cost = get_time()- start_time;

printf("core_cost = %.2lf(s)\n", core_cost);
printf("output_cost = %.2lf(s)\n", output_cost);
printf("total_cost = %.2lf(s)\n", prepare_cost + core_cost + output_cost);
printf("DONE.\n");
return 0;
}

0 comments on commit c48c410

Please sign in to comment.