From 1423c0b0ae6db84b675c3649c12b08bc1781ccdb Mon Sep 17 00:00:00 2001 From: limingyao001 <185339530@qq.com> Date: Tue, 10 Oct 2023 17:50:47 +0800 Subject: [PATCH] plugin three --- procedures/CMakeLists.txt | 6 + procedures/algo_cpp/algo.h | 40 ++ procedures/algo_cpp/leiden_core.cpp | 558 ++++++++++++++++++ procedures/algo_cpp/leiden_procedure.cpp | 87 +++ procedures/algo_cpp/leiden_standalone.cpp | 99 ++++ .../algo_cpp/subgraph_isomorphism_core.cpp | 208 +++++++ .../subgraph_isomorphism_procedure.cpp | 64 ++ .../subgraph_isomorphism_standalone.cpp | 88 +++ procedures/algo_cpp/sybilrank_core.cpp | 67 +++ procedures/algo_cpp/sybilrank_procedure.cpp | 79 +++ procedures/algo_cpp/sybilrank_standalone.cpp | 95 +++ test/integration/test_algo.py | 55 ++ 12 files changed, 1446 insertions(+) create mode 100644 procedures/algo_cpp/leiden_core.cpp create mode 100644 procedures/algo_cpp/leiden_procedure.cpp create mode 100644 procedures/algo_cpp/leiden_standalone.cpp create mode 100644 procedures/algo_cpp/subgraph_isomorphism_core.cpp create mode 100644 procedures/algo_cpp/subgraph_isomorphism_procedure.cpp create mode 100644 procedures/algo_cpp/subgraph_isomorphism_standalone.cpp create mode 100644 procedures/algo_cpp/sybilrank_core.cpp create mode 100644 procedures/algo_cpp/sybilrank_procedure.cpp create mode 100644 procedures/algo_cpp/sybilrank_standalone.cpp diff --git a/procedures/CMakeLists.txt b/procedures/CMakeLists.txt index 364e7bb835..76d9a612c2 100644 --- a/procedures/CMakeLists.txt +++ b/procedures/CMakeLists.txt @@ -70,6 +70,9 @@ add_standalone(mis) add_standalone(trustrank) add_standalone(slpa) add_standalone(wlpa) +add_standalone(subgraph_isomorphism) +add_standalone(sybilrank) +add_standalone(leiden) add_embed(bfs) add_embed(pagerank) @@ -102,6 +105,9 @@ add_embed(mis) add_embed(trustrank) add_embed(slpa) add_embed(wlpa) +add_embed(subgraph_isomorphism) +add_embed(sybilrank) +add_embed(leiden) add_embed2(khop_kth) add_embed2(khop_within) diff --git a/procedures/algo_cpp/algo.h b/procedures/algo_cpp/algo.h index d9d0e84e92..a580ccb5cf 100644 --- a/procedures/algo_cpp/algo.h +++ b/procedures/algo_cpp/algo.h @@ -14,6 +14,7 @@ #pragma once +#include #include "lgraph/olap_base.h" using namespace lgraph_api; @@ -110,6 +111,21 @@ void WCCCore(OlapBase& graph, ParallelVector& label); */ double LCCCore(OlapBase& graph, ParallelVector& score); +/** + * @brief Compute the leiden algorithm. + * + * @param[in] graph The graph to compute on. + * @param[in,out] label The community of vertex. + * @param[in] random_seed The number of random seed for leiden algorithm. + * @param[in] theta Determine the degree of randomness in the selection + * of a community and within a range of roughly [0.0005, 0.1] + * @param[in] gamma The resolution parameter. + * Higher resolutions lead to more communities + * @param[in] threshold Terminate when active_vertices < num_vertices / threshold + */ +void LeidenCore(OlapBase& graph, ParallelVector& label, unsigned random_seed, + double theta, double gamma, size_t threshold); + /** * @brief Compute the number of rings of length k. * @@ -273,6 +289,30 @@ void MotifCore(OlapBase& graph, ParallelVector& motif_vertices, i */ void MSSPCore(OlapBase& graph, std::vector roots, ParallelVector& distance); +/** + * @brief Compute the Subgraph Isomorphism Algorithm. + * + * @param[in] graph The graph to compute on. + * @param[in] query The outgoing adjacency list of subgraph. + * @param[in, out] counts The ParallelVector to store number of subgraph. + * + * @return return total number of subgraph. + */ +size_t SubgraphIsomorphismCore(OlapBase& graph, + std::vector>& query, + ParallelVector& counts); + +/** + * @brief Compute the Sybil Rank Algorithm. + * + * @param[in] graph The graph to compute on. + * @param[in] trust_seeds The ParallelVector for trusted nodes. + * @param[in, out] curr The ParallelVector to store sybil rank value. + * + */ +void SybilRankCore(OlapBase& graph, ParallelVector& trust_seeds, + ParallelVector& curr); + /** * @brief Compute the Triangle Counting algorithm. * diff --git a/procedures/algo_cpp/leiden_core.cpp b/procedures/algo_cpp/leiden_core.cpp new file mode 100644 index 0000000000..0eed93e856 --- /dev/null +++ b/procedures/algo_cpp/leiden_core.cpp @@ -0,0 +1,558 @@ +/** + * Copyright 2022 AntGroup CO., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#include +#include +#include "lgraph/olap_base.h" +#include "./algo.h" + +using namespace lgraph_api; +using namespace lgraph_api::olap; + +size_t choice(unsigned seed, std::unordered_map *probability_distributions) { + std::vector probability(probability_distributions->size()); + std::vector community(probability_distributions->size()); + size_t idx = 0; + for (auto it : *probability_distributions) { + community[idx] = it.first; + probability[idx] = it.second; + idx++; + } + std::default_random_engine generator{seed}; + std::discrete_distribution distribution{probability.begin(), probability.end()}; + return community[distribution(generator)]; +} + +class LeidenGraph { + OlapBase *graph; + bool if_base = false; + ParallelBitset active; + ParallelBitset well_connected; + ParallelVector k; + ParallelVector e_tot; + ParallelVector label_size; + double m; + size_t real_nodes; + size_t num_community; + size_t num_vertices; + double Q; + unsigned random_seed; + double theta; + double gamma; + size_t threshold; + + ParallelVector sub_index; + ParallelVector label; + ParallelVector sub_label; + OlapBase *sub_graph = nullptr; + LeidenGraph *sub_leiden = nullptr; + + public: + LeidenGraph(OlapBase *myGraph, ParallelVector &_label, + unsigned _random_seed = 0, double _theta = 0.1, double _gamma = 0.2, size_t _threshold = 0) + : active(myGraph->AllocVertexSubset()), + well_connected(myGraph->AllocVertexSubset()), + k(myGraph->AllocVertexArray()), + e_tot(myGraph->AllocVertexArray()), + label_size(myGraph->AllocVertexArray()), + sub_index(myGraph->AllocVertexArray()), + label(1), + sub_label(1) { + graph = myGraph; + theta = _theta; + gamma = _gamma; + random_seed = _random_seed; + if (_threshold <= 0) { + threshold = graph->NumVertices(); + } else { + threshold = _threshold; + } + label.Swap(_label); + active.Fill(); + num_vertices = graph->NumVertices(); + + real_nodes = graph->ProcessVertexInRange( + [&](size_t v) { + k[v] = 0; + label_size[v] = 0; + if (graph->OutDegree(v) > 0) { + return 1; + } + return 0; + }, + 0, num_vertices); + + m = graph->ProcessVertexInRange( + [&](size_t v) { + for (auto &e : graph->OutEdges(v)) { + k[v] += e.edge_data; + } + return k[v]; + }, + 0, num_vertices); + + double normalized = m / graph->NumEdges(); + m = graph->ProcessVertexInRange( + [&](size_t v) { + for (auto &e : graph->OutEdges(v)) { + e.edge_data /= normalized; + } + k[v] /= normalized; + return k[v]; + }, + 0, num_vertices) / + 2; + + std::cout << "m = " << m << std::endl; + + min_label(); + update_e_tot(); + update_num_community(true); + update_label_size(); + update_Q(true); + std::cout << "init" << std::endl; + } + + ~LeidenGraph() { + // if (!if_base) { + // delete sub_graph; + // } + } + + ParallelVector &get_label() { return label; } + + void set_base(bool _if_base) { if_base = _if_base; } + + void min_label() { + auto min_label = graph->AllocVertexArray(); + min_label.Fill(-1); + graph->ProcessVertexInRange( + [&](size_t v) { + write_min(&min_label[label[v]], v); + return 0; + }, + 0, num_vertices); + graph->ProcessVertexInRange( + [&](size_t v) { + label[v] = min_label[label[v]]; + return 0; + }, + 0, num_vertices); + } + + void update_label_size(ParallelVector *l = nullptr) { + if (l == nullptr) { + l = &label; + } + label_size.Fill(0); + graph->ProcessVertexInRange( + [&](size_t v) { + write_add(&label_size[(*l)[v]], 1); + return 0; + }, + 0, num_vertices); + } + + void update_e_tot(ParallelVector *l = nullptr) { + if (l == nullptr) { + l = &label; + } + e_tot.Fill(0); + graph->ProcessVertexInRange( + [&](size_t v) { + write_add(&e_tot[(*l)[v]], k[v]); + return 0; + }, + 0, num_vertices); + } + + size_t update_num_community(bool is_print = true) { + auto bit = graph->AllocVertexSubset(); + + num_community = graph->ProcessVertexInRange( + [&](size_t v) { + if (e_tot[v] < 1e-5) { + return 0; + } else { + return 1; + } + return 0; + }, + 0, num_vertices); + if (is_print) { + std::cout << "number of communities is " << num_community << std::endl; + } + return num_community; + } + + size_t get_num_community() { return num_community; } + + double update_Q(bool is_print = true) { + Q = graph->ProcessVertexInRange( + [&](size_t v) { + double q = 0.0; + for (auto e : graph->OutEdges(v)) { + size_t nbr = e.neighbour; + if (label[v] == label[nbr]) q += e.edge_data; + } + q -= 1.0 * gamma * k[v] * e_tot[label[v]] / (2 * m); + return q; + }, + 0, num_vertices) / + (2.0 * m); + if (is_print) { + std::cout << "Q = " << Q << std::endl; + } + return Q; + } + + double get_Q() { return Q; } + + ParallelVector singleton_partition() { + auto l = graph->AllocVertexArray(); + graph->ProcessVertexInRange( + [&](size_t vtx) { + l[vtx] = vtx; + return 0; + }, + 0, num_vertices); + update_e_tot(&l); + update_label_size(&l); + return l; + } + + void aggregate_graph(ParallelVector &l) { + std::cout << std::endl; + size_t num_sub_vertices = 0; + auto label_bitmap = graph->AllocVertexSubset(); + sub_index.Fill((size_t)-1); + for (size_t v_i = 0; v_i < num_vertices; v_i++) { + if (label_bitmap.Has(l[v_i])) { + sub_index[v_i] = sub_index[l[v_i]]; + } else { + sub_index[l[v_i]] = num_sub_vertices; + sub_index[v_i] = num_sub_vertices; + num_sub_vertices++; + label_bitmap.Add(l[v_i]); + } + } + + std::vector *> sub_edges(num_sub_vertices); + for (auto &m_ptr : sub_edges) { + m_ptr = new std::unordered_map(); + } + + for (size_t v_i = 0; v_i < num_vertices; v_i++) { + for (auto &e : graph->OutEdges(v_i)) { + auto v_sub_node = sub_index[v_i]; + auto neighbour_sub_node = sub_index[e.neighbour]; + auto edge_data = e.edge_data; + auto pair_it = sub_edges[v_sub_node]->find(neighbour_sub_node); + if (pair_it == sub_edges[v_sub_node]->end()) { + sub_edges[v_sub_node]->insert({neighbour_sub_node, edge_data}); + } else { + pair_it->second += edge_data; + } + } + } + + size_t num_sub_edges = 0; + for (size_t i = 0; i < num_sub_vertices; i++) { + num_sub_edges += sub_edges[i]->size(); + } + + EdgeUnit *sub_edge_array = new EdgeUnit[num_sub_edges]; + size_t sub_edge_index = 0; + for (size_t v_i = 0; v_i < num_sub_vertices; v_i++) { + for (auto &edge_pair : *sub_edges[v_i]) { + sub_edge_array[sub_edge_index].src = v_i; + sub_edge_array[sub_edge_index].dst = edge_pair.first; + sub_edge_array[sub_edge_index].edge_data = edge_pair.second; + sub_edge_index++; + } + delete sub_edges[v_i]; + } + assert(sub_edge_index == num_sub_edges); + + sub_graph = new OlapBase; + sub_graph->LoadFromArray((char*)sub_edge_array, num_sub_vertices, + num_sub_edges, INPUT_SYMMETRIC); + delete[] sub_edge_array; + + sub_graph->AllocVertexArray().Swap(sub_label); + sub_label.Fill(0); + + graph->ProcessVertexActive( + [&](size_t v_i) { + sub_label[sub_index[v_i]] = sub_index[label[v_i]]; + return 1; + }, + label_bitmap); + } + + bool move_nodes_fast() { + auto active_next = graph->AllocVertexSubset(); + int iters = 0; + size_t active_vertices = 1; + size_t all_vertices = 0; + while (active_vertices != 0) { + active_next.Clear(); + active_vertices = graph->ProcessVertexActive( + [&](size_t v) { + std::unordered_map count; + for (auto e : graph->OutEdges(v)) { + if (v == e.neighbour) continue; + size_t nbr_label = label[e.neighbour]; + auto it = count.find(nbr_label); + if (it == count.end()) { + count[nbr_label] = e.edge_data; + } else { + it->second += e.edge_data; + } + } + size_t old_label = label[v]; + double k_in_out = 0.0; + if (count.find(old_label) != count.end()) { + k_in_out = count[old_label]; + } + double delta_in = k[v] * (e_tot[old_label] - k[v]) * gamma - 2.0 * k_in_out * m; + + double delta_in_max = -delta_in; + size_t label_max = old_label; + for (auto &ele : count) { + size_t new_label = ele.first; + if (old_label == new_label) continue; + double k_in_in = ele.second; + delta_in = 2.0 * k_in_in * m - k[v] * (e_tot[new_label]) * gamma; + if (delta_in > delta_in_max) { + delta_in_max = delta_in; + label_max = new_label; + } else if (delta_in == delta_in_max && new_label < label_max) { + delta_in_max = delta_in; + label_max = new_label; + } + } + + if (delta_in_max > 0 && label_max != old_label) { + write_sub(&e_tot[old_label], k[v]); + write_sub(&label_size[old_label], 1); + label[v] = label_max; + write_add(&e_tot[label_max], k[v]); + write_add(&label_size[label_max], 1); + for (auto &edge : graph->OutEdges(v)) { + if (label[edge.neighbour] != label_max) active_next.Add(edge.neighbour); + } + return 1; + } + if (delta_in_max < 0) { + write_sub(&e_tot[old_label], k[v]); + write_sub(&label_size[old_label], 1); + label[v] = v; + write_add(&e_tot[v], k[v]); + write_add(&label_size[v], 1); + for (auto &edge : graph->OutEdges(v)) { + if (label[edge.neighbour] != v) active_next.Add(edge.neighbour); + } + return 1; + } + return 0; + }, + active); + std::cout << "active_vertices(" << iters << ") = " << active_vertices << std::endl; + iters++; + active.Swap(active_next); + all_vertices += active_vertices; + } + active.Fill(); + update_num_community(); + return (all_vertices <= num_vertices / threshold); + } + + ParallelVector refine_partition() { + auto l = singleton_partition(); + auto comm_size = graph->AllocVertexArray(); + comm_size.Fill(1); + auto comm_degree = graph->AllocVertexArray(); + comm_degree.Fill(0); + auto sub_size = graph->AllocVertexArray(); + sub_size.Fill(0); + auto degree_in_sub = graph->AllocVertexArray(); + degree_in_sub.Fill(0); + well_connected.Clear(); + + graph->ProcessVertexInRange( + [&](size_t vtx) { + double c_degree = 0; + double in_degree = 0; + write_add(&sub_size[label[vtx]], (size_t)1); + for (auto &edge : graph->OutEdges(vtx)) { + size_t nbr = edge.neighbour; + if (nbr == vtx || label[vtx] != label[nbr]) { + continue; + } + in_degree += edge.edge_data; + if (l[vtx] != l[nbr]) { + c_degree += edge.edge_data; + } + write_add(°ree_in_sub[vtx], in_degree); + write_add(&comm_degree[l[vtx]], c_degree); + } + return 0; + }, + 0, num_vertices); + + graph->ProcessVertexInRange( + [&](size_t vtx) { + if (degree_in_sub[vtx] >= gamma * (double)(sub_size[label[vtx]] - 1)) { + well_connected.Add(vtx); + } + return 0; + }, + 0, num_vertices); + + graph->ProcessVertexActive( + [&](size_t vtx) { + if (comm_size[l[vtx]] != 1) { + return 0; + } + std::unordered_map count; + std::unordered_map pr; + std::unordered_map v_degree; + + for (auto e : graph->OutEdges(vtx)) { + size_t nbr = e.neighbour; + if (vtx == nbr || label[nbr] != label[vtx]) { + continue; + } + size_t nbr_label = l[nbr]; + auto it_d = v_degree.find(nbr_label); + if (it_d != v_degree.end()) { + it_d->second += e.edge_data; + } else { + v_degree[nbr_label] = e.edge_data; + } + auto it = count.find(nbr_label); + if (it != count.end()) { + it->second += e.edge_data; + } else if (comm_degree[nbr_label] >= + gamma * comm_size[nbr_label] * + (sub_size[label[vtx]] - comm_size[nbr_label])) { + count[nbr_label] = e.edge_data; + } + } + + size_t old_label = l[vtx]; + pr[old_label] = 1; + double k_in_in = 0.0; + double delta_in = 0.0; + + for (auto &ele : count) { + size_t new_label = ele.first; + if (old_label == new_label) continue; + k_in_in = ele.second; + + delta_in = k_in_in - k[vtx] * (e_tot[new_label]) / 2.0 / m * gamma; + if (delta_in >= 0) { + pr[new_label] = std::exp(1.0 / theta * delta_in); + } + } + + size_t chosen_label = choice(random_seed, &pr); + write_add(&e_tot[old_label], -k[vtx]); + write_add(&comm_size[old_label], -1); + write_add(&label_size[old_label], -1); + write_add(&comm_degree[old_label], -k[vtx] + 2 * v_degree[old_label]); + l[vtx] = chosen_label; + write_add(&e_tot[chosen_label], k[vtx]); + write_add(&comm_size[chosen_label], 1); + write_add(&label_size[chosen_label], 1); + write_add(&comm_degree[chosen_label], k[vtx] - 2 * v_degree[chosen_label]); + return 0; + }, + well_connected); + + return l; + } + + void leiden() { + bool changed = move_nodes_fast(); + if (!changed && num_community != graph->NumVertices()) { + auto l = refine_partition(); + aggregate_graph(l); + sub_leiden = + new LeidenGraph(sub_graph, sub_label, random_seed, theta, gamma, threshold); + bool flag = true; + while (flag) { + flag = subgraph_leiden(l); + } + + graph->ProcessVertexInRange( + [&](size_t v) { + size_t sub_vtx = sub_index[v]; + if (sub_vtx == (size_t)-1) { + return 0; + } + size_t sub_index_v_comm = sub_leiden->label[sub_vtx]; + label[v] = sub_index_v_comm; + return 0; + }, + 0, num_vertices); + delete sub_leiden; + } + } + + bool subgraph_leiden(ParallelVector &original_l) { + bool not_changed = sub_leiden->move_nodes_fast(); + bool ret = !not_changed && sub_leiden->num_community != sub_graph->NumVertices(); + if (ret) { + auto l = sub_leiden->refine_partition(); + sub_leiden->aggregate_graph(l); + LeidenGraph *sub_sub_leiden = new LeidenGraph( + sub_leiden->sub_graph, sub_leiden->sub_label, random_seed, theta, gamma, threshold); + + graph->ProcessVertexInRange( + [&](size_t v) { + auto sub_v = sub_index[v]; + sub_index[v] = sub_leiden->sub_index[sub_v]; + return 0; + }, + 0, num_vertices); + delete sub_leiden; + sub_leiden = sub_sub_leiden; + } + return ret; + } +}; + +void LeidenCore(OlapBase &graph, ParallelVector &label, unsigned random_seed = 0, + double theta = 0.1, double gamma = 0.2, size_t threshold = 0) { + graph.ProcessVertexInRange( + [&](size_t v) { + label[v] = v; + return 0; + }, + 0, graph.NumVertices()); + + LeidenGraph leiden_graph(&graph, label, random_seed, theta, gamma, threshold); + leiden_graph.set_base(true); + leiden_graph.leiden(); + + leiden_graph.update_e_tot(); + leiden_graph.update_label_size(); + leiden_graph.update_num_community(false); + leiden_graph.update_Q(false); + std::cout << "final number of communities is " << leiden_graph.get_num_community() << std::endl; + std::cout << "final Q is " << leiden_graph.get_Q() << std::endl; + label.Swap(leiden_graph.get_label()); +} diff --git a/procedures/algo_cpp/leiden_procedure.cpp b/procedures/algo_cpp/leiden_procedure.cpp new file mode 100644 index 0000000000..3209862ff0 --- /dev/null +++ b/procedures/algo_cpp/leiden_procedure.cpp @@ -0,0 +1,87 @@ +/** + * Copyright 2022 AntGroup CO., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#include "lgraph/olap_on_db.h" +#include "tools/json.hpp" +#include "./algo.h" + +using namespace lgraph_api; +using namespace lgraph_api::olap; +using json = nlohmann::json; + +extern "C" bool Process(GraphDB& db, const std::string& request, std::string& response) { + auto start_time = get_time(); + + // prepare + start_time = get_time(); + double gamma = 0.2; + double theta = 0.1; + unsigned random_seed = 0; + size_t threshold = 0; + std::string weight = ""; + std::cout << "Input: " << request << std::endl; + try { + json input = json::parse(request); + parse_from_json(gamma, "gamma", input); + assert(gamma > 0 && gamma <= 1); + parse_from_json(theta, "theta", input); + assert(theta > 0 && theta <= 1); + parse_from_json(random_seed, "random_seed", input); + parse_from_json(weight, "weight", input); + parse_from_json(threshold, "threshold", input); + } catch (std::exception& e) { + response = "json parse error: " + std::string(e.what()); + std::cout << response << std::endl; + return false; + } + auto txn = db.CreateReadTxn(); + auto edge_convert = [&](OutEdgeIterator& eit, double& edge_data) -> bool { + if (weight.length() == 0) { + edge_data = 1; + return true; + } + edge_data = eit.GetField(weight).real(); + if (edge_data > 0.0) return true; + return false; + }; + OlapOnDB olapondb(db, txn, SNAPSHOT_PARALLEL | SNAPSHOT_UNDIRECTED, nullptr, + edge_convert); + printf("|V| = %lu\n", olapondb.NumVertices()); + printf("|E| = %lu\n", olapondb.NumEdges()); + auto prepare_cost = get_time() - start_time; + + // core + start_time = get_time(); + ParallelVector label = olapondb.AllocVertexArray(); + LeidenCore(olapondb, label, random_seed, theta, gamma, threshold); + printf("label.size=%lu\n", label.Size()); + auto core_cost = get_time() - start_time; + + // output + start_time = get_time(); + double output_cost = get_time() - start_time; + + // return + { + json output; + output["num_vertices"] = olapondb.NumVertices(); + output["num_edges"] = olapondb.NumEdges(); + output["prepare_cost"] = prepare_cost; + output["core_cost"] = core_cost; + output["output_cost"] = output_cost; + output["total_cost"] = prepare_cost + core_cost + output_cost; + response = output.dump(); + } + return true; +} diff --git a/procedures/algo_cpp/leiden_standalone.cpp b/procedures/algo_cpp/leiden_standalone.cpp new file mode 100644 index 0000000000..ff905bd899 --- /dev/null +++ b/procedures/algo_cpp/leiden_standalone.cpp @@ -0,0 +1,99 @@ +/** + * Copyright 2022 AntGroup CO., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#include "olap/olap_on_disk.h" +#include "tools/json.hpp" +#include "./algo.h" + +using namespace lgraph_api; +using namespace lgraph_api::olap; +using json = nlohmann::json; + +class MyConfig : public ConfigBase { + public: + double gamma = 0.2; + double theta = 0.1; + unsigned random_seed = 0; + size_t threshold = 0; + std::string name = std::string("leiden"); + void AddParameter(fma_common::Configuration & config) { + ConfigBase::AddParameter(config); + config.Add(gamma, "gamma", true) + .Comment("the gamma of leiden"); + config.Add(theta, "theta", true) + .Comment("the theta of leiden"); + config.Add(random_seed, "random_seed", true) + .Comment("the random_seed of leiden"); + config.Add(threshold, "threshold", true) + .Comment("the threshold of leiden"); + } + + void Print() { + ConfigBase::Print(); + std::cout << " name: " << name << std::endl; + std::cout << " gamma: " << gamma << std::endl; + std::cout << " theta: " << theta << std::endl; + std::cout << " random_seed: " << gamma << std::endl; + std::cout << " threshold: " << theta << std::endl; + } + + MyConfig(int &argc, char** &argv) : ConfigBase(argc, argv) { + parse_line = parse_line_weighted; + fma_common::Configuration config; + AddParameter(config); + config.ExitAfterHelp(true); + config.ParseAndFinalize(argc, argv); + Print(); + } +}; + +int main(int argc, char** argv) { + double start_time; + MemUsage memUsage; + memUsage.startMemRecord(); + + // prepare + MyConfig config(argc, argv); + start_time = get_time(); + std::string output_file = config.output_dir; + double gamma = config.gamma; + double theta = config.theta; + unsigned random_seed = config.random_seed; + size_t threshold = config.threshold; + + OlapOnDisk graph; + graph.Load(config, MAKE_SYMMETRIC); + printf("|V|=%lu, |E|=%lu\n", graph.NumVertices(), graph.NumEdges()); + auto prepare_cost = get_time() - start_time; + + // core + start_time = get_time(); + auto label = graph.AllocVertexArray(); + LeidenCore(graph, label, random_seed, theta, gamma, threshold); + memUsage.print(); + memUsage.reset(); + auto core_cost = get_time() - start_time; + + // output + start_time = get_time(); + // TODO(any): write to file + auto output_cost = get_time() - start_time; + printf("prepare_cost = %.2lf(s)\n", prepare_cost); + printf("core_cost = %.2lf(s)\n", core_cost); + printf("output_cost = %.2lf(s)\n", output_cost); + printf("total_cost = %.2lf(s)\n", prepare_cost + core_cost + output_cost); + printf("DONE."); + + return 0; +} diff --git a/procedures/algo_cpp/subgraph_isomorphism_core.cpp b/procedures/algo_cpp/subgraph_isomorphism_core.cpp new file mode 100644 index 0000000000..a4704bb996 --- /dev/null +++ b/procedures/algo_cpp/subgraph_isomorphism_core.cpp @@ -0,0 +1,208 @@ +/** + * Copyright 2022 AntGroup CO., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#include +#include +#include "lgraph/olap_base.h" +#include "./algo.h" + +using namespace lgraph_api; +using namespace lgraph_api::olap; + +int subgraph_sort_compare(const void *a, const void *b) { + const auto *ptr_a = (const AdjUnit *)a; + const auto *ptr_b = (const AdjUnit *)b; + return ptr_a->neighbour > ptr_b->neighbour ? 1 : -1; +} +bool subgraph_adj_compare(const AdjUnit &a, const AdjUnit &b) { + return (a.neighbour < b.neighbour); +} + +bool subgraph_upper_compare(size_t a, const AdjUnit &b) { return (a < b.neighbour); } + +bool subgraph_check_match(const size_t *vtx_list, size_t it, size_t n) { + for (size_t i = 0; i < n; i++) { + if (vtx_list[i] == it) return true; + } + return false; +} + +size_t subquery_merge(size_t *solution, const size_t *src_list, OlapBase &graph, + std::vector> query, + std::unordered_map *candidate, + ParallelVector &match_times) { + size_t query_vtx = 1; + size_t local_num = 0; + while (query_vtx > 0) { + size_t it; + if (solution[query_vtx] == (size_t)-1) { + if (src_list[query_vtx] != (size_t)-1) { + auto edges = graph.OutEdges(solution[src_list[query_vtx]]); + auto pos = edges.begin(); + if (pos == edges.end()) { + solution[query_vtx] = (size_t)-1; + query_vtx--; + continue; + } + it = pos->neighbour; + solution[query_vtx] = it; + } else { + auto pos = candidate[query_vtx].begin(); + it = pos->first; + solution[query_vtx] = it; + } + } else { + if (src_list[query_vtx] != (size_t)-1) { + auto edges = graph.OutEdges(solution[src_list[query_vtx]]); + auto pos = std::upper_bound(edges.begin(), edges.end(), solution[query_vtx], + subgraph_upper_compare); + if (pos == edges.end()) { + solution[query_vtx] = (size_t)-1; + query_vtx--; + continue; + } + it = pos->neighbour; + solution[query_vtx] = it; + } else { + auto pos = candidate[query_vtx].find(solution[query_vtx]); + ++pos; + if (pos == candidate[query_vtx].end()) { + solution[query_vtx] = (size_t)-1; + query_vtx--; + continue; + } + it = pos->first; + solution[query_vtx] = it; + } + } + + if (subgraph_check_match(solution, it, query_vtx)) { + continue; + } + if (candidate[query_vtx].count(solution[query_vtx]) == 0) { + continue; + } + + bool search_ret = true; + for (auto &query_dst : query[query_vtx]) { + if (query_dst >= query_vtx) { + continue; + } + + auto edges = graph.OutEdges(it); + AdjUnit dst{solution[query_dst]}; + if (!std::binary_search(edges.begin(), edges.end(), dst, subgraph_adj_compare)) { + search_ret = false; + break; + } + } + if (!search_ret) { + continue; + } + + for (size_t query_src = 0; query_src < query_vtx; query_src++) { + if (query[query_src].count(query_vtx) == 0) { + continue; + } + auto edges = graph.InEdges(it); + AdjUnit src{solution[query_src]}; + if (!std::binary_search(edges.begin(), edges.end(), src, subgraph_adj_compare)) { + search_ret = false; + break; + } + } + if (!search_ret) { + continue; + } + query_vtx++; + + if (query_vtx == query.size()) { + for (size_t i = 0; i < query_vtx; i++) { + write_add(&match_times[solution[i]], (size_t)1); + } + local_num++; + query_vtx--; + } + } + return local_num; +} + +size_t SubgraphIsomorphismCore(OlapBase &graph, + std::vector> &query, + ParallelVector &counts) { + graph.ProcessVertexInRange( + [&](size_t vtx) { + auto edges = graph.OutEdges(vtx); + qsort(edges.begin(), graph.OutDegree(vtx), sizeof(AdjUnit), + subgraph_sort_compare); + edges = graph.InEdges(vtx); + qsort(edges.begin(), graph.InDegree(vtx), sizeof(AdjUnit), + subgraph_sort_compare); + return 0; + }, + 0, graph.NumVertices()); + printf("sorted\n"); + size_t num_vertices = graph.NumVertices(); + size_t num_subgraph; + int k = query.size(); + + auto *candidate = new std::unordered_map[k]; + graph.ProcessVertexInRange( + [&](size_t vtx) { + for (size_t i = 0; i < graph.NumVertices(); i++) { + candidate[vtx].insert(std::pair(i, (size_t)0)); + } + return 0; + }, + 0, k); + printf("candidate\n"); + + auto *src = new size_t[k]; + graph.ProcessVertexInRange( + [&](size_t v) { + src[v] = (size_t)-1; + return 0; + }, + 0, k); + graph.ProcessVertexInRange( + [&](size_t vtx) { + for (auto &dst : query[vtx]) { + if (src[dst] == (size_t)-1 && vtx < dst) { + write_min(&src[dst], vtx); + } + } + return 0; + }, + 0, k); + + num_subgraph = graph.ProcessVertexInRange( + [&](size_t vtx) { + if (candidate[0].count(vtx) == 0) { + return (size_t)0; + } else { + auto *solution = new size_t[k]; + for (int i = 0; i < k; i++) { + solution[i] = (size_t)-1; + } + solution[0] = vtx; + auto local_num = subquery_merge(solution, src, graph, query, candidate, counts); + delete[] solution; + return local_num; + } + }, + 0, num_vertices); + + printf("match %zu subgraph\n", num_subgraph); + return num_subgraph; +} diff --git a/procedures/algo_cpp/subgraph_isomorphism_procedure.cpp b/procedures/algo_cpp/subgraph_isomorphism_procedure.cpp new file mode 100644 index 0000000000..3f7f377e21 --- /dev/null +++ b/procedures/algo_cpp/subgraph_isomorphism_procedure.cpp @@ -0,0 +1,64 @@ +/** + * Copyright 2022 AntGroup CO., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#include "lgraph/olap_on_db.h" +#include "tools/json.hpp" +#include "./algo.h" + +using namespace lgraph_api; +using namespace lgraph_api::olap; +using json = nlohmann::json; + +extern "C" bool Process(GraphDB& db, const std::string& request, std::string& response) { + auto start_time = get_time(); + + std::vector> query; + std::cout << "Input: " << request << std::endl; + try { + json input = json::parse(request); + parse_from_json(query, "query", input); + } catch (std::exception& e) { + response = "json parse error: " + std::string(e.what()); + std::cout << response << std::endl; + return false; + } + + if (query.empty()) { + query.emplace_back(std::unordered_set{1, 2}); + query.emplace_back(std::unordered_set{2}); + query.emplace_back(std::unordered_set{}); +// throw std::runtime_error("query is empty."); + } + + auto txn = db.CreateReadTxn(); + + OlapOnDB olapondb(db, txn, SNAPSHOT_PARALLEL); + + auto preprocessing_time = get_time(); + auto counts = olapondb.AllocVertexArray(); + size_t subgraph_num = SubgraphIsomorphismCore(olapondb, query, counts); + auto exec_time = get_time(); + + json output; + // TODO(any): write count back to graph + output["match_subgraph_num"] = subgraph_num; + auto output_time = get_time(); + + output["preprocessing_time"] = (double)(preprocessing_time - start_time); + output["exec_time"] = (double)(exec_time - preprocessing_time); + output["output_time"] = (double)(output_time - exec_time); + output["sum_time"] = (double)(output_time - start_time); + response = output.dump(); + return true; +} diff --git a/procedures/algo_cpp/subgraph_isomorphism_standalone.cpp b/procedures/algo_cpp/subgraph_isomorphism_standalone.cpp new file mode 100644 index 0000000000..fce131e251 --- /dev/null +++ b/procedures/algo_cpp/subgraph_isomorphism_standalone.cpp @@ -0,0 +1,88 @@ +/** + * Copyright 2022 AntGroup CO., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#include "olap/olap_on_disk.h" +#include "tools/json.hpp" +#include "./algo.h" + +using namespace lgraph_api; +using namespace lgraph_api::olap; +using json = nlohmann::json; + +class MyConfig : public ConfigBase { + public: + std::string name = "subgraph_isomorphism"; + std::vector> query; + std::string query_s; + void AddParameter(fma_common::Configuration& config) { + ConfigBase::AddParameter(config); + config.Add(query_s, "query", false).Comment("edges of subgraph"); + } + void Print() { + ConfigBase::Print(); + std::cout << " name: " << name << std::endl; + std::cout << " query_vertices: " << query.size() << std::endl; + } + MyConfig(int& argc, char**& argv) : ConfigBase(argc, argv) { + fma_common::Configuration config; + AddParameter(config); + config.ExitAfterHelp(true); + config.ParseAndFinalize(argc, argv); + printf("query = %s\n", query_s.c_str()); + + json input = json::parse("{\"query\":" + query_s + "}"); + printf("input = %s\n", input.dump().c_str()); + query = input["query"].get>>(); + Print(); + } +}; + +int main(int argc, char** argv) { + double start_time; + MemUsage memUsage; + memUsage.startMemRecord(); + + // prepare + start_time = get_time(); + OlapOnDisk graph; + MyConfig config(argc, argv); + + graph.Load(config, DUAL_DIRECTION); + memUsage.print(); + memUsage.reset(); + std::cout << " num_vertices = " << graph.NumVertices() << std::endl; + std::cout << " num_edges = " << graph.NumEdges() << std::endl; + auto prepare_cost = get_time() - start_time; + + // core + start_time = get_time(); + ParallelVector counts = graph.AllocVertexArray(); + SubgraphIsomorphismCore(graph, config.query, counts); + auto core_cost = get_time() - start_time; + + // output + start_time = get_time(); + if (config.output_dir != "") { + graph.Write(config, counts, graph.NumVertices(), config.name); + } + auto output_cost = get_time() - start_time; + + printf("prepare_cost = %.2lf(s)\n", prepare_cost); + printf("core_cost = %.2lf(s)\n", core_cost); + printf("output_cost = %.2lf(s)\n", output_cost); + printf("total_cost = %.2lf(s)\n", prepare_cost + core_cost + output_cost); + printf("ALL DONE.\n"); + + return 0; +} diff --git a/procedures/algo_cpp/sybilrank_core.cpp b/procedures/algo_cpp/sybilrank_core.cpp new file mode 100644 index 0000000000..1b7202b675 --- /dev/null +++ b/procedures/algo_cpp/sybilrank_core.cpp @@ -0,0 +1,67 @@ +/** + * Copyright 2022 AntGroup CO., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#include +#include "lgraph/olap_base.h" +#include "./algo.h" + +void SybilRankCore(OlapBase &graph, ParallelVector &trust_seeds, + ParallelVector &curr) { + auto next = graph.AllocVertexArray(); + curr.Fill(0); + auto active_in = graph.AllocVertexSubset(); + auto active_out = graph.AllocVertexSubset(); + for (auto &s : trust_seeds) { + curr[s] = 1; + active_in.Add(s); + if (graph.OutDegree(s) > 0) { + curr[s] /= graph.OutDegree(s); + } + } + + int iterations = std::ceil(std::log(graph.NumVertices())); + std::cout << "iterations=" << iterations << std::endl; + double delta = trust_seeds.Size(); + size_t num_vertices = graph.NumVertices(); + + for (int i_i = 0; i_i < iterations; i_i++) { + next.Fill(0); + graph.ProcessVertexInRange( + [&](size_t src) { + for (auto &edge : graph.OutEdges(src)) { + auto dst = edge.neighbour; + active_out.Add(dst); + write_add(&next[dst], curr[src]); + } + return 1; + }, + 0, num_vertices); + if (i_i != iterations - 1) { + delta = graph.ProcessVertexInRange( + [&](size_t src) { + if (graph.OutDegree(src) > 0) { + next[src] /= graph.OutDegree(src); + return std::fabs(next[src] - curr[src]) * graph.OutDegree(src); + } + return std::fabs(next[src] - curr[src]); + }, + 0, num_vertices); + } else { + delta = graph.ProcessVertexInRange( + [&](size_t src) { return std::fabs(next[src] - curr[src]); }, 0, num_vertices); + } + curr.Swap(next); + std::cout << "delta[" << i_i << "] = " << delta << std::endl; + } +} diff --git a/procedures/algo_cpp/sybilrank_procedure.cpp b/procedures/algo_cpp/sybilrank_procedure.cpp new file mode 100644 index 0000000000..ba3558a2bd --- /dev/null +++ b/procedures/algo_cpp/sybilrank_procedure.cpp @@ -0,0 +1,79 @@ +/** + * Copyright 2022 AntGroup CO., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#include "lgraph/olap_on_db.h" +#include "tools/json.hpp" +#include "./algo.h" + +using namespace lgraph_api; +using namespace lgraph_api::olap; +using json = nlohmann::json; + +extern "C" bool Process(GraphDB& db, const std::string& request, std::string& response) { + auto start_time = get_time(); + + std::vector vertices_id; + std::string label = "node"; + std::string field = "id"; + std::cout << "Input: " << request << std::endl; + try { + json input = json::parse(request); + parse_from_json(vertices_id, "trust_seeds", input); + parse_from_json(label, "label", input); + parse_from_json(field, "field", input); + } catch (std::exception& e) { + response = "json parse error: " + std::string(e.what()); + std::cout << response << std::endl; + return false; + } + + auto txn = db.CreateReadTxn(); + + OlapOnDB olapondb(db, txn, SNAPSHOT_PARALLEL); + + ParallelVector vertices_vid; + if (!vertices_id.empty()) { + vertices_vid.ReAlloc(vertices_id.size()); + for (auto id : vertices_id) { + int64_t vid = txn.GetVertexIndexIterator(label, field, id, id).GetVid(); + vertices_vid.Append(olapondb.MappedVid(vid)); + } + } else { + vertices_vid.ReAlloc(1); + int64_t vid = txn.GetVertexIndexIterator(label, field, "0", "0").GetVid(); + vertices_vid.Append(olapondb.MappedVid(vid)); + // throw std::runtime_error("No node found for input in DB."); + } + + auto preprocessing_time = get_time(); + auto curr = olapondb.AllocVertexArray(); + SybilRankCore(olapondb, vertices_vid, curr); + auto max_sr_vi = olapondb.ProcessVertexInRange( + [&](size_t vi) { return vi; }, 0, olapondb.NumVertices(), 0, + [&](size_t a, size_t b) { return curr[a] > curr[b] ? a : b; }); + auto exec_time = get_time(); + + json output; + // TODO(any): write sybilrank back to graph + printf("max rank value is sybilrank[%ld] = %lf\n", max_sr_vi, curr[max_sr_vi]); + output["max_sybilrank"] = curr[max_sr_vi]; + auto output_time = get_time(); + + output["preprocessing_time"] = (double)(preprocessing_time - start_time); + output["exec_time"] = (double)(exec_time - preprocessing_time); + output["output_time"] = (double)(output_time - exec_time); + output["sum_time"] = (double)(output_time - start_time); + response = output.dump(); + return true; +} diff --git a/procedures/algo_cpp/sybilrank_standalone.cpp b/procedures/algo_cpp/sybilrank_standalone.cpp new file mode 100644 index 0000000000..71c8d68575 --- /dev/null +++ b/procedures/algo_cpp/sybilrank_standalone.cpp @@ -0,0 +1,95 @@ +/** + * Copyright 2022 AntGroup CO., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#include +#include "olap/olap_on_disk.h" +#include "tools/json.hpp" +#include "./algo.h" + +using namespace lgraph_api; +using namespace lgraph_api::olap; +using json = nlohmann::json; + +class MyConfig : public ConfigBase { + public: + std::string name = "sybilrank"; + std::string trust_seeds; + void AddParameter(fma_common::Configuration& config) { + ConfigBase::AddParameter(config); + config.Add(trust_seeds, "trust_seeds", false).Comment("vertices of trust seeds"); + } + void Print() { + ConfigBase::Print(); + std::cout << " name: " << name << std::endl; + std::cout << " trust_seeds:" << trust_seeds << std::endl; + } + MyConfig(int& argc, char**& argv) : ConfigBase(argc, argv) { + parse_line = parse_line_unweighted; + fma_common::Configuration config; + AddParameter(config); + config.ExitAfterHelp(true); + config.ParseAndFinalize(argc, argv); + Print(); + } +}; + +int main(int argc, char** argv) { + double start_time; + MemUsage memUsage; + memUsage.startMemRecord(); + + // prepare + start_time = get_time(); + OlapOnDisk graph; + MyConfig config(argc, argv); + int cnt = std::count(config.trust_seeds.begin(), config.trust_seeds.end(), ',') + 1; + ParallelVector vertices_ids(cnt); + std::stringstream ss(config.trust_seeds); + std::string vid; + while (getline(ss, vid, ',')) { + vertices_ids.Append(std::stoul(vid)); + } + + graph.Load(config, DUAL_DIRECTION); + memUsage.print(); + memUsage.reset(); + std::cout << " num_vertices = " << graph.NumVertices() << std::endl; + std::cout << " num_edges = " << graph.NumEdges() << std::endl; + auto prepare_cost = get_time() - start_time; + + // core + start_time = get_time(); + auto curr = graph.AllocVertexArray(); + SybilRankCore(graph, vertices_ids, curr); + auto max_sr_vi = graph.ProcessVertexInRange( + [&](size_t vi) { return vi; }, 0, graph.NumVertices(), 0, + [&](size_t a, size_t b) { return curr[a] > curr[b] ? a : b; }); + auto core_cost = get_time() - start_time; + + // output + start_time = get_time(); + printf("max rank value is sybilrank[%ld] = %lf\n", max_sr_vi, curr[max_sr_vi]); + if (config.output_dir != "") { + graph.Write(config, curr, graph.NumVertices(), config.name); + } + auto output_cost = get_time() - start_time; + + printf("prepare_cost = %.2lf(s)\n", prepare_cost); + printf("core_cost = %.2lf(s)\n", core_cost); + printf("output_cost = %.2lf(s)\n", output_cost); + printf("total_cost = %.2lf(s)\n", prepare_cost + core_cost + output_cost); + printf("ALL DONE.\n"); + + return 0; +} diff --git a/test/integration/test_algo.py b/test/integration/test_algo.py index 6891f08d13..f79e07e2d5 100644 --- a/test/integration/test_algo.py +++ b/test/integration/test_algo.py @@ -332,6 +332,23 @@ def test_exec_lcc_python_embed(self, importor, algo): def test_exec_lcc_python_standalone(self, algo): pass + LEIDENEMBEDOPT = { + "cmd" : "algo/leiden_embed ./testdb", + "result" : ['''final Q is 0.9'''] + } + @pytest.mark.parametrize("importor", [IMPORTOPT], indirect=True) + @pytest.mark.parametrize("algo", [LEIDENEMBEDOPT], indirect=True) + def test_exec_leiden_embed(self, importor, algo): + pass + + LEIDENSTANDOPT = { + "cmd" : "algo/leiden_standalone --type text --input_dir ./data/algo/fb_weighted", + "result" : ["final Q is 0.9"] + } + @pytest.mark.parametrize("algo", [LEIDENSTANDOPT], indirect=True) + def test_exec_leiden_standalone(self, algo): + pass + LCIMPORTOPT = { "cmd": "OMP_NUM_THREADS=6 ./lgraph_import -c ./data/algo/lc.conf -d ./locate_cycle_db --overwrite 1", "cleanup_dir": ["./locate_cycle_db", "./.import_tmp"] @@ -652,6 +669,44 @@ def test_exec_sssp_python_embed(self, importor, algo): def test_exec_sssp_python_standalone(self, algo): pass + SUBFRAPHISOMORPHISMEMBEDOPT = { + "cmd": "OMP_NUM_THREADS=6 algo/subgraph_isomorphism_embed ./testdb", + "result": ['''"match_subgraph_num":1612010'''] + } + + @pytest.mark.parametrize("importor", [IMPORTOPT], indirect=True) + @pytest.mark.parametrize("algo", [SUBFRAPHISOMORPHISMEMBEDOPT], indirect=True) + def test_exec_subgraph_isomorphism_embed(self, importor, algo): + pass + + SUBFRAPHISOMORPHISMSTANDOPT = { + "cmd": "OMP_NUM_THREADS=6 algo/subgraph_isomorphism_standalone --type text --input_dir ./data/algo/fb_unweighted --query [[1,2],[2],[]]", + "result": ["match 1612010 subgraph"] + } + + @pytest.mark.parametrize("algo", [SUBFRAPHISOMORPHISMSTANDOPT], indirect=True) + def test_exec_subgraph_isomorphism_standalone(self, algo): + pass + + SYBILRANKEMBEDOPT = { + "cmd": "OMP_NUM_THREADS=6 algo/sybilrank_embed ./testdb", + "result": ['''"max_sybilrank":0.0008'''] + } + + @pytest.mark.parametrize("importor", [IMPORTOPT], indirect=True) + @pytest.mark.parametrize("algo", [SYBILRANKEMBEDOPT], indirect=True) + def test_exec_sybilrank_embed(self, importor, algo): + pass + + SYBILRANKSTANDOPT = { + "cmd": "OMP_NUM_THREADS=6 algo/sybilrank_standalone --type text --input_dir ./data/algo/fb_unweighted --trust_seeds 0", + "result": ["max rank value is sybilrank[332] = 0.000849"] + } + + @pytest.mark.parametrize("algo", [SYBILRANKSTANDOPT], indirect=True) + def test_exec_sybilrank_standalone(self, algo): + pass + TRIANGLEEMBEDOPT = { "cmd": "OMP_NUM_THREADS=6 algo/triangle_embed ./testdb", "result": ['''"discovered_triangles":1612010''']