Skip to content

Commit

Permalink
feat(c++): support construct multi-label chunks and label filtering
Browse files Browse the repository at this point in the history
  • Loading branch information
Elssky committed Sep 19, 2024
1 parent c260513 commit 846db0c
Show file tree
Hide file tree
Showing 25 changed files with 1,483 additions and 102 deletions.
2 changes: 2 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,7 @@ if (BUILD_TESTS)
add_test(test_chunk_info_reader SRCS test/test_chunk_info_reader.cc)
add_test(test_arrow_chunk_reader SRCS test/test_arrow_chunk_reader.cc)
add_test(test_graph SRCS test/test_graph.cc)
add_test(test_multi_label SRCS test/test_multi_label.cc)

# enable_testing()
endif()
Expand All @@ -476,6 +477,7 @@ if (BUILD_BENCHMARKS)
target_link_libraries(${target} PRIVATE benchmark::benchmark_main graphar ${CMAKE_DL_LIBS})
endmacro()
add_benchmark(arrow_chunk_reader_benchmark SRCS benchmarks/arrow_chunk_reader_benchmark.cc)
add_benchmark(label_filter_benchmark SRCS benchmarks/label_filter_benchmark.cc)
add_benchmark(graph_info_benchmark SRCS benchmarks/graph_info_benchmark.cc)
endif()

Expand Down
6 changes: 6 additions & 0 deletions cpp/benchmarks/benchmark_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,18 @@ class BenchmarkFixture : public ::benchmark::Fixture {
path_ = std::string(c_root) + "/ldbc_sample/parquet/ldbc_sample.graph.yml";
auto maybe_graph_info = GraphInfo::Load(path_);
graph_info_ = maybe_graph_info.value();

second_path_ = std::string(c_root) + "/ldbc_large/parquet/ldbc.graph.yml";
auto second_maybe_graph_info = GraphInfo::Load(second_path_);
second_graph_info_ = second_maybe_graph_info.value();
}

void TearDown(const ::benchmark::State& state) override {}

protected:
std::string path_;
std::shared_ptr<GraphInfo> graph_info_;
std::string second_path_;
std::shared_ptr<GraphInfo> second_graph_info_;
};
} // namespace graphar
131 changes: 131 additions & 0 deletions cpp/benchmarks/label_filter_benchmark.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "benchmark/benchmark.h"

#include "./benchmark_util.h"
#include "graphar/api/high_level_reader.h"
#include "graphar/api/info.h"

namespace graphar {

std::shared_ptr<graphar::VerticesCollection> SingleLabelFilter(const std::shared_ptr<graphar::GraphInfo>& graph_info) {
std::string type = "comment";
auto vertex_info = graph_info->GetVertexInfo(type);
auto labels = vertex_info->GetLabels();
std::string filter_label = "Chrome";
auto maybe_filter_vertices_collection =
VerticesCollection::verticesWithLabel(filter_label, graph_info, type);
auto filter_vertices = maybe_filter_vertices_collection.value();
return filter_vertices;
}

void SingleLabelFilterbyAcero(
const std::shared_ptr<graphar::GraphInfo>& graph_info) {
std::string type = "comment";
auto vertex_info = graph_info->GetVertexInfo(type);
auto labels = vertex_info->GetLabels();
std::string filter_label = "Chrome";
auto maybe_filter_vertices_collection =
VerticesCollection::verticesWithLabelbyAcero(filter_label, graph_info,
type);
auto filter_vertices = maybe_filter_vertices_collection.value();
}

void MultiLabelFilter(const std::shared_ptr<graphar::GraphInfo>& graph_info) {
std::string type = "comment";
auto vertex_info = graph_info->GetVertexInfo(type);
auto labels = vertex_info->GetLabels();
std::vector<std::string> filter_label = {"Chrome", "Firefox"};
auto maybe_filter_vertices_collection =
VerticesCollection::verticesWithMultipleLabels(filter_label, graph_info,
type);
auto filter_vertices = maybe_filter_vertices_collection.value();
}

void MultiLabelFilterbyAcero(
const std::shared_ptr<graphar::GraphInfo>& graph_info) {
std::string type = "comment";
auto vertex_info = graph_info->GetVertexInfo(type);
auto labels = vertex_info->GetLabels();
std::vector<std::string> filter_label = {"Chrome", "Firefox"};
auto maybe_filter_vertices_collection =
VerticesCollection::verticesWithMultipleLabelsbyAcero(filter_label,
graph_info, type);
auto filter_vertices = maybe_filter_vertices_collection.value();
}

void LabelFilterFromSet(const std::shared_ptr<graphar::GraphInfo>& graph_info,
const std::shared_ptr<VerticesCollection>& vertices_collection) {
std::string type = "comment";
auto vertex_info = graph_info->GetVertexInfo(type);
auto labels = vertex_info->GetLabels();
std::vector<std::string> filter_label = { "Firefox", "Internet Explorer"};
auto maybe_filter_vertices_collection =
VerticesCollection::verticesWithMultipleLabels(filter_label,
vertices_collection);
auto filter_vertices = maybe_filter_vertices_collection.value();
}

BENCHMARK_DEFINE_F(BenchmarkFixture, SingleLabelFilter)
(::benchmark::State& state) { // NOLINT
for (auto _ : state) {
SingleLabelFilter(second_graph_info_);
}
}

BENCHMARK_DEFINE_F(BenchmarkFixture, SingleLabelFilterbyAcero)
(::benchmark::State& state) { // NOLINT
for (auto _ : state) {
SingleLabelFilterbyAcero(second_graph_info_);
}
}

BENCHMARK_DEFINE_F(BenchmarkFixture, MultiLabelFilter)
(::benchmark::State& state) { // NOLINT
for (auto _ : state) {
MultiLabelFilter(second_graph_info_);
}
}

BENCHMARK_DEFINE_F(BenchmarkFixture, MultiLabelFilterbyAcero)
(::benchmark::State& state) { // NOLINT
for (auto _ : state) {
MultiLabelFilterbyAcero(second_graph_info_);
}
}

BENCHMARK_DEFINE_F(BenchmarkFixture, LabelFilterFromSet)
(::benchmark::State& state) { // NOLINT
for (auto _ : state) {
state.PauseTiming();
auto vertices_collection = SingleLabelFilter(second_graph_info_);
state.ResumeTiming();
LabelFilterFromSet(second_graph_info_, vertices_collection);
}
}

BENCHMARK_REGISTER_F(BenchmarkFixture, SingleLabelFilter)->Iterations(10);
// BENCHMARK_REGISTER_F(BenchmarkFixture, SingleLabelFilterbyAcero)
// ->Iterations(10);
// BENCHMARK_REGISTER_F(BenchmarkFixture, MultiLabelFilter)->Iterations(10);
// BENCHMARK_REGISTER_F(BenchmarkFixture, MultiLabelFilterbyAcero)->Iterations(10);
BENCHMARK_REGISTER_F(BenchmarkFixture, LabelFilterFromSet)->Iterations(10);

} // namespace graphar
2 changes: 1 addition & 1 deletion cpp/examples/bgl_example.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ int main(int argc, char* argv[]) {
int chunk_size = 100;
auto version = graphar::InfoVersion::Parse("gar/v1").value();
auto new_info = graphar::CreateVertexInfo(vertex_type, chunk_size, {group},
vertex_prefix, version);
{}, vertex_prefix, version);
// dump new vertex info
ASSERT(new_info->IsValidated());
ASSERT(new_info->Dump().status().ok());
Expand Down
4 changes: 2 additions & 2 deletions cpp/examples/construct_info_example.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ int main(int argc, char* argv[]) {
graphar::CreatePropertyGroup(property_vector_2, graphar::FileType::ORC);

// create vertex info
auto vertex_info = graphar::CreateVertexInfo(type, chunk_size, {group1},
auto vertex_info = graphar::CreateVertexInfo(type, chunk_size, {group1}, {},
vertex_prefix, version);

ASSERT(vertex_info != nullptr);
Expand Down Expand Up @@ -150,7 +150,7 @@ int main(int argc, char* argv[]) {

// create graph info
auto graph_info = graphar::CreateGraphInfo(name, {vertex_info}, {edge_info},
prefix, version);
{}, prefix, version);
ASSERT(graph_info->GetName() == name);
ASSERT(graph_info->GetPrefix() == prefix);
ASSERT(graph_info->GetVertexInfos().size() == 1);
Expand Down
125 changes: 125 additions & 0 deletions cpp/examples/high_level_label_reader_example.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include <iostream>

#include "arrow/api.h"
#include "arrow/filesystem/api.h"

#include "./config.h"
#include "graphar/api/high_level_reader.h"

void vertices_collection(
const std::shared_ptr<graphar::GraphInfo>& graph_info) {
std::string type = "organisation";
auto vertex_info = graph_info->GetVertexInfo("organisation");
auto labels = vertex_info->GetLabels();

std::cout << "Query vertices with a specific label" << std::endl;
std::cout << "--------------------------------------" << std::endl;

auto maybe_filter_vertices_collection =
graphar::VerticesCollection::verticesWithLabel(std::string("company"),
graph_info, type);

ASSERT(!maybe_filter_vertices_collection.has_error());
auto filter_vertices = maybe_filter_vertices_collection.value();
std::cout << "valid vertices num: " << filter_vertices->size() << std::endl;

// for (auto it = filter_vertices->begin(); it != filter_vertices->end();
// ++it) {
// // get a node's all labels
// auto label_result = it.label();
// std::cout<< "id: " << it.id()<<" ";
// if (!label_result.has_error()) {
// for (auto label : label_result.value()) {
// std::cout << label << " ";
// }
// }
// std::cout << "name: ";
// auto property = it.property<std::string>("name").value();
// std::cout << property << " ";
// std::cout<<std::endl;
// }

std::cout << std::endl;
std::cout << "Query vertices with specific label in a filtered vertices set"
<< std::endl;
std::cout << "--------------------------------------" << std::endl;

auto maybe_filter_vertices_collection_2 =
graphar::VerticesCollection::verticesWithLabel(std::string("public"),
filter_vertices);
ASSERT(!maybe_filter_vertices_collection_2.has_error());
auto filter_vertices_2 = maybe_filter_vertices_collection_2.value();
std::cout << "valid vertices num: " << filter_vertices_2->size() << std::endl;

// for (auto it = filter_vertices_2->begin(); it != filter_vertices_2->end();
// ++it) {
// auto label_result = it.label();
// std::cout<< "id: " << it.id()<<" ";
// if (!label_result.has_error()) {
// for (auto label : label_result.value()) {
// std::cout << label << " ";
// }
// }
// std::cout << "name: ";
// auto property = it.property<std::string>("name").value();
// std::cout << property << " ";
// std::cout<<std::endl;
// }

std::cout << std::endl;
std::cout << "Test vertices with multi labels" << std::endl;
std::cout << "--------------------------------------" << std::endl;
auto maybe_filter_vertices_collection_3 =
graphar::VerticesCollection::verticesWithMultipleLabels(
{"company", "public"}, graph_info, type);
ASSERT(!maybe_filter_vertices_collection_3.has_error());
auto filter_vertices_3 = maybe_filter_vertices_collection_3.value();
std::cout << "valid vertices num: " << filter_vertices_3->size() << std::endl;

// for (auto it = filter_vertices_3->begin(); it != filter_vertices_3->end();
// ++it) {
// // get a node's all labels
// auto label_result = it.label();
// std::cout<< "id: " << it.id()<<" ";
// if (!label_result.has_error()) {
// for (auto label : label_result.value()) {
// std::cout << label << " ";
// }
// }
// std::cout << "name: ";
// auto property = it.property<std::string>("name").value();
// std::cout << property << " ";
// std::cout<<std::endl;
// }
}

int main(int argc, char* argv[]) {
// read file and construct graph info
std::string path = GetTestingResourceRoot() + "/ldbc/parquet/ldbc.graph.yml";
auto graph_info = graphar::GraphInfo::Load(path).value();

// vertices collection
std::cout << "Vertices collection" << std::endl;
std::cout << "-------------------" << std::endl;
vertices_collection(graph_info);
std::cout << std::endl;
}
6 changes: 3 additions & 3 deletions cpp/examples/snap_dataset_to_graphar.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ int main(int argc, char* argv[]) {
std::string type = "node", vertex_prefix = "vertex/node/";

// create vertex info
auto vertex_info = graphar::CreateVertexInfo(type, VERTEX_CHUNK_SIZE, {},
auto vertex_info = graphar::CreateVertexInfo(type, VERTEX_CHUNK_SIZE, {}, {},
vertex_prefix, version);

// save & dump
Expand All @@ -75,8 +75,8 @@ int main(int argc, char* argv[]) {

/*------------------construct graph info------------------*/
// create graph info
auto graph_info = graphar::CreateGraphInfo(graph_name, {vertex_info},
{edge_info}, save_path, version);
auto graph_info = graphar::CreateGraphInfo(
graph_name, {vertex_info}, {edge_info}, {}, save_path, version);
// save & dump
ASSERT(!graph_info->Dump().has_error());
ASSERT(graph_info->Save(save_path + graph_name + ".graph.yml").ok());
Expand Down
Loading

0 comments on commit 846db0c

Please sign in to comment.