Skip to content

Commit

Permalink
feat(c++): label filtering API, benchmarks, and examples (#654)
Browse files Browse the repository at this point in the history
  • Loading branch information
Elssky authored Nov 11, 2024
1 parent dcef976 commit b86304a
Show file tree
Hide file tree
Showing 8 changed files with 874 additions and 19 deletions.
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,7 @@ if (BUILD_BENCHMARKS)
target_link_libraries(${target} PRIVATE benchmark::benchmark_main graphar ${CMAKE_DL_LIBS})
endmacro()
add_benchmark(arrow_chunk_reader_benchmark SRCS benchmarks/arrow_chunk_reader_benchmark.cc)
add_benchmark(label_filter_benchmark SRCS benchmarks/label_filter_benchmark.cc)
add_benchmark(graph_info_benchmark SRCS benchmarks/graph_info_benchmark.cc)
endif()

Expand Down
6 changes: 6 additions & 0 deletions cpp/benchmarks/benchmark_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,18 @@ class BenchmarkFixture : public ::benchmark::Fixture {
path_ = std::string(c_root) + "/ldbc_sample/parquet/ldbc_sample.graph.yml";
auto maybe_graph_info = GraphInfo::Load(path_);
graph_info_ = maybe_graph_info.value();

second_path_ = std::string(c_root) + "/ldbc/parquet/ldbc.graph.yml";
auto second_maybe_graph_info = GraphInfo::Load(second_path_);
second_graph_info_ = second_maybe_graph_info.value();
}

void TearDown(const ::benchmark::State& state) override {}

protected:
std::string path_;
std::shared_ptr<GraphInfo> graph_info_;
std::string second_path_;
std::shared_ptr<GraphInfo> second_graph_info_;
};
} // namespace graphar
136 changes: 136 additions & 0 deletions cpp/benchmarks/label_filter_benchmark.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "benchmark/benchmark.h"

#include "./benchmark_util.h"
#include "graphar/api/high_level_reader.h"
#include "graphar/api/info.h"

namespace graphar {

std::shared_ptr<graphar::VerticesCollection> SingleLabelFilter(
const std::shared_ptr<graphar::GraphInfo>& graph_info) {
std::string type = "organisation";
auto vertex_info = graph_info->GetVertexInfo(type);
auto labels = vertex_info->GetLabels();
std::string filter_label = "university";
auto maybe_filter_vertices_collection =
VerticesCollection::verticesWithLabel(filter_label, graph_info, type);
auto filter_vertices = maybe_filter_vertices_collection.value();
return filter_vertices;
}

void SingleLabelFilterbyAcero(
const std::shared_ptr<graphar::GraphInfo>& graph_info) {
std::string type = "organisation";
auto vertex_info = graph_info->GetVertexInfo(type);
auto labels = vertex_info->GetLabels();
std::string filter_label = "university";
auto maybe_filter_vertices_collection =
VerticesCollection::verticesWithLabelbyAcero(filter_label, graph_info,
type);
auto filter_vertices = maybe_filter_vertices_collection.value();
}

void MultiLabelFilter(const std::shared_ptr<graphar::GraphInfo>& graph_info) {
std::string type = "organisation";
auto vertex_info = graph_info->GetVertexInfo(type);
auto labels = vertex_info->GetLabels();
std::vector<std::string> filter_label = {"university", "company"};
auto maybe_filter_vertices_collection =
VerticesCollection::verticesWithMultipleLabels(filter_label, graph_info,
type);
auto filter_vertices = maybe_filter_vertices_collection.value();
}

void MultiLabelFilterbyAcero(
const std::shared_ptr<graphar::GraphInfo>& graph_info) {
std::string type = "organisation";
auto vertex_info = graph_info->GetVertexInfo(type);
auto labels = vertex_info->GetLabels();
std::vector<std::string> filter_label = {"university", "company"};
auto maybe_filter_vertices_collection =
VerticesCollection::verticesWithMultipleLabelsbyAcero(filter_label,
graph_info, type);
auto filter_vertices = maybe_filter_vertices_collection.value();
}

std::shared_ptr<graphar::VerticesCollection> LabelFilterFromSet(
const std::shared_ptr<graphar::GraphInfo>& graph_info,
const std::shared_ptr<VerticesCollection>& vertices_collection) {
std::string type = "organisation";
auto vertex_info = graph_info->GetVertexInfo(type);
auto labels = vertex_info->GetLabels();
std::vector<std::string> filter_label = {"company", "public"};
auto maybe_filter_vertices_collection =
VerticesCollection::verticesWithMultipleLabels(filter_label,
vertices_collection);
auto filter_vertices = maybe_filter_vertices_collection.value();
return filter_vertices;
}

BENCHMARK_DEFINE_F(BenchmarkFixture, SingleLabelFilter)
(::benchmark::State& state) { // NOLINT
for (auto _ : state) {
SingleLabelFilter(second_graph_info_);
}
}

BENCHMARK_DEFINE_F(BenchmarkFixture, SingleLabelFilterbyAcero)
(::benchmark::State& state) { // NOLINT
for (auto _ : state) {
SingleLabelFilterbyAcero(second_graph_info_);
}
}

BENCHMARK_DEFINE_F(BenchmarkFixture, MultiLabelFilter)
(::benchmark::State& state) { // NOLINT
for (auto _ : state) {
MultiLabelFilter(second_graph_info_);
}
}

BENCHMARK_DEFINE_F(BenchmarkFixture, MultiLabelFilterbyAcero)
(::benchmark::State& state) { // NOLINT
for (auto _ : state) {
MultiLabelFilterbyAcero(second_graph_info_);
}
}

BENCHMARK_DEFINE_F(BenchmarkFixture, LabelFilterFromSet)
(::benchmark::State& state) { // NOLINT
for (auto _ : state) {
state.PauseTiming();
auto vertices_collection = SingleLabelFilter(second_graph_info_);
auto vertices_collection_2 =
LabelFilterFromSet(second_graph_info_, vertices_collection);
state.ResumeTiming();
LabelFilterFromSet(second_graph_info_, vertices_collection_2);
}
}

BENCHMARK_REGISTER_F(BenchmarkFixture, SingleLabelFilter)->Iterations(10);
BENCHMARK_REGISTER_F(BenchmarkFixture, SingleLabelFilterbyAcero)
->Iterations(10);
BENCHMARK_REGISTER_F(BenchmarkFixture, MultiLabelFilter)->Iterations(10);
BENCHMARK_REGISTER_F(BenchmarkFixture, MultiLabelFilterbyAcero)->Iterations(10);
BENCHMARK_REGISTER_F(BenchmarkFixture, LabelFilterFromSet)->Iterations(10);

} // namespace graphar
95 changes: 95 additions & 0 deletions cpp/examples/label_filtering_example.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include <iostream>

#include "arrow/api.h"
#include "arrow/filesystem/api.h"

#include "./config.h"
#include "graphar/api/arrow_reader.h"
#include "graphar/api/high_level_reader.h"

void vertices_collection(
const std::shared_ptr<graphar::GraphInfo>& graph_info) {
std::string type = "organisation";
auto vertex_info = graph_info->GetVertexInfo("organisation");
auto labels = vertex_info->GetLabels();

std::cout << "Query vertices with a specific label" << std::endl;
std::cout << "--------------------------------------" << std::endl;

auto maybe_filter_vertices_collection =
graphar::VerticesCollection::verticesWithLabel(std::string("company"),
graph_info, type);

ASSERT(!maybe_filter_vertices_collection.has_error());
auto filter_vertices = maybe_filter_vertices_collection.value();
std::cout << "valid vertices num: " << filter_vertices->size() << std::endl;

std::cout << std::endl;
std::cout << "Query vertices with specific label in a filtered vertices set"
<< std::endl;
std::cout << "--------------------------------------" << std::endl;

auto maybe_filter_vertices_collection_2 =
graphar::VerticesCollection::verticesWithLabel(std::string("public"),
filter_vertices);
ASSERT(!maybe_filter_vertices_collection_2.has_error());
auto filter_vertices_2 = maybe_filter_vertices_collection_2.value();
std::cout << "valid vertices num: " << filter_vertices_2->size() << std::endl;

std::cout << std::endl;
std::cout << "Test vertices with multi labels" << std::endl;
std::cout << "--------------------------------------" << std::endl;
auto maybe_filter_vertices_collection_3 =
graphar::VerticesCollection::verticesWithMultipleLabels(
{"company", "public"}, graph_info, type);
ASSERT(!maybe_filter_vertices_collection_3.has_error());
auto filter_vertices_3 = maybe_filter_vertices_collection_3.value();
std::cout << "valid vertices num: " << filter_vertices_3->size() << std::endl;

for (auto it = filter_vertices_3->begin(); it != filter_vertices_3->end();
++it) {
// get a node's all labels
auto label_result = it.label();
std::cout << "id: " << it.id() << " ";
if (!label_result.has_error()) {
for (auto label : label_result.value()) {
std::cout << label << " ";
}
}
std::cout << "name: ";
auto property = it.property<std::string>("name").value();
std::cout << property << " ";
std::cout << std::endl;
}
}

int main(int argc, char* argv[]) {
// read file and construct graph info
std::string path = GetTestingResourceRoot() + "/ldbc/parquet/ldbc.graph.yml";
auto graph_info = graphar::GraphInfo::Load(path).value();

// vertices collection
std::cout << "Vertices collection" << std::endl;
std::cout << "-------------------" << std::endl;
vertices_collection(graph_info);
std::cout << std::endl;
}
Loading

0 comments on commit b86304a

Please sign in to comment.