Skip to content

Commit

Permalink
Add API: filter property and return VerticesCollection
Browse files Browse the repository at this point in the history
  • Loading branch information
Elssky committed Sep 25, 2024
1 parent 404324c commit 042f46a
Show file tree
Hide file tree
Showing 3 changed files with 141 additions and 60 deletions.
92 changes: 45 additions & 47 deletions cpp/examples/high_level_label_reader_example.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

#include "./config.h"
#include "graphar/api/high_level_reader.h"
#include "graphar/api/arrow_reader.h"

void vertices_collection(
const std::shared_ptr<graphar::GraphInfo>& graph_info) {
Expand All @@ -42,22 +43,6 @@ void vertices_collection(
auto filter_vertices = maybe_filter_vertices_collection.value();
std::cout << "valid vertices num: " << filter_vertices->size() << std::endl;

// for (auto it = filter_vertices->begin(); it != filter_vertices->end();
// ++it) {
// // get a node's all labels
// auto label_result = it.label();
// std::cout<< "id: " << it.id()<<" ";
// if (!label_result.has_error()) {
// for (auto label : label_result.value()) {
// std::cout << label << " ";
// }
// }
// std::cout << "name: ";
// auto property = it.property<std::string>("name").value();
// std::cout << property << " ";
// std::cout<<std::endl;
// }

std::cout << std::endl;
std::cout << "Query vertices with specific label in a filtered vertices set"
<< std::endl;
Expand All @@ -70,21 +55,6 @@ void vertices_collection(
auto filter_vertices_2 = maybe_filter_vertices_collection_2.value();
std::cout << "valid vertices num: " << filter_vertices_2->size() << std::endl;

// for (auto it = filter_vertices_2->begin(); it != filter_vertices_2->end();
// ++it) {
// auto label_result = it.label();
// std::cout<< "id: " << it.id()<<" ";
// if (!label_result.has_error()) {
// for (auto label : label_result.value()) {
// std::cout << label << " ";
// }
// }
// std::cout << "name: ";
// auto property = it.property<std::string>("name").value();
// std::cout << property << " ";
// std::cout<<std::endl;
// }

std::cout << std::endl;
std::cout << "Test vertices with multi labels" << std::endl;
std::cout << "--------------------------------------" << std::endl;
Expand All @@ -95,23 +65,51 @@ void vertices_collection(
auto filter_vertices_3 = maybe_filter_vertices_collection_3.value();
std::cout << "valid vertices num: " << filter_vertices_3->size() << std::endl;

// for (auto it = filter_vertices_3->begin(); it != filter_vertices_3->end();
// ++it) {
// // get a node's all labels
// auto label_result = it.label();
// std::cout<< "id: " << it.id()<<" ";
// if (!label_result.has_error()) {
// for (auto label : label_result.value()) {
// std::cout << label << " ";
// }
// }
// std::cout << "name: ";
// auto property = it.property<std::string>("name").value();
// std::cout << property << " ";
// std::cout<<std::endl;
// }
}
for (auto it = filter_vertices_3->begin(); it != filter_vertices_3->end();
++it) {
// get a node's all labels
auto label_result = it.label();
std::cout << "id: " << it.id() << " ";
if (!label_result.has_error()) {
for (auto label : label_result.value()) {
std::cout << label << " ";
}
}
std::cout << "name: ";
auto property = it.property<std::string>("name").value();
std::cout << property << " ";
std::cout << std::endl;
}


std::cout << "Test vertices with property" << std::endl;
std::cout << "--------------------------------------" << std::endl;
auto filter = graphar::_Equal(graphar::_Property("name"),
graphar::_Literal("Safi_Airways"));
auto maybe_filter_vertices_collection_4 =
graphar::VerticesCollection::verticesWithProperty(
std::string("name"), filter, graph_info, type);
ASSERT(!maybe_filter_vertices_collection_4.has_error());
auto filter_vertices_4 = maybe_filter_vertices_collection_4.value();
std::cout << "valid vertices num: " << filter_vertices_4->size() << std::endl;

for (auto it = filter_vertices_4->begin(); it != filter_vertices_4->end();
++it) {
// get a node's all labels
auto label_result = it.label();
std::cout << "id: " << it.id() << " ";
if (!label_result.has_error()) {
for (auto label : label_result.value()) {
std::cout << label << " ";
}
}
std::cout << "name: ";
auto property = it.property<std::string>("name").value();
std::cout << property << " ";
std::cout << std::endl;
}

}
int main(int argc, char* argv[]) {
// read file and construct graph info
std::string path = GetTestingResourceRoot() + "/ldbc/parquet/ldbc.graph.yml";
Expand Down
91 changes: 84 additions & 7 deletions cpp/src/graphar/high-level/graph_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,68 @@ Result<std::vector<IdType>> VerticesCollection::filter_by_acero(
return indices64;
}

Result<std::vector<IdType>> VerticesCollection::filter(
std::string property_name,
std::shared_ptr<Expression> filter_expression,
std::vector<IdType>* new_valid_chunk) {

std::vector<int> indices;
const int TOT_ROWS_NUM = vertex_num_;
const int CHUNK_SIZE = vertex_info_->GetChunkSize();
int total_count = 0;

auto property_group = vertex_info_->GetPropertyGroup(property_name);
auto maybe_filter_reader = graphar::VertexPropertyArrowChunkReader::Make(
vertex_info_, property_group, prefix_, {});
auto filter_reader = maybe_filter_reader.value();
filter_reader->Filter(filter_expression);
std::vector<int64_t> indices64;

if(is_filtered_) {
for (int chunk_idx : valid_chunk_) {
// how to itetate valid_chunk_?
filter_reader->seek(chunk_idx * CHUNK_SIZE);
auto filter_result = filter_reader->GetChunk();
auto filter_table = filter_result.value();
int count = filter_table->num_rows();
if(count != 0 && new_valid_chunk!= nullptr) {
new_valid_chunk->emplace_back(static_cast<IdType>(chunk_idx));
//TODO: record indices
int kVertexIndexCol = filter_table->schema()->GetFieldIndex(GeneralParams::kVertexIndexCol);
auto column_array = filter_table->column(kVertexIndexCol)->chunk(0);
auto int64_array = std::static_pointer_cast<arrow::Int64Array>(column_array);
for (int64_t i = 0; i < int64_array->length(); ++i) {
if (!int64_array->IsNull(i)) {
indices64.push_back(int64_array->Value(i));
}
}
}
}} else {
for (int chunk_idx = 0; chunk_idx * CHUNK_SIZE < TOT_ROWS_NUM; ++chunk_idx) {
auto filter_result = filter_reader->GetChunk();
auto filter_table = filter_result.value();
int count = filter_table->num_rows();
filter_reader->next_chunk();
total_count += count;
if(count != 0) {
valid_chunk_.emplace_back(static_cast<IdType>(chunk_idx));
//TODO: record indices
int kVertexIndexCol = filter_table->schema()->GetFieldIndex(GeneralParams::kVertexIndexCol);
auto column_array = filter_table->column(kVertexIndexCol)->chunk(0);
auto int64_array = std::static_pointer_cast<arrow::Int64Array>(column_array);
for (int64_t i = 0; i < int64_array->length(); ++i) {
if (!int64_array->IsNull(i)) {
indices64.push_back(int64_array->Value(i));
}
}
}
}
}
// std::cout << "Total valid count: " << total_count << std::endl;
return indices64;
}


Result<std::shared_ptr<VerticesCollection>>
VerticesCollection::verticesWithLabel(
const std::string& filter_label,
Expand Down Expand Up @@ -380,19 +442,34 @@ VerticesCollection::verticesWithMultipleLabels(
return new_vertices_collection;
}

Result<std::shared_ptr<VerticesCollection>>
VerticesCollection::verticesWithProperty(
const std::string property_name,
const graphar::util::Filter filter,
const std::shared_ptr<GraphInfo>& graph_info,
const std::string& type) {

auto prefix = graph_info->GetPrefix();
auto vertex_info = graph_info->GetVertexInfo(type);
auto vertices_collection =
std::make_shared<VerticesCollection>(vertex_info, prefix);
vertices_collection->filtered_ids_ =
vertices_collection->filter(property_name, filter).value();
vertices_collection->is_filtered_ = true;
return vertices_collection;
}

// Result<std::shared_ptr<VerticesCollection>>
// VerticesCollection::verticesWithLabelAndProperty(
// const std::string& filter_label,
// const std::vector<std::shared_ptr<Property>>& filter_properties,
// const std::vector<std::string>& filter_properties_val,
// const std::shared_ptr<GraphInfo>& graph_info,
// const std::string& type) {
// VerticesCollection::verticesWithProperty(
// const graphar::util::Filter filter,
// const std::shared_ptr<VerticesCollection>& vertices_collection
// ) {
// auto prefix = graph_info->GetPrefix();
// auto vertex_info = graph_info->GetVertexInfo(type);
// auto labels = vertex_info->GetLabels();
// auto vertices_collection =
// std::make_shared<VerticesCollection>(vertex_info, prefix); auto
// filtered_ids = vertices_collection->filter({filter_label});

// std::vector<IdType> new_filtered_ids;
// for (auto it = vertices_collection->begin(); it !=
// vertices_collection->end(); ++it) {
Expand Down
18 changes: 12 additions & 6 deletions cpp/src/graphar/high-level/graph_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,9 @@ class VerticesCollection {

Result<std::vector<IdType>> filter_by_acero(
std::vector<std::string> filter_labels) const;

Result<std::vector<IdType>> filter(
std::string property_name, std::shared_ptr<Expression> filter_expression, std::vector<IdType>* new_valid_chunk=nullptr);

/**
* @brief Query vertices with a specific label
Expand Down Expand Up @@ -443,9 +446,8 @@ class VerticesCollection {
const std::shared_ptr<VerticesCollection>& vertices_collection);

/**
* @brief Query vertices with a specific label and properties
* @brief Query vertices with specific properties
*
* @param filter_label The label to query vertices by
* @param properties A vector of shared pointers to Property objects that the
* vertices should match
* @param graph_info A smart pointer to GraphInfo that contains details about
Expand All @@ -455,12 +457,16 @@ class VerticesCollection {
* specified label and match the given properties
*/
static Result<std::shared_ptr<VerticesCollection>>
verticesWithLabelAndProperty(
const std::string& filter_label,
const std::vector<std::shared_ptr<Property>>& filter_properties,
const std::vector<std::string>& filter_properties_val,
verticesWithProperty(
const std::string property_name,
const graphar::util::Filter filter,
const std::shared_ptr<GraphInfo>& graph_info, const std::string& type);

static Result<std::shared_ptr<VerticesCollection>>
verticesWithProperty(
const std::string property_name,
const graphar::util::Filter filter,
const std::shared_ptr<VerticesCollection>& vertices_collection);
/**
* @brief Construct a VerticesCollection from graph info and vertex label.
*
Expand Down

0 comments on commit 042f46a

Please sign in to comment.