diff --git a/src/import/graphar_config.h b/src/import/graphar_config.h index bbf7ba9385..f174f3cf3a 100644 --- a/src/import/graphar_config.h +++ b/src/import/graphar_config.h @@ -22,12 +22,12 @@ namespace lgraph { namespace import_v3 { -typedef std::map Dict; +typedef std::vector> Properties; /** * @brief Parse the gar DataType to FieldType in config. * - * @param data_type The GraphAr DataType of the vetex or property. + * @param data_type The GraphAr DataType of the vetex or edge property. * @param type_name The FieldType string which used to make json object. */ inline void ParseType(const GraphArchive::DataType& data_type, std::string& type_name) { @@ -76,14 +76,14 @@ inline void CheckAdjListType(const GraphArchive::EdgeInfo& edge_info, /** * Traverse all properties of the vertex, get the primary key, the properties and the property * names. Keep the original order in yml config. - * + * * @param ver_info The gar vertex information. * @param primary The primary key of the vertex. * @param props All the properties of the vertex. One of it maybe {"name":"id","type":"INT64"}. * @param prop_names All the property names of the vertex. One of it maybe "id". */ inline void WalkVertex(const GraphArchive::VertexInfo& ver_info, std::string& primary, - std::vector& props, std::vector& prop_names) { + Properties& props, std::vector& prop_names) { auto& ver_groups = ver_info.GetPropertyGroups(); for (auto& ver_props : ver_groups) { for (auto& prop : ver_props.GetProperties()) { @@ -100,12 +100,12 @@ inline void WalkVertex(const GraphArchive::VertexInfo& ver_info, std::string& pr /** * Traverse all properties of the edge, get the properties and the property names. * Keep the original order in yml config. Similar to WalkVertex, but don't get primary. - * + * * @param edge_info The gar edge information. * @param props All the properties of the vertex. One of it maybe {"name":"id","type":"INT64"}. * @param prop_names All the property names of the vertex. One of it maybe "id". */ -inline void WalkEdge(const GraphArchive::EdgeInfo& edge_info, std::vector& props, +inline void WalkEdge(const GraphArchive::EdgeInfo& edge_info, Properties& props, std::vector& prop_names) { GraphArchive::AdjListType adj_list_type = GraphArchive::AdjListType::ordered_by_dest; CheckAdjListType(edge_info, adj_list_type); @@ -121,6 +121,33 @@ inline void WalkEdge(const GraphArchive::EdgeInfo& edge_info, std::vector& } } +/** + * Compare the properties of two edges to see if they are equal. + * + * @param props1 The first edge properties. + * @param props2 The second edge properties. + * @return Result of the comparison. + */ +inline bool CheckEdgePropsEqual(Properties& props1, Properties& props2) { + if (props1.size() != props2.size()) { + return false; + } + std::sort(props1.begin(), props1.end(), + [](const auto& map1, const auto& map2) { return map1.at("name") < map2.at("name"); }); + std::sort(props2.begin(), props2.end(), + [](const auto& map1, const auto& map2) { return map1.at("name") < map2.at("name"); }); + auto it = props2.begin(); + for (const auto& map1 : props1) { + auto map2 = *it; + ++it; + if (map1.at("name") != map2.at("name") || map1.at("type") != map2.at("type")) { + return false; + } + } + + return true; +} + /** * @brief Read the gar yml file to construct the import config in json form. * @@ -131,14 +158,13 @@ inline void ParserGraphArConf(nlohmann::json& gar_conf, const std::string& path) auto graph_info = GraphArchive::GraphInfo::Load(path).value(); gar_conf["schema"] = {}; gar_conf["files"] = {}; - std::unordered_set labels; auto vertex_infos = graph_info.GetVertexInfos(); for (const auto& [key, value] : vertex_infos) { nlohmann::json schema_node; schema_node["label"] = value.GetLabel(); schema_node["type"] = "VERTEX"; std::string primary; - std::vector properties; + Properties properties; std::vector prop_names; WalkVertex(value, primary, properties, prop_names); schema_node["primary"] = primary; @@ -154,18 +180,26 @@ inline void ParserGraphArConf(nlohmann::json& gar_conf, const std::string& path) } auto edge_infos = graph_info.GetEdgeInfos(); + std::unordered_map edge_labels; for (const auto& [key, edge_info] : edge_infos) { std::string label = edge_info.GetEdgeLabel(); - auto result = labels.insert(label); - std::vector properties; + Properties properties; std::vector prop_names = {"SRC_ID", "DST_ID"}; WalkEdge(edge_info, properties, prop_names); - if (result.second) { + if (!edge_labels.count(label)) { + edge_labels[label] = properties; nlohmann::json schema_node; schema_node["label"] = label; schema_node["type"] = "EDGE"; - schema_node["properties"] = properties; + if (properties.size()) { + schema_node["properties"] = properties; + } gar_conf["schema"].push_back(schema_node); + } else { + if (!CheckEdgePropsEqual(properties, edge_labels[label])) { + FMA_LOG() << "Same edge label has different properties." << path; + throw std::runtime_error("The edge [" + label + "] is not supported."); + } } nlohmann::json file_node; diff --git a/test/resource/data/gar_test/edge_test/actor.vertex.yml b/test/resource/data/gar_test/edge_test/actor.vertex.yml new file mode 100644 index 0000000000..f30562352a --- /dev/null +++ b/test/resource/data/gar_test/edge_test/actor.vertex.yml @@ -0,0 +1,10 @@ +label: actor +chunk_size: 4096 +prefix: vertex/actor/ +property_groups: + - file_type: parquet + properties: + - name: id + data_type: int64 + is_primary: true +version: gar/v1 diff --git a/test/resource/data/gar_test/edge_test/actor_comment_movie.edge.yml b/test/resource/data/gar_test/edge_test/actor_comment_movie.edge.yml new file mode 100644 index 0000000000..2b281a5e0d --- /dev/null +++ b/test/resource/data/gar_test/edge_test/actor_comment_movie.edge.yml @@ -0,0 +1,19 @@ +src_label: actor +edge_label: comment +dst_label: movie +chunk_size: 1024 +src_chunk_size: 100 +dst_chunk_size: 100 +directed: false +prefix: edge/actor_comment_movie/ +adj_lists: + - ordered: true + aligned_by: src + file_type: parquet + property_groups: + - file_type: parquet + properties: + - name: diffculty + data_type: string + is_primary: false +version: gar/v1 diff --git a/test/resource/data/gar_test/edge_test/movie.graph.yml b/test/resource/data/gar_test/edge_test/movie.graph.yml new file mode 100644 index 0000000000..e63ad58342 --- /dev/null +++ b/test/resource/data/gar_test/edge_test/movie.graph.yml @@ -0,0 +1,9 @@ +name: movie +vertices: + - viewer.vertex.yml + - actor.vertex.yml + - movie.vertex.yml +edges: + - viewer_comment_movie.edge.yml + - actor_comment_movie.edge.yml +version: gar/v1 diff --git a/test/resource/data/gar_test/edge_test/movie.vertex.yml b/test/resource/data/gar_test/edge_test/movie.vertex.yml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/resource/data/gar_test/edge_test/viewer.vertex.yml b/test/resource/data/gar_test/edge_test/viewer.vertex.yml new file mode 100644 index 0000000000..3ebc44d900 --- /dev/null +++ b/test/resource/data/gar_test/edge_test/viewer.vertex.yml @@ -0,0 +1,10 @@ +label: viewer +chunk_size: 4096 +prefix: vertex/viewer/ +property_groups: + - file_type: parquet + properties: + - name: id + data_type: int64 + is_primary: true +version: gar/v1 diff --git a/test/resource/data/gar_test/edge_test/viewer_comment_movie.edge.yml b/test/resource/data/gar_test/edge_test/viewer_comment_movie.edge.yml new file mode 100644 index 0000000000..807525b8b3 --- /dev/null +++ b/test/resource/data/gar_test/edge_test/viewer_comment_movie.edge.yml @@ -0,0 +1,19 @@ +src_label: viewer +edge_label: comment +dst_label: movie +chunk_size: 1024 +src_chunk_size: 100 +dst_chunk_size: 100 +directed: false +prefix: edge/viewer_comment_movie/ +adj_lists: + - ordered: true + aligned_by: src + file_type: parquet + property_groups: + - file_type: parquet + properties: + - name: rate + data_type: string + is_primary: false +version: gar/v1 diff --git a/test/test_import_gar.cpp b/test/test_import_gar.cpp index d88e428f04..307512bc41 100644 --- a/test/test_import_gar.cpp +++ b/test/test_import_gar.cpp @@ -29,9 +29,22 @@ using namespace import_v3; class TestImportGar : public TuGraphTest {}; +// The path "/test/resource/data/gar_test/edge_test" is for TestEdgeLabel // The path "/test/resource/data/gar_test/ldbc/" is for TestGarConfig // The path "/test/resource/data/gar_test/ldbc_parquet" is for TestGarData +TEST_F(TestImportGar, TestEdgeLabel) { + // reject the same edge label with different properties + Importer::Config config; + std::string tugraph_path = std::filesystem::path(__FILE__).parent_path().parent_path(); + config.config_file = tugraph_path + "/test/resource/data/gar_test/edge_test/movie.graph.yml"; + config.is_graphar = true; + config.delete_if_exists = true; + + nlohmann::json conf; + UT_EXPECT_ANY_THROW(ParserGraphArConf(conf, config.config_file)); +} + TEST_F(TestImportGar, TestGarConfig) { // test parse gar config UT_LOG() << "Parsing gar yaml config to lgraph_import json config";