Skip to content

Commit

Permalink
add metapath unit test
Browse files Browse the repository at this point in the history
  • Loading branch information
AlCatt91 committed Oct 16, 2024
1 parent ea845ab commit 05e3ee3
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 20 deletions.
17 changes: 10 additions & 7 deletions src/kg_topology_toolbox/topology_toolbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,9 +278,9 @@ def edge_metapath_count(
composition_workers: int = min(32, mp.cpu_count() - 1 or 1),
) -> pd.DataFrame:
"""
For each edge in the KG, compute the number of triangles of different
metapaths (i.e., the unique tuples (r1, r2) of relation types
of the two additional edges of the triangle).
For each edge in the KG, compute the number of triangles supported on it
distinguishing between different metapaths (i.e., the unique tuples (r1, r2)
of relation types of the two additional edges of the triangle).
:param filter_relations:
If not empty, compute the output only for the edges with relation
Expand All @@ -293,10 +293,11 @@ def edge_metapath_count(
on number of available threads (max: 32).
:return:
The output dataframe has one row for each (h, t, r1, r2) such that
there exists at least one triangle of metapath (r1, r2) over (any) edge
connecting h, t.
The output dataframe has one row for each (h, r, t, r1, r2) such that
there exists at least one triangle of metapath (r1, r2) over (h, r, t).
The number of metapath triangles is given in the column **n_triangles**.
The column **index** provides the index of the edge (h, r, t) in the
original Knowledge Graph dataframe.
"""
# discard loops as edges of a triangle
df_wo_loops = self.df[self.df.h != self.df.t]
Expand All @@ -313,14 +314,16 @@ def edge_metapath_count(
rel_df = self.df
df_triangles = df_wo_loops

return composition_count(
counts = composition_count(
df_triangles,
chunk_size=composition_chunk_size,
workers=composition_workers,
metapaths=True,
directed=True,
)

return rel_df.reset_index().merge(counts, on=["h", "t"], how="inner")

def edge_degree_cardinality_summary(
self, filter_relations: list[int] = [], aggregate_by_r: bool = False
) -> pd.DataFrame:
Expand Down
21 changes: 16 additions & 5 deletions tests/test_edge_topology_toolbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,19 @@
)


@pytest.mark.parametrize("return_metapath_list", [True, False])
def test_small_graph_metrics(return_metapath_list: bool) -> None:
# Define a small graph with all the features tested by
# the edge_topology_toolbox
def test_edge_metapath_count() -> None:
res = kgtt.edge_metapath_count()
assert np.allclose(res["index"], [2, 2])
assert np.allclose(res["h"], [0, 0])
assert np.allclose(res["r"], [0, 0])
assert np.allclose(res["t"], [2, 2])
assert np.allclose(res["r1"], [0, 1])
assert np.allclose(res["r2"], [1, 1])
assert np.allclose(res["n_triangles"], [1, 1])


# entity degrees statistics
def test_edge_degree_cardinality_summary() -> None:
# edge degrees statistics
res = kgtt.edge_degree_cardinality_summary()
assert np.allclose(res["h_unique_rel"], [2, 2, 2, 1, 2, 2, 1, 2])
assert np.allclose(res["h_degree"], [3, 3, 3, 2, 3, 3, 2, 3])
Expand Down Expand Up @@ -60,6 +67,9 @@ def test_small_graph_metrics(return_metapath_list: bool) -> None:
"M:M",
]


@pytest.mark.parametrize("return_metapath_list", [True, False])
def test_edge_pattern_summary(return_metapath_list: bool) -> None:
# relation pattern symmetry
res = kgtt.edge_pattern_summary(return_metapath_list=return_metapath_list)
assert np.allclose(
Expand Down Expand Up @@ -92,6 +102,7 @@ def test_small_graph_metrics(return_metapath_list: bool) -> None:
def test_filter_relations() -> None:
for rels in [[0], [1], [0, 1]]:
for method in [
kgtt.edge_metapath_count,
kgtt.edge_degree_cardinality_summary,
partial(kgtt.edge_pattern_summary, return_metapath_list=True),
]:
Expand Down
5 changes: 1 addition & 4 deletions tests/test_node_topology_toolbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,7 @@


@pytest.mark.parametrize("return_relation_list", [True, False])
def test_small_graph_metrics(return_relation_list: bool) -> None:
# Define a small graph with all the features tested by
# the node_topology_toolbox

def test_node_degree_summary(return_relation_list: bool) -> None:
# entity degrees statistics
res = kgtt.node_degree_summary(return_relation_list=return_relation_list)
assert np.allclose(res["h_degree"], [3, 1, 3])
Expand Down
5 changes: 1 addition & 4 deletions tests/test_relation_topology_toolbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,7 @@
kgtt = KGTopologyToolbox(df, head_column="H", relation_column="R", tail_column="T")


def test_small_graph_metrics() -> None:
# Define a small graph on five nodes with all the features tested by
# the relation_topology_toolbox

def test_aggregate_by_r() -> None:
dcs = kgtt.edge_degree_cardinality_summary(aggregate_by_r=True)
eps = kgtt.edge_pattern_summary(return_metapath_list=True, aggregate_by_r=True)

Expand Down

0 comments on commit 05e3ee3

Please sign in to comment.