Skip to content

Commit

Permalink
fixed tests for DFS
Browse files Browse the repository at this point in the history
  • Loading branch information
cmungall committed Dec 21, 2023
1 parent 8ccb570 commit 90cb757
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 19 deletions.
91 changes: 84 additions & 7 deletions src/oaklib/utilities/obograph_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,19 +182,25 @@ def filter_by_predicates(graph: Graph, predicates: List[PRED_CURIE], graph_id: s


def as_multi_digraph(
graph: Graph, reverse: bool = True, filter_reflexive: bool = True
graph: Graph,
reverse: bool = True,
filter_reflexive: bool = True,
predicates: Optional[List[PRED_CURIE]] = None,
) -> nx.MultiDiGraph:
"""
Convert to a networkx :class:`.MultiDiGraph`
:param graph: OBOGraph
:param reverse:
:return:
:param filter_reflexive: if true, remove edges where sub == obj
:param predicates: if not None, only include edges with these predicates
:return: networkx MultiDiGraph
"""
mdg = nx.MultiDiGraph()
for edge in graph.edges:
if filter_reflexive and reflexive(edge):
continue
if predicates is not None and edge.pred not in predicates:
continue
edge_attrs = {"predicate": edge.pred}
if reverse:
mdg.add_edge(edge.obj, edge.sub, **edge_attrs)
Expand Down Expand Up @@ -343,13 +349,72 @@ def shortest_paths(
logging.info(f"No path between {start_curie} and {end_curie}")


def depth_first_ordering(graph: Graph) -> List[CURIE]:
def edges_from_tree(tree: dict, pred=IS_A) -> List[Edge]:
"""
Given a parent node and a list of children, return a list of edges
>>> from oaklib.utilities.obograph_utils import edges_from_tree
>>> for e in edges_from_tree({1: {2: [3, 4]}}):
... print(e.sub, e.obj)
2 1
3 2
4 2
:param tree:
:param pred: defaults to IS_A
:return:
"""
edges = []

def _safe(x: Any):
return str(x)

def _edge(s: Any, o: Any) -> None:
edges.append(Edge(sub=str(s), pred=pred, obj=str(o)))

for parent, children in tree.items():
if isinstance(children, list):
# leaf nodes
for child in children:
_edge(child, parent)
else:
# non-leaf nodes
for child, grandchildren in children.items():
_edge(child, parent)
edges.extend(edges_from_tree({child: grandchildren}, pred=pred))
return edges


def depth_first_ordering(
graph: Graph, predicates: Optional[List[PRED_CURIE]] = None
) -> List[CURIE]:
"""
Return a depth-first ordering of the nodes in the graph.
:param graph:
>>> from oaklib.datamodels.obograph import Graph
>>> from oaklib.utilities.obograph_utils import depth_first_ordering, edges_from_tree
>>> ## Chains have a deterministic DF ordering
>>> edges = edges_from_tree({1: {2: [3]}})
>>> list(depth_first_ordering(Graph("test", edges=edges)))
['1', '2', '3']
>>> list(depth_first_ordering(Graph("test", edges=list(reversed(edges)))))
['1', '2', '3']
>>> edges2 = edges_from_tree({5: [3, 4]})
>>> ordered = list(depth_first_ordering(Graph("test", edges=edges + edges2)))
>>> assert ordered.index('1') < ordered.index('2')
:param graph: OBOGraph
:param predicates:
:return:
"""
g = as_digraph(graph, predicates=predicates)
roots = [n for n, d in g.in_degree() if d == 0]
ordered = []
for root in roots:
for n in nx.dfs_preorder_nodes(g, root):
if n not in ordered:
ordered.append(n)
return ordered
six = index_graph_edges_by_subject(graph)
oix = index_graph_edges_by_object(graph)
stack = list(set(oix.keys()) - set(six.keys()))
Expand Down Expand Up @@ -544,8 +609,20 @@ def index_graph_edges_by_predicate(graph: Graph) -> Dict[CURIE, List[Edge]]:
return d


def topological_sort(graph: Graph, predicates: List[PRED_CURIE]) -> List[CURIE]:
dg = as_multi_digraph(graph)
def topological_sort(graph: Graph, predicates: Optional[List[PRED_CURIE]]) -> List[CURIE]:
"""
Returns a topological sort of the graph.
A topological sort is a nonunique permutation of the nodes of a
directed graph such that an edge from u to v implies that u
appears before v in the topological sort order. This ordering is
valid only if the graph has no directed cycles.
:param graph:
:param predicates:
:return:
"""
dg = as_multi_digraph(graph, predicates=predicates)
return nx.topological_sort(dg)


Expand Down
56 changes: 44 additions & 12 deletions tests/test_utilities/test_obograph_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,12 @@
IMBO,
INPUT_DIR,
INTRACELLULAR,
MEMBRANE,
NUCLEAR_MEMBRANE,
NUCLEUS,
ORGANELLE,
OUTPUT_DIR,
PLASMA_MEMBRANE,
VACUOLE,
)

Expand Down Expand Up @@ -158,6 +160,11 @@ def test_trim_ancestors(self):
raise NotImplementedError

def test_shortest_paths(self):
"""
Test that the shortest paths are correct.
:return:
"""
oi = self.oi
both = [IS_A, PART_OF]
hi = 1.0
Expand Down Expand Up @@ -188,17 +195,42 @@ def test_shortest_paths(self):
self.assertNotIn(x, path)

def test_depth_first_ordering(self):
"""
Test that the depth first ordering of the graph is correct.
Note that DF ordering may be non-deterministic if the graph is not a tree.
This test conservatively checks conditions that are guaranteed to hold
even with DAGs
:return:
"""
oi = self.oi
graph = oi.descendant_graph([CELLULAR_COMPONENT], predicates=[IS_A, PART_OF])
ordered = depth_first_ordering(graph)
self.assertEqual(ordered[0], CELLULAR_COMPONENT)
expected_order = [
(CELLULAR_COMPONENT, CELLULAR_ANATOMICAL_ENTITY),
(CELLULAR_ANATOMICAL_ENTITY, ORGANELLE),
(ORGANELLE, NUCLEUS),
# (CYTOPLASM, NUCLEUS),
(IMBO, NUCLEUS),
(NUCLEUS, NUCLEAR_MEMBRANE),
expected = [
(
[CELLULAR_COMPONENT],
[IS_A, PART_OF],
[
(CELLULAR_COMPONENT, CELLULAR_ANATOMICAL_ENTITY),
(CELLULAR_ANATOMICAL_ENTITY, ORGANELLE),
(CELLULAR_ANATOMICAL_ENTITY, NUCLEUS),
],
),
(
[CELLULAR_COMPONENT],
[IS_A],
[
(CELLULAR_COMPONENT, CELLULAR_ANATOMICAL_ENTITY),
(CELLULAR_ANATOMICAL_ENTITY, ORGANELLE),
(CELLULAR_ANATOMICAL_ENTITY, NUCLEUS),
(CELLULAR_ANATOMICAL_ENTITY, MEMBRANE),
(MEMBRANE, PLASMA_MEMBRANE),
],
),
]
for parent, child in expected_order:
self.assertLess(ordered.index(parent), ordered.index(child), f"{parent} -> {child}")
for starts, preds, expected_order in expected:
graph = oi.descendant_graph(starts, predicates=preds)
ordered = depth_first_ordering(graph)
if len(starts) == 1:
self.assertEqual(ordered[0], starts[0])
for parent, child in expected_order:
self.assertLess(ordered.index(parent), ordered.index(child), f"{parent} -> {child}")

0 comments on commit 90cb757

Please sign in to comment.