Skip to content

Commit fc2e92f

Browse files
authored
multidigraph_to_digraph transitive reduction option (#16)
merges #16 Background information at https://twitter.com/larsjuhljensen/status/1450188835032375300 Ontologies such as GO can be reduced when collapsing multiple relationship types into a single relationship type DiGraph. Improve multidigraph_to_digraph logging
1 parent b7679f2 commit fc2e92f

File tree

3 files changed

+37
-3
lines changed

3 files changed

+37
-3
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ Counter({'is a': 71509,
124124
'regulates': 3216,
125125
'negatively regulates': 2768,
126126
'positively regulates': 2756})
127-
>>> go_digraph = multidigraph_to_digraph(go_multidigraph)
127+
>>> go_digraph = multidigraph_to_digraph(go_multidigraph, reduce=True)
128128
>>> go_nxo = NXOntology(go_digraph)
129129
>>> # Notice the similarity increases due to the full set of edges
130130
>>> round(go_nxo.similarity("GO:0042552", "GO:0022008").lin, 3)

nxontology/imports.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,10 @@ def pronto_to_multidigraph(
142142

143143

144144
def multidigraph_to_digraph(
145-
graph: nx.MultiDiGraph, rel_types: Optional[List[str]] = None, reverse: bool = True
145+
graph: nx.MultiDiGraph,
146+
rel_types: Optional[List[str]] = None,
147+
reverse: bool = True,
148+
reduce: bool = False,
146149
) -> nx.DiGraph:
147150
"""
148151
Convert a networkx MultiDiGraph to a DiGraph by aggregating edges accross relationship types.
@@ -152,7 +155,13 @@ def multidigraph_to_digraph(
152155
153156
When rel_types is None, all relationship types are preserved. If rel_types is defined,
154157
then the MultiDiGraph is first filtered for edges with that key (relationship type).
158+
159+
If reduce is True, perform a transitive reduction on the DiGraph
160+
to produce a minimum equivalent graph that removes redundant relationships
161+
— i.e. those that are already captured by a more specific ancestral path.
162+
The default is reduce=False since the reduction can be a computationally expensive step.
155163
"""
164+
logging.info(f"Received MultiDiGraph with {graph.number_of_edges():,} edges.")
156165
if rel_types is not None:
157166
graph.remove_edges_from(
158167
[
@@ -161,11 +170,23 @@ def multidigraph_to_digraph(
161170
if key not in rel_types
162171
]
163172
)
173+
logging.info(
174+
f"Filtered MultiDiGraph to {graph.number_of_edges():,} edges of the following types: {rel_types}."
175+
)
164176
if reverse:
165177
graph = graph.reverse(copy=True)
166178
digraph = nx.DiGraph(graph)
179+
if reduce:
180+
n_edges_before = digraph.number_of_edges()
181+
digraph = nx.transitive_reduction(digraph)
182+
logging.info(
183+
f"Reduced DiGraph by removing {n_edges_before - digraph.number_of_edges():,} redundant edges."
184+
)
167185
for source, target in digraph.edges(data=False):
168186
digraph[source][target]["rel_types"] = sorted(graph[source][target])
187+
logging.info(
188+
f"Converted MultiDiGraph to DiGraph with {digraph.number_of_nodes():,} nodes and {digraph.number_of_edges():,} edges."
189+
)
169190
return digraph
170191

171192

@@ -179,6 +200,7 @@ def read_gene_ontology(
179200
"negatively regulates",
180201
"positively regulates",
181202
],
203+
reduce: bool = True,
182204
) -> NXOntology[str]:
183205
"""
184206
Load the Gene Ontology into NXOntology,
@@ -198,10 +220,14 @@ def read_gene_ontology(
198220
else:
199221
date.fromisoformat(release) # check that release is a valid date
200222
url = f"http://release.geneontology.org/{release}/ontology/{source_file}"
223+
logging.info(f"Loading Gene Ontology into Pronto from <{url}>.")
201224
go_pronto = Prontology(handle=url)
202225
go_multidigraph = pronto_to_multidigraph(go_pronto, default_rel_type="is a")
203226
go_digraph = multidigraph_to_digraph(
204-
go_multidigraph, rel_types=rel_types, reverse=True
227+
go_multidigraph,
228+
rel_types=rel_types,
229+
reverse=True,
230+
reduce=reduce,
205231
)
206232
go_nxo: NXOntology[str] = NXOntology(go_digraph)
207233
go_nxo.graph.graph["source_url"] = url

nxontology/tests/imports_test.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,11 @@ def test_read_gene_ontology():
9090
== "http://release.geneontology.org/2021-02-01/ontology/go-basic.json.gz"
9191
)
9292
assert "regulates" in nxo.graph["GO:0006310"]["GO:0000018"]["rel_types"]
93+
# Transitive reduction should remove this edge
94+
# from "defense response to insect" to "negative regulation of defense response to insect"
95+
# since it is redundant with a more specific ancestral path.
96+
# https://github.com/related-sciences/nxontology/pull/16
97+
assert not nxo.graph.has_edge("GO:0002213", "GO:1900366")
98+
# GO:0002213 --> GO:2000068 --> GO:1900366 is more specific
99+
assert nxo.graph.has_edge("GO:0002213", "GO:2000068")
100+
assert nxo.graph.has_edge("GO:2000068", "GO:1900366")

0 commit comments

Comments
 (0)