@@ -142,7 +142,10 @@ def pronto_to_multidigraph(
142142
143143
144144def multidigraph_to_digraph (
145- graph : nx .MultiDiGraph , rel_types : Optional [List [str ]] = None , reverse : bool = True
145+ graph : nx .MultiDiGraph ,
146+ rel_types : Optional [List [str ]] = None ,
147+ reverse : bool = True ,
148+ reduce : bool = False ,
146149) -> nx .DiGraph :
147150 """
148151 Convert a networkx MultiDiGraph to a DiGraph by aggregating edges accross relationship types.
@@ -152,7 +155,13 @@ def multidigraph_to_digraph(
152155
153156 When rel_types is None, all relationship types are preserved. If rel_types is defined,
154157 then the MultiDiGraph is first filtered for edges with that key (relationship type).
158+
159+ If reduce is True, perform a transitive reduction on the DiGraph
160+ to produce a minimum equivalent graph that removes redundant relationships
161+ — i.e. those that are already captured by a more specific ancestral path.
162+ The default is reduce=False since the reduction can be a computationally expensive step.
155163 """
164+ logging .info (f"Received MultiDiGraph with { graph .number_of_edges ():,} edges." )
156165 if rel_types is not None :
157166 graph .remove_edges_from (
158167 [
@@ -161,11 +170,23 @@ def multidigraph_to_digraph(
161170 if key not in rel_types
162171 ]
163172 )
173+ logging .info (
174+ f"Filtered MultiDiGraph to { graph .number_of_edges ():,} edges of the following types: { rel_types } ."
175+ )
164176 if reverse :
165177 graph = graph .reverse (copy = True )
166178 digraph = nx .DiGraph (graph )
179+ if reduce :
180+ n_edges_before = digraph .number_of_edges ()
181+ digraph = nx .transitive_reduction (digraph )
182+ logging .info (
183+ f"Reduced DiGraph by removing { n_edges_before - digraph .number_of_edges ():,} redundant edges."
184+ )
167185 for source , target in digraph .edges (data = False ):
168186 digraph [source ][target ]["rel_types" ] = sorted (graph [source ][target ])
187+ logging .info (
188+ f"Converted MultiDiGraph to DiGraph with { digraph .number_of_nodes ():,} nodes and { digraph .number_of_edges ():,} edges."
189+ )
169190 return digraph
170191
171192
@@ -179,6 +200,7 @@ def read_gene_ontology(
179200 "negatively regulates" ,
180201 "positively regulates" ,
181202 ],
203+ reduce : bool = True ,
182204) -> NXOntology [str ]:
183205 """
184206 Load the Gene Ontology into NXOntology,
@@ -198,10 +220,14 @@ def read_gene_ontology(
198220 else :
199221 date .fromisoformat (release ) # check that release is a valid date
200222 url = f"http://release.geneontology.org/{ release } /ontology/{ source_file } "
223+ logging .info (f"Loading Gene Ontology into Pronto from <{ url } >." )
201224 go_pronto = Prontology (handle = url )
202225 go_multidigraph = pronto_to_multidigraph (go_pronto , default_rel_type = "is a" )
203226 go_digraph = multidigraph_to_digraph (
204- go_multidigraph , rel_types = rel_types , reverse = True
227+ go_multidigraph ,
228+ rel_types = rel_types ,
229+ reverse = True ,
230+ reduce = reduce ,
205231 )
206232 go_nxo : NXOntology [str ] = NXOntology (go_digraph )
207233 go_nxo .graph .graph ["source_url" ] = url
0 commit comments