Skip to content

Commit b395a8f

Browse files
committed
test(hop): add edge cases for ranges/labels
1 parent 9bfca35 commit b395a8f

File tree

2 files changed

+65
-14
lines changed

2 files changed

+65
-14
lines changed

graphistry/compute/hop.py

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -443,28 +443,24 @@ def resolve_label_col(requested: Optional[str], df, default_base: str) -> Option
443443
candidate = f"{requested}_{counter}"
444444
return candidate
445445

446-
track_hops = (
447-
label_nodes is not None
448-
or label_edges is not None
449-
or label_seed
450-
or resolved_min_hops > 1
451-
or output_min is not None
452-
or output_max is not None
453-
)
446+
track_hops = any([
447+
label_nodes,
448+
label_edges,
449+
label_seed,
450+
resolved_min_hops > 1,
451+
output_min is not None,
452+
output_max is not None,
453+
])
454454
track_node_hops = track_hops or label_nodes is not None or label_seed
455455
track_edge_hops = track_hops or label_edges is not None
456456

457457
edge_hop_col = None
458458
node_hop_col = None
459459
if track_edge_hops:
460460
edge_hop_col = resolve_label_col(label_edges, edges_indexed, '_hop')
461-
if track_node_hops:
462-
node_hop_col = resolve_label_col(label_nodes, g2._nodes, '_hop')
463-
seen_edge_marker_col = None
464-
seen_node_marker_col = None
465-
if track_edge_hops:
466461
seen_edge_marker_col = generate_safe_column_name('__gfql_edge_seen__', edges_indexed, prefix='__seen_', suffix='__')
467462
if track_node_hops:
463+
node_hop_col = resolve_label_col(label_nodes, g2._nodes, '_hop')
468464
seen_node_marker_col = generate_safe_column_name('__gfql_node_seen__', g2._nodes, prefix='__seen_', suffix='__')
469465

470466
wave_front = starting_nodes[[g2._node]][:0]
@@ -502,7 +498,7 @@ def resolve_label_col(requested: Optional[str], df, default_base: str) -> Option
502498
if not to_fixed_point and resolved_max_hops is not None and current_hop >= resolved_max_hops:
503499
break
504500

505-
current_hop = current_hop + 1
501+
current_hop += 1
506502

507503
if debugging_hop and logger.isEnabledFor(logging.DEBUG):
508504
logger.debug('~~~~~~~~~~ LOOP STEP BEGIN ~~~~~~~~~~~')

graphistry/tests/test_compute_hops.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import pandas as pd
2+
import graphistry
23
from common import NoAuthTestCase
34
from functools import lru_cache
45

@@ -231,6 +232,60 @@ def test_hop_output_slice(self):
231232
assert set(zip(g2._edges['s'], g2._edges['d'])) == {('b', 'c')}
232233
assert set(g2._edges['edge_hop'].to_list()) == {2}
233234

235+
def test_hop_cycle_min_gt_one(self):
236+
# Cycle a->b->c->a; ensure min>1 does not loop infinitely and labels stick to earliest hop
237+
edges = pd.DataFrame({'s': ['a', 'b', 'c'], 'd': ['b', 'c', 'a']})
238+
g = graphistry.edges(edges, 's', 'd').nodes(pd.DataFrame({'id': ['a', 'b', 'c']}), 'id')
239+
seeds = pd.DataFrame({g._node: ['a']})
240+
g2 = g.hop(seeds, min_hops=2, max_hops=3, label_nodes='hop', label_edges='edge_hop')
241+
assert set(zip(g2._edges['s'], g2._edges['d'])) == {('b', 'c'), ('c', 'a')}
242+
node_hops = dict(zip(g2._nodes[g._node], g2._nodes['hop']))
243+
assert node_hops['a'] == 3 # first return to seed at hop 3
244+
assert node_hops.get('c') == 2
245+
assert set(g2._edges['edge_hop']) == {2, 3}
246+
247+
def test_hop_undirected_min_gt_one(self):
248+
edges = pd.DataFrame({'s': ['a', 'b'], 'd': ['b', 'c']})
249+
g = graphistry.edges(edges, 's', 'd').nodes(pd.DataFrame({'id': ['a', 'b', 'c']}), 'id')
250+
seeds = pd.DataFrame({g._node: ['a']})
251+
g2 = g.hop(seeds, direction='undirected', min_hops=2, max_hops=3, label_nodes='hop', label_edges='edge_hop')
252+
assert set(zip(g2._edges['s'], g2._edges['d'])) == {('b', 'c')}
253+
assert set(g2._edges['edge_hop']) == {2}
254+
node_hops = dict(zip(g2._nodes[g._node], g2._nodes['hop']))
255+
assert node_hops.get('c') == 2
256+
257+
def test_hop_label_collision_suffix(self):
258+
# Existing hop column should be preserved; new label suffixes
259+
g = simple_chain_graph()
260+
seeds = pd.DataFrame({g._node: ['a']})
261+
g_existing = g.nodes(g._nodes.assign(hop='keep_me'))
262+
g2 = g_existing.hop(seeds, min_hops=1, max_hops=2, label_nodes='hop', label_edges='hop')
263+
assert 'hop' in g2._nodes.columns and 'hop_1' in g2._nodes.columns
264+
assert set(g2._edges.columns) & {'hop', 'hop_1'} == {'hop'} # edges only suffix once
265+
assert 'keep_me' in set(g2._nodes['hop'])
266+
267+
def test_hop_seed_output_slice_with_label_seed(self):
268+
g = simple_chain_graph()
269+
seeds = pd.DataFrame({g._node: ['a']})
270+
g2 = g.hop(seeds, min_hops=1, max_hops=3, output_min=2, output_max=3, label_nodes='hop', label_seed=True, drop_outside=False)
271+
# Seeds kept with hop 0 even though outside slice when drop_outside=False
272+
node_hops = dict(zip(g2._nodes[g._node], g2._nodes['hop']))
273+
assert node_hops['a'] == 0 and node_hops['c'] == 2 and node_hops['d'] == 3
274+
275+
def test_hop_call_path_new_params(self):
276+
g = simple_chain_graph()
277+
seeds = pd.DataFrame({g._node: ['a']})
278+
payload = {'type': 'Call', 'function': 'hop', 'params': {
279+
'nodes': seeds,
280+
'min_hops': 1,
281+
'max_hops': 2,
282+
'label_nodes': 'hop',
283+
'label_edges': 'edge_hop'
284+
}}
285+
g2 = g.gfql([payload])
286+
assert set(g2._nodes['hop']) == {1, 2}
287+
assert set(g2._edges['edge_hop']) == {1, 2}
288+
234289
class TestComputeHopMixinQuery(NoAuthTestCase):
235290

236291
def test_hop_source_query(self):

0 commit comments

Comments
 (0)