Skip to content

Commit f3114ac

Browse files
author
Jeny Sadadia
committed
Fix re-subscription logic in pipeline services
When API helper functions `receive_event_node` or `receive_event_data` fails, there may be some issue with extracting node data from the event rather than an issue with the subscription. Check exception message before resubscribing. Fixes: f04f6d0 ("(events): Add re-subscribe mechanism") Signed-off-by: Jeny Sadadia <[email protected]>
1 parent 7a007b4 commit f3114ac

File tree

4 files changed

+37
-31
lines changed

4 files changed

+37
-31
lines changed

src/monitor.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,15 @@ def _run(self, sub_id):
5959
try:
6060
event = self._api.receive_event(sub_id)
6161
except Exception as e:
62-
self.log.error(f"Error receiving event: {e}, re-subscribing in 10 seconds")
63-
time.sleep(10)
64-
sub_id = self._api.subscribe('node')
65-
subscribe_retries += 1
66-
if subscribe_retries > 3:
67-
self.log.error("Failed to re-subscribe to node events")
68-
return False
62+
self.log.error(f"Error receiving event: {e}")
63+
if "404 Client Error" in str(e):
64+
self.log.error(f"Error receiving event: {e}. Re-subscribing...")
65+
sub_id = self._setup(None)
66+
subscribe_retries += 1
67+
if subscribe_retries > 3:
68+
self.log.error("Failed to re-subscribe to node events")
69+
return False
70+
continue
6971
continue
7072
subscribe_retries = 0
7173
obj = event.data

src/scheduler.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -382,18 +382,19 @@ def _run(self, sub_id):
382382
try:
383383
event = self._api_helper.receive_event_data(sub_id, block=False)
384384
except Exception as e:
385-
self.log.error(f"Error receiving event: {e}, re-subscribing in 10 seconds")
386-
time.sleep(10)
387-
sub_id = self._api.subscribe('node')
388-
subscribe_retries += 1
389-
if subscribe_retries > 3:
390-
self.log.error("Failed to re-subscribe to node events")
391-
return False
385+
self.log.error(f"Error receiving event: {e}")
386+
if "404 Client Error" in str(e):
387+
self.log.error(f"Error receiving event: {e}. Re-subscribing...")
388+
sub_id = self._setup(None)
389+
subscribe_retries += 1
390+
if subscribe_retries > 3:
391+
self.log.error("Failed to re-subscribe to node events")
392+
return False
393+
continue
392394
continue
393395
if not event:
394396
# If we received a keep-alive event, just continue
395397
continue
396-
subscribe_retries = 0
397398
for job, runtime, platform, rules in self._sched.get_schedule(event):
398399
input_node = self._api.node.get(event['id'])
399400
jobfilter = event.get('jobfilter')

src/send_kcidb.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -702,13 +702,15 @@ def _run(self, context):
702702
try:
703703
node, is_hierarchy = self._api_helper.receive_event_node(context['sub_id'])
704704
except Exception as e:
705-
self.log.error(f"Error receiving event: {e}, re-subscribing in 10 seconds")
706-
time.sleep(10)
707-
context['sub_id'] = self._api_helper.subscribe_filters(self._filters, promiscuous=True)
708-
subscribe_retries += 1
709-
if subscribe_retries > 3:
710-
self.log.error("Failed to re-subscribe to node events")
711-
return False
705+
self.log.error(f"Error receiving event: {e}")
706+
if "404 Client Error" in str(e):
707+
self.log.error(f"Error receiving event: {e}. Re-subscribing...")
708+
context['sub_id'] = self._api_helper.subscribe_filters(self._filters, promiscuous=True)
709+
subscribe_retries += 1
710+
if subscribe_retries > 3:
711+
self.log.error("Failed to re-subscribe to node events")
712+
return False
713+
continue
712714
continue
713715
subscribe_retries = 0
714716
self.log.info(f"Processing event node: {node['id']}")

src/tarball.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -215,17 +215,18 @@ def _run(self, sub_id):
215215
try:
216216
checkout_node, _ = self._api_helper.receive_event_node(sub_id)
217217
except Exception as e:
218-
self.log.error(f"Error receiving event: {e}, re-subscribing in 10 seconds")
219-
time.sleep(10)
220-
# try to resubscribe
221-
sub_id = self._api_helper.subscribe_filters(self._filters)
222-
subscribe_retries += 1
223-
if subscribe_retries > 3:
224-
self.log.error("Failed to re-subscribe to checkout events")
225-
return False
218+
self.log.error(f"Error receiving event: {e}")
219+
if "404 Client Error" in str(e):
220+
self.log.error(f"Error receiving event: {e}. Re-subscribing...")
221+
sub_id = self._setup(None)
222+
subscribe_retries += 1
223+
if subscribe_retries > 3:
224+
self.log.error("Failed to re-subscribe to checkout events")
225+
return False
226+
continue
226227
continue
227-
subscribe_retries = 0
228228

229+
subscribe_retries = 0
229230
build_config = self._find_build_config(checkout_node)
230231
if build_config is None:
231232
continue

0 commit comments

Comments
 (0)