Skip to content

Commit d7ca19f

Browse files
authored
Clean up logging for receptor down & unregistered cases (ansible#15990)
* Clean up logging when receptor not running * Make logging more concise for unregistered case * Silence another unwanted traceback * Silence a few more tracebacks
1 parent a655a3f commit d7ca19f

File tree

1 file changed

+22
-5
lines changed

1 file changed

+22
-5
lines changed

awx/main/tasks/system.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -594,8 +594,16 @@ def inspect_established_receptor_connections(mesh_status):
594594
def inspect_execution_and_hop_nodes(instance_list):
595595
with advisory_lock('inspect_execution_and_hop_nodes_lock', wait=False):
596596
node_lookup = {inst.hostname: inst for inst in instance_list}
597-
ctl = get_receptor_ctl()
598-
mesh_status = ctl.simple_command('status')
597+
try:
598+
ctl = get_receptor_ctl()
599+
except FileNotFoundError:
600+
logger.error('Receptor daemon not running, skipping execution node check')
601+
return
602+
try:
603+
mesh_status = ctl.simple_command('status')
604+
except ValueError as exc:
605+
logger.error(f'Error running receptorctl status command, error: {str(exc)}')
606+
return
599607

600608
inspect_established_receptor_connections(mesh_status)
601609

@@ -784,7 +792,8 @@ def _heartbeat_instance_management():
784792
logger.warning(f'Recreated instance record {this_inst.hostname} after unexpected removal')
785793
this_inst.local_health_check()
786794
else:
787-
raise RuntimeError("Cluster Host Not Found: {}".format(settings.CLUSTER_HOST_ID))
795+
logger.error("Cluster Host Not Found: {}".format(settings.CLUSTER_HOST_ID))
796+
return None, None, None
788797

789798
return this_inst, instance_list, lost_instances
790799

@@ -864,8 +873,16 @@ def awx_receptor_workunit_reaper():
864873
if not settings.RECEPTOR_RELEASE_WORK:
865874
return
866875
logger.debug("Checking for unreleased receptor work units")
867-
receptor_ctl = get_receptor_ctl()
868-
receptor_work_list = receptor_ctl.simple_command("work list")
876+
try:
877+
receptor_ctl = get_receptor_ctl()
878+
except FileNotFoundError:
879+
logger.info('Receptorctl sockfile not found for workunit reaper, doing nothing')
880+
return
881+
try:
882+
receptor_work_list = receptor_ctl.simple_command("work list")
883+
except ValueError as exc:
884+
logger.info(f'Error getting work list for workunit reaper, error: {str(exc)}')
885+
return
869886

870887
unit_ids = [id for id in receptor_work_list]
871888
jobs_with_unreleased_receptor_units = UnifiedJob.objects.filter(work_unit_id__in=unit_ids).exclude(status__in=ACTIVE_STATES)

0 commit comments

Comments
 (0)