@@ -594,8 +594,16 @@ def inspect_established_receptor_connections(mesh_status):
594594def inspect_execution_and_hop_nodes (instance_list ):
595595 with advisory_lock ('inspect_execution_and_hop_nodes_lock' , wait = False ):
596596 node_lookup = {inst .hostname : inst for inst in instance_list }
597- ctl = get_receptor_ctl ()
598- mesh_status = ctl .simple_command ('status' )
597+ try :
598+ ctl = get_receptor_ctl ()
599+ except FileNotFoundError :
600+ logger .error ('Receptor daemon not running, skipping execution node check' )
601+ return
602+ try :
603+ mesh_status = ctl .simple_command ('status' )
604+ except ValueError as exc :
605+ logger .error (f'Error running receptorctl status command, error: { str (exc )} ' )
606+ return
599607
600608 inspect_established_receptor_connections (mesh_status )
601609
@@ -784,7 +792,8 @@ def _heartbeat_instance_management():
784792 logger .warning (f'Recreated instance record { this_inst .hostname } after unexpected removal' )
785793 this_inst .local_health_check ()
786794 else :
787- raise RuntimeError ("Cluster Host Not Found: {}" .format (settings .CLUSTER_HOST_ID ))
795+ logger .error ("Cluster Host Not Found: {}" .format (settings .CLUSTER_HOST_ID ))
796+ return None , None , None
788797
789798 return this_inst , instance_list , lost_instances
790799
@@ -864,8 +873,16 @@ def awx_receptor_workunit_reaper():
864873 if not settings .RECEPTOR_RELEASE_WORK :
865874 return
866875 logger .debug ("Checking for unreleased receptor work units" )
867- receptor_ctl = get_receptor_ctl ()
868- receptor_work_list = receptor_ctl .simple_command ("work list" )
876+ try :
877+ receptor_ctl = get_receptor_ctl ()
878+ except FileNotFoundError :
879+ logger .info ('Receptorctl sockfile not found for workunit reaper, doing nothing' )
880+ return
881+ try :
882+ receptor_work_list = receptor_ctl .simple_command ("work list" )
883+ except ValueError as exc :
884+ logger .info (f'Error getting work list for workunit reaper, error: { str (exc )} ' )
885+ return
869886
870887 unit_ids = [id for id in receptor_work_list ]
871888 jobs_with_unreleased_receptor_units = UnifiedJob .objects .filter (work_unit_id__in = unit_ids ).exclude (status__in = ACTIVE_STATES )
0 commit comments