@@ -314,17 +314,15 @@ async def _monitor_openai_server(
314314 # If there are no running or pending requests, send a health check
315315 if running_requests == 0 and pending_requests == 0 :
316316 try :
317- # Send a health check with a 5 second timeout
318- timeout = float (
319- os .environ .get ("ART_SERVER_MONITOR_TIMEOUT" , 5.0 )
320- )
321- # Send a health check with a 5 second timeout
322- await openai_client .models .retrieve (
317+ # Send a health check with a short timeout
318+ await openai_client .completions .create (
323319 model = model_name ,
324- timeout = timeout ,
320+ prompt = "Hi" ,
321+ max_tokens = 1 ,
322+ timeout = float (
323+ os .environ .get ("ART_SERVER_MONITOR_TIMEOUT" , 5.0 )
324+ ),
325325 )
326- # get the completion response, exit the loop
327- break
328326 except Exception as e :
329327 # If the server is sleeping, a failed health check is okay
330328 if await self ._services [model_name ].vllm_engine_is_sleeping ():
@@ -490,9 +488,9 @@ async def _train_model(
490488 num_gradient_steps = int (
491489 result .pop ("num_gradient_steps" , estimated_gradient_steps )
492490 )
493- assert num_gradient_steps == estimated_gradient_steps , (
494- f" num_gradient_steps { num_gradient_steps } != estimated_gradient_steps { estimated_gradient_steps } "
495- )
491+ assert (
492+ num_gradient_steps == estimated_gradient_steps
493+ ), f"num_gradient_steps { num_gradient_steps } != estimated_gradient_steps { estimated_gradient_steps } "
496494 results .append (result )
497495 yield {** result , "num_gradient_steps" : num_gradient_steps }
498496 pbar .update (1 )
0 commit comments