1717from tests .utils .constants import FAULT_TOLERANCE_MODEL_NAME
1818from tests .utils .managed_process import ManagedProcess
1919from tests .utils .payloads import check_health_generate , check_models_api
20- from tests .utils .port_utils import get_free_port
20+ from tests .utils .port_utils import get_free_ports
2121
2222logger = logging .getLogger (__name__ )
2323
@@ -151,15 +151,17 @@ def test_request_cancellation_trtllm_aggregated(
151151 on the worker side in aggregated (prefill_and_decode) mode.
152152 """
153153
154+ # Allocate all ports upfront to avoid race conditions
155+ frontend_port , system_port = get_free_ports (2 )
156+
154157 # Step 1: Start the frontend
155- frontend_port = get_free_port ()
156158 with DynamoFrontendProcess (request , frontend_port ) as frontend :
157159 logger .info ("Frontend started successfully" )
158160
159161 # Step 2: Start an aggregated worker
160162 with DynamoWorkerProcess (
161163 request ,
162- system_port = get_free_port () ,
164+ system_port = system_port ,
163165 frontend_port = frontend_port ,
164166 mode = "prefill_and_decode" ,
165167 ) as worker :
@@ -234,15 +236,17 @@ def test_request_cancellation_trtllm_disagg_decode_cancel(
234236 on the decode worker side in a disaggregated setup.
235237 """
236238
239+ # Allocate all ports upfront to avoid race conditions
240+ frontend_port , prefill_system_port , decode_system_port = get_free_ports (3 )
241+
237242 # Step 1: Start the frontend
238- frontend_port = get_free_port ()
239243 with DynamoFrontendProcess (request , frontend_port ) as frontend :
240244 logger .info ("Frontend started successfully" )
241245
242246 # Step 2: Start the prefill worker
243247 with DynamoWorkerProcess (
244248 request ,
245- system_port = get_free_port () ,
249+ system_port = prefill_system_port ,
246250 frontend_port = frontend_port ,
247251 mode = "prefill" ,
248252 strategy = "decode_first" ,
@@ -252,7 +256,7 @@ def test_request_cancellation_trtllm_disagg_decode_cancel(
252256 # Step 3: Start the decode worker
253257 with DynamoWorkerProcess (
254258 request ,
255- system_port = get_free_port () ,
259+ system_port = decode_system_port ,
256260 frontend_port = frontend_port ,
257261 mode = "decode" ,
258262 strategy = "decode_first" ,
@@ -325,15 +329,17 @@ def test_request_cancellation_trtllm_disagg_prefill_cancel(
325329 Since the request is cancelled before prefill completes, the decode worker never receives it.
326330 """
327331
332+ # Allocate all ports upfront to avoid race conditions
333+ frontend_port , prefill_system_port , decode_system_port = get_free_ports (3 )
334+
328335 # Step 1: Start the frontend
329- frontend_port = get_free_port ()
330336 with DynamoFrontendProcess (request , frontend_port ) as frontend :
331337 logger .info ("Frontend started successfully" )
332338
333339 # Step 2: Start the prefill worker
334340 with DynamoWorkerProcess (
335341 request ,
336- system_port = get_free_port () ,
342+ system_port = prefill_system_port ,
337343 frontend_port = frontend_port ,
338344 mode = "prefill" ,
339345 strategy = "decode_first" ,
@@ -343,7 +349,7 @@ def test_request_cancellation_trtllm_disagg_prefill_cancel(
343349 # Step 3: Start the decode worker
344350 with DynamoWorkerProcess (
345351 request ,
346- system_port = get_free_port () ,
352+ system_port = decode_system_port ,
347353 frontend_port = frontend_port ,
348354 mode = "decode" ,
349355 strategy = "decode_first" ,
@@ -420,15 +426,17 @@ def test_request_cancellation_trtllm_prefill_first_prefill_cancel(
420426 on the prefill worker side in a disaggregated setup using prefill_first strategy.
421427 """
422428
429+ # Allocate all ports upfront to avoid race conditions
430+ frontend_port , decode_system_port , prefill_system_port = get_free_ports (3 )
431+
423432 # Step 1: Start the frontend
424- frontend_port = get_free_port ()
425433 with DynamoFrontendProcess (request , frontend_port ) as frontend :
426434 logger .info ("Frontend started successfully" )
427435
428436 # Step 2: Start the decode worker
429437 with DynamoWorkerProcess (
430438 request ,
431- system_port = get_free_port () ,
439+ system_port = decode_system_port ,
432440 frontend_port = frontend_port ,
433441 mode = "decode" ,
434442 strategy = "prefill_first" ,
@@ -438,7 +446,7 @@ def test_request_cancellation_trtllm_prefill_first_prefill_cancel(
438446 # Step 3: Start the prefill worker
439447 with DynamoWorkerProcess (
440448 request ,
441- system_port = get_free_port () ,
449+ system_port = prefill_system_port ,
442450 frontend_port = frontend_port ,
443451 mode = "prefill" ,
444452 strategy = "prefill_first" ,
@@ -502,15 +510,17 @@ def test_request_cancellation_trtllm_prefill_first_remote_decode_cancel(
502510 on both the prefill and decode workers in a disaggregated setup using prefill_first strategy.
503511 """
504512
513+ # Allocate all ports upfront to avoid race conditions
514+ frontend_port , decode_system_port , prefill_system_port = get_free_ports (3 )
515+
505516 # Step 1: Start the frontend
506- frontend_port = get_free_port ()
507517 with DynamoFrontendProcess (request , frontend_port ) as frontend :
508518 logger .info ("Frontend started successfully" )
509519
510520 # Step 2: Start the decode worker
511521 with DynamoWorkerProcess (
512522 request ,
513- system_port = get_free_port () ,
523+ system_port = decode_system_port ,
514524 frontend_port = frontend_port ,
515525 mode = "decode" ,
516526 strategy = "prefill_first" ,
@@ -520,7 +530,7 @@ def test_request_cancellation_trtllm_prefill_first_remote_decode_cancel(
520530 # Step 3: Start the prefill worker
521531 with DynamoWorkerProcess (
522532 request ,
523- system_port = get_free_port () ,
533+ system_port = prefill_system_port ,
524534 frontend_port = frontend_port ,
525535 mode = "prefill" ,
526536 strategy = "prefill_first" ,
0 commit comments