Skip to content

Commit 3d35006

Browse files
committed
Fix port allocation race conditions in cancellation tests
Signed-off-by: Keiven Chang <[email protected]>
1 parent 1c218db commit 3d35006

File tree

3 files changed

+51
-30
lines changed

3 files changed

+51
-30
lines changed

tests/fault_tolerance/cancellation/test_sglang.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from tests.utils.constants import FAULT_TOLERANCE_MODEL_NAME
1818
from tests.utils.managed_process import ManagedProcess
1919
from tests.utils.payloads import check_health_generate, check_models_api
20-
from tests.utils.port_utils import get_free_port
20+
from tests.utils.port_utils import get_free_ports
2121

2222
logger = logging.getLogger(__name__)
2323

@@ -168,15 +168,18 @@ def test_request_cancellation_sglang_aggregated(
168168
See: https://github.com/sgl-project/sglang/issues/11139
169169
"""
170170
logger.info("Sanity check if latest test is getting executed")
171+
172+
# Allocate all ports upfront to avoid race conditions
173+
frontend_port, system_port = get_free_ports(2)
174+
171175
# Step 1: Start the frontend
172-
frontend_port = get_free_port()
173176
with DynamoFrontendProcess(request, frontend_port) as frontend:
174177
logger.info("Frontend started successfully")
175178

176179
# Step 2: Start an aggregated worker
177180
with DynamoWorkerProcess(
178181
request,
179-
system_port=get_free_port(),
182+
system_port=system_port,
180183
frontend_port=frontend_port,
181184
mode="agg",
182185
) as worker:
@@ -261,15 +264,17 @@ def test_request_cancellation_sglang_decode_cancel(
261264
Note: This test requires 2 GPUs to run decode and prefill workers on separate GPUs.
262265
"""
263266

267+
# Allocate all ports upfront to avoid race conditions
268+
frontend_port, decode_system_port, prefill_system_port = get_free_ports(3)
269+
264270
# Step 1: Start the frontend
265-
frontend_port = get_free_port()
266271
with DynamoFrontendProcess(request, frontend_port) as frontend:
267272
logger.info("Frontend started successfully")
268273

269274
# Step 2: Start the decode worker
270275
with DynamoWorkerProcess(
271276
request,
272-
system_port=get_free_port(),
277+
system_port=decode_system_port,
273278
frontend_port=frontend_port,
274279
mode="decode",
275280
) as decode_worker:
@@ -278,7 +283,7 @@ def test_request_cancellation_sglang_decode_cancel(
278283
# Step 3: Start the prefill worker
279284
with DynamoWorkerProcess(
280285
request,
281-
system_port=get_free_port(),
286+
system_port=prefill_system_port,
282287
frontend_port=frontend_port,
283288
mode="prefill",
284289
) as prefill_worker:

tests/fault_tolerance/cancellation/test_trtllm.py

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from tests.utils.constants import FAULT_TOLERANCE_MODEL_NAME
1818
from tests.utils.managed_process import ManagedProcess
1919
from tests.utils.payloads import check_health_generate, check_models_api
20-
from tests.utils.port_utils import get_free_port
20+
from tests.utils.port_utils import get_free_ports
2121

2222
logger = logging.getLogger(__name__)
2323

@@ -151,15 +151,17 @@ def test_request_cancellation_trtllm_aggregated(
151151
on the worker side in aggregated (prefill_and_decode) mode.
152152
"""
153153

154+
# Allocate all ports upfront to avoid race conditions
155+
frontend_port, system_port = get_free_ports(2)
156+
154157
# Step 1: Start the frontend
155-
frontend_port = get_free_port()
156158
with DynamoFrontendProcess(request, frontend_port) as frontend:
157159
logger.info("Frontend started successfully")
158160

159161
# Step 2: Start an aggregated worker
160162
with DynamoWorkerProcess(
161163
request,
162-
system_port=get_free_port(),
164+
system_port=system_port,
163165
frontend_port=frontend_port,
164166
mode="prefill_and_decode",
165167
) as worker:
@@ -234,15 +236,17 @@ def test_request_cancellation_trtllm_disagg_decode_cancel(
234236
on the decode worker side in a disaggregated setup.
235237
"""
236238

239+
# Allocate all ports upfront to avoid race conditions
240+
frontend_port, prefill_system_port, decode_system_port = get_free_ports(3)
241+
237242
# Step 1: Start the frontend
238-
frontend_port = get_free_port()
239243
with DynamoFrontendProcess(request, frontend_port) as frontend:
240244
logger.info("Frontend started successfully")
241245

242246
# Step 2: Start the prefill worker
243247
with DynamoWorkerProcess(
244248
request,
245-
system_port=get_free_port(),
249+
system_port=prefill_system_port,
246250
frontend_port=frontend_port,
247251
mode="prefill",
248252
strategy="decode_first",
@@ -252,7 +256,7 @@ def test_request_cancellation_trtllm_disagg_decode_cancel(
252256
# Step 3: Start the decode worker
253257
with DynamoWorkerProcess(
254258
request,
255-
system_port=get_free_port(),
259+
system_port=decode_system_port,
256260
frontend_port=frontend_port,
257261
mode="decode",
258262
strategy="decode_first",
@@ -325,15 +329,17 @@ def test_request_cancellation_trtllm_disagg_prefill_cancel(
325329
Since the request is cancelled before prefill completes, the decode worker never receives it.
326330
"""
327331

332+
# Allocate all ports upfront to avoid race conditions
333+
frontend_port, prefill_system_port, decode_system_port = get_free_ports(3)
334+
328335
# Step 1: Start the frontend
329-
frontend_port = get_free_port()
330336
with DynamoFrontendProcess(request, frontend_port) as frontend:
331337
logger.info("Frontend started successfully")
332338

333339
# Step 2: Start the prefill worker
334340
with DynamoWorkerProcess(
335341
request,
336-
system_port=get_free_port(),
342+
system_port=prefill_system_port,
337343
frontend_port=frontend_port,
338344
mode="prefill",
339345
strategy="decode_first",
@@ -343,7 +349,7 @@ def test_request_cancellation_trtllm_disagg_prefill_cancel(
343349
# Step 3: Start the decode worker
344350
with DynamoWorkerProcess(
345351
request,
346-
system_port=get_free_port(),
352+
system_port=decode_system_port,
347353
frontend_port=frontend_port,
348354
mode="decode",
349355
strategy="decode_first",
@@ -420,15 +426,17 @@ def test_request_cancellation_trtllm_prefill_first_prefill_cancel(
420426
on the prefill worker side in a disaggregated setup using prefill_first strategy.
421427
"""
422428

429+
# Allocate all ports upfront to avoid race conditions
430+
frontend_port, decode_system_port, prefill_system_port = get_free_ports(3)
431+
423432
# Step 1: Start the frontend
424-
frontend_port = get_free_port()
425433
with DynamoFrontendProcess(request, frontend_port) as frontend:
426434
logger.info("Frontend started successfully")
427435

428436
# Step 2: Start the decode worker
429437
with DynamoWorkerProcess(
430438
request,
431-
system_port=get_free_port(),
439+
system_port=decode_system_port,
432440
frontend_port=frontend_port,
433441
mode="decode",
434442
strategy="prefill_first",
@@ -438,7 +446,7 @@ def test_request_cancellation_trtllm_prefill_first_prefill_cancel(
438446
# Step 3: Start the prefill worker
439447
with DynamoWorkerProcess(
440448
request,
441-
system_port=get_free_port(),
449+
system_port=prefill_system_port,
442450
frontend_port=frontend_port,
443451
mode="prefill",
444452
strategy="prefill_first",
@@ -502,15 +510,17 @@ def test_request_cancellation_trtllm_prefill_first_remote_decode_cancel(
502510
on both the prefill and decode workers in a disaggregated setup using prefill_first strategy.
503511
"""
504512

513+
# Allocate all ports upfront to avoid race conditions
514+
frontend_port, decode_system_port, prefill_system_port = get_free_ports(3)
515+
505516
# Step 1: Start the frontend
506-
frontend_port = get_free_port()
507517
with DynamoFrontendProcess(request, frontend_port) as frontend:
508518
logger.info("Frontend started successfully")
509519

510520
# Step 2: Start the decode worker
511521
with DynamoWorkerProcess(
512522
request,
513-
system_port=get_free_port(),
523+
system_port=decode_system_port,
514524
frontend_port=frontend_port,
515525
mode="decode",
516526
strategy="prefill_first",
@@ -520,7 +530,7 @@ def test_request_cancellation_trtllm_prefill_first_remote_decode_cancel(
520530
# Step 3: Start the prefill worker
521531
with DynamoWorkerProcess(
522532
request,
523-
system_port=get_free_port(),
533+
system_port=prefill_system_port,
524534
frontend_port=frontend_port,
525535
mode="prefill",
526536
strategy="prefill_first",

tests/fault_tolerance/cancellation/test_vllm.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from tests.utils.constants import FAULT_TOLERANCE_MODEL_NAME
1717
from tests.utils.managed_process import ManagedProcess
1818
from tests.utils.payloads import check_health_generate, check_models_api
19-
from tests.utils.port_utils import get_free_port
19+
from tests.utils.port_utils import get_free_ports
2020

2121
logger = logging.getLogger(__name__)
2222

@@ -139,14 +139,16 @@ def test_request_cancellation_vllm_aggregated(
139139
3. Chat completion request (streaming)
140140
"""
141141

142+
# Allocate all ports upfront to avoid race conditions
143+
frontend_port, system_port = get_free_ports(2)
144+
142145
# Step 1: Start the frontend
143-
frontend_port = get_free_port()
144146
with DynamoFrontendProcess(request, frontend_port) as frontend:
145147
logger.info("Frontend started successfully")
146148

147149
# Step 2: Start a single worker
148150
with DynamoWorkerProcess(
149-
request, system_port=get_free_port(), frontend_port=frontend_port
151+
request, system_port=system_port, frontend_port=frontend_port
150152
) as worker:
151153
logger.info(f"Worker PID: {worker.get_pid()}")
152154

@@ -216,15 +218,17 @@ def test_request_cancellation_vllm_decode_cancel(
216218
on the decode worker side in a disaggregated setup.
217219
"""
218220

221+
# Allocate all ports upfront to avoid race conditions
222+
frontend_port, prefill_system_port, decode_system_port = get_free_ports(3)
223+
219224
# Step 1: Start the frontend
220-
frontend_port = get_free_port()
221225
with DynamoFrontendProcess(request, frontend_port) as frontend:
222226
logger.info("Frontend started successfully")
223227

224228
# Step 2: Start the prefill worker
225229
with DynamoWorkerProcess(
226230
request,
227-
system_port=get_free_port(),
231+
system_port=prefill_system_port,
228232
frontend_port=frontend_port,
229233
is_prefill=True,
230234
) as prefill_worker:
@@ -233,7 +237,7 @@ def test_request_cancellation_vllm_decode_cancel(
233237
# Step 3: Start the decode worker
234238
with DynamoWorkerProcess(
235239
request,
236-
system_port=get_free_port(),
240+
system_port=decode_system_port,
237241
frontend_port=frontend_port,
238242
is_prefill=False,
239243
) as decode_worker:
@@ -302,15 +306,17 @@ def test_request_cancellation_vllm_remote_prefill_cancel(
302306
on both the decode and prefill workers in a disaggregated setup.
303307
"""
304308

309+
# Allocate all ports upfront to avoid race conditions
310+
frontend_port, prefill_system_port, decode_system_port = get_free_ports(3)
311+
305312
# Step 1: Start the frontend
306-
frontend_port = get_free_port()
307313
with DynamoFrontendProcess(request, frontend_port) as frontend:
308314
logger.info("Frontend started successfully")
309315

310316
# Step 2: Start the prefill worker
311317
with DynamoWorkerProcess(
312318
request,
313-
system_port=get_free_port(),
319+
system_port=prefill_system_port,
314320
frontend_port=frontend_port,
315321
is_prefill=True,
316322
) as prefill_worker:
@@ -319,7 +325,7 @@ def test_request_cancellation_vllm_remote_prefill_cancel(
319325
# Step 3: Start the decode worker
320326
with DynamoWorkerProcess(
321327
request,
322-
system_port=get_free_port(),
328+
system_port=decode_system_port,
323329
frontend_port=frontend_port,
324330
is_prefill=False,
325331
) as decode_worker:

0 commit comments

Comments
 (0)