Skip to content

Commit 69f3f7d

Browse files
committed
Use runtime_services_dynamic_ports for vLLM tests
Update vLLM cancellation and migration tests to use runtime_services_dynamic_ports fixture for fully dynamic NATS/Etcd ports, enabling true parallel test execution. - Restore max_tokens to 16384 in send_cancellable_request - Add TODO to etcd_ha/test_sglang.py for future port update Signed-off-by: Keiven Chang <[email protected]>
1 parent 1ab57ba commit 69f3f7d

File tree

5 files changed

+15
-13
lines changed

5 files changed

+15
-13
lines changed

tests/fault_tolerance/cancellation/test_vllm.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ def is_ready(self, response) -> bool:
148148

149149

150150
@pytest.mark.timeout(110) # 3x average
151-
def test_request_cancellation_vllm_aggregated(request, runtime_services):
151+
def test_request_cancellation_vllm_aggregated(request, runtime_services_dynamic_ports):
152152
"""
153153
End-to-end test for request cancellation functionality in aggregated mode.
154154
@@ -232,7 +232,7 @@ def test_request_cancellation_vllm_aggregated(request, runtime_services):
232232

233233
@pytest.mark.timeout(150) # 3x average
234234
def test_request_cancellation_vllm_decode_cancel(
235-
request, runtime_services, set_ucx_tls_no_mm
235+
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
236236
):
237237
"""
238238
End-to-end test for request cancellation during decode phase.
@@ -324,7 +324,7 @@ def test_request_cancellation_vllm_decode_cancel(
324324

325325
@pytest.mark.timeout(150) # 3x average
326326
def test_request_cancellation_vllm_prefill_cancel(
327-
request, runtime_services, set_ucx_tls_no_mm
327+
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
328328
):
329329
"""
330330
End-to-end test for request cancellation during prefill phase.

tests/fault_tolerance/cancellation/utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -268,11 +268,11 @@ def send_cancellable_request(
268268
prompt += " Make sure it is" + " long" * 16000 + "!"
269269

270270
if request_type == "completion":
271-
return send_completion_request(prompt, 8192, frontend_port)
271+
return send_completion_request(prompt, 16384, frontend_port)
272272
elif request_type == "chat_completion":
273-
return send_chat_completion_request(prompt, 8192, frontend_port, stream=False)
273+
return send_chat_completion_request(prompt, 16384, frontend_port, stream=False)
274274
elif request_type == "chat_completion_stream":
275-
return send_chat_completion_request(prompt, 8192, frontend_port, stream=True)
275+
return send_chat_completion_request(prompt, 16384, frontend_port, stream=True)
276276
else:
277277
raise ValueError(f"Unknown request type: {request_type}")
278278

tests/fault_tolerance/etcd_ha/test_sglang.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
# SPDX-License-Identifier: Apache-2.0
33

4+
# TODO: Update to use dynamic port allocation (allocate_free_port) for parallel execution
5+
# Currently uses hardcoded ports: FRONTEND_PORT (8000), system ports (8081, 8082)
6+
# See tests/fault_tolerance/migration/test_sglang.py for dynamic port pattern
7+
48
import logging
59
import os
610
import shutil

tests/fault_tolerance/migration/test_vllm.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def is_ready(self, response) -> bool:
128128

129129
@pytest.mark.timeout(290) # 3x average
130130
def test_request_migration_vllm_worker_failure(
131-
request, runtime_services, set_ucx_tls_no_mm
131+
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
132132
):
133133
"""
134134
End-to-end test for worker fault tolerance with migration support.
@@ -186,7 +186,7 @@ def test_request_migration_vllm_worker_failure(
186186

187187
@pytest.mark.timeout(280) # 3x average
188188
def test_request_migration_vllm_graceful_shutdown(
189-
request, runtime_services, set_ucx_tls_no_mm
189+
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
190190
):
191191
"""
192192
End-to-end test for worker fault tolerance with graceful shutdown and migration support.
@@ -248,7 +248,7 @@ def test_request_migration_vllm_graceful_shutdown(
248248

249249
@pytest.mark.timeout(150) # 3x average
250250
def test_no_request_migration_vllm_worker_failure(
251-
request, runtime_services, set_ucx_tls_no_mm
251+
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
252252
):
253253
"""
254254
End-to-end test for worker fault tolerance with migration disabled.
@@ -326,7 +326,7 @@ def test_no_request_migration_vllm_worker_failure(
326326

327327
@pytest.mark.timeout(140) # 3x average
328328
def test_no_request_migration_vllm_graceful_shutdown(
329-
request, runtime_services, set_ucx_tls_no_mm
329+
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm
330330
):
331331
"""
332332
End-to-end test for worker fault tolerance with graceful shutdown and migration disabled.

tests/fault_tolerance/migration/utils.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def send_request():
109109

110110
def determine_request_receiving_worker(
111111
worker1: ManagedProcess, worker2: ManagedProcess, receiving_pattern: str
112-
) -> tuple[ManagedProcess, str]:
112+
) -> tuple:
113113
"""
114114
Determine which worker received the request using parallel polling.
115115
@@ -170,10 +170,8 @@ def poll_worker(worker: ManagedProcess, result_list: list[bool]):
170170
return worker2, "Worker 2"
171171
elif worker1_received and worker2_received:
172172
pytest.fail("Both workers received the request")
173-
raise AssertionError("Unreachable") # For mypy: pytest.fail() raises
174173
else:
175174
pytest.fail("Neither worker received the request")
176-
raise AssertionError("Unreachable") # For mypy: pytest.fail() raises
177175

178176

179177
def validate_completion_response(

0 commit comments

Comments
 (0)