Skip to content

Commit 4ca1679

Browse files
authored
test: Pre-download models before tests are ran (#4811)
Signed-off-by: Jacky <[email protected]>
1 parent e6de33f commit 4ca1679

File tree

7 files changed

+42
-35
lines changed

7 files changed

+42
-35
lines changed

tests/conftest.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import os
1818
import shutil
1919
import tempfile
20+
import time
2021
from pathlib import Path
2122
from typing import Optional
2223

@@ -226,6 +227,26 @@ def pytest_collection_modifyitems(config, items):
226227
config.models_to_download = models_to_download
227228

228229

230+
def pytest_runtestloop(session):
231+
"""Download models after collection but before any tests run.
232+
233+
This hook runs after pytest_collection_modifyitems (so models are collected)
234+
but before any test execution, ensuring model downloads don't count against test timeouts.
235+
"""
236+
models = getattr(session.config, "models_to_download", None)
237+
238+
if models:
239+
logging.info(
240+
f"Downloading {len(models)} models before test execution\nModels: {models}"
241+
)
242+
start_time = time.time()
243+
244+
download_models(model_list=list(models))
245+
246+
download_duration = time.time() - start_time
247+
logging.info(f"Model download completed in {download_duration:.1f}s")
248+
249+
229250
class EtcdServer(ManagedProcess):
230251
def __init__(self, request, port=2379, timeout=300):
231252
port_string = str(port)

tests/fault_tolerance/cancellation/test_sglang.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -161,9 +161,7 @@ def is_ready(self, response) -> bool:
161161
@pytest.mark.timeout(160) # 3x average
162162
@pytest.mark.gpu_1
163163
@pytest.mark.xfail(strict=False)
164-
def test_request_cancellation_sglang_aggregated(
165-
request, runtime_services, predownload_models
166-
):
164+
def test_request_cancellation_sglang_aggregated(request, runtime_services):
167165
"""
168166
End-to-end test for request cancellation functionality in aggregated mode.
169167
@@ -247,9 +245,7 @@ def test_request_cancellation_sglang_aggregated(
247245

248246
@pytest.mark.timeout(185) # 3x average
249247
@pytest.mark.gpu_2
250-
def test_request_cancellation_sglang_decode_cancel(
251-
request, runtime_services, predownload_models
252-
):
248+
def test_request_cancellation_sglang_decode_cancel(request, runtime_services):
253249
"""
254250
End-to-end test for request cancellation during decode phase.
255251

tests/fault_tolerance/cancellation/test_trtllm.py

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -141,9 +141,7 @@ def is_ready(self, response) -> bool:
141141

142142

143143
@pytest.mark.timeout(140) # 3x average
144-
def test_request_cancellation_trtllm_aggregated(
145-
request, runtime_services, predownload_models
146-
):
144+
def test_request_cancellation_trtllm_aggregated(request, runtime_services):
147145
"""
148146
End-to-end test for request cancellation functionality in aggregated mode.
149147
@@ -215,9 +213,7 @@ def test_request_cancellation_trtllm_aggregated(
215213

216214

217215
@pytest.mark.timeout(350) # 3x average
218-
def test_request_cancellation_trtllm_decode_cancel(
219-
request, runtime_services, predownload_models
220-
):
216+
def test_request_cancellation_trtllm_decode_cancel(request, runtime_services):
221217
"""
222218
End-to-end test for request cancellation during decode phase with unified frontend.
223219
@@ -288,9 +284,7 @@ def test_request_cancellation_trtllm_decode_cancel(
288284

289285

290286
@pytest.mark.timeout(350) # 3x average
291-
def test_request_cancellation_trtllm_prefill_cancel(
292-
request, runtime_services, predownload_models
293-
):
287+
def test_request_cancellation_trtllm_prefill_cancel(request, runtime_services):
294288
"""
295289
End-to-end test for request cancellation during prefill phase with unified frontend.
296290
@@ -375,9 +369,7 @@ def test_request_cancellation_trtllm_prefill_cancel(
375369
reason="May fail due to unknown reason with TRT-LLM or backend implementation",
376370
strict=False,
377371
)
378-
def test_request_cancellation_trtllm_kv_transfer_cancel(
379-
request, runtime_services, predownload_models
380-
):
372+
def test_request_cancellation_trtllm_kv_transfer_cancel(request, runtime_services):
381373
"""
382374
End-to-end test for request cancellation during prefill to decode KV transfer phase.
383375

tests/fault_tolerance/cancellation/test_vllm.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -134,9 +134,7 @@ def is_ready(self, response) -> bool:
134134

135135

136136
@pytest.mark.timeout(110) # 3x average
137-
def test_request_cancellation_vllm_aggregated(
138-
request, runtime_services, predownload_models
139-
):
137+
def test_request_cancellation_vllm_aggregated(request, runtime_services):
140138
"""
141139
End-to-end test for request cancellation functionality in aggregated mode.
142140
@@ -209,7 +207,7 @@ def test_request_cancellation_vllm_aggregated(
209207

210208
@pytest.mark.timeout(150) # 3x average
211209
def test_request_cancellation_vllm_decode_cancel(
212-
request, runtime_services, predownload_models, set_ucx_tls_no_mm
210+
request, runtime_services, set_ucx_tls_no_mm
213211
):
214212
"""
215213
End-to-end test for request cancellation during decode phase.
@@ -279,7 +277,7 @@ def test_request_cancellation_vllm_decode_cancel(
279277

280278
@pytest.mark.timeout(150) # 3x average
281279
def test_request_cancellation_vllm_prefill_cancel(
282-
request, runtime_services, predownload_models, set_ucx_tls_no_mm
280+
request, runtime_services, set_ucx_tls_no_mm
283281
):
284282
"""
285283
End-to-end test for request cancellation during prefill phase.

tests/fault_tolerance/migration/test_sglang.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def is_ready(self, response) -> bool:
115115

116116
@pytest.mark.timeout(235) # 3x average
117117
def test_request_migration_sglang_worker_failure(
118-
request, runtime_services, predownload_models, set_ucx_tls_no_mm
118+
request, runtime_services, set_ucx_tls_no_mm
119119
):
120120
"""
121121
End-to-end test for worker fault tolerance with migration support using SGLang.
@@ -159,7 +159,7 @@ def test_request_migration_sglang_worker_failure(
159159

160160
@pytest.mark.skip(reason="SGLang graceful shutdown not yet implemented")
161161
def test_request_migration_sglang_graceful_shutdown(
162-
request, runtime_services, predownload_models, set_ucx_tls_no_mm
162+
request, runtime_services, set_ucx_tls_no_mm
163163
):
164164
"""
165165
End-to-end test for worker fault tolerance with graceful shutdown and migration support using SGLang.
@@ -207,7 +207,7 @@ def test_request_migration_sglang_graceful_shutdown(
207207

208208
@pytest.mark.timeout(135) # 3x average
209209
def test_no_request_migration_sglang_worker_failure(
210-
request, runtime_services, predownload_models, set_ucx_tls_no_mm
210+
request, runtime_services, set_ucx_tls_no_mm
211211
):
212212
"""
213213
End-to-end test for worker fault tolerance with migration disabled using SGLang.
@@ -267,7 +267,7 @@ def test_no_request_migration_sglang_worker_failure(
267267

268268
@pytest.mark.skip(reason="SGLang graceful shutdown not yet implemented")
269269
def test_no_request_migration_sglang_graceful_shutdown(
270-
request, runtime_services, predownload_models, set_ucx_tls_no_mm
270+
request, runtime_services, set_ucx_tls_no_mm
271271
):
272272
"""
273273
End-to-end test for worker fault tolerance with graceful shutdown and migration disabled using SGLang.

tests/fault_tolerance/migration/test_trtllm.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def is_ready(self, response) -> bool:
111111

112112
@pytest.mark.timeout(290) # 3x average
113113
def test_request_migration_trtllm_worker_failure(
114-
request, runtime_services, predownload_models, set_ucx_tls_no_mm
114+
request, runtime_services, set_ucx_tls_no_mm
115115
):
116116
"""
117117
End-to-end test for worker fault tolerance with migration support using TRT-LLM.
@@ -155,7 +155,7 @@ def test_request_migration_trtllm_worker_failure(
155155

156156
@pytest.mark.skip(reason="TRT-LLM graceful shutdown not yet implemented")
157157
def test_request_migration_trtllm_graceful_shutdown(
158-
request, runtime_services, predownload_models, set_ucx_tls_no_mm
158+
request, runtime_services, set_ucx_tls_no_mm
159159
):
160160
"""
161161
End-to-end test for worker fault tolerance with graceful shutdown and migration support using TRT-LLM.
@@ -203,7 +203,7 @@ def test_request_migration_trtllm_graceful_shutdown(
203203

204204
@pytest.mark.timeout(185) # 3x average
205205
def test_no_request_migration_trtllm_worker_failure(
206-
request, runtime_services, predownload_models, set_ucx_tls_no_mm
206+
request, runtime_services, set_ucx_tls_no_mm
207207
):
208208
"""
209209
End-to-end test for worker fault tolerance with migration disabled using TRT-LLM.
@@ -263,7 +263,7 @@ def test_no_request_migration_trtllm_worker_failure(
263263

264264
@pytest.mark.skip(reason="TRT-LLM graceful shutdown not yet implemented")
265265
def test_no_request_migration_trtllm_graceful_shutdown(
266-
request, runtime_services, predownload_models, set_ucx_tls_no_mm
266+
request, runtime_services, set_ucx_tls_no_mm
267267
):
268268
"""
269269
End-to-end test for worker fault tolerance with graceful shutdown and migration disabled using TRT-LLM.

tests/fault_tolerance/migration/test_vllm.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def is_ready(self, response) -> bool:
115115

116116
@pytest.mark.timeout(290) # 3x average
117117
def test_request_migration_vllm_worker_failure(
118-
request, runtime_services, predownload_models, set_ucx_tls_no_mm
118+
request, runtime_services, set_ucx_tls_no_mm
119119
):
120120
"""
121121
End-to-end test for worker fault tolerance with migration support.
@@ -159,7 +159,7 @@ def test_request_migration_vllm_worker_failure(
159159

160160
@pytest.mark.timeout(280) # 3x average
161161
def test_request_migration_vllm_graceful_shutdown(
162-
request, runtime_services, predownload_models, set_ucx_tls_no_mm
162+
request, runtime_services, set_ucx_tls_no_mm
163163
):
164164
"""
165165
End-to-end test for worker fault tolerance with graceful shutdown and migration support.
@@ -207,7 +207,7 @@ def test_request_migration_vllm_graceful_shutdown(
207207

208208
@pytest.mark.timeout(150) # 3x average
209209
def test_no_request_migration_vllm_worker_failure(
210-
request, runtime_services, predownload_models, set_ucx_tls_no_mm
210+
request, runtime_services, set_ucx_tls_no_mm
211211
):
212212
"""
213213
End-to-end test for worker fault tolerance with migration disabled.
@@ -267,7 +267,7 @@ def test_no_request_migration_vllm_worker_failure(
267267

268268
@pytest.mark.timeout(140) # 3x average
269269
def test_no_request_migration_vllm_graceful_shutdown(
270-
request, runtime_services, predownload_models, set_ucx_tls_no_mm
270+
request, runtime_services, set_ucx_tls_no_mm
271271
):
272272
"""
273273
End-to-end test for worker fault tolerance with graceful shutdown and migration disabled.

0 commit comments

Comments
 (0)