Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import os
import shutil
import tempfile
import time
from pathlib import Path
from typing import Optional

Expand Down Expand Up @@ -226,6 +227,26 @@ def pytest_collection_modifyitems(config, items):
config.models_to_download = models_to_download


def pytest_runtestloop(session):
"""Download models after collection but before any tests run.

This hook runs after pytest_collection_modifyitems (so models are collected)
but before any test execution, ensuring model downloads don't count against test timeouts.
"""
models = getattr(session.config, "models_to_download", None)

if models:
logging.info(
f"Downloading {len(models)} models before test execution\nModels: {models}"
)
start_time = time.time()

download_models(model_list=list(models))

download_duration = time.time() - start_time
logging.info(f"Model download completed in {download_duration:.1f}s")


class EtcdServer(ManagedProcess):
def __init__(self, request, port=2379, timeout=300):
port_string = str(port)
Expand Down
8 changes: 2 additions & 6 deletions tests/fault_tolerance/cancellation/test_sglang.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,7 @@ def is_ready(self, response) -> bool:
@pytest.mark.timeout(160) # 3x average
@pytest.mark.gpu_1
@pytest.mark.xfail(strict=False)
def test_request_cancellation_sglang_aggregated(
request, runtime_services, predownload_models
):
def test_request_cancellation_sglang_aggregated(request, runtime_services):
"""
End-to-end test for request cancellation functionality in aggregated mode.

Expand Down Expand Up @@ -247,9 +245,7 @@ def test_request_cancellation_sglang_aggregated(

@pytest.mark.timeout(185) # 3x average
@pytest.mark.gpu_2
def test_request_cancellation_sglang_decode_cancel(
request, runtime_services, predownload_models
):
def test_request_cancellation_sglang_decode_cancel(request, runtime_services):
"""
End-to-end test for request cancellation during decode phase.

Expand Down
16 changes: 4 additions & 12 deletions tests/fault_tolerance/cancellation/test_trtllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,7 @@ def is_ready(self, response) -> bool:


@pytest.mark.timeout(140) # 3x average
def test_request_cancellation_trtllm_aggregated(
request, runtime_services, predownload_models
):
def test_request_cancellation_trtllm_aggregated(request, runtime_services):
"""
End-to-end test for request cancellation functionality in aggregated mode.

Expand Down Expand Up @@ -215,9 +213,7 @@ def test_request_cancellation_trtllm_aggregated(


@pytest.mark.timeout(350) # 3x average
def test_request_cancellation_trtllm_decode_cancel(
request, runtime_services, predownload_models
):
def test_request_cancellation_trtllm_decode_cancel(request, runtime_services):
"""
End-to-end test for request cancellation during decode phase with unified frontend.

Expand Down Expand Up @@ -288,9 +284,7 @@ def test_request_cancellation_trtllm_decode_cancel(


@pytest.mark.timeout(350) # 3x average
def test_request_cancellation_trtllm_prefill_cancel(
request, runtime_services, predownload_models
):
def test_request_cancellation_trtllm_prefill_cancel(request, runtime_services):
"""
End-to-end test for request cancellation during prefill phase with unified frontend.

Expand Down Expand Up @@ -375,9 +369,7 @@ def test_request_cancellation_trtllm_prefill_cancel(
reason="May fail due to unknown reason with TRT-LLM or backend implementation",
strict=False,
)
def test_request_cancellation_trtllm_kv_transfer_cancel(
request, runtime_services, predownload_models
):
def test_request_cancellation_trtllm_kv_transfer_cancel(request, runtime_services):
"""
End-to-end test for request cancellation during prefill to decode KV transfer phase.

Expand Down
8 changes: 3 additions & 5 deletions tests/fault_tolerance/cancellation/test_vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,7 @@ def is_ready(self, response) -> bool:


@pytest.mark.timeout(110) # 3x average
def test_request_cancellation_vllm_aggregated(
request, runtime_services, predownload_models
):
def test_request_cancellation_vllm_aggregated(request, runtime_services):
"""
End-to-end test for request cancellation functionality in aggregated mode.

Expand Down Expand Up @@ -209,7 +207,7 @@ def test_request_cancellation_vllm_aggregated(

@pytest.mark.timeout(150) # 3x average
def test_request_cancellation_vllm_decode_cancel(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for request cancellation during decode phase.
Expand Down Expand Up @@ -279,7 +277,7 @@ def test_request_cancellation_vllm_decode_cancel(

@pytest.mark.timeout(150) # 3x average
def test_request_cancellation_vllm_prefill_cancel(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for request cancellation during prefill phase.
Expand Down
8 changes: 4 additions & 4 deletions tests/fault_tolerance/migration/test_sglang.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def is_ready(self, response) -> bool:

@pytest.mark.timeout(235) # 3x average
def test_request_migration_sglang_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with migration support using SGLang.
Expand Down Expand Up @@ -159,7 +159,7 @@ def test_request_migration_sglang_worker_failure(

@pytest.mark.skip(reason="SGLang graceful shutdown not yet implemented")
def test_request_migration_sglang_graceful_shutdown(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with graceful shutdown and migration support using SGLang.
Expand Down Expand Up @@ -207,7 +207,7 @@ def test_request_migration_sglang_graceful_shutdown(

@pytest.mark.timeout(135) # 3x average
def test_no_request_migration_sglang_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with migration disabled using SGLang.
Expand Down Expand Up @@ -267,7 +267,7 @@ def test_no_request_migration_sglang_worker_failure(

@pytest.mark.skip(reason="SGLang graceful shutdown not yet implemented")
def test_no_request_migration_sglang_graceful_shutdown(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with graceful shutdown and migration disabled using SGLang.
Expand Down
8 changes: 4 additions & 4 deletions tests/fault_tolerance/migration/test_trtllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def is_ready(self, response) -> bool:

@pytest.mark.timeout(290) # 3x average
def test_request_migration_trtllm_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with migration support using TRT-LLM.
Expand Down Expand Up @@ -155,7 +155,7 @@ def test_request_migration_trtllm_worker_failure(

@pytest.mark.skip(reason="TRT-LLM graceful shutdown not yet implemented")
def test_request_migration_trtllm_graceful_shutdown(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with graceful shutdown and migration support using TRT-LLM.
Expand Down Expand Up @@ -203,7 +203,7 @@ def test_request_migration_trtllm_graceful_shutdown(

@pytest.mark.timeout(185) # 3x average
def test_no_request_migration_trtllm_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with migration disabled using TRT-LLM.
Expand Down Expand Up @@ -263,7 +263,7 @@ def test_no_request_migration_trtllm_worker_failure(

@pytest.mark.skip(reason="TRT-LLM graceful shutdown not yet implemented")
def test_no_request_migration_trtllm_graceful_shutdown(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with graceful shutdown and migration disabled using TRT-LLM.
Expand Down
8 changes: 4 additions & 4 deletions tests/fault_tolerance/migration/test_vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def is_ready(self, response) -> bool:

@pytest.mark.timeout(290) # 3x average
def test_request_migration_vllm_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with migration support.
Expand Down Expand Up @@ -159,7 +159,7 @@ def test_request_migration_vllm_worker_failure(

@pytest.mark.timeout(280) # 3x average
def test_request_migration_vllm_graceful_shutdown(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with graceful shutdown and migration support.
Expand Down Expand Up @@ -207,7 +207,7 @@ def test_request_migration_vllm_graceful_shutdown(

@pytest.mark.timeout(150) # 3x average
def test_no_request_migration_vllm_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with migration disabled.
Expand Down Expand Up @@ -267,7 +267,7 @@ def test_no_request_migration_vllm_worker_failure(

@pytest.mark.timeout(140) # 3x average
def test_no_request_migration_vllm_graceful_shutdown(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
request, runtime_services, set_ucx_tls_no_mm
):
"""
End-to-end test for worker fault tolerance with graceful shutdown and migration disabled.
Expand Down
Loading