Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions lib/bindings/python/tests/cancellation/test_cancellation.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ async def client(runtime, namespace):

@pytest.mark.forked
@pytest.mark.asyncio
@pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True)
async def test_client_context_cancel(temp_file_store, server, client):
_, handler = server
context = Context()
Expand Down Expand Up @@ -198,6 +199,7 @@ async def test_client_context_cancel(temp_file_store, server, client):

@pytest.mark.forked
@pytest.mark.asyncio
@pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True)
async def test_client_loop_break(temp_file_store, server, client):
_, handler = server
stream = await client.generate("_generate_until_context_cancelled")
Expand Down Expand Up @@ -230,6 +232,7 @@ async def test_client_loop_break(temp_file_store, server, client):

@pytest.mark.forked
@pytest.mark.asyncio
@pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True)
async def test_server_context_cancel(temp_file_store, server, client):
_, handler = server
stream = await client.generate("_generate_and_cancel_context")
Expand All @@ -254,6 +257,7 @@ async def test_server_context_cancel(temp_file_store, server, client):

@pytest.mark.forked
@pytest.mark.asyncio
@pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True)
async def test_server_raise_cancelled(temp_file_store, server, client):
_, handler = server
stream = await client.generate("_generate_and_raise_cancelled")
Expand Down Expand Up @@ -282,6 +286,7 @@ async def test_server_raise_cancelled(temp_file_store, server, client):

@pytest.mark.forked
@pytest.mark.asyncio
@pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True)
async def test_client_context_already_cancelled(temp_file_store, server, client):
_, handler = server
context = Context()
Expand All @@ -304,6 +309,7 @@ async def test_client_context_already_cancelled(temp_file_store, server, client)

@pytest.mark.forked
@pytest.mark.asyncio
@pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True)
async def test_client_context_cancel_before_await_request(
temp_file_store, server, client
):
Expand Down
38 changes: 36 additions & 2 deletions lib/bindings/python/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,8 +402,34 @@ def temp_file_store():
yield tmpdir


@pytest.fixture
def store_kv(request):
"""
KV store for runtime. Defaults to "file".

To iterate over multiple stores in a test:
@pytest.mark.parametrize("store_kv", ["file", "etcd"], indirect=True)
async def test_example(runtime):
...
"""
return getattr(request, "param", "file")


@pytest.fixture
def request_plane(request):
"""
Request plane for runtime. Defaults to "nats".

To iterate over multiple transports in a test:
@pytest.mark.parametrize("request_plane", ["tcp", "nats"], indirect=True)
async def test_example(runtime):
...
"""
return getattr(request, "param", "nats")


@pytest.fixture(scope="function", autouse=False)
async def runtime(request):
async def runtime(request, store_kv, request_plane):
"""
Create a DistributedRuntime for testing.

Expand All @@ -413,6 +439,14 @@ async def runtime(request):

Without @pytest.mark.forked in isolated mode, you will get "Worker already initialized"
errors when multiple tests try to create runtimes in the same process.

The store_kv and request_plane can be customized by overriding their fixtures
or using @pytest.mark.parametrize with indirect=True:

@pytest.mark.forked
@pytest.mark.parametrize("store_kv", ["etcd"], indirect=True)
async def test_with_etcd(runtime):
...
"""
# Check if the test is marked with @pytest.mark.forked (only in isolated mode)
if ENABLE_ISOLATED_ETCD_AND_NATS:
Expand All @@ -435,6 +469,6 @@ async def test_my_test(runtime):
)

loop = asyncio.get_running_loop()
runtime = DistributedRuntime(loop, "file", "nats")
runtime = DistributedRuntime(loop, store_kv, request_plane)
yield runtime
runtime.shutdown()
47 changes: 44 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,11 +412,52 @@ def _create_server(self) -> ManagedProcess:
return server


@pytest.fixture
def store_kv(request):
"""
KV store for runtime. Defaults to "etcd".

To iterate over multiple stores in a test:
@pytest.mark.parametrize("store_kv", ["file", "etcd"], indirect=True)
def test_example(runtime_services):
...
"""
return getattr(request, "param", "etcd")


@pytest.fixture
def request_plane(request):
"""
Request plane for runtime. Defaults to "nats".

To iterate over multiple transports in a test:
@pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True)
def test_example(runtime_services):
...
"""
return getattr(request, "param", "nats")


@pytest.fixture()
def runtime_services(request):
with NatsServer(request) as nats_process:
def runtime_services(request, store_kv, request_plane):
"""
Start runtime services (NATS and/or etcd) based on store_kv and request_plane.

- If store_kv != "etcd", etcd is not started (returns None)
- If request_plane != "nats", NATS is not started (returns None)
"""
if request_plane == "nats" and store_kv == "etcd":
with NatsServer(request) as nats_process:
with EtcdServer(request) as etcd_process:
yield nats_process, etcd_process
elif request_plane == "nats":
with NatsServer(request) as nats_process:
yield nats_process, None
elif store_kv == "etcd":
with EtcdServer(request) as etcd_process:
yield nats_process, etcd_process
yield None, etcd_process
else:
yield None, None


@pytest.fixture(scope="session")
Expand Down
5 changes: 4 additions & 1 deletion tests/fault_tolerance/cancellation/test_sglang.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
pytest.mark.sglang,
pytest.mark.e2e,
pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True),
pytest.mark.post_merge, # post_merge to pinpoint failure commit
]

Expand Down Expand Up @@ -89,8 +90,10 @@ def __init__(self, request, mode: str = "agg"):
else: # agg (aggregated mode)
port = "8081"

# Set debug logging environment
# Set environment variables
env = os.environ.copy()
env["DYN_REQUEST_PLANE"] = request.getfixturevalue("request_plane")

env["DYN_LOG"] = "debug"
# Disable canary health check - these tests expect full control over requests
# sent to the workers where canary health check intermittently sends dummy
Expand Down
5 changes: 4 additions & 1 deletion tests/fault_tolerance/cancellation/test_trtllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
pytest.mark.gpu_1,
pytest.mark.e2e,
pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True),
pytest.mark.post_merge, # post_merge to pinpoint failure commit
]

Expand Down Expand Up @@ -85,8 +86,10 @@ def __init__(self, request, mode: str = "prefill_and_decode"):
else: # prefill_and_decode
port = "8081"

# Set debug logging environment
# Set environment variables
env = os.environ.copy()
env["DYN_REQUEST_PLANE"] = request.getfixturevalue("request_plane")

env["DYN_LOG"] = "debug"
# Disable canary health check - these tests expect full control over requests
# sent to the workers where canary health check intermittently sends dummy
Expand Down
5 changes: 4 additions & 1 deletion tests/fault_tolerance/cancellation/test_vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
pytest.mark.gpu_1,
pytest.mark.e2e,
pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True),
pytest.mark.post_merge, # post_merge to pinpoint failure commit
]

Expand Down Expand Up @@ -65,8 +66,10 @@ def __init__(self, request, is_prefill: bool = False):
(f"http://localhost:{FRONTEND_PORT}/health", check_health_generate),
]

# Set debug logging environment
# Set environment variables
env = os.environ.copy()
env["DYN_REQUEST_PLANE"] = request.getfixturevalue("request_plane")

env["DYN_LOG"] = "debug"
# Disable canary health check - these tests expect full control over requests
# sent to the workers where canary health check intermittently sends dummy
Expand Down
2 changes: 1 addition & 1 deletion tests/fault_tolerance/cancellation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ class DynamoFrontendProcess(ManagedProcess):
def __init__(self, request):
command = ["python", "-m", "dynamo.frontend"]

# Set debug logging environment
env = os.environ.copy()
env["DYN_REQUEST_PLANE"] = request.getfixturevalue("request_plane")
env["DYN_LOG"] = "debug"
# Disable canary health check - these tests expect full control over requests
# sent to the workers where canary health check intermittently sends dummy
Expand Down
4 changes: 3 additions & 1 deletion tests/fault_tolerance/migration/test_sglang.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
pytest.mark.gpu_1,
pytest.mark.e2e,
pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True),
pytest.mark.post_merge, # post_merge to pinpoint failure commit
]

Expand Down Expand Up @@ -56,8 +57,9 @@ def __init__(self, request, worker_id: str, migration_limit: int = 3):
str(migration_limit),
]

# Set debug logging environment
# Set environment variables
env = os.environ.copy()
env["DYN_REQUEST_PLANE"] = request.getfixturevalue("request_plane")
env["DYN_LOG"] = "debug"
# Disable canary health check - these tests expect full control over requests
# sent to the workers where canary health check intermittently sends dummy
Expand Down
4 changes: 3 additions & 1 deletion tests/fault_tolerance/migration/test_trtllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
pytest.mark.gpu_1,
pytest.mark.e2e,
pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True),
pytest.mark.post_merge, # post_merge to pinpoint failure commit
]

Expand All @@ -54,8 +55,9 @@ def __init__(self, request, worker_id: str, migration_limit: int = 3):
str(migration_limit),
]

# Set debug logging environment
# Set environment variables
env = os.environ.copy()
env["DYN_REQUEST_PLANE"] = request.getfixturevalue("request_plane")
env["DYN_LOG"] = "debug"
# Disable canary health check - these tests expect full control over requests
# sent to the workers where canary health check intermittently sends dummy
Expand Down
5 changes: 4 additions & 1 deletion tests/fault_tolerance/migration/test_vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
pytest.mark.gpu_1,
pytest.mark.e2e,
pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True),
pytest.mark.post_merge, # post_merge to pinpoint failure commit
]

Expand All @@ -53,8 +54,10 @@ def __init__(self, request, worker_id: str, migration_limit: int = 3):
str(migration_limit),
]

# Set debug logging environment
# Set environment variables
env = os.environ.copy()
env["DYN_REQUEST_PLANE"] = request.getfixturevalue("request_plane")

env["DYN_VLLM_KV_EVENT_PORT"] = f"2008{worker_id[-1]}"
env["VLLM_NIXL_SIDE_CHANNEL_PORT"] = f"560{worker_id[-1]}"

Expand Down
3 changes: 2 additions & 1 deletion tests/fault_tolerance/migration/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,14 @@ class DynamoFrontendProcess(ManagedProcess):
def __init__(self, request):
command = ["python", "-m", "dynamo.frontend", "--router-mode", "round-robin"]

# Unset DYN_SYSTEM_PORT - frontend doesn't use system metrics server
env = os.environ.copy()
env["DYN_REQUEST_PLANE"] = request.getfixturevalue("request_plane")
# Disable canary health check - these tests expect full control over requests
# sent to the workers where canary health check intermittently sends dummy
# requests to workers interfering with the test process which may cause
# intermittent failures
env["DYN_HEALTH_CHECK_ENABLED"] = "false"
# Unset DYN_SYSTEM_PORT - frontend doesn't use system metrics server
env.pop("DYN_SYSTEM_PORT", None)

log_dir = f"{request.node.name}_frontend"
Expand Down
Loading