tmp: Mark cancellation/migration E2E tests using multiple workers and TCP request plane allow to fail

kthui · kthui · commit a6ae4906e577 · 2025-12-09T18:34:55.000-08:00
diff --git a/tests/fault_tolerance/cancellation/test_sglang.py b/tests/fault_tolerance/cancellation/test_sglang.py
@@ -25,7 +25,6 @@
     pytest.mark.sglang,
     pytest.mark.e2e,
     pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
-    pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True),
     pytest.mark.post_merge,  # post_merge to pinpoint failure commit
 ]
 
@@ -164,6 +163,7 @@ def is_ready(self, response) -> bool:
 @pytest.mark.timeout(160)  # 3x average
 @pytest.mark.gpu_1
 @pytest.mark.xfail(strict=False)
+@pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True)
 def test_request_cancellation_sglang_aggregated(request, runtime_services):
     """
     End-to-end test for request cancellation functionality in aggregated mode.
@@ -248,6 +248,17 @@ def test_request_cancellation_sglang_aggregated(request, runtime_services):
 
 @pytest.mark.timeout(185)  # 3x average
 @pytest.mark.gpu_2
+@pytest.mark.parametrize(
+    "request_plane",
+    [
+        "nats",
+        pytest.param(
+            "tcp",
+            marks=pytest.mark.xfail(reason="Multi-worker TCP unstable", strict=False),
+        ),
+    ],
+    indirect=True,
+)
 def test_request_cancellation_sglang_decode_cancel(request, runtime_services):
     """
     End-to-end test for request cancellation during decode phase.
diff --git a/tests/fault_tolerance/cancellation/test_trtllm.py b/tests/fault_tolerance/cancellation/test_trtllm.py
@@ -26,7 +26,6 @@
     pytest.mark.gpu_1,
     pytest.mark.e2e,
     pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
-    pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True),
     pytest.mark.post_merge,  # post_merge to pinpoint failure commit
 ]
 
@@ -144,6 +143,7 @@ def is_ready(self, response) -> bool:
 
 
 @pytest.mark.timeout(140)  # 3x average
+@pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True)
 def test_request_cancellation_trtllm_aggregated(request, runtime_services):
     """
     End-to-end test for request cancellation functionality in aggregated mode.
@@ -216,6 +216,17 @@ def test_request_cancellation_trtllm_aggregated(request, runtime_services):
 
 
 @pytest.mark.timeout(350)  # 3x average
+@pytest.mark.parametrize(
+    "request_plane",
+    [
+        "nats",
+        pytest.param(
+            "tcp",
+            marks=pytest.mark.xfail(reason="Multi-worker TCP unstable", strict=False),
+        ),
+    ],
+    indirect=True,
+)
 def test_request_cancellation_trtllm_decode_cancel(request, runtime_services):
     """
     End-to-end test for request cancellation during decode phase with unified frontend.
@@ -287,6 +298,17 @@ def test_request_cancellation_trtllm_decode_cancel(request, runtime_services):
 
 
 @pytest.mark.timeout(350)  # 3x average
+@pytest.mark.parametrize(
+    "request_plane",
+    [
+        "nats",
+        pytest.param(
+            "tcp",
+            marks=pytest.mark.xfail(reason="Multi-worker TCP unstable", strict=False),
+        ),
+    ],
+    indirect=True,
+)
 def test_request_cancellation_trtllm_prefill_cancel(request, runtime_services):
     """
     End-to-end test for request cancellation during prefill phase with unified frontend.
@@ -368,6 +390,7 @@ def test_request_cancellation_trtllm_prefill_cancel(request, runtime_services):
 
 
 @pytest.mark.timeout(350)  # 3x average
+@pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True)
 @pytest.mark.xfail(
     reason="May fail due to unknown reason with TRT-LLM or backend implementation",
     strict=False,
diff --git a/tests/fault_tolerance/cancellation/test_vllm.py b/tests/fault_tolerance/cancellation/test_vllm.py
@@ -25,7 +25,6 @@
     pytest.mark.gpu_1,
     pytest.mark.e2e,
     pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
-    pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True),
     pytest.mark.post_merge,  # post_merge to pinpoint failure commit
 ]
 
@@ -137,6 +136,7 @@ def is_ready(self, response) -> bool:
 
 
 @pytest.mark.timeout(110)  # 3x average
+@pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True)
 def test_request_cancellation_vllm_aggregated(request, runtime_services):
     """
     End-to-end test for request cancellation functionality in aggregated mode.
@@ -209,6 +209,17 @@ def test_request_cancellation_vllm_aggregated(request, runtime_services):
 
 
 @pytest.mark.timeout(150)  # 3x average
+@pytest.mark.parametrize(
+    "request_plane",
+    [
+        "nats",
+        pytest.param(
+            "tcp",
+            marks=pytest.mark.xfail(reason="Multi-worker TCP unstable", strict=False),
+        ),
+    ],
+    indirect=True,
+)
 def test_request_cancellation_vllm_decode_cancel(
     request, runtime_services, set_ucx_tls_no_mm
 ):
@@ -279,6 +290,17 @@ def test_request_cancellation_vllm_decode_cancel(
 
 
 @pytest.mark.timeout(150)  # 3x average
+@pytest.mark.parametrize(
+    "request_plane",
+    [
+        "nats",
+        pytest.param(
+            "tcp",
+            marks=pytest.mark.xfail(reason="Multi-worker TCP unstable", strict=False),
+        ),
+    ],
+    indirect=True,
+)
 def test_request_cancellation_vllm_prefill_cancel(
     request, runtime_services, set_ucx_tls_no_mm
 ):
diff --git a/tests/fault_tolerance/migration/test_sglang.py b/tests/fault_tolerance/migration/test_sglang.py
@@ -28,7 +28,6 @@
     pytest.mark.gpu_1,
     pytest.mark.e2e,
     pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
-    pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True),
     pytest.mark.post_merge,  # post_merge to pinpoint failure commit
 ]
 
@@ -116,6 +115,17 @@ def is_ready(self, response) -> bool:
 
 
 @pytest.mark.timeout(235)  # 3x average
+@pytest.mark.parametrize(
+    "request_plane",
+    [
+        "nats",
+        pytest.param(
+            "tcp",
+            marks=pytest.mark.xfail(reason="Multi-worker TCP unstable", strict=False),
+        ),
+    ],
+    indirect=True,
+)
 def test_request_migration_sglang_worker_failure(
     request, runtime_services, set_ucx_tls_no_mm
 ):
@@ -160,6 +170,17 @@ def test_request_migration_sglang_worker_failure(
 
 
 @pytest.mark.skip(reason="SGLang graceful shutdown not yet implemented")
+@pytest.mark.parametrize(
+    "request_plane",
+    [
+        "nats",
+        pytest.param(
+            "tcp",
+            marks=pytest.mark.xfail(reason="Multi-worker TCP unstable", strict=False),
+        ),
+    ],
+    indirect=True,
+)
 def test_request_migration_sglang_graceful_shutdown(
     request, runtime_services, set_ucx_tls_no_mm
 ):
@@ -208,6 +229,17 @@ def test_request_migration_sglang_graceful_shutdown(
 
 
 @pytest.mark.timeout(135)  # 3x average
+@pytest.mark.parametrize(
+    "request_plane",
+    [
+        "nats",
+        pytest.param(
+            "tcp",
+            marks=pytest.mark.xfail(reason="Multi-worker TCP unstable", strict=False),
+        ),
+    ],
+    indirect=True,
+)
 def test_no_request_migration_sglang_worker_failure(
     request, runtime_services, set_ucx_tls_no_mm
 ):
@@ -268,6 +300,17 @@ def test_no_request_migration_sglang_worker_failure(
 
 
 @pytest.mark.skip(reason="SGLang graceful shutdown not yet implemented")
+@pytest.mark.parametrize(
+    "request_plane",
+    [
+        "nats",
+        pytest.param(
+            "tcp",
+            marks=pytest.mark.xfail(reason="Multi-worker TCP unstable", strict=False),
+        ),
+    ],
+    indirect=True,
+)
 def test_no_request_migration_sglang_graceful_shutdown(
     request, runtime_services, set_ucx_tls_no_mm
 ):
diff --git a/tests/fault_tolerance/migration/test_trtllm.py b/tests/fault_tolerance/migration/test_trtllm.py
@@ -28,7 +28,6 @@
     pytest.mark.gpu_1,
     pytest.mark.e2e,
     pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
-    pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True),
     pytest.mark.post_merge,  # post_merge to pinpoint failure commit
 ]
 
@@ -112,6 +111,17 @@ def is_ready(self, response) -> bool:
 
 
 @pytest.mark.timeout(290)  # 3x average
+@pytest.mark.parametrize(
+    "request_plane",
+    [
+        "nats",
+        pytest.param(
+            "tcp",
+            marks=pytest.mark.xfail(reason="Multi-worker TCP unstable", strict=False),
+        ),
+    ],
+    indirect=True,
+)
 def test_request_migration_trtllm_worker_failure(
     request, runtime_services, set_ucx_tls_no_mm
 ):
@@ -156,6 +166,17 @@ def test_request_migration_trtllm_worker_failure(
 
 
 @pytest.mark.skip(reason="TRT-LLM graceful shutdown not yet implemented")
+@pytest.mark.parametrize(
+    "request_plane",
+    [
+        "nats",
+        pytest.param(
+            "tcp",
+            marks=pytest.mark.xfail(reason="Multi-worker TCP unstable", strict=False),
+        ),
+    ],
+    indirect=True,
+)
 def test_request_migration_trtllm_graceful_shutdown(
     request, runtime_services, set_ucx_tls_no_mm
 ):
@@ -204,6 +225,17 @@ def test_request_migration_trtllm_graceful_shutdown(
 
 
 @pytest.mark.timeout(185)  # 3x average
+@pytest.mark.parametrize(
+    "request_plane",
+    [
+        "nats",
+        pytest.param(
+            "tcp",
+            marks=pytest.mark.xfail(reason="Multi-worker TCP unstable", strict=False),
+        ),
+    ],
+    indirect=True,
+)
 def test_no_request_migration_trtllm_worker_failure(
     request, runtime_services, set_ucx_tls_no_mm
 ):
@@ -264,6 +296,17 @@ def test_no_request_migration_trtllm_worker_failure(
 
 
 @pytest.mark.skip(reason="TRT-LLM graceful shutdown not yet implemented")
+@pytest.mark.parametrize(
+    "request_plane",
+    [
+        "nats",
+        pytest.param(
+            "tcp",
+            marks=pytest.mark.xfail(reason="Multi-worker TCP unstable", strict=False),
+        ),
+    ],
+    indirect=True,
+)
 def test_no_request_migration_trtllm_graceful_shutdown(
     request, runtime_services, set_ucx_tls_no_mm
 ):
diff --git a/tests/fault_tolerance/migration/test_vllm.py b/tests/fault_tolerance/migration/test_vllm.py
@@ -28,7 +28,6 @@
     pytest.mark.gpu_1,
     pytest.mark.e2e,
     pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
-    pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True),
     pytest.mark.post_merge,  # post_merge to pinpoint failure commit
 ]
 
@@ -117,6 +116,17 @@ def is_ready(self, response) -> bool:
 
 
 @pytest.mark.timeout(290)  # 3x average
+@pytest.mark.parametrize(
+    "request_plane",
+    [
+        "nats",
+        pytest.param(
+            "tcp",
+            marks=pytest.mark.xfail(reason="Multi-worker TCP unstable", strict=False),
+        ),
+    ],
+    indirect=True,
+)
 def test_request_migration_vllm_worker_failure(
     request, runtime_services, set_ucx_tls_no_mm
 ):
@@ -161,6 +171,17 @@ def test_request_migration_vllm_worker_failure(
 
 
 @pytest.mark.timeout(280)  # 3x average
+@pytest.mark.parametrize(
+    "request_plane",
+    [
+        "nats",
+        pytest.param(
+            "tcp",
+            marks=pytest.mark.xfail(reason="Multi-worker TCP unstable", strict=False),
+        ),
+    ],
+    indirect=True,
+)
 def test_request_migration_vllm_graceful_shutdown(
     request, runtime_services, set_ucx_tls_no_mm
 ):
@@ -209,6 +230,17 @@ def test_request_migration_vllm_graceful_shutdown(
 
 
 @pytest.mark.timeout(150)  # 3x average
+@pytest.mark.parametrize(
+    "request_plane",
+    [
+        "nats",
+        pytest.param(
+            "tcp",
+            marks=pytest.mark.xfail(reason="Multi-worker TCP unstable", strict=False),
+        ),
+    ],
+    indirect=True,
+)
 def test_no_request_migration_vllm_worker_failure(
     request, runtime_services, set_ucx_tls_no_mm
 ):
@@ -269,6 +301,17 @@ def test_no_request_migration_vllm_worker_failure(
 
 
 @pytest.mark.timeout(140)  # 3x average
+@pytest.mark.parametrize(
+    "request_plane",
+    [
+        "nats",
+        pytest.param(
+            "tcp",
+            marks=pytest.mark.xfail(reason="Multi-worker TCP unstable", strict=False),
+        ),
+    ],
+    indirect=True,
+)
 def test_no_request_migration_vllm_graceful_shutdown(
     request, runtime_services, set_ucx_tls_no_mm
 ):