22# SPDX-License-Identifier: Apache-2.0
33
44"""
5- Test Execution Times (Last Run: 2025-12-09 ):
5+ Test Execution Times (Last Run: 2025-12-13 ):
66- test_request_cancellation_trtllm_aggregated: ~45s (gpu_1)
7- - test_request_cancellation_trtllm_decode_cancel: ~115s (gpu_1)
8- - test_request_cancellation_trtllm_prefill_cancel: ~115s (gpu_1)
9- - test_request_cancellation_trtllm_kv_transfer_cancel: ~115s (gpu_1, xfail )
10- - Total: ~390s (0:06:30 )
7+ - test_request_cancellation_trtllm_decode_cancel: ~65s (gpu_1)
8+ - test_request_cancellation_trtllm_prefill_cancel: ~65s (gpu_1)
9+ - test_request_cancellation_trtllm_kv_transfer_cancel: ~65s (gpu_1)
10+ - Total: ~240s x2 request planes = ~480s (0:08:00 )
1111"""
1212
1313import logging
@@ -72,8 +72,6 @@ def __init__(
7272 FAULT_TOLERANCE_MODEL_NAME ,
7373 "--disaggregation-mode" ,
7474 mode ,
75- "--free-gpu-memory-fraction" ,
76- "0.45" ,
7775 "--max-seq-len" ,
7876 "16384" ,
7977 "--max-num-tokens" ,
@@ -83,8 +81,11 @@ def __init__(
8381 ]
8482 if mode != "prefill_and_decode" :
8583 with open ("test_request_cancellation_trtllm_config.yaml" , "w" ) as f :
86- f .write ("cache_transceiver_config:\n backend: DEFAULT\n " )
84+ f .write (
85+ "cache_transceiver_config:\n backend: DEFAULT\n max_tokens_in_buffer: 16384\n "
86+ )
8787 f .write ("disable_overlap_scheduler: true\n " )
88+ f .write ("kv_cache_config:\n max_tokens: 16384\n " )
8889 command += [
8990 "--extra-engine-args" ,
9091 "test_request_cancellation_trtllm_config.yaml" ,
@@ -164,7 +165,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):
164165 return super ().__exit__ (exc_type , exc_val , exc_tb )
165166
166167
167- @pytest .mark .timeout (140 ) # 3x average
168+ @pytest .mark .timeout (135 ) # 3x average
168169def test_request_cancellation_trtllm_aggregated (
169170 request , runtime_services_dynamic_ports , predownload_models
170171):
@@ -251,7 +252,7 @@ def test_request_cancellation_trtllm_aggregated(
251252 logger .info (f"{ description } detected successfully" )
252253
253254
254- @pytest .mark .timeout (350 ) # 3x average
255+ @pytest .mark .timeout (195 ) # 3x average
255256def test_request_cancellation_trtllm_decode_cancel (
256257 request , runtime_services_dynamic_ports , predownload_models
257258):
@@ -335,7 +336,7 @@ def test_request_cancellation_trtllm_decode_cancel(
335336 )
336337
337338
338- @pytest .mark .timeout (350 ) # 3x average
339+ @pytest .mark .timeout (195 ) # 3x average
339340def test_request_cancellation_trtllm_prefill_cancel (
340341 request , runtime_services_dynamic_ports , predownload_models
341342):
@@ -427,11 +428,8 @@ def test_request_cancellation_trtllm_prefill_cancel(
427428 )
428429
429430
430- @pytest .mark .timeout (350 ) # 3x average
431- @pytest .mark .xfail (
432- reason = "May fail due to unknown reason with TRT-LLM or backend implementation" ,
433- strict = False ,
434- )
431+ @pytest .mark .xfail (reason = "Test fails only on CI" , strict = False )
432+ @pytest .mark .timeout (195 ) # 3x average
435433def test_request_cancellation_trtllm_kv_transfer_cancel (
436434 request , runtime_services_dynamic_ports , predownload_models
437435):
0 commit comments