We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent e709de8 commit c57ef93Copy full SHA for c57ef93
tests/fault_tolerance/cancellation/test_trtllm.py
@@ -72,6 +72,8 @@ def __init__(
72
FAULT_TOLERANCE_MODEL_NAME,
73
"--disaggregation-mode",
74
mode,
75
+ "--free-gpu-memory-fraction",
76
+ "0.2",
77
"--max-seq-len",
78
"16384",
79
"--max-num-tokens",
@@ -85,7 +87,7 @@ def __init__(
85
87
"cache_transceiver_config:\n backend: DEFAULT\n max_tokens_in_buffer: 16384\n"
86
88
)
89
f.write("disable_overlap_scheduler: true\n")
- f.write("kv_cache_config:\n max_tokens: 16384\n")
90
+ # f.write("kv_cache_config:\n max_tokens: 16384\n")
91
command += [
92
"--extra-engine-args",
93
"test_request_cancellation_trtllm_config.yaml",
0 commit comments