Skip to content

Commit d89df01

Browse files
committed
make warmup timeout configurable
1 parent 2bb0317 commit d89df01

File tree

2 files changed

+18
-2
lines changed

2 files changed

+18
-2
lines changed

python/sglang/srt/entrypoints/http_server.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1529,11 +1529,16 @@ def _execute_server_warmup(
15291529

15301530
try:
15311531
if server_args.disaggregation_mode == "null":
1532+
logger.info(f"Start of co-locate warmup ...")
15321533
res = requests.post(
15331534
url + request_name,
15341535
json=json_data,
15351536
headers=headers,
1536-
timeout=600,
1537+
timeout=(
1538+
server_args.warmup_timeout
1539+
if server_args.warmup_timeout is not None
1540+
else 300
1541+
),
15371542
)
15381543
assert res.status_code == 200, f"{res}"
15391544
_global_state.tokenizer_manager.server_status = ServerStatus.Up
@@ -1559,7 +1564,11 @@ def _execute_server_warmup(
15591564
url + request_name,
15601565
json=json_data,
15611566
headers=headers,
1562-
timeout=1800, # because of deep gemm precache is very long if not precache.
1567+
timeout=(
1568+
server_args.warmup_timeout
1569+
if server_args.warmup_timeout is not None
1570+
else 1800
1571+
), # because of deep gemm precache is very long if not precache.
15631572
)
15641573
if res.status_code == 200:
15651574
logger.info(

python/sglang/srt/server_args.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,7 @@ class ServerArgs:
292292
constrained_json_disable_any_whitespace: bool = False
293293
watchdog_timeout: float = 300
294294
dist_timeout: Optional[int] = None # timeout for torch.distributed
295+
warmup_timeout: Optional[float] = None # timeout for warmup request
295296
download_dir: Optional[str] = None
296297
base_gpu_id: int = 0
297298
gpu_id_step: int = 1
@@ -2287,6 +2288,12 @@ def add_cli_args(parser: argparse.ArgumentParser):
22872288
default=ServerArgs.dist_timeout,
22882289
help="Set timeout for torch.distributed initialization.",
22892290
)
2291+
parser.add_argument(
2292+
"--warmup-timeout",
2293+
type=float,
2294+
default=ServerArgs.warmup_timeout,
2295+
help="Set warmup timeout in seconds. If a warmup forward batch takes longer than this, the server will crash to prevent hanging. Recommend to increase warmup timeout to 1800 to accommodate some kernel JIT precache e.g. deep gemm",
2296+
)
22902297
parser.add_argument(
22912298
"--download-dir",
22922299
type=str,

0 commit comments

Comments
 (0)