Skip to content

Commit 01a634d

Browse files
authored
feat: SGLang FP8 improvements and vLLM benchmark enhancements (#4675)
1 parent c9fdc2e commit 01a634d

File tree

1 file changed

+30
-8
lines changed
  • examples/backends/sglang/slurm_jobs/scripts/vllm

1 file changed

+30
-8
lines changed

examples/backends/sglang/slurm_jobs/scripts/vllm/bench.sh

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,19 +27,42 @@ chosen_req_rate=$8
2727

2828
echo "Config ${chosen_isl}; ${chosen_osl}; ${chosen_concurrencies[@]}; ${chosen_req_rate}"
2929

30-
wait_for_model_timeout=1500 # 25 minutes
31-
wait_for_model_check_interval=5 # check interval -> 5s
32-
wait_for_model_report_interval=60 # wait_for_model report interval -> 60s
30+
wait_for_model_timeout=3000
31+
wait_for_model_check_interval=5
32+
wait_for_model_report_interval=60
3333

34-
wait_for_model $head_node $head_port $n_prefill $n_decode $wait_for_model_check_interval $wait_for_model_timeout $wait_for_model_report_interval
34+
wait_for_model $head_node $head_port $n_prefill $n_decode \
35+
$wait_for_model_check_interval $wait_for_model_timeout $wait_for_model_report_interval
3536

3637
set -e
37-
# Warmup the model with a sweep of concurrencies
38+
39+
# Warmup defaults
3840
warmup_isl=$chosen_isl
3941
warmup_osl=$chosen_osl
4042
warmup_req_rate=250
41-
warmup_concurrency_list=(1 4 8 32 64 128 256 512 1024 4096)
43+
warmup_concurrency_list=(1 4 8 32 64 128 256 512)
44+
45+
# Ensure all chosen concurrencies are in warmup list
46+
for c in "${chosen_concurrencies[@]}"; do
47+
found=false
48+
for w in "${warmup_concurrency_list[@]}"; do
49+
if [[ "$c" == "$w" ]]; then
50+
found=true
51+
break
52+
fi
53+
done
54+
if [[ "$found" == false ]]; then
55+
warmup_concurrency_list+=("$c")
56+
fi
57+
done
4258

59+
# Optional: sort warmup list numerically
60+
IFS=$'\n' warmup_concurrency_list=($(sort -n <<<"${warmup_concurrency_list[*]}"))
61+
unset IFS
62+
63+
echo "Final warmup list: ${warmup_concurrency_list[@]}"
64+
65+
# Warmup
4366
for warmup_concurrency in "${warmup_concurrency_list[@]}"
4467
do
4568
echo "Warming up model with concurrency $warmup_concurrency"
@@ -73,7 +96,7 @@ for concurrency in "${chosen_concurrencies[@]}"
7396
do
7497
num_prompts=$((concurrency * 5))
7598
echo "Running benchmark with concurrency: $concurrency and num-prompts: $num_prompts, writing to file ${result_dir}"
76-
result_filename="isl_${chosen_isl}_osl_${chosen_osl}_concurrency_${concurrency}_req_rate_${chosen_req_rate}_ctx${prefill_gpus}_gen${decode_gpus}.json"
99+
result_filename="isl_${chosen_isl}_osl_${chosen_osl}_concurrency_${concurrency}_req_rate_${chosen_req_rate}_ctx_${prefill_gpus}_gen_${decode_gpus}_gpus_${total_gpus}.json"
77100

78101
set -x
79102
echo "$(date '+%Y-%m-%d %H:%M:%S')"
@@ -98,4 +121,3 @@ do
98121
echo "Completed benchmark with concurrency: $concurrency"
99122
echo "-----------------------------------------"
100123
done
101-
set +e

0 commit comments

Comments
 (0)