@@ -27,19 +27,42 @@ chosen_req_rate=$8
2727
2828echo " Config ${chosen_isl} ; ${chosen_osl} ; ${chosen_concurrencies[@]} ; ${chosen_req_rate} "
2929
30- wait_for_model_timeout=1500 # 25 minutes
31- wait_for_model_check_interval=5 # check interval -> 5s
32- wait_for_model_report_interval=60 # wait_for_model report interval -> 60s
30+ wait_for_model_timeout=3000
31+ wait_for_model_check_interval=5
32+ wait_for_model_report_interval=60
3333
34- wait_for_model $head_node $head_port $n_prefill $n_decode $wait_for_model_check_interval $wait_for_model_timeout $wait_for_model_report_interval
34+ wait_for_model $head_node $head_port $n_prefill $n_decode \
35+ $wait_for_model_check_interval $wait_for_model_timeout $wait_for_model_report_interval
3536
3637set -e
37- # Warmup the model with a sweep of concurrencies
38+
39+ # Warmup defaults
3840warmup_isl=$chosen_isl
3941warmup_osl=$chosen_osl
4042warmup_req_rate=250
41- warmup_concurrency_list=(1 4 8 32 64 128 256 512 1024 4096)
43+ warmup_concurrency_list=(1 4 8 32 64 128 256 512)
44+
45+ # Ensure all chosen concurrencies are in warmup list
46+ for c in " ${chosen_concurrencies[@]} " ; do
47+ found=false
48+ for w in " ${warmup_concurrency_list[@]} " ; do
49+ if [[ " $c " == " $w " ]]; then
50+ found=true
51+ break
52+ fi
53+ done
54+ if [[ " $found " == false ]]; then
55+ warmup_concurrency_list+=(" $c " )
56+ fi
57+ done
4258
59+ # Optional: sort warmup list numerically
60+ IFS=$' \n ' warmup_concurrency_list=($( sort -n <<< " ${warmup_concurrency_list[*]}" ) )
61+ unset IFS
62+
63+ echo " Final warmup list: ${warmup_concurrency_list[@]} "
64+
65+ # Warmup
4366for warmup_concurrency in " ${warmup_concurrency_list[@]} "
4467do
4568 echo " Warming up model with concurrency $warmup_concurrency "
@@ -73,7 +96,7 @@ for concurrency in "${chosen_concurrencies[@]}"
7396do
7497 num_prompts=$(( concurrency * 5 ))
7598 echo " Running benchmark with concurrency: $concurrency and num-prompts: $num_prompts , writing to file ${result_dir} "
76- result_filename=" isl_${chosen_isl} _osl_${chosen_osl} _concurrency_${concurrency} _req_rate_${chosen_req_rate} _ctx ${prefill_gpus} _gen ${decode_gpus} .json"
99+ result_filename=" isl_${chosen_isl} _osl_${chosen_osl} _concurrency_${concurrency} _req_rate_${chosen_req_rate} _ctx_ ${prefill_gpus} _gen_ ${decode_gpus} _gpus_ ${total_gpus } .json"
77100
78101 set -x
79102 echo " $( date ' +%Y-%m-%d %H:%M:%S' ) "
98121 echo " Completed benchmark with concurrency: $concurrency "
99122 echo " -----------------------------------------"
100123done
101- set +e
0 commit comments