@@ -373,8 +373,8 @@ main() {
373373 fi
374374 ;;
375375 " tep" )
376- if [ $# -ne 15 ]; then
377- echo " Error: TEP mode requires 15 additional parameters (including mtp_mode)"
376+ if [ $# -ne 14 ]; then
377+ echo " Error: TEP mode requires 14 additional parameters (including mtp_mode)"
378378 usage
379379 fi
380380
@@ -384,22 +384,21 @@ main() {
384384 local ctx_enable_attention_dp=$6
385385 local gen_num=$7
386386 local gen_tp_size=$8
387- local gen_ep_size=$9
388- local gen_batch_size=${10}
389- local gen_max_num_tokens=${11}
390- local gen_gpu_memory_fraction=${12}
391- local gen_mtp_size=${13}
392- local gen_eplb_num_slots=${14}
393- local gen_concurrency_list=${15}
387+ local gen_batch_size=$9
388+ local gen_max_num_tokens=${10}
389+ local gen_gpu_memory_fraction=${11}
390+ local gen_mtp_size=${12}
391+ local gen_eplb_num_slots=${13}
392+ local gen_concurrency_list=${14}
394393
395- echo " Running TEP mode ($mtp_mode ) with ctx_num=$ctx_num , gen_num=$gen_num , gen_tp_size=$gen_tp_size , gen_ep_size=$gen_ep_size , gen_batch_size=$gen_batch_size , gen_max_num_tokens=$gen_max_num_tokens , gen_gpu_memory_fraction=$gen_gpu_memory_fraction , gen_mtp_size=$gen_mtp_size , gen_eplb_num_slots=$gen_eplb_num_slots , gen_concurrency_list=\" $gen_concurrency_list \" "
394+ echo " Running TEP mode ($mtp_mode ) with ctx_num=$ctx_num , gen_num=$gen_num , gen_tp_size=$gen_tp_size , gen_ep_size=$gen_tp_size , gen_batch_size=$gen_batch_size , gen_max_num_tokens=$gen_max_num_tokens , gen_gpu_memory_fraction=$gen_gpu_memory_fraction , gen_mtp_size=$gen_mtp_size , gen_eplb_num_slots=$gen_eplb_num_slots , gen_concurrency_list=\" $gen_concurrency_list \" "
396395
397396 # TEP mode: Use false to disable attention dp
398- run_single $ctx_num $ctx_tp_size $ctx_ep_size $ctx_enable_attention_dp $gen_num $gen_tp_size $gen_ep_size $gen_batch_size $gen_max_num_tokens false $gen_gpu_memory_fraction $gen_mtp_size $gen_eplb_num_slots " $gen_concurrency_list "
397+ run_single $ctx_num $ctx_tp_size $ctx_ep_size $ctx_enable_attention_dp $gen_num $gen_tp_size $gen_tp_size $gen_batch_size $gen_max_num_tokens false $gen_gpu_memory_fraction $gen_mtp_size $gen_eplb_num_slots " $gen_concurrency_list "
399398 ;;
400399 " dep" )
401- if [ $# -ne 15 ]; then
402- echo " Error: DEP mode requires 15 additional parameters (including mtp_mode)"
400+ if [ $# -ne 14 ]; then
401+ echo " Error: DEP mode requires 14 additional parameters (including mtp_mode)"
403402 usage
404403 fi
405404
@@ -409,17 +408,16 @@ main() {
409408 local ctx_enable_attention_dp=$6
410409 local gen_num=$7
411410 local gen_tp_size=$8
412- local gen_ep_size=$9
413- local gen_batch_size=${10}
414- local gen_max_num_tokens=${11}
415- local gen_gpu_memory_fraction=${12}
416- local gen_mtp_size=${13}
417- local gen_eplb_num_slots=${14}
418- local gen_concurrency_list=${15}
411+ local gen_batch_size=$9
412+ local gen_max_num_tokens=${10}
413+ local gen_gpu_memory_fraction=${11}
414+ local gen_mtp_size=${12}
415+ local gen_eplb_num_slots=${13}
416+ local gen_concurrency_list=${14}
419417
420- echo " Running DEP mode ($mtp_mode ) with ctx_num=$ctx_num , ctx_tp_size=$ctx_tp_size , ctx_enable_attention_dp=$ctx_enable_attention_dp , gen_num=$gen_num , gen_tp_size=$gen_tp_size , gen_ep_size=$gen_ep_size , gen_batch_size=$gen_batch_size , gen_max_num_tokens=$gen_max_num_tokens , gen_gpu_memory_fraction=$gen_gpu_memory_fraction , gen_mtp_size=$gen_mtp_size , gen_eplb_num_slots=$gen_eplb_num_slots , gen_concurrency_list=\" $gen_concurrency_list \" "
418+ echo " Running DEP mode ($mtp_mode ) with ctx_num=$ctx_num , ctx_tp_size=$ctx_tp_size , ctx_enable_attention_dp=$ctx_enable_attention_dp , gen_num=$gen_num , gen_tp_size=$gen_tp_size , gen_ep_size=$gen_tp_size , gen_batch_size=$gen_batch_size , gen_max_num_tokens=$gen_max_num_tokens , gen_gpu_memory_fraction=$gen_gpu_memory_fraction , gen_mtp_size=$gen_mtp_size , gen_eplb_num_slots=$gen_eplb_num_slots , gen_concurrency_list=\" $gen_concurrency_list \" "
421419
422- run_single $ctx_num $ctx_tp_size $ctx_ep_size $ctx_enable_attention_dp $gen_num $gen_tp_size $gen_ep_size $gen_batch_size $gen_max_num_tokens $gen_gpu_memory_fraction $gen_mtp_size $gen_eplb_num_slots " $gen_concurrency_list "
420+ run_single $ctx_num $ctx_tp_size $ctx_ep_size $ctx_enable_attention_dp $gen_num $gen_tp_size $gen_tp_size $gen_batch_size $gen_max_num_tokens $gen_gpu_memory_fraction $gen_mtp_size $gen_eplb_num_slots " $gen_concurrency_list "
423421 ;;
424422 * )
425423 echo " Error: Unknown mode '$mode '"
0 commit comments