@@ -102,171 +102,6 @@ run_single() {
102102 set +x
103103}
104104
105- # MTP0 Configuration (gen_mtp_size=0)
106- run_4_gpus_mtp0 () {
107- echo " Running 4 GPUs MTP0 combinations..."
108- if (( ISL == OSL )) ; then
109- run_single 1 5 4 128 128 false " 0.9" 0 0 " 1 2 4 8 16 32 64 128 192"
110- run_single 1 5 4 64 64 true " 0.85" 0 0 " 256 384"
111- run_single 1 4 4 128 128 true " 0.85" 0 0 " 512 768"
112- run_single 2 5 4 256 256 true " 0.85" 0 0 " 1024 1536"
113- run_single 1 2 4 512 512 true " 0.85" 0 0 " 2048 3072"
114- run_single 2 3 4 768 768 true " 0.85" 0 0 " 3072 4096"
115- else
116- run_single 1 5 4 16 16 false " 0.9" 0 0 " 1 2 4 8 16 24"
117- run_single 1 4 4 32 32 false " 0.9" 0 0 " 32 48"
118- run_single 2 5 4 64 64 false " 0.9" 0 0 " 64 96"
119- run_single 1 2 4 128 128 false " 0.9" 0 0 " 128 192"
120- run_single 1 1 4 64 64 true " 0.8" 0 0 " 256 384"
121- run_single 3 2 4 128 128 true " 0.8" 0 0 " 512 768"
122- fi
123- }
124-
125- run_8_gpus_mtp0 () {
126- echo " Running 8 GPUs MTP0 combinations..."
127- if (( ISL == OSL )) ; then
128- run_single 1 4 8 128 128 false " 0.9" 0 0 " 1 2 4 8 16 32 64 128 192 256"
129- run_single 1 4 8 32 32 true " 0.8" 0 0 " 256 384"
130- run_single 1 3 8 64 64 true " 0.8" 0 0 " 512 768"
131- run_single 1 2 8 128 128 true " 0.8" 0 0 " 1024 1536"
132- run_single 1 1 8 256 256 true " 0.8" 0 0 " 2048 3072"
133- run_single 1 1 8 512 512 true " 0.8" 0 0 " 4096 6144"
134- run_single 3 2 8 768 768 true " 0.8" 0 0 " 6144 8192"
135- run_single 3 2 8 1024 1024 true " 0.8" 0 0 " 8192 12288"
136- else
137- run_single 1 4 8 16 16 false " 0.9" 0 0 " 1 2 4 8 16 24"
138- run_single 1 3 8 32 32 false " 0.9" 0 0 " 32 48"
139- run_single 1 2 8 64 64 false " 0.9" 0 0 " 64 96"
140- run_single 1 1 8 128 128 false " 0.9" 0 0 " 128 192"
141- run_single 3 2 8 32 32 true " 0.8" 0 0 " 256 384"
142- run_single 5 2 8 64 64 true " 0.8" 0 0 " 512 768"
143- run_single 4 1 8 128 128 true " 0.8" 0 0 " 1024 1536"
144- run_single 5 1 8 256 256 true " 0.8" 0 0 " 2048 3072"
145- fi
146- }
147-
148- run_16_gpus_mtp0 () {
149- echo " Running 16 GPUs MTP0 combinations..."
150- if (( ISL == OSL )) ; then
151- run_single 1 1 16 64 64 true " 0.75" 0 0 " 16 32 64 128 256 512 1024 1536"
152- run_single 2 1 16 128 128 true " 0.75" 0 256 " 2048 3072"
153- run_single 2 1 16 256 256 true " 0.75" 0 256 " 4096 6144"
154- run_single 3 1 16 512 512 true " 0.75" 0 256 " 8192 12288"
155- run_single 3 1 16 768 768 true " 0.75" 0 256 " 12288 16384"
156- run_single 3 1 16 1024 1024 true " 0.75" 0 288 " 16384 20480"
157- else
158- run_single 1 1 16 8 8 true " 0.8" 0 0 " 16 32 64 128 192" # 5
159- run_single 2 1 16 16 16 true " 0.8" 0 0 " 256 384" # 6
160- run_single 3 1 16 32 32 true " 0.8" 0 0 " 512 768" # 7
161- run_single 6 1 16 64 64 true " 0.8" 0 0 " 1024 1536" # 10
162- run_single 8 1 16 128 128 true " 0.8" 0 256 " 2048 3072" # 12
163- run_single 10 1 16 256 256 true " 0.8" 0 256 " 4096 6144" # 14
164- fi
165- }
166-
167- run_32_gpus_mtp0 () {
168- echo " Running 32 GPUs MTP0 combinations..."
169- if (( ISL == OSL )) ; then
170- run_single 1 1 32 32 32 true " 0.7" 0 0 " 32 64 128 256 512 1024 1536"
171- run_single 2 1 32 64 64 true " 0.7" 0 256 " 2048 3072"
172- run_single 3 1 32 128 128 true " 0.7" 0 288 " 4096 6144"
173- run_single 4 1 32 256 256 true " 0.7" 0 288 " 8192 12288"
174- run_single 5 1 32 512 512 true " 0.7" 0 288 " 16384 20480"
175- else
176- run_single 1 1 32 4 4 true " 0.7" 0 0 " 32 64 128 192" # 9
177- run_single 2 1 32 8 8 true " 0.7" 0 0 " 256 384" # 10
178- run_single 4 1 32 16 16 true " 0.7" 0 0 " 512 768" # 12
179- run_single 7 1 32 32 32 true " 0.7" 0 0 " 1024 1536" # 15
180- fi
181- }
182-
183- # MTP Configuration (gen_mtp_size=1,2,3)
184- run_4_gpus_mtp () {
185- echo " Running 4 GPUs MTP combinations..."
186- if (( ISL == OSL )) ; then
187- run_single 1 5 4 32 128 false " 0.9" 3 0 " 1 2 4 8 16 32 48"
188- run_single 1 5 4 32 128 true " 0.9" 3 0 " 64 128 192"
189- run_single 1 4 4 64 256 true " 0.9" 3 0 " 256 384"
190- run_single 1 3 4 128 512 true " 0.9" 3 0 " 512 768"
191- run_single 1 2 4 256 768 true " 0.9" 2 0 " 1024 1536"
192- run_single 2 3 4 512 1024 true " 0.9" 1 0 " 2048 3072"
193- run_single 1 1 4 768 1536 true " 0.9" 1 0 " 3072 4096"
194- else
195- run_single 1 5 4 8 32 false " 0.9" 3 0 " 1 2 4 8 12"
196- run_single 1 4 4 16 64 false " 0.9" 3 0 " 16 24"
197- run_single 1 3 4 32 128 false " 0.9" 3 0 " 32 48"
198- run_single 2 3 4 16 64 true " 0.8" 3 0 " 64 96"
199- run_single 1 1 4 32 128 true " 0.8" 3 0 " 128 192"
200- run_single 2 1 4 64 256 true " 0.8" 2 0 " 256 384"
201- run_single 5 2 4 128 512 true " 0.8" 1 0 " 512 768"
202- fi
203- }
204-
205- run_8_gpus_mtp () {
206- echo " Running 8 GPUs MTP combinations..."
207- if (( ISL == OSL )) ; then
208- run_single 1 4 8 32 128 false " 0.9" 3 0 " 1 2 4 8 16 32 48"
209- run_single 1 4 8 16 64 true " 0.8" 3 0 " 64 128 192"
210- run_single 1 3 8 32 128 true " 0.8" 3 0 " 256 384"
211- run_single 1 2 8 64 256 true " 0.8" 3 0 " 512 768"
212- run_single 1 1 8 128 512 true " 0.8" 3 0 " 1024 1536"
213- run_single 1 1 8 256 512 true " 0.8" 1 0 " 2048 3072"
214- run_single 3 2 8 512 1024 true " 0.8" 1 0 " 4096 6144"
215- run_single 3 2 8 768 1536 true " 0.8" 1 0 " 6144 8192"
216- run_single 3 2 8 1024 2048 true " 0.8" 1 0 " 8192 12288"
217- else
218- run_single 1 4 8 8 32 false " 0.9" 3 0 " 1 2 4 8 12"
219- run_single 1 3 8 16 64 false " 0.9" 3 0 " 16 24"
220- run_single 1 2 8 32 128 false " 0.9" 3 0 " 32 48"
221- run_single 1 1 8 8 32 true " 0.8" 3 0 " 64 96"
222- run_single 3 2 8 16 64 true " 0.8" 3 0 " 128 192"
223- run_single 5 2 8 32 128 true " 0.8" 3 0 " 256 384"
224- run_single 7 2 8 64 256 true " 0.8" 2 0 " 512 768"
225- run_single 5 1 8 128 256 true " 0.8" 1 0 " 1024 1536"
226- run_single 6 1 8 256 512 true " 0.8" 1 0 " 2048 3072"
227- fi
228- }
229-
230- run_16_gpus_mtp () {
231- echo " Running 16 GPUs MTP combinations..."
232- if (( ISL == OSL )) ; then
233- run_single 1 1 16 32 128 true " 0.7" 3 0 " 16 32 64 128 256 512 768"
234- run_single 1 1 16 64 256 true " 0.7" 3 256 " 1024 1536"
235- run_single 2 1 16 128 256 true " 0.7" 1 288 " 2048 3072"
236- run_single 2 1 16 256 512 true " 0.7" 1 288 " 4096 6144"
237- run_single 3 1 16 512 1024 true " 0.7" 1 288 " 8192 12288"
238- run_single 3 1 16 768 1536 true " 0.7" 1 288 " 12288 16384"
239- run_single 3 1 16 1024 1024 true " 0.75" 0 288 " 16384 20480"
240- else
241- run_single 1 1 16 4 16 true " 0.8" 3 0 " 16 32 64 96" # 5
242- run_single 2 1 16 8 32 true " 0.8" 3 0 " 128 192" # 6
243- run_single 4 1 16 16 64 true " 0.8" 3 0 " 256 384" # 8
244- run_single 6 1 16 32 128 true " 0.8" 3 0 " 512 768" # 10
245- run_single 8 1 16 64 256 true " 0.8" 2 256 " 1024 1536" # 13
246- run_single 10 1 16 128 256 true " 0.8" 1 256 " 2048 3072" # 15
247- run_single 12 1 16 256 512 true " 0.8" 1 256 " 4096 6144" # 16
248- fi
249-
250- }
251-
252- run_32_gpus_mtp () {
253- echo " Running 32 GPUs MTP combinations..."
254- if (( ISL == OSL )) ; then
255- run_single 1 1 32 16 64 true " 0.6" 3 0 " 32 64 128 256 512 768"
256- run_single 2 1 32 32 128 true " 0.6" 3 288 " 1024 1536"
257- run_single 3 1 32 64 256 true " 0.6" 3 288 " 2048 3072"
258- run_single 3 1 32 128 256 true " 0.6" 1 288 " 4096 6144"
259- run_single 4 1 32 256 512 true " 0.6" 1 288 " 8192 12288"
260- run_single 5 1 32 512 1024 true " 0.6" 1 288 " 16384 20480"
261- else
262- run_single 1 1 32 1 4 true " 0.7" 3 0 " 32 48" # 9
263- run_single 2 1 32 2 8 true " 0.7" 3 0 " 64 96" # 10
264- run_single 3 1 32 4 16 true " 0.7" 3 0 " 128 192" # 11
265- run_single 5 1 32 8 32 true " 0.7" 3 0 " 256 384" # 13
266- run_single 8 1 32 16 64 true " 0.7" 3 256 " 512 768" # 16
267- fi
268- }
269-
270105# Main function
271106main () {
272107 local mtp_mode=$1
@@ -279,99 +114,6 @@ main() {
279114 fi
280115
281116 case $mode in
282- " all" )
283- echo " Running all GPU configurations for $mtp_mode mode..."
284- if [[ " $mtp_mode " == " mtp=off" ]]; then
285- run_4_gpus_mtp0
286- run_8_gpus_mtp0
287- run_16_gpus_mtp0
288- run_32_gpus_mtp0
289- else
290- run_4_gpus_mtp
291- run_8_gpus_mtp
292- run_16_gpus_mtp
293- run_32_gpus_mtp
294- fi
295- ;;
296- " pareto" )
297- # 1k/1k
298- export ISL=1024
299- export OSL=1024
300- export CACHE_TRANSCEIVER_MAX_NUM_TOKENS=4608
301-
302- if [[ " $mtp_mode " == " mtp=off" ]]; then
303- # 1k/1k mtp=off
304- run_single 1 4 8 128 128 false " 0.9" 0 0 " 1 2 4 8 16 32 64 141"
305- run_single 1 1 32 32 32 true " 0.7" 0 0 " 1075"
306- run_single 1 1 16 64 64 true " 0.75" 0 0 " 1075"
307- run_single 2 1 16 256 256 true " 0.75" 0 0 " 2048 4300"
308- run_single 1 1 8 512 512 true " 0.8" 0 0 " 4300"
309-
310- else
311- # 1k/1k mtp=on
312- run_single 1 4 8 32 128 false " 0.9" 3 0 " 1 2 4 8 16 36"
313- run_single 1 1 16 64 256 true " 0.7" 3 0 " 512 1075"
314- run_single 2 1 16 128 256 true " 0.7" 1 0 " 2150"
315- run_single 1 1 32 16 64 true " 0.6" 3 0 " 512"
316- run_single 1 1 8 256 512 true " 0.8" 1 0 " 2252"
317- fi
318-
319- # 8k/1k
320- export ISL=8192
321- export OSL=1024
322- export CACHE_TRANSCEIVER_MAX_NUM_TOKENS=8448
323-
324- if [[ " $mtp_mode " == " mtp=off" ]]; then
325- # 8k/1k mtp=off
326- run_single 1 3 8 32 32 false " 0.9" 0 0 " 1 2 4 8 16 34"
327- run_single 4 1 32 16 16 true " 0.7" 0 0 " 256 538"
328- run_single 7 1 32 32 32 true " 0.7" 0 0 " 1075" # remove if need 5 cofigs
329- run_single 6 1 16 64 64 true " 0.75" 0 0 " 1075"
330- run_single 8 1 16 128 128 true " 0.75" 0 0 " 2150"
331- run_single 5 1 8 256 256 true " 0.8" 0 0 " 2150"
332- else
333- # 8k/1k mtp=on
334- run_single 1 3 8 16 64 false " 0.9" 3 0 " 1 2 4 8 18"
335- run_single 5 1 32 8 32 true " 0.7" 3 0 " 128 269"
336- run_single 8 1 32 16 64 true " 0.7" 3 0 " 538"
337- run_single 6 1 16 32 128 true " 0.75" 3 0 " 538" # remove if need 5 configs
338- run_single 8 1 16 64 256 true " 0.75" 2 0 " 1075"
339- run_single 5 1 8 128 256 true " 0.8" 1 0 " 1075" # remove if need 5 configs
340- run_single 6 1 8 256 512 true " 0.8" 1 0 " 2150"
341- fi
342- ;;
343- " 4GPU" )
344- echo " Running 4 GPUs combinations for $mtp_mode mode..."
345- if [[ " $mtp_mode " == " mtp=off" ]]; then
346- run_4_gpus_mtp0
347- else
348- run_4_gpus_mtp
349- fi
350- ;;
351- " 8GPU" )
352- echo " Running 8 GPUs combinations for $mtp_mode mode..."
353- if [[ " $mtp_mode " == " mtp=off" ]]; then
354- run_8_gpus_mtp0
355- else
356- run_8_gpus_mtp
357- fi
358- ;;
359- " 16GPU" )
360- echo " Running 16 GPUs combinations for $mtp_mode mode..."
361- if [[ " $mtp_mode " == " mtp=off" ]]; then
362- run_16_gpus_mtp0
363- else
364- run_16_gpus_mtp
365- fi
366- ;;
367- " 32GPU" )
368- echo " Running 32 GPUs combinations for $mtp_mode mode..."
369- if [[ " $mtp_mode " == " mtp=off" ]]; then
370- run_32_gpus_mtp0
371- else
372- run_32_gpus_mtp
373- fi
374- ;;
375117 " tep" )
376118 if [ $# -ne 14 ]; then
377119 echo " Error: TEP mode requires 14 additional parameters (including mtp_mode)"
@@ -417,7 +159,7 @@ main() {
417159
418160 echo " Running DEP mode ($mtp_mode ) with ctx_num=$ctx_num , ctx_tp_size=$ctx_tp_size , ctx_enable_attention_dp=$ctx_enable_attention_dp , gen_num=$gen_num , gen_tp_size=$gen_tp_size , gen_ep_size=$gen_tp_size , gen_batch_size=$gen_batch_size , gen_max_num_tokens=$gen_max_num_tokens , gen_gpu_memory_fraction=$gen_gpu_memory_fraction , gen_mtp_size=$gen_mtp_size , gen_eplb_num_slots=$gen_eplb_num_slots , gen_concurrency_list=\" $gen_concurrency_list \" "
419161
420- run_single $ctx_num $ctx_tp_size $ctx_ep_size $ctx_enable_attention_dp $gen_num $gen_tp_size $gen_tp_size $gen_batch_size $gen_max_num_tokens $gen_gpu_memory_fraction $gen_mtp_size $gen_eplb_num_slots " $gen_concurrency_list "
162+ run_single $ctx_num $ctx_tp_size $ctx_ep_size $ctx_enable_attention_dp $gen_num $gen_tp_size $gen_tp_size $gen_batch_size $gen_max_num_tokens true $gen_gpu_memory_fraction $gen_mtp_size $gen_eplb_num_slots " $gen_concurrency_list "
421163 ;;
422164 " tp" )
423165 if [ $# -ne 14 ]; then
0 commit comments