Skip to content

Commit 5477ae5

Browse files
committed
More refactor
Signed-off-by: jthomson04 <[email protected]>
1 parent 1473f1a commit 5477ae5

File tree

1 file changed

+1
-259
lines changed

1 file changed

+1
-259
lines changed

components/backends/trtllm/performance_sweeps/submit_disagg.sh

Lines changed: 1 addition & 259 deletions
Original file line numberDiff line numberDiff line change
@@ -102,171 +102,6 @@ run_single() {
102102
set +x
103103
}
104104

105-
# MTP0 Configuration (gen_mtp_size=0)
106-
run_4_gpus_mtp0() {
107-
echo "Running 4 GPUs MTP0 combinations..."
108-
if (( ISL == OSL )); then
109-
run_single 1 5 4 128 128 false "0.9" 0 0 "1 2 4 8 16 32 64 128 192"
110-
run_single 1 5 4 64 64 true "0.85" 0 0 "256 384"
111-
run_single 1 4 4 128 128 true "0.85" 0 0 "512 768"
112-
run_single 2 5 4 256 256 true "0.85" 0 0 "1024 1536"
113-
run_single 1 2 4 512 512 true "0.85" 0 0 "2048 3072"
114-
run_single 2 3 4 768 768 true "0.85" 0 0 "3072 4096"
115-
else
116-
run_single 1 5 4 16 16 false "0.9" 0 0 "1 2 4 8 16 24"
117-
run_single 1 4 4 32 32 false "0.9" 0 0 "32 48"
118-
run_single 2 5 4 64 64 false "0.9" 0 0 "64 96"
119-
run_single 1 2 4 128 128 false "0.9" 0 0 "128 192"
120-
run_single 1 1 4 64 64 true "0.8" 0 0 "256 384"
121-
run_single 3 2 4 128 128 true "0.8" 0 0 "512 768"
122-
fi
123-
}
124-
125-
run_8_gpus_mtp0() {
126-
echo "Running 8 GPUs MTP0 combinations..."
127-
if (( ISL == OSL )); then
128-
run_single 1 4 8 128 128 false "0.9" 0 0 "1 2 4 8 16 32 64 128 192 256"
129-
run_single 1 4 8 32 32 true "0.8" 0 0 "256 384"
130-
run_single 1 3 8 64 64 true "0.8" 0 0 "512 768"
131-
run_single 1 2 8 128 128 true "0.8" 0 0 "1024 1536"
132-
run_single 1 1 8 256 256 true "0.8" 0 0 "2048 3072"
133-
run_single 1 1 8 512 512 true "0.8" 0 0 "4096 6144"
134-
run_single 3 2 8 768 768 true "0.8" 0 0 "6144 8192"
135-
run_single 3 2 8 1024 1024 true "0.8" 0 0 "8192 12288"
136-
else
137-
run_single 1 4 8 16 16 false "0.9" 0 0 "1 2 4 8 16 24"
138-
run_single 1 3 8 32 32 false "0.9" 0 0 "32 48"
139-
run_single 1 2 8 64 64 false "0.9" 0 0 "64 96"
140-
run_single 1 1 8 128 128 false "0.9" 0 0 "128 192"
141-
run_single 3 2 8 32 32 true "0.8" 0 0 "256 384"
142-
run_single 5 2 8 64 64 true "0.8" 0 0 "512 768"
143-
run_single 4 1 8 128 128 true "0.8" 0 0 "1024 1536"
144-
run_single 5 1 8 256 256 true "0.8" 0 0 "2048 3072"
145-
fi
146-
}
147-
148-
run_16_gpus_mtp0() {
149-
echo "Running 16 GPUs MTP0 combinations..."
150-
if (( ISL == OSL )); then
151-
run_single 1 1 16 64 64 true "0.75" 0 0 "16 32 64 128 256 512 1024 1536"
152-
run_single 2 1 16 128 128 true "0.75" 0 256 "2048 3072"
153-
run_single 2 1 16 256 256 true "0.75" 0 256 "4096 6144"
154-
run_single 3 1 16 512 512 true "0.75" 0 256 "8192 12288"
155-
run_single 3 1 16 768 768 true "0.75" 0 256 "12288 16384"
156-
run_single 3 1 16 1024 1024 true "0.75" 0 288 "16384 20480"
157-
else
158-
run_single 1 1 16 8 8 true "0.8" 0 0 "16 32 64 128 192" # 5
159-
run_single 2 1 16 16 16 true "0.8" 0 0 "256 384" # 6
160-
run_single 3 1 16 32 32 true "0.8" 0 0 "512 768" # 7
161-
run_single 6 1 16 64 64 true "0.8" 0 0 "1024 1536" # 10
162-
run_single 8 1 16 128 128 true "0.8" 0 256 "2048 3072" # 12
163-
run_single 10 1 16 256 256 true "0.8" 0 256 "4096 6144" # 14
164-
fi
165-
}
166-
167-
run_32_gpus_mtp0() {
168-
echo "Running 32 GPUs MTP0 combinations..."
169-
if (( ISL == OSL )); then
170-
run_single 1 1 32 32 32 true "0.7" 0 0 "32 64 128 256 512 1024 1536"
171-
run_single 2 1 32 64 64 true "0.7" 0 256 "2048 3072"
172-
run_single 3 1 32 128 128 true "0.7" 0 288 "4096 6144"
173-
run_single 4 1 32 256 256 true "0.7" 0 288 "8192 12288"
174-
run_single 5 1 32 512 512 true "0.7" 0 288 "16384 20480"
175-
else
176-
run_single 1 1 32 4 4 true "0.7" 0 0 "32 64 128 192" # 9
177-
run_single 2 1 32 8 8 true "0.7" 0 0 "256 384" # 10
178-
run_single 4 1 32 16 16 true "0.7" 0 0 "512 768" # 12
179-
run_single 7 1 32 32 32 true "0.7" 0 0 "1024 1536" # 15
180-
fi
181-
}
182-
183-
# MTP Configuration (gen_mtp_size=1,2,3)
184-
run_4_gpus_mtp() {
185-
echo "Running 4 GPUs MTP combinations..."
186-
if (( ISL == OSL )); then
187-
run_single 1 5 4 32 128 false "0.9" 3 0 "1 2 4 8 16 32 48"
188-
run_single 1 5 4 32 128 true "0.9" 3 0 "64 128 192"
189-
run_single 1 4 4 64 256 true "0.9" 3 0 "256 384"
190-
run_single 1 3 4 128 512 true "0.9" 3 0 "512 768"
191-
run_single 1 2 4 256 768 true "0.9" 2 0 "1024 1536"
192-
run_single 2 3 4 512 1024 true "0.9" 1 0 "2048 3072"
193-
run_single 1 1 4 768 1536 true "0.9" 1 0 "3072 4096"
194-
else
195-
run_single 1 5 4 8 32 false "0.9" 3 0 "1 2 4 8 12"
196-
run_single 1 4 4 16 64 false "0.9" 3 0 "16 24"
197-
run_single 1 3 4 32 128 false "0.9" 3 0 "32 48"
198-
run_single 2 3 4 16 64 true "0.8" 3 0 "64 96"
199-
run_single 1 1 4 32 128 true "0.8" 3 0 "128 192"
200-
run_single 2 1 4 64 256 true "0.8" 2 0 "256 384"
201-
run_single 5 2 4 128 512 true "0.8" 1 0 "512 768"
202-
fi
203-
}
204-
205-
run_8_gpus_mtp() {
206-
echo "Running 8 GPUs MTP combinations..."
207-
if (( ISL == OSL )); then
208-
run_single 1 4 8 32 128 false "0.9" 3 0 "1 2 4 8 16 32 48"
209-
run_single 1 4 8 16 64 true "0.8" 3 0 "64 128 192"
210-
run_single 1 3 8 32 128 true "0.8" 3 0 "256 384"
211-
run_single 1 2 8 64 256 true "0.8" 3 0 "512 768"
212-
run_single 1 1 8 128 512 true "0.8" 3 0 "1024 1536"
213-
run_single 1 1 8 256 512 true "0.8" 1 0 "2048 3072"
214-
run_single 3 2 8 512 1024 true "0.8" 1 0 "4096 6144"
215-
run_single 3 2 8 768 1536 true "0.8" 1 0 "6144 8192"
216-
run_single 3 2 8 1024 2048 true "0.8" 1 0 "8192 12288"
217-
else
218-
run_single 1 4 8 8 32 false "0.9" 3 0 "1 2 4 8 12"
219-
run_single 1 3 8 16 64 false "0.9" 3 0 "16 24"
220-
run_single 1 2 8 32 128 false "0.9" 3 0 "32 48"
221-
run_single 1 1 8 8 32 true "0.8" 3 0 "64 96"
222-
run_single 3 2 8 16 64 true "0.8" 3 0 "128 192"
223-
run_single 5 2 8 32 128 true "0.8" 3 0 "256 384"
224-
run_single 7 2 8 64 256 true "0.8" 2 0 "512 768"
225-
run_single 5 1 8 128 256 true "0.8" 1 0 "1024 1536"
226-
run_single 6 1 8 256 512 true "0.8" 1 0 "2048 3072"
227-
fi
228-
}
229-
230-
run_16_gpus_mtp() {
231-
echo "Running 16 GPUs MTP combinations..."
232-
if (( ISL == OSL )); then
233-
run_single 1 1 16 32 128 true "0.7" 3 0 "16 32 64 128 256 512 768"
234-
run_single 1 1 16 64 256 true "0.7" 3 256 "1024 1536"
235-
run_single 2 1 16 128 256 true "0.7" 1 288 "2048 3072"
236-
run_single 2 1 16 256 512 true "0.7" 1 288 "4096 6144"
237-
run_single 3 1 16 512 1024 true "0.7" 1 288 "8192 12288"
238-
run_single 3 1 16 768 1536 true "0.7" 1 288 "12288 16384"
239-
run_single 3 1 16 1024 1024 true "0.75" 0 288 "16384 20480"
240-
else
241-
run_single 1 1 16 4 16 true "0.8" 3 0 "16 32 64 96" # 5
242-
run_single 2 1 16 8 32 true "0.8" 3 0 "128 192" # 6
243-
run_single 4 1 16 16 64 true "0.8" 3 0 "256 384" # 8
244-
run_single 6 1 16 32 128 true "0.8" 3 0 "512 768" # 10
245-
run_single 8 1 16 64 256 true "0.8" 2 256 "1024 1536" # 13
246-
run_single 10 1 16 128 256 true "0.8" 1 256 "2048 3072" # 15
247-
run_single 12 1 16 256 512 true "0.8" 1 256 "4096 6144" # 16
248-
fi
249-
250-
}
251-
252-
run_32_gpus_mtp() {
253-
echo "Running 32 GPUs MTP combinations..."
254-
if (( ISL == OSL )); then
255-
run_single 1 1 32 16 64 true "0.6" 3 0 "32 64 128 256 512 768"
256-
run_single 2 1 32 32 128 true "0.6" 3 288 "1024 1536"
257-
run_single 3 1 32 64 256 true "0.6" 3 288 "2048 3072"
258-
run_single 3 1 32 128 256 true "0.6" 1 288 "4096 6144"
259-
run_single 4 1 32 256 512 true "0.6" 1 288 "8192 12288"
260-
run_single 5 1 32 512 1024 true "0.6" 1 288 "16384 20480"
261-
else
262-
run_single 1 1 32 1 4 true "0.7" 3 0 "32 48" # 9
263-
run_single 2 1 32 2 8 true "0.7" 3 0 "64 96" # 10
264-
run_single 3 1 32 4 16 true "0.7" 3 0 "128 192" # 11
265-
run_single 5 1 32 8 32 true "0.7" 3 0 "256 384" # 13
266-
run_single 8 1 32 16 64 true "0.7" 3 256 "512 768" # 16
267-
fi
268-
}
269-
270105
# Main function
271106
main() {
272107
local mtp_mode=$1
@@ -279,99 +114,6 @@ main() {
279114
fi
280115

281116
case $mode in
282-
"all")
283-
echo "Running all GPU configurations for $mtp_mode mode..."
284-
if [[ "$mtp_mode" == "mtp=off" ]]; then
285-
run_4_gpus_mtp0
286-
run_8_gpus_mtp0
287-
run_16_gpus_mtp0
288-
run_32_gpus_mtp0
289-
else
290-
run_4_gpus_mtp
291-
run_8_gpus_mtp
292-
run_16_gpus_mtp
293-
run_32_gpus_mtp
294-
fi
295-
;;
296-
"pareto")
297-
# 1k/1k
298-
export ISL=1024
299-
export OSL=1024
300-
export CACHE_TRANSCEIVER_MAX_NUM_TOKENS=4608
301-
302-
if [[ "$mtp_mode" == "mtp=off" ]]; then
303-
# 1k/1k mtp=off
304-
run_single 1 4 8 128 128 false "0.9" 0 0 "1 2 4 8 16 32 64 141"
305-
run_single 1 1 32 32 32 true "0.7" 0 0 "1075"
306-
run_single 1 1 16 64 64 true "0.75" 0 0 "1075"
307-
run_single 2 1 16 256 256 true "0.75" 0 0 "2048 4300"
308-
run_single 1 1 8 512 512 true "0.8" 0 0 "4300"
309-
310-
else
311-
# 1k/1k mtp=on
312-
run_single 1 4 8 32 128 false "0.9" 3 0 "1 2 4 8 16 36"
313-
run_single 1 1 16 64 256 true "0.7" 3 0 "512 1075"
314-
run_single 2 1 16 128 256 true "0.7" 1 0 "2150"
315-
run_single 1 1 32 16 64 true "0.6" 3 0 "512"
316-
run_single 1 1 8 256 512 true "0.8" 1 0 "2252"
317-
fi
318-
319-
# 8k/1k
320-
export ISL=8192
321-
export OSL=1024
322-
export CACHE_TRANSCEIVER_MAX_NUM_TOKENS=8448
323-
324-
if [[ "$mtp_mode" == "mtp=off" ]]; then
325-
# 8k/1k mtp=off
326-
run_single 1 3 8 32 32 false "0.9" 0 0 "1 2 4 8 16 34"
327-
run_single 4 1 32 16 16 true "0.7" 0 0 "256 538"
328-
run_single 7 1 32 32 32 true "0.7" 0 0 "1075" # remove if need 5 cofigs
329-
run_single 6 1 16 64 64 true "0.75" 0 0 "1075"
330-
run_single 8 1 16 128 128 true "0.75" 0 0 "2150"
331-
run_single 5 1 8 256 256 true "0.8" 0 0 "2150"
332-
else
333-
# 8k/1k mtp=on
334-
run_single 1 3 8 16 64 false "0.9" 3 0 "1 2 4 8 18"
335-
run_single 5 1 32 8 32 true "0.7" 3 0 "128 269"
336-
run_single 8 1 32 16 64 true "0.7" 3 0 "538"
337-
run_single 6 1 16 32 128 true "0.75" 3 0 "538" # remove if need 5 configs
338-
run_single 8 1 16 64 256 true "0.75" 2 0 "1075"
339-
run_single 5 1 8 128 256 true "0.8" 1 0 "1075" # remove if need 5 configs
340-
run_single 6 1 8 256 512 true "0.8" 1 0 "2150"
341-
fi
342-
;;
343-
"4GPU")
344-
echo "Running 4 GPUs combinations for $mtp_mode mode..."
345-
if [[ "$mtp_mode" == "mtp=off" ]]; then
346-
run_4_gpus_mtp0
347-
else
348-
run_4_gpus_mtp
349-
fi
350-
;;
351-
"8GPU")
352-
echo "Running 8 GPUs combinations for $mtp_mode mode..."
353-
if [[ "$mtp_mode" == "mtp=off" ]]; then
354-
run_8_gpus_mtp0
355-
else
356-
run_8_gpus_mtp
357-
fi
358-
;;
359-
"16GPU")
360-
echo "Running 16 GPUs combinations for $mtp_mode mode..."
361-
if [[ "$mtp_mode" == "mtp=off" ]]; then
362-
run_16_gpus_mtp0
363-
else
364-
run_16_gpus_mtp
365-
fi
366-
;;
367-
"32GPU")
368-
echo "Running 32 GPUs combinations for $mtp_mode mode..."
369-
if [[ "$mtp_mode" == "mtp=off" ]]; then
370-
run_32_gpus_mtp0
371-
else
372-
run_32_gpus_mtp
373-
fi
374-
;;
375117
"tep")
376118
if [ $# -ne 14 ]; then
377119
echo "Error: TEP mode requires 14 additional parameters (including mtp_mode)"
@@ -417,7 +159,7 @@ main() {
417159

418160
echo "Running DEP mode ($mtp_mode) with ctx_num=$ctx_num, ctx_tp_size=$ctx_tp_size, ctx_enable_attention_dp=$ctx_enable_attention_dp, gen_num=$gen_num, gen_tp_size=$gen_tp_size, gen_ep_size=$gen_tp_size, gen_batch_size=$gen_batch_size, gen_max_num_tokens=$gen_max_num_tokens, gen_gpu_memory_fraction=$gen_gpu_memory_fraction, gen_mtp_size=$gen_mtp_size, gen_eplb_num_slots=$gen_eplb_num_slots, gen_concurrency_list=\"$gen_concurrency_list\""
419161

420-
run_single $ctx_num $ctx_tp_size $ctx_ep_size $ctx_enable_attention_dp $gen_num $gen_tp_size $gen_tp_size $gen_batch_size $gen_max_num_tokens $gen_gpu_memory_fraction $gen_mtp_size $gen_eplb_num_slots "$gen_concurrency_list"
162+
run_single $ctx_num $ctx_tp_size $ctx_ep_size $ctx_enable_attention_dp $gen_num $gen_tp_size $gen_tp_size $gen_batch_size $gen_max_num_tokens true $gen_gpu_memory_fraction $gen_mtp_size $gen_eplb_num_slots "$gen_concurrency_list"
421163
;;
422164
"tp")
423165
if [ $# -ne 14 ]; then

0 commit comments

Comments
 (0)