Skip to content

Commit 4481091

Browse files
committed
fix bench script + align concurrencies
Signed-off-by: jthomson04 <[email protected]>
1 parent e41b80d commit 4481091

File tree

1 file changed

+2
-24
lines changed
  • components/backends/trtllm/performance_sweeps/scripts

1 file changed

+2
-24
lines changed

components/backends/trtllm/performance_sweeps/scripts/bench.sh

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,6 @@ fi
178178
mkdir -p ${log_path}/results
179179
echo "Starting benchmark..."
180180
for concurrency in ${concurrency_list}; do
181-
original_concurrency=${concurrency}
182-
concurrency=$((concurrency * num_gen_servers))
183181
num_prompts=$((concurrency * multi_round))
184182
echo "Benchmarking with concurrency ${concurrency} ... ${num_prompts} prompts"
185183
mkdir -p ${log_path}/concurrency_${concurrency}
@@ -204,7 +202,7 @@ for concurrency in ${concurrency_list}; do
204202
--port ${port} \
205203
--save-result \
206204
--result-dir "${log_path}/results" \
207-
--result-filename "results_concurrency_${original_concurrency}_gpus_${total_gpus}_ctx_${prefill_gpus}_gen_${decode_gpus}.json"
205+
--result-filename "results_concurrency_${concurrency}_gpus_${total_gpus}_ctx_${prefill_gpus}_gen_${decode_gpus}.json"
208206
else
209207
aiperf profile \
210208
--model ${model} \
@@ -226,32 +224,12 @@ for concurrency in ${concurrency_list}; do
226224
--warmup-request-count $(($concurrency*2)) \
227225
--num-dataset-entries ${num_prompts} \
228226
--random-seed 100 \
229-
--artifact-dir "${log_path}/results/concurrency_${original_concurrency}" \
227+
--artifact-dir "${log_path}/results/concurrency_${concurrency}" \
230228
--ui simple \
231229
-v \
232230
-H 'Authorization: Bearer NOT USED' \
233231
-H 'Accept: text/event-stream'
234232
fi
235-
python3 ${SCRIPTS_DIR}/scripts/bench/benchmark_serving.py \
236-
--served-model-name ${model} \
237-
--model ${model_path} \
238-
--dataset-name random \
239-
--num-prompts "$num_prompts" \
240-
--random-input-len ${isl} \
241-
--random-output-len ${osl} \
242-
--random-range-ratio 0.8 \
243-
--use-chat-template \
244-
--ignore-eos \
245-
--use-chat-template \
246-
--backend "dynamo" \
247-
--endpoint "/v1/completions" \
248-
--percentile-metrics ttft,tpot,itl,e2el \
249-
--max-concurrency "$concurrency" \
250-
--host ${hostname} \
251-
--port ${port} \
252-
--save-result \
253-
--result-dir "${log_path}/results" \
254-
--result-filename "results_concurrency_${original_concurrency}_gpus_${total_gpus}_ctx_${prefill_gpus}_gen_${decode_gpus}.json"
255233

256234
echo "Benchmark with concurrency ${concurrency} done"
257235
done

0 commit comments

Comments
 (0)