178178mkdir -p ${log_path} /results
179179echo " Starting benchmark..."
180180for concurrency in ${concurrency_list} ; do
181- original_concurrency=${concurrency}
182- concurrency=$(( concurrency * num_gen_servers))
183181 num_prompts=$(( concurrency * multi_round))
184182 echo " Benchmarking with concurrency ${concurrency} ... ${num_prompts} prompts"
185183 mkdir -p ${log_path} /concurrency_${concurrency}
@@ -204,7 +202,7 @@ for concurrency in ${concurrency_list}; do
204202 --port ${port} \
205203 --save-result \
206204 --result-dir " ${log_path} /results" \
207- --result-filename " results_concurrency_${original_concurrency } _gpus_${total_gpus} _ctx_${prefill_gpus} _gen_${decode_gpus} .json"
205+ --result-filename " results_concurrency_${concurrency } _gpus_${total_gpus} _ctx_${prefill_gpus} _gen_${decode_gpus} .json"
208206 else
209207 aiperf profile \
210208 --model ${model} \
@@ -226,32 +224,12 @@ for concurrency in ${concurrency_list}; do
226224 --warmup-request-count $(( $concurrency * 2 )) \
227225 --num-dataset-entries ${num_prompts} \
228226 --random-seed 100 \
229- --artifact-dir " ${log_path} /results/concurrency_${original_concurrency } " \
227+ --artifact-dir " ${log_path} /results/concurrency_${concurrency } " \
230228 --ui simple \
231229 -v \
232230 -H ' Authorization: Bearer NOT USED' \
233231 -H ' Accept: text/event-stream'
234232 fi
235- python3 ${SCRIPTS_DIR} /scripts/bench/benchmark_serving.py \
236- --served-model-name ${model} \
237- --model ${model_path} \
238- --dataset-name random \
239- --num-prompts " $num_prompts " \
240- --random-input-len ${isl} \
241- --random-output-len ${osl} \
242- --random-range-ratio 0.8 \
243- --use-chat-template \
244- --ignore-eos \
245- --use-chat-template \
246- --backend " dynamo" \
247- --endpoint " /v1/completions" \
248- --percentile-metrics ttft,tpot,itl,e2el \
249- --max-concurrency " $concurrency " \
250- --host ${hostname} \
251- --port ${port} \
252- --save-result \
253- --result-dir " ${log_path} /results" \
254- --result-filename " results_concurrency_${original_concurrency} _gpus_${total_gpus} _ctx_${prefill_gpus} _gen_${decode_gpus} .json"
255233
256234 echo " Benchmark with concurrency ${concurrency} done"
257235done
0 commit comments