@@ -322,6 +322,10 @@ void PrefillRpcServer::pollRemoteOutput(PrefillGenerateContext& prefill_context)
322322 }
323323 int64_t cost_time_us = currentTimeUs () - prefill_context.request_begin_time_us ;
324324 for (size_t i = 0 ; i < response.generate_outputs_size (); i++) {
325+ auto decode_total_reuse_len = response.generate_outputs (i).aux_info ().total_reuse_len ();
326+ auto decode_local_reuse_len = response.generate_outputs (i).aux_info ().local_reuse_len ();
327+ auto decode_remote_reuse_len = response.generate_outputs (i).aux_info ().remote_reuse_len ();
328+
325329 response.mutable_generate_outputs (i)->mutable_aux_info ()->set_first_token_cost_time_us (first_token_rt_us);
326330 response.mutable_generate_outputs (i)->mutable_aux_info ()->set_cost_time_us (cost_time_us);
327331
@@ -337,11 +341,11 @@ void PrefillRpcServer::pollRemoteOutput(PrefillGenerateContext& prefill_context)
337341 prefill_remote_reuse_len);
338342
339343 response.mutable_generate_outputs (i)->mutable_aux_info ()->set_decode_total_reuse_len (
340- response. generate_outputs (i). aux_info (). total_reuse_len () );
344+ decode_total_reuse_len );
341345 response.mutable_generate_outputs (i)->mutable_aux_info ()->set_decode_local_reuse_len (
342- response. generate_outputs (i). aux_info (). local_reuse_len () );
346+ decode_local_reuse_len );
343347 response.mutable_generate_outputs (i)->mutable_aux_info ()->set_decode_remote_reuse_len (
344- response. generate_outputs (i). aux_info (). remote_reuse_len () );
348+ decode_remote_reuse_len );
345349 }
346350 if (!prefill_context.rpc_context .writer ->Write (response)) {
347351 RTP_LLM_LOG_WARNING (" request [%ld] write outputs pb failed" , request_id);
0 commit comments