@@ -11,8 +11,8 @@ use async_nats::client::{
1111} ;
1212
1313use crate :: {
14- model_card:: ModelDeploymentCard ,
15- protocols:: common:: llm_backend:: { LLMEngineOutput , PreprocessedRequest } ,
14+ model_card:: ModelDeploymentCard , preprocessor :: BackendOutput ,
15+ protocols:: common:: llm_backend:: PreprocessedRequest ,
1616} ;
1717
1818use dynamo_runtime:: {
@@ -44,16 +44,16 @@ impl Migration {
4444impl
4545 Operator <
4646 SingleIn < PreprocessedRequest > ,
47- ManyOut < Annotated < LLMEngineOutput > > ,
47+ ManyOut < Annotated < BackendOutput > > ,
4848 SingleIn < PreprocessedRequest > ,
49- ManyOut < Annotated < LLMEngineOutput > > ,
49+ ManyOut < Annotated < BackendOutput > > ,
5050 > for Migration
5151{
5252 async fn generate (
5353 & self ,
5454 request : SingleIn < PreprocessedRequest > ,
55- next : ServerStreamingEngine < PreprocessedRequest , Annotated < LLMEngineOutput > > ,
56- ) -> Result < ManyOut < Annotated < LLMEngineOutput > > > {
55+ next : ServerStreamingEngine < PreprocessedRequest , Annotated < BackendOutput > > ,
56+ ) -> Result < ManyOut < Annotated < BackendOutput > > > {
5757 let ( preprocessed_request, context) = request. transfer ( ( ) ) ;
5858 let engine_ctx = context. context ( ) ;
5959 let engine_ctx_ = engine_ctx. clone ( ) ;
7373struct RetryManager {
7474 context : Arc < dyn AsyncEngineContext > ,
7575 request : PreprocessedRequest ,
76- next_generate : ServerStreamingEngine < PreprocessedRequest , Annotated < LLMEngineOutput > > ,
77- next_stream : Option < ManyOut < Annotated < LLMEngineOutput > > > ,
76+ next_generate : ServerStreamingEngine < PreprocessedRequest , Annotated < BackendOutput > > ,
77+ next_stream : Option < ManyOut < Annotated < BackendOutput > > > ,
7878 retries_left : u32 ,
7979}
8080
8181impl RetryManager {
8282 pub async fn build (
8383 context : Arc < dyn AsyncEngineContext > ,
8484 preprocessed_request : PreprocessedRequest ,
85- next : ServerStreamingEngine < PreprocessedRequest , Annotated < LLMEngineOutput > > ,
85+ next : ServerStreamingEngine < PreprocessedRequest , Annotated < BackendOutput > > ,
8686 retries_left : u32 ,
8787 ) -> Result < Self > {
8888 let mut slf = Self {
@@ -96,7 +96,7 @@ impl RetryManager {
9696 Ok ( slf)
9797 }
9898
99- pub async fn next ( & mut self ) -> Option < Annotated < LLMEngineOutput > > {
99+ pub async fn next ( & mut self ) -> Option < Annotated < BackendOutput > > {
100100 loop {
101101 let response_stream = match self . next_stream . as_mut ( ) {
102102 Some ( stream) => stream,
@@ -128,7 +128,7 @@ impl RetryManager {
128128 }
129129
130130 async fn new_stream ( & mut self ) -> Result < ( ) > {
131- let mut response_stream: Option < Result < ManyOut < Annotated < LLMEngineOutput > > > > = None ;
131+ let mut response_stream: Option < Result < ManyOut < Annotated < BackendOutput > > > > = None ;
132132 while self . retries_left > 0 {
133133 self . retries_left -= 1 ;
134134 let request = Context :: with_id ( self . request . clone ( ) , self . context . id ( ) . to_string ( ) ) ;
@@ -162,7 +162,7 @@ impl RetryManager {
162162 }
163163 }
164164
165- fn track_response ( & mut self , response : & Annotated < LLMEngineOutput > ) {
165+ fn track_response ( & mut self , response : & Annotated < BackendOutput > ) {
166166 if self . retries_left == 0 {
167167 return ;
168168 }
@@ -207,18 +207,17 @@ mod tests {
207207 }
208208
209209 // Helper to create mock LLM engine output
210- fn create_mock_output ( token_id : u32 ) -> Annotated < LLMEngineOutput > {
211- Annotated :: from_data ( LLMEngineOutput {
210+ fn create_mock_output ( token_id : u32 ) -> Annotated < BackendOutput > {
211+ Annotated :: from_data ( BackendOutput {
212212 token_ids : vec ! [ token_id] ,
213- tokens : None ,
214- text : Some ( format ! ( "token_{}" , token_id ) ) ,
213+ tokens : vec ! [ ] ,
214+ text : Some ( format ! ( "token_{token_id}" ) ) ,
215215 cum_log_probs : None ,
216216 log_probs : None ,
217217 top_logprobs : None ,
218218 finish_reason : None ,
219219 index : None ,
220220 disaggregated_params : None ,
221- extra_args : None ,
222221 completion_usage : None ,
223222 } )
224223 }
@@ -267,16 +266,13 @@ mod tests {
267266
268267 #[ async_trait]
269268 impl
270- AsyncEngine <
271- SingleIn < PreprocessedRequest > ,
272- ManyOut < Annotated < LLMEngineOutput > > ,
273- anyhow:: Error ,
274- > for MockEngine
269+ AsyncEngine < SingleIn < PreprocessedRequest > , ManyOut < Annotated < BackendOutput > > , anyhow:: Error >
270+ for MockEngine
275271 {
276272 async fn generate (
277273 & self ,
278274 request : SingleIn < PreprocessedRequest > ,
279- ) -> Result < ManyOut < Annotated < LLMEngineOutput > > > {
275+ ) -> Result < ManyOut < Annotated < BackendOutput > > > {
280276 let call_num = self . call_count . fetch_add ( 1 , Ordering :: SeqCst ) ;
281277 let ( preprocessed_request, context) = request. transfer ( ( ) ) ;
282278
@@ -457,7 +453,7 @@ mod tests {
457453 & self ,
458454 start : usize ,
459455 end : usize ,
460- ) -> Result < ManyOut < Annotated < LLMEngineOutput > > > {
456+ ) -> Result < ManyOut < Annotated < BackendOutput > > > {
461457 let ( tx, rx) = mpsc:: channel ( 1 ) ;
462458 let token_offset = self . token_offset ;
463459
@@ -494,7 +490,7 @@ mod tests {
494490 100 ,
495491 context_id. clone ( ) ,
496492 ) ) ;
497- let next_generate: ServerStreamingEngine < PreprocessedRequest , Annotated < LLMEngineOutput > > =
493+ let next_generate: ServerStreamingEngine < PreprocessedRequest , Annotated < BackendOutput > > =
498494 mock_engine;
499495
500496 let ctx = Arc :: new ( Controller :: new ( context_id. clone ( ) ) ) ;
@@ -533,7 +529,7 @@ mod tests {
533529 100 ,
534530 context_id. clone ( ) ,
535531 ) ) ;
536- let next_generate: ServerStreamingEngine < PreprocessedRequest , Annotated < LLMEngineOutput > > =
532+ let next_generate: ServerStreamingEngine < PreprocessedRequest , Annotated < BackendOutput > > =
537533 mock_engine;
538534
539535 let ctx = Arc :: new ( Controller :: new ( context_id. clone ( ) ) ) ;
@@ -573,7 +569,7 @@ mod tests {
573569 100 ,
574570 context_id. clone ( ) ,
575571 ) ) ;
576- let next_generate: ServerStreamingEngine < PreprocessedRequest , Annotated < LLMEngineOutput > > =
572+ let next_generate: ServerStreamingEngine < PreprocessedRequest , Annotated < BackendOutput > > =
577573 mock_engine;
578574
579575 let ctx = Arc :: new ( Controller :: new ( context_id. clone ( ) ) ) ;
@@ -613,7 +609,7 @@ mod tests {
613609 100 ,
614610 context_id. clone ( ) ,
615611 ) ) ;
616- let next_generate: ServerStreamingEngine < PreprocessedRequest , Annotated < LLMEngineOutput > > =
612+ let next_generate: ServerStreamingEngine < PreprocessedRequest , Annotated < BackendOutput > > =
617613 mock_engine;
618614
619615 // Should fail to build due to initial stream creation failure after exhausting all 3 retries
@@ -641,7 +637,7 @@ mod tests {
641637 100 ,
642638 context_id. clone ( ) ,
643639 ) ) ;
644- let next_generate: ServerStreamingEngine < PreprocessedRequest , Annotated < LLMEngineOutput > > =
640+ let next_generate: ServerStreamingEngine < PreprocessedRequest , Annotated < BackendOutput > > =
645641 mock_engine;
646642
647643 let ctx = Arc :: new ( Controller :: new ( context_id. clone ( ) ) ) ;
@@ -690,7 +686,7 @@ mod tests {
690686 100 ,
691687 context_id. clone ( ) ,
692688 ) ) ;
693- let next_generate: ServerStreamingEngine < PreprocessedRequest , Annotated < LLMEngineOutput > > =
689+ let next_generate: ServerStreamingEngine < PreprocessedRequest , Annotated < BackendOutput > > =
694690 mock_engine;
695691
696692 let ctx = Arc :: new ( Controller :: new ( context_id. clone ( ) ) ) ;
@@ -739,7 +735,7 @@ mod tests {
739735 100 ,
740736 context_id. clone ( ) ,
741737 ) ) ;
742- let next_generate: ServerStreamingEngine < PreprocessedRequest , Annotated < LLMEngineOutput > > =
738+ let next_generate: ServerStreamingEngine < PreprocessedRequest , Annotated < BackendOutput > > =
743739 mock_engine;
744740
745741 let ctx = Arc :: new ( Controller :: new ( context_id. clone ( ) ) ) ;
0 commit comments