Skip to content

Commit 111c681

Browse files
authored
fix: Send last token batch when finish_reason is set (#3531)
1 parent 03bdced commit 111c681

File tree

1 file changed

+11
-13
lines changed

1 file changed

+11
-13
lines changed

components/src/dynamo/sglang/request_handlers/llm/decode_handler.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -188,26 +188,24 @@ async def _process_token_stream(
188188
189189
Yields:
190190
Dict with token_ids and optional finish_reason.
191-
192-
Raises:
193-
ValueError: If response missing output_ids.
194191
"""
195192
num_output_tokens_so_far = 0
196193

197194
async for res in stream_source:
195+
out = {}
198196
finish_reason = res["meta_info"]["finish_reason"]
199197
if finish_reason:
200-
out = {"token_ids": [], "finish_reason": finish_reason["type"]}
201-
else:
202-
try:
203-
next_total_toks = len(res["output_ids"])
204-
except KeyError:
205-
raise ValueError(
206-
f"Missing 'output_ids' in response. Response keys: {list(res.keys())}"
207-
)
208-
out = {"token_ids": res["output_ids"][num_output_tokens_so_far:]}
209-
num_output_tokens_so_far = next_total_toks
198+
out["finish_reason"] = finish_reason["type"]
199+
200+
output_ids = res.get("output_ids", [])
201+
# If request is not finished yet, but there are no outputs, return an error.
202+
if not output_ids and not finish_reason:
203+
yield {"finish_reason": "error", "token_ids": []}
204+
break
210205

206+
next_total_toks = len(output_ids)
207+
out["token_ids"] = output_ids[num_output_tokens_so_far:]
208+
num_output_tokens_so_far = next_total_toks
211209
yield out
212210

213211
async def _process_text_stream(

0 commit comments

Comments
 (0)