Skip to content

Commit 133893a

Browse files
Use perf_counter() instead + formatting
Signed-off-by: Zhongxuan Wang <[email protected]>
1 parent 32f0528 commit 133893a

File tree

3 files changed

+20
-15
lines changed

3 files changed

+20
-15
lines changed

components/src/dynamo/vllm/handlers.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -633,10 +633,10 @@ async def generate_tokens(
633633
out = {"token_ids": output.token_ids[num_output_tokens_so_far:]}
634634
if output.finish_reason:
635635
out["finish_reason"] = output.finish_reason
636-
out["completion_usage"] = (
637-
BaseWorkerHandler._build_completion_usage(
638-
request_output=res,
639-
)
636+
out[
637+
"completion_usage"
638+
] = BaseWorkerHandler._build_completion_usage(
639+
request_output=res,
640640
)
641641
# Log completion with LoRA info (debug level to avoid log spam)
642642
if lora_request:
@@ -805,7 +805,9 @@ async def generate(self, request, context):
805805
):
806806
# Capture first token timing
807807
if include_timing and not first_token_sent:
808-
first_token_time = time.time()
808+
first_token_time = decode_start_seconds + (
809+
time.perf_counter() - decode_start_perf_counter
810+
)
809811
timing_metrics["decode_first_token_seconds"] = first_token_time
810812
# In aggregated mode, prefill finishes when first token is generated
811813
if prefill_result is None:
@@ -966,14 +968,15 @@ async def generate(self, request, context):
966968
disaggregated_params: Optional[Dict[str, Any]] = {}
967969

968970
if res.kv_transfer_params:
969-
disaggregated_params["kv_transfer_params"] = (
970-
res.kv_transfer_params
971-
)
971+
disaggregated_params[
972+
"kv_transfer_params"
973+
] = res.kv_transfer_params
972974

973975
if include_timing and timing_metrics:
974-
timing_metrics["prefill_end_seconds"] = (
975-
prefill_start_seconds
976-
+ (time.perf_counter() - prefill_start_perf_counter)
976+
timing_metrics[
977+
"prefill_end_seconds"
978+
] = prefill_start_seconds + (
979+
time.perf_counter() - prefill_start_perf_counter
977980
)
978981
disaggregated_params["timing_metrics"] = timing_metrics
979982

components/src/dynamo/vllm/multimodal_handlers/worker_handler.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -227,9 +227,9 @@ async def generate(self, request: vLLMMultimodalRequest, context):
227227
# Update the prompt token id in the decode request to the one
228228
# in response, which has image templated filled in. So that
229229
# the decode worker will fetch correct amount of KV blocks.
230-
decode_request.engine_prompt["prompt_token_ids"] = (
231-
prefill_response.prompt_token_ids
232-
)
230+
decode_request.engine_prompt[
231+
"prompt_token_ids"
232+
] = prefill_response.prompt_token_ids
233233
logger.debug(
234234
f"Prefill response kv_transfer_params: {prefill_response.kv_transfer_params}"
235235
)

components/src/dynamo/vllm/tests/test_vllm_observability_fields.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,9 @@ class TestShouldIncludeTimingMetrics:
3535

3636
def test_returns_true_with_multiple_observability_fields(self):
3737
"""Timing metrics should be included when explicitly requested."""
38-
request = {"observability_fields": ["worker_id", "timing_metrics", "other_field"]}
38+
request = {
39+
"observability_fields": ["worker_id", "timing_metrics", "other_field"]
40+
}
3941
assert _request_contains_timing_metrics(request) is True
4042

4143
def test_returns_false_when_observability_fields_is_none(self):

0 commit comments

Comments
 (0)