Skip to content

Commit bd4366d

Browse files
authored
fix: Fix warning messages with healthcheck (#4793)
Signed-off-by: jthomson04 <[email protected]>
1 parent 3e4b480 commit bd4366d

File tree

2 files changed

+6
-2
lines changed

2 files changed

+6
-2
lines changed

components/src/dynamo/vllm/health_check.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,15 +67,14 @@ def __init__(self, engine_client=None):
6767
self.default_payload = {
6868
"token_ids": [bos_token_id],
6969
"sampling_options": {
70-
"max_tokens": 1,
7170
"temperature": 0.0,
7271
},
7372
"stop_conditions": {
73+
"max_tokens": 1,
7474
"stop": None,
7575
"stop_token_ids": None,
7676
"include_stop_str_in_output": False,
7777
"ignore_eos": False,
78-
"min_tokens": 0,
7978
},
8079
}
8180
super().__init__()

lib/runtime/src/health_check.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,11 @@ impl HealthCheckManager {
303303
false
304304
};
305305

306+
tokio::spawn(async move {
307+
// We need to consume the rest of the stream to avoid warnings on the frontend.
308+
response_stream.for_each(|_| async {}).await;
309+
});
310+
306311
// Update health status based on response
307312
system_health.lock().set_endpoint_health_status(
308313
&endpoint_subject_owned,

0 commit comments

Comments
 (0)