Skip to content

Commit e5837a4

Browse files
committed
bring back positive threshold check
Signed-off-by: PeaBrane <[email protected]>
1 parent 324668c commit e5837a4

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

lib/llm/src/discovery/worker_monitor.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,19 +57,23 @@ impl WorkerLoadState {
5757
// Check if ALL dp_ranks are busy
5858
all_dp_ranks.iter().all(|&dp_rank| {
5959
// First check: prefill tokens threshold
60+
// Skip if max_tokens is 0 (no capacity means threshold check is meaningless)
6061
if let (Some(&active_tokens), Some(&max_tokens)) = (
6162
self.active_prefill_tokens.get(&dp_rank),
6263
self.max_num_batch_tokens.get(&dp_rank),
63-
) && (active_tokens as f64) > (tokens_threshold * max_tokens as f64)
64+
) && max_tokens > 0
65+
&& (active_tokens as f64) > (tokens_threshold * max_tokens as f64)
6466
{
6567
return true; // This dp_rank is busy due to tokens
6668
}
6769

6870
// Second check: blocks threshold
71+
// Skip if total_blocks is 0 (no capacity means threshold check is meaningless)
6972
if let (Some(&active_blocks), Some(&total_blocks)) = (
7073
self.kv_active_blocks.get(&dp_rank),
7174
self.kv_total_blocks.get(&dp_rank),
72-
) && (active_blocks as f64) > (blocks_threshold * total_blocks as f64)
75+
) && total_blocks > 0
76+
&& (active_blocks as f64) > (blocks_threshold * total_blocks as f64)
7377
{
7478
return true; // This dp_rank is busy due to blocks
7579
}

0 commit comments

Comments
 (0)