@@ -303,18 +303,20 @@ impl KvScheduler {
303303 break ;
304304 }
305305 _ = interval. tick( ) => {
306- // Query active blocks and tokens from all workers
307- let active_blocks = slots_metrics. active_blocks( ) . await ;
306+ // Query active tokens and blocks from all workers
307+ // Note: active_tokens is always tracked, but active_blocks may be empty
308+ // if --no-track-active-blocks is set
308309 let active_tokens = slots_metrics. active_tokens( ) . await ;
310+ let active_blocks = slots_metrics. active_blocks( ) . await ;
309311
310- // Publish ActiveLoad for each worker/dp_rank
311- for ( worker, blocks ) in active_blocks . iter( ) {
312- let tokens = active_tokens . get( worker) . copied( ) ;
312+ // Publish ActiveLoad for each worker/dp_rank (iterate over tokens since always tracked)
313+ for ( worker, tokens ) in active_tokens . iter( ) {
314+ let blocks = active_blocks . get( worker) . copied( ) ;
313315 let active_load = ActiveLoad {
314316 worker_id: worker. worker_id,
315317 dp_rank: worker. dp_rank,
316- kv_active_blocks: Some ( * blocks as u64 ) ,
317- active_prefill_tokens: tokens . map ( |t| t as u64 ) ,
318+ kv_active_blocks: blocks. map ( |b| b as u64 ) ,
319+ active_prefill_tokens: Some ( * tokens as u64 ) ,
318320 } ;
319321
320322 if let Err ( e) = ns_metrics. publish( KV_METRICS_SUBJECT , & active_load) . await {
0 commit comments