2020from kvbm ._core import v2 as _v2
2121from kvbm .v2 .vllm import KvbmVllmConfig
2222
23+ from ..sched_output import process_scheduler_output
24+ from .worker import NovaPeerMetadata
25+
2326KvbmRuntime = _v2 .KvbmRuntime
2427ConnectorLeader = _v2 .ConnectorLeader
2528KvbmRequest = _v2 .KvbmRequest
2629
27- # TODO: Re-enable when v2 connector bindings are updated
28- # These classes need to be updated for v2 API changes in kvbm crate
29- # KvbmRequest = _v2.KvbmRequest
30- # RustKvConnectorLeader = _v2.KvConnectorLeader
31- # RustSchedulerOutput = _v2.RustSchedulerOutput
32-
33- # Import the handshake metadata type from worker module
34- from .worker import NovaPeerMetadata
35- from ..sched_output import process_scheduler_output
3630
3731if TYPE_CHECKING :
3832 from vllm .config import VllmConfig
4135 )
4236 from vllm .v1 .core .kv_cache_manager import KVCacheBlocks , KVCacheConfig
4337 from vllm .v1 .core .sched .output import SchedulerOutput
44- from vllm .v1 .request import Request
4538 from vllm .v1 .outputs import KVConnectorOutput
39+ from vllm .v1 .request import Request
4640
4741
4842class SchedulerConnectorLeader :
@@ -59,7 +53,11 @@ class SchedulerConnectorLeader:
5953 """
6054
6155 def __init__ (
62- self , vllm_config : VllmConfig , kvbm_config : KvbmVllmConfig , kv_cache_config : KVCacheConfig , ** kwargs
56+ self ,
57+ vllm_config : VllmConfig ,
58+ kvbm_config : KvbmVllmConfig ,
59+ kv_cache_config : KVCacheConfig ,
60+ ** kwargs ,
6361 ):
6462 """Initialize the scheduler connector leader."""
6563 print ("[KVBM DEBUG] SchedulerConnectorLeader.__init__ START" , flush = True )
@@ -90,7 +88,9 @@ def get_num_new_matched_tokens(
9088 num_computed_tokens : int ,
9189 ) -> tuple [Optional [int ], bool ]:
9290 self ._create_slot (request )
93- return self .leader .get_num_new_matched_tokens (request .request_id , num_computed_tokens )
91+ return self .leader .get_num_new_matched_tokens (
92+ request .request_id , num_computed_tokens
93+ )
9494
9595 def update_state_after_alloc (
9696 self , request : "Request" , blocks : "KVCacheBlocks" , num_external_tokens : int
@@ -101,7 +101,9 @@ def update_state_after_alloc(
101101 This should never be called with num_external_tokens > 0.
102102 """
103103 block_ids = [int (block_id ) for block_id in blocks .get_block_ids ()[0 ]]
104- self .leader .update_state_after_alloc (request .request_id , block_ids , num_external_tokens )
104+ self .leader .update_state_after_alloc (
105+ request .request_id , block_ids , num_external_tokens
106+ )
105107
106108 def build_connector_meta (self , scheduler_output : "SchedulerOutput" ) -> bytes :
107109 """
@@ -136,12 +138,19 @@ def request_finished(
136138
137139 def update_connector_output (self , connector_output : KVConnectorOutput ) -> None :
138140 # Convert None to empty sets for Rust binding compatibility
139- finished_sending = connector_output .finished_sending if connector_output .finished_sending is not None else set ()
140- finished_recving = connector_output .finished_recving if connector_output .finished_recving is not None else set ()
141+ finished_sending = (
142+ connector_output .finished_sending
143+ if connector_output .finished_sending is not None
144+ else set ()
145+ )
146+ finished_recving = (
147+ connector_output .finished_recving
148+ if connector_output .finished_recving is not None
149+ else set ()
150+ )
141151 self .leader .update_connector_output (finished_sending , finished_recving )
142152
143153 def get_finished_count (self ) -> Optional [int ]:
144- """No finished count tracking for Phase 1."""
145154 return None
146155
147156 def set_xfer_handshake_metadata (
@@ -193,7 +202,7 @@ def set_xfer_handshake_metadata(
193202 def _create_slot (self , request : "Request" ) -> None :
194203 if self .leader .has_slot (request .request_id ):
195204 return
196-
205+
197206 if bool (getattr (request , "mm_features" , None )) or bool (
198207 getattr (request , "mm_positions" , None )
199208 ):
@@ -207,7 +216,7 @@ def _create_slot(self, request: "Request") -> None:
207216 else :
208217 # Single-sequence case: already flat
209218 all_token_ids = [int (token ) for token in request .all_token_ids ]
210-
219+
211220 kv_request = KvbmRequest (
212221 request_id = request .request_id ,
213222 tokens = all_token_ids ,
@@ -219,5 +228,5 @@ def _create_slot(self, request: "Request") -> None:
219228 else None ,
220229 max_tokens = request .max_tokens ,
221230 )
222-
231+
223232 self .leader .create_slot (kv_request )
0 commit comments