Skip to content

Commit a1c3f4e

Browse files
committed
kvbm: offload+onboard appear to be working correctly
Signed-off-by: Ryan Olson <[email protected]>
1 parent 27207c9 commit a1c3f4e

File tree

28 files changed

+1564
-343
lines changed

28 files changed

+1564
-343
lines changed

.sandbox/launch_vllm_with_connector.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ source "$VENV_PATH/bin/activate"
4343

4444
# Ensure our kvbm package is in the path
4545
export PYTHONPATH="$REPO_ROOT/lib/bindings/kvbm/python:$PYTHONPATH"
46+
export VLLM_SERVER_DEV_MODE=1
4647

4748
# =============================================================================
4849
# Rust Logging Configuration

.sandbox/rebuild.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ echo "Build mode: dev (default)"
4545
echo ""
4646

4747
# Build with maturin develop
48-
maturin develop
48+
maturin develop --release
4949

5050
echo ""
5151
echo "✅ Build complete!"

.sandbox/test_cmpl_1.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,6 @@ curl -X POST http://127.0.0.1:8000/v1/completions \
88
-d '{
99
"model": "gpt2",
1010
"prompt": "what is a dynamo? and how did it signify the start of the industrial revolution?",
11-
"max_tokens": 16,
11+
"max_tokens": 32,
1212
"temperature": 0
1313
}'

lib/bindings/kvbm/Cargo.lock

Lines changed: 8 additions & 154 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lib/bindings/kvbm/python/kvbm/v2/vllm/schedulers/worker.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def clear_connector_metadata(self) -> None:
177177
"""
178178
Clear connector metadata - no-op.
179179
"""
180-
pass
180+
self.worker.clear_connector_metadata()
181181

182182
def start_load_kv(self, forward_context: "ForwardContext", **kwargs) -> None:
183183
"""
@@ -222,6 +222,7 @@ def get_finished(
222222
Returns:
223223
(None, None): No finished sends/receives
224224
"""
225+
print(f"SchedulerConnectorWorker.get_finished called with {len(finished_req_ids)} finished requests")
225226
return self.worker.get_finished()
226227

227228
def get_block_ids_with_load_errors(self) -> set[int]:

lib/kvbm-config/src/offload.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ impl Default for PresenceLfuFilterConfig {
9797
pub struct TierOffloadConfig {
9898
/// Ordered list of policies to apply (implicit AND).
9999
///
100-
/// If empty, defaults to pass-all behavior.
100+
/// If empty, tier-specific defaults are applied by the engine.
101101
/// Policies are evaluated in order; a block must pass all to be transferred.
102102
#[serde(default)]
103103
pub policies: Vec<PolicyType>,
@@ -140,6 +140,7 @@ mod tests {
140140
#[test]
141141
fn test_default_config() {
142142
let config = OffloadConfig::default();
143+
// Empty policies - engine applies tier-specific defaults
143144
assert!(config.g1_to_g2.policies.is_empty());
144145
assert!(config.g2_to_g3.policies.is_empty());
145146
assert_eq!(config.g2_to_g3.presence_lfu.min_lfu_count, 8);

lib/kvbm/src/v2/distributed/leader/instance.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,14 @@ impl InstanceLeader {
314314
&self.registry
315315
}
316316

317+
/// Get a reference to the Nova instance.
318+
///
319+
/// This provides access to the Nova distributed system for features
320+
/// like event coordination and cross-instance communication.
321+
pub fn nova(&self) -> &Arc<Nova> {
322+
&self.nova
323+
}
324+
317325
/// Get the tokio runtime handle from Nova.
318326
///
319327
/// This handle should be used for spawning background tasks that need to

0 commit comments

Comments
 (0)