ai-dynamo
diff --git a/‎Cargo.lock‎
Lines changed: 1 addition & 0 deletions b/‎Cargo.lock‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎lib/bindings/kvbm/Cargo.lock‎
Lines changed: 1 addition & 0 deletions b/‎lib/bindings/kvbm/Cargo.lock‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎lib/kvbm-config/src/lib.rs‎
Lines changed: 9 additions & 0 deletions b/‎lib/kvbm-config/src/lib.rs‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎lib/kvbm-config/src/offload.rs‎
Lines changed: 217 additions & 0 deletions b/‎lib/kvbm-config/src/offload.rs‎
Lines changed: 217 additions & 0 deletions
diff --git a/‎lib/kvbm/Cargo.toml‎
Lines changed: 1 addition & 0 deletions b/‎lib/kvbm/Cargo.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎lib/kvbm/src/v2/distributed/leader/instance.rs‎
Lines changed: 21 additions & 0 deletions b/‎lib/kvbm/src/v2/distributed/leader/instance.rs‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎lib/kvbm/src/v2/distributed/worker/nova/client.rs‎
Lines changed: 3 additions & 4 deletions b/‎lib/kvbm/src/v2/distributed/worker/nova/client.rs‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎lib/kvbm/src/v2/distributed/worker/nova/mod.rs‎
Lines changed: 32 additions & 0 deletions b/‎lib/kvbm/src/v2/distributed/worker/nova/mod.rs‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎lib/kvbm/src/v2/distributed/worker/nova/service.rs‎
Lines changed: 4 additions & 2 deletions b/‎lib/kvbm/src/v2/distributed/worker/nova/service.rs‎
Lines changed: 4 additions & 2 deletions
@@ -10,6 +10,7 @@ mod cache;
 mod discovery;
 mod nixl;
 mod nova;
+mod offload;
 mod rayon;
 mod tokio;
 
@@ -19,6 +20,9 @@ pub use discovery::{
 };
 pub use nixl::NixlConfig;
 pub use nova::{NovaBackendConfig, NovaConfig};
+pub use offload::{
+    OffloadConfig, PolicyType, PresenceFilterConfig, PresenceLfuFilterConfig, TierOffloadConfig,
+};
 pub use rayon::RayonConfig;
 pub use tokio::TokioConfig;
 
@@ -69,6 +73,11 @@ pub struct KvbmConfig {
     #[validate(nested)]
     #[serde(default)]
     pub cache: CacheConfig,
+
+    /// Offload policy configuration (G1→G2, G2→G3 transitions).
+    #[validate(nested)]
+    #[serde(default)]
+    pub offload: OffloadConfig,
 }
 
 impl KvbmConfig {
 
@@ -0,0 +1,217 @@
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! Offload policy configuration for KVBM.
+//!
+//! Defines configuration for offload policies that control which blocks
+//! are transferred between storage tiers (G1→G2, G2→G3).
+//!
+//! # Policy Types
+//!
+//! - `pass_all`: No filtering, all blocks pass
+//! - `presence`: Skip blocks already present in destination tier
+//! - `presence_lfu`: Presence check + LFU count threshold
+//!
+//! # Configuration
+//!
+//! Policies are configured per tier transition. Multiple policies in the
+//! `policies` list are applied in order with implicit AND logic (all must pass).
+//!
+//! ## JSON Example
+//!
+//! ```json
+//! {
+//!   "offload": {
+//!     "g1_to_g2": {
+//!       "policies": ["presence"],
+//!       "presence": {}
+//!     },
+//!     "g2_to_g3": {
+//!       "policies": ["presence_lfu"],
+//!       "presence_lfu": { "min_lfu_count": 8 }
+//!     }
+//!   }
+//! }
+//! ```
+
+use serde::{Deserialize, Serialize};
+use validator::Validate;
+
+/// Policy type enum for serialization.
+///
+/// Each variant corresponds to a policy implementation in the kvbm crate.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub enum PolicyType {
+    /// PassAllPolicy - no filtering, all blocks pass
+    PassAll,
+    /// PresenceFilter - skip blocks already in destination tier
+    Presence,
+    /// PresenceAndLFUFilter - presence check + LFU threshold
+    PresenceLfu,
+}
+
+/// Configuration for presence filter.
+///
+/// Currently has no parameters, but the struct exists for future extensibility
+/// and to maintain consistent configuration patterns.
+#[derive(Debug, Clone, Default, Serialize, Deserialize, Validate)]
+pub struct PresenceFilterConfig {}
+
+/// Default LFU count threshold.
+fn default_min_lfu_count() -> u32 {
+    8
+}
+
+/// Configuration for presence + LFU filter.
+///
+/// Combines presence checking with LFU (Least Frequently Used) count threshold.
+/// Only blocks with access count above the threshold are offloaded.
+#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
+pub struct PresenceLfuFilterConfig {
+    /// Minimum LFU count threshold for offload.
+    ///
+    /// Blocks must have been accessed more than this many times to be
+    /// considered for offload. This prevents offloading rarely-used blocks.
+    ///
+    /// Default: 8
+    #[serde(default = "default_min_lfu_count")]
+    #[validate(range(min = 1))]
+    pub min_lfu_count: u32,
+}
+
+impl Default for PresenceLfuFilterConfig {
+    fn default() -> Self {
+        Self {
+            min_lfu_count: default_min_lfu_count(),
+        }
+    }
+}
+
+/// Configuration for a tier transition (e.g., G1→G2, G2→G3).
+///
+/// Defines which policies to apply when offloading blocks between tiers.
+/// Policies are evaluated in order with implicit AND logic - a block must
+/// pass ALL policies to be transferred.
+#[derive(Debug, Clone, Default, Serialize, Deserialize, Validate)]
+pub struct TierOffloadConfig {
+    /// Ordered list of policies to apply (implicit AND).
+    ///
+    /// If empty, defaults to pass-all behavior.
+    /// Policies are evaluated in order; a block must pass all to be transferred.
+    #[serde(default)]
+    pub policies: Vec<PolicyType>,
+
+    /// Presence filter configuration.
+    ///
+    /// Used when "presence" is in the policies list.
+    #[serde(default)]
+    #[validate(nested)]
+    pub presence: PresenceFilterConfig,
+
+    /// Presence + LFU filter configuration.
+    ///
+    /// Used when "presence_lfu" is in the policies list.
+    #[serde(default)]
+    #[validate(nested)]
+    pub presence_lfu: PresenceLfuFilterConfig,
+}
+
+/// Top-level offload configuration.
+///
+/// Groups policy configurations for each tier transition.
+#[derive(Debug, Clone, Default, Serialize, Deserialize, Validate)]
+pub struct OffloadConfig {
+    /// G1 (GPU) → G2 (Host) offload policies.
+    #[serde(default)]
+    #[validate(nested)]
+    pub g1_to_g2: TierOffloadConfig,
+
+    /// G2 (Host) → G3 (Disk) offload policies.
+    #[serde(default)]
+    #[validate(nested)]
+    pub g2_to_g3: TierOffloadConfig,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_default_config() {
+        let config = OffloadConfig::default();
+        assert!(config.g1_to_g2.policies.is_empty());
+        assert!(config.g2_to_g3.policies.is_empty());
+        assert_eq!(config.g2_to_g3.presence_lfu.min_lfu_count, 8);
+    }
+
+    #[test]
+    fn test_policy_type_serde() {
+        let json = r#"["pass_all", "presence", "presence_lfu"]"#;
+        let policies: Vec<PolicyType> = serde_json::from_str(json).unwrap();
+        assert_eq!(policies.len(), 3);
+        assert_eq!(policies[0], PolicyType::PassAll);
+        assert_eq!(policies[1], PolicyType::Presence);
+        assert_eq!(policies[2], PolicyType::PresenceLfu);
+
+        // Roundtrip (serde_json doesn't add spaces after commas)
+        let serialized = serde_json::to_string(&policies).unwrap();
+        let roundtrip: Vec<PolicyType> = serde_json::from_str(&serialized).unwrap();
+        assert_eq!(policies, roundtrip);
+    }
+
+    #[test]
+    fn test_tier_config_serde() {
+        let json = r#"{
+            "policies": ["presence_lfu"],
+            "presence_lfu": { "min_lfu_count": 16 }
+        }"#;
+
+        let config: TierOffloadConfig = serde_json::from_str(json).unwrap();
+        assert_eq!(config.policies.len(), 1);
+        assert_eq!(config.policies[0], PolicyType::PresenceLfu);
+        assert_eq!(config.presence_lfu.min_lfu_count, 16);
+    }
+
+    #[test]
+    fn test_offload_config_serde() {
+        let json = r#"{
+            "g1_to_g2": {
+                "policies": ["presence"]
+            },
+            "g2_to_g3": {
+                "policies": ["presence_lfu"],
+                "presence_lfu": { "min_lfu_count": 4 }
+            }
+        }"#;
+
+        let config: OffloadConfig = serde_json::from_str(json).unwrap();
+        assert_eq!(config.g1_to_g2.policies, vec![PolicyType::Presence]);
+        assert_eq!(config.g2_to_g3.policies, vec![PolicyType::PresenceLfu]);
+        assert_eq!(config.g2_to_g3.presence_lfu.min_lfu_count, 4);
+    }
+
+    #[test]
+    fn test_default_lfu_threshold() {
+        let json = r#"{"policies": ["presence_lfu"]}"#;
+        let config: TierOffloadConfig = serde_json::from_str(json).unwrap();
+        // Should use default of 8
+        assert_eq!(config.presence_lfu.min_lfu_count, 8);
+    }
+
+    #[test]
+    fn test_validation() {
+        let config = OffloadConfig::default();
+        assert!(config.validate().is_ok());
+
+        let config_with_lfu = OffloadConfig {
+            g2_to_g3: TierOffloadConfig {
+                policies: vec![PolicyType::PresenceLfu],
+                presence_lfu: PresenceLfuFilterConfig { min_lfu_count: 1 },
+                ..Default::default()
+            },
+            ..Default::default()
+        };
+        assert!(config_with_lfu.validate().is_ok());
+    }
+}
@@ -36,6 +36,7 @@ aligned-vec = "0.6.4"
 bincode = { version = "2.0.1", features = ["serde", "derive"] }
 blake3 = { version = "1" }
 bytes = "1.10"
+crossbeam-queue = "0.3"
 derive-getters = "0.5"
 figment = { version = "0.10", features = ["env"] }
 lru = "0.16"
 
@@ -309,6 +309,27 @@ impl InstanceLeader {
         self.g3_manager.as_ref()
     }
 
+    /// Get the block registry.
+    pub fn registry(&self) -> &BlockRegistry {
+        &self.registry
+    }
+
+    /// Get the tokio runtime handle from Nova.
+    ///
+    /// This handle should be used for spawning background tasks that need to
+    /// run on the KVBM runtime's executor (e.g., offload engine pipelines).
+    pub fn runtime(&self) -> tokio::runtime::Handle {
+        self.nova.runtime().clone()
+    }
+
+    /// Check if a parallel_worker is configured.
+    ///
+    /// The parallel_worker is required for local transfer operations
+    /// (e.g., offloading blocks between tiers).
+    pub fn has_parallel_worker(&self) -> bool {
+        self.parallel_worker.is_some()
+    }
+
     /// Add a remote leader to the search list.
     ///
     /// Remote leaders are queried during `find_matches_with_options` when
 
@@ -28,8 +28,7 @@ impl WorkerTransfers for NovaWorkerClient {
     ) -> Result<TransferCompleteNotification> {
         // Create a single local event for this operation
         let event = self.nova.events().new_event()?;
-        let handle = event.handle();
-        let awaiter = self.nova.events().awaiter(handle)?;
+        let awaiter = self.nova.events().awaiter(event.handle())?;
 
         // Convert to serializable options
         // TODO: Extract bounce buffer handle if present in options.bounce_buffer
@@ -53,13 +52,13 @@ impl WorkerTransfers for NovaWorkerClient {
 
         // Spawn a task for the remote instance
         let nova = self.nova.clone();
-        let bytes = bytes.clone();
         let remote_instance = self.remote;
 
+        // Use unary (not am_sync) to wait for transfer completion
         self.nova.tracker().spawn_on(
             async move {
                 let result = nova
-                    .am_sync("kvbm.worker.local_transfer")?
+                    .unary("kvbm.worker.local_transfer")?
                     .raw_payload(bytes)
                     .instance(remote_instance)
                     .send()
 
@@ -1,6 +1,38 @@
 // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 
+//! Nova-based RPC implementation for distributed worker communication.
+//!
+//! # RPC Pattern Guidelines
+//!
+//! This module uses only two Nova RPC patterns:
+//!
+//! 1. **`am_send` (fire-and-forget)**: Use when no response is needed.
+//!    - Client sends message and returns immediately
+//!    - Handler processes asynchronously, no response sent back
+//!    - Use `NovaHandler::am_handler` or `am_handler_async`
+//!
+//! 2. **`unary` (request-response)**: Use when waiting for completion.
+//!    - Client sends request and awaits response
+//!    - Handler returns `Ok(Some(Bytes))` or `Ok(None)` which is sent back
+//!    - Use `NovaHandler::unary_handler` or `unary_handler_async`
+//!
+//! # Why Not `am_sync`?
+//!
+//! We avoid `am_sync` due to observed issues where it does not reliably
+//! receive completion signals when paired with `am_handler_async`. While
+//! `am_sync` should theoretically behave like `unary` (both await completion),
+//! in practice pairing `am_sync` client with `am_handler_async` handler caused
+//! indefinite blocking during RDMA transfer tests.
+//!
+//! The root cause appears to be a mismatch in how responses are routed:
+//! - `am_handler_async` returns `Result<()>` - the return value is NOT sent back
+//! - `unary_handler_async` returns `Result<Option<Bytes>>` - the return value IS sent back
+//!
+//! Until the `am_sync` completion path is validated, prefer the simpler and
+//! more predictable patterns: `am_send` for fire-and-forget, `unary` for
+//! request-response.
+
 mod client;
 mod service;
 
 
@@ -60,7 +60,8 @@ impl NovaWorkerService {
     fn register_local_transfer_handler(&self) -> Result<()> {
         let worker = self.worker.clone();
 
-        let handler = NovaHandler::am_handler_async("kvbm.worker.local_transfer", move |ctx| {
+        // Use unary_handler_async for explicit response (client waits for transfer completion)
+        let handler = NovaHandler::unary_handler_async("kvbm.worker.local_transfer", move |ctx| {
             let worker = worker.clone();
 
             async move {
@@ -85,7 +86,8 @@ impl NovaWorkerService {
                 // Await the transfer completion
                 notification.await?;
 
-                Ok(())
+                // Return empty response to signal success
+                Ok(Some(Bytes::new()))
             }
         })
         .build();