ai-dynamo
diff --git a/‎Cargo.lock‎
Lines changed: 12 additions & 11 deletions b/‎Cargo.lock‎
Lines changed: 12 additions & 11 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 7 additions & 7 deletions b/‎Cargo.toml‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎benchmarks/incluster/benchmark_job.yaml‎
Lines changed: 1 addition & 1 deletion b/‎benchmarks/incluster/benchmark_job.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml‎
Lines changed: 2 additions & 2 deletions b/‎benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎benchmarks/profiler/deploy/profile_sla_dgdr.yaml‎
Lines changed: 2 additions & 2 deletions b/‎benchmarks/profiler/deploy/profile_sla_dgdr.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎components/src/dynamo/sglang/request_handlers/multimodal/encode_worker_handler.py‎
Lines changed: 1 addition & 2 deletions b/‎components/src/dynamo/sglang/request_handlers/multimodal/encode_worker_handler.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎components/src/dynamo/sglang/request_handlers/multimodal/worker_handler.py‎
Lines changed: 0 additions & 2 deletions b/‎components/src/dynamo/sglang/request_handlers/multimodal/worker_handler.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎components/src/dynamo/trtllm/encode_helper.py‎
Lines changed: 1 addition & 1 deletion b/‎components/src/dynamo/trtllm/encode_helper.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎components/src/dynamo/trtllm/main.py‎
Lines changed: 0 additions & 1 deletion b/‎components/src/dynamo/trtllm/main.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎components/src/dynamo/vllm/multimodal_handlers/encode_worker_handler.py‎
Lines changed: 1 addition & 2 deletions b/‎components/src/dynamo/vllm/multimodal_handlers/encode_worker_handler.py‎
Lines changed: 1 addition & 2 deletions
@@ -33,7 +33,7 @@ default-members = [
 resolver = "3"
 
 [workspace.package]
-version = "0.7.0-post1"
+version = "0.7.1"
 edition = "2024"
 description = "Dynamo Inference Framework"
 authors = ["NVIDIA Inc. <[email protected]>"]
@@ -44,15 +44,15 @@ keywords = ["llm", "genai", "inference", "nvidia", "distributed"]
 
 [workspace.dependencies]
 # Local crates
-dynamo-runtime = { path = "lib/runtime", version = "0.7.0-post1" }
-dynamo-llm = { path = "lib/llm", version = "0.7.0-post1" }
-dynamo-config = { path = "lib/config", version = "0.7.0-post1" }
-dynamo-tokens = { path = "lib/tokens", version = "0.7.0-post1" }
-dynamo-async-openai = { path = "lib/async-openai", version = "0.7.0-post1", features = [
+dynamo-runtime = { path = "lib/runtime", version = "0.7.1" }
+dynamo-llm = { path = "lib/llm", version = "0.7.1" }
+dynamo-config = { path = "lib/config", version = "0.7.1" }
+dynamo-tokens = { path = "lib/tokens", version = "0.7.1" }
+dynamo-async-openai = { path = "lib/async-openai", version = "0.7.1", features = [
     "byot",
     "rustls",
 ] }
-dynamo-parsers = { path = "lib/parsers", version = "0.7.0-post1" }
+dynamo-parsers = { path = "lib/parsers", version = "0.7.1" }
 
 # External dependencies
 anyhow = { version = "1" }
 
@@ -17,7 +17,7 @@ spec:
       containers:
       - name: benchmark-runner
         # TODO: update to latest public image in next release
-        image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.7.0.post1
+        image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.7.1
         securityContext:
           allowPrivilegeEscalation: false
           capabilities:
 
@@ -12,7 +12,7 @@ spec:
 
   # ProfilingConfig maps directly to the profile_sla.py config format
   profilingConfig:
-    profilerImage: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.7.0.post1"
+    profilerImage: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.7.1"
     config:
       # Sweep/profiling configuration
       sweep:
@@ -31,7 +31,7 @@ spec:
 
   # Deployment overrides for the auto-created DGD
   deploymentOverrides:
-    workersImage: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.7.0.post1"
+    workersImage: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.7.1"
 
   # Automatically create DynamoGraphDeployment after profiling
   autoApply: true
@@ -12,7 +12,7 @@ spec:
 
   # ProfilingConfig maps directly to the profile_sla.py config format
   profilingConfig:
-    profilerImage: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.7.0.post1"
+    profilerImage: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.7.1"
     config:
       # Sweep/profiling configuration
       sweep:
@@ -28,7 +28,7 @@ spec:
 
   # Deployment overrides for the auto-created DGD
   deploymentOverrides:
-    workersImage: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.7.0.post1"
+    workersImage: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.7.1"
 
   # Automatically create DynamoGraphDeployment after profiling
   autoApply: true
@@ -159,7 +159,7 @@ async def generate(
             # Create descriptor for the multimodal data
             descriptor = connect.Descriptor(precomputed_embeddings)
 
-            with self._connector.create_readable(descriptor) as readable:
+            with await self._connector.create_readable(descriptor) as readable:
                 request.serialized_request = readable.metadata()
 
                 logger.debug(f"Request: {request.model_dump_json()}")
@@ -184,6 +184,5 @@ async def async_init(self, runtime: DistributedRuntime):
         # Create and initialize a dynamo connector for this worker.
         # We'll needs this to move data between this worker and remote workers efficiently.
         self._connector = connect.Connector()
-        await self._connector.initialize()
 
         logger.info("Startup completed.")
@@ -77,7 +77,6 @@ def __init__(self):
     async def initialize(self):
         """Initialize the connector for embeddings processing"""
         self._connector = connect.Connector()
-        await self._connector.initialize()
 
     async def process_embeddings(self, request: SglangMultimodalRequest):
         """Process embeddings from serialized request"""
@@ -103,7 +102,6 @@ async def process_embeddings(self, request: SglangMultimodalRequest):
                 "Connector is None - this should not happen after initialization"
             )
             self._connector = connect.Connector()
-            await self._connector.initialize()
 
         read_op = await self._connector.begin_read(
             request.serialized_request, descriptor
 
@@ -241,7 +241,7 @@ async def process_embedding_request(
 
         # Create readable operation with main embeddings tensor (works for both formats)
         descriptor = nixl_connect.Descriptor(encodings)
-        with connector.create_readable(descriptor) as readable_op:
+        with await connector.create_readable(descriptor) as readable_op:
             # Get the metadata for the readable operation
             op_metadata = readable_op.metadata()
 
 
@@ -276,7 +276,6 @@ async def init(runtime: DistributedRuntime, config: Config):
     connector = None
     logging.info("Initializing NIXL Connect.")
     connector = nixl_connect.Connector()
-    await connector.initialize()
 
     dump_config(
         config.dump_config_to, {"engine_args": engine_args, "dynamo_args": config}
 
@@ -69,7 +69,6 @@ async def async_init(self, runtime: DistributedRuntime):
         # Create and initialize a dynamo connector for this worker.
         # We'll needs this to move data between this worker and remote workers efficiently.
         self._connector = connect.Connector()
-        await self._connector.initialize()
         logger.info("Encode worker startup completed.")
 
     async def generate(
@@ -130,7 +129,7 @@ async def generate(
             request.embeddings_shape = tuple(embeddings.shape)
             descriptor = connect.Descriptor(embeddings_cpu)
 
-            with self._connector.create_readable(descriptor) as readable:
+            with await self._connector.create_readable(descriptor) as readable:
                 request.serialized_request = readable.metadata()
                 # Clear the image URL as hint that the image is passed as embeddings.
                 request.multimodal_input.image_url = None