Skip to content

Commit 2806586

Browse files
Merge branch 'release/0.7.1' into dtokarev/0.7.0.post1-vllm-0.12.0
2 parents eead28e + ce602cb commit 2806586

File tree

95 files changed

+1855
-471
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

95 files changed

+1855
-471
lines changed

Cargo.lock

Lines changed: 12 additions & 11 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ default-members = [
3333
resolver = "3"
3434

3535
[workspace.package]
36-
version = "0.7.0-post1"
36+
version = "0.7.1"
3737
edition = "2024"
3838
description = "Dynamo Inference Framework"
3939
authors = ["NVIDIA Inc. <[email protected]>"]
@@ -44,15 +44,15 @@ keywords = ["llm", "genai", "inference", "nvidia", "distributed"]
4444

4545
[workspace.dependencies]
4646
# Local crates
47-
dynamo-runtime = { path = "lib/runtime", version = "0.7.0-post1" }
48-
dynamo-llm = { path = "lib/llm", version = "0.7.0-post1" }
49-
dynamo-config = { path = "lib/config", version = "0.7.0-post1" }
50-
dynamo-tokens = { path = "lib/tokens", version = "0.7.0-post1" }
51-
dynamo-async-openai = { path = "lib/async-openai", version = "0.7.0-post1", features = [
47+
dynamo-runtime = { path = "lib/runtime", version = "0.7.1" }
48+
dynamo-llm = { path = "lib/llm", version = "0.7.1" }
49+
dynamo-config = { path = "lib/config", version = "0.7.1" }
50+
dynamo-tokens = { path = "lib/tokens", version = "0.7.1" }
51+
dynamo-async-openai = { path = "lib/async-openai", version = "0.7.1", features = [
5252
"byot",
5353
"rustls",
5454
] }
55-
dynamo-parsers = { path = "lib/parsers", version = "0.7.0-post1" }
55+
dynamo-parsers = { path = "lib/parsers", version = "0.7.1" }
5656

5757
# External dependencies
5858
anyhow = { version = "1" }

benchmarks/incluster/benchmark_job.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ spec:
1717
containers:
1818
- name: benchmark-runner
1919
# TODO: update to latest public image in next release
20-
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.7.0.post1
20+
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.7.1
2121
securityContext:
2222
allowPrivilegeEscalation: false
2323
capabilities:

benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ spec:
1212

1313
# ProfilingConfig maps directly to the profile_sla.py config format
1414
profilingConfig:
15-
profilerImage: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.7.0.post1"
15+
profilerImage: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.7.1"
1616
config:
1717
# Sweep/profiling configuration
1818
sweep:
@@ -31,7 +31,7 @@ spec:
3131

3232
# Deployment overrides for the auto-created DGD
3333
deploymentOverrides:
34-
workersImage: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.7.0.post1"
34+
workersImage: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.7.1"
3535

3636
# Automatically create DynamoGraphDeployment after profiling
3737
autoApply: true

benchmarks/profiler/deploy/profile_sla_dgdr.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ spec:
1212

1313
# ProfilingConfig maps directly to the profile_sla.py config format
1414
profilingConfig:
15-
profilerImage: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.7.0.post1"
15+
profilerImage: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.7.1"
1616
config:
1717
# Sweep/profiling configuration
1818
sweep:
@@ -28,7 +28,7 @@ spec:
2828

2929
# Deployment overrides for the auto-created DGD
3030
deploymentOverrides:
31-
workersImage: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.7.0.post1"
31+
workersImage: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.7.1"
3232

3333
# Automatically create DynamoGraphDeployment after profiling
3434
autoApply: true

components/src/dynamo/sglang/request_handlers/multimodal/encode_worker_handler.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ async def generate(
159159
# Create descriptor for the multimodal data
160160
descriptor = connect.Descriptor(precomputed_embeddings)
161161

162-
with self._connector.create_readable(descriptor) as readable:
162+
with await self._connector.create_readable(descriptor) as readable:
163163
request.serialized_request = readable.metadata()
164164

165165
logger.debug(f"Request: {request.model_dump_json()}")
@@ -184,6 +184,5 @@ async def async_init(self, runtime: DistributedRuntime):
184184
# Create and initialize a dynamo connector for this worker.
185185
# We'll needs this to move data between this worker and remote workers efficiently.
186186
self._connector = connect.Connector()
187-
await self._connector.initialize()
188187

189188
logger.info("Startup completed.")

components/src/dynamo/sglang/request_handlers/multimodal/worker_handler.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ def __init__(self):
7777
async def initialize(self):
7878
"""Initialize the connector for embeddings processing"""
7979
self._connector = connect.Connector()
80-
await self._connector.initialize()
8180

8281
async def process_embeddings(self, request: SglangMultimodalRequest):
8382
"""Process embeddings from serialized request"""
@@ -103,7 +102,6 @@ async def process_embeddings(self, request: SglangMultimodalRequest):
103102
"Connector is None - this should not happen after initialization"
104103
)
105104
self._connector = connect.Connector()
106-
await self._connector.initialize()
107105

108106
read_op = await self._connector.begin_read(
109107
request.serialized_request, descriptor

components/src/dynamo/trtllm/encode_helper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ async def process_embedding_request(
241241

242242
# Create readable operation with main embeddings tensor (works for both formats)
243243
descriptor = nixl_connect.Descriptor(encodings)
244-
with connector.create_readable(descriptor) as readable_op:
244+
with await connector.create_readable(descriptor) as readable_op:
245245
# Get the metadata for the readable operation
246246
op_metadata = readable_op.metadata()
247247

components/src/dynamo/trtllm/main.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,6 @@ async def init(runtime: DistributedRuntime, config: Config):
276276
connector = None
277277
logging.info("Initializing NIXL Connect.")
278278
connector = nixl_connect.Connector()
279-
await connector.initialize()
280279

281280
dump_config(
282281
config.dump_config_to, {"engine_args": engine_args, "dynamo_args": config}

components/src/dynamo/vllm/multimodal_handlers/encode_worker_handler.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ async def async_init(self, runtime: DistributedRuntime):
6969
# Create and initialize a dynamo connector for this worker.
7070
# We'll needs this to move data between this worker and remote workers efficiently.
7171
self._connector = connect.Connector()
72-
await self._connector.initialize()
7372
logger.info("Encode worker startup completed.")
7473

7574
async def generate(
@@ -130,7 +129,7 @@ async def generate(
130129
request.embeddings_shape = tuple(embeddings.shape)
131130
descriptor = connect.Descriptor(embeddings_cpu)
132131

133-
with self._connector.create_readable(descriptor) as readable:
132+
with await self._connector.create_readable(descriptor) as readable:
134133
request.serialized_request = readable.metadata()
135134
# Clear the image URL as hint that the image is passed as embeddings.
136135
request.multimodal_input.image_url = None

0 commit comments

Comments
 (0)