Skip to content

Commit d68b1eb

Browse files
authored
Merge branch 'main' into bis/dep-681-add-agg-lora-tests
2 parents 05d0db4 + c9e445a commit d68b1eb

File tree

25 files changed

+1362
-388
lines changed

25 files changed

+1362
-388
lines changed

.github/actions/docker-build/action.yml

Lines changed: 26 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,6 @@ inputs:
1616
image_tag:
1717
description: 'Custom image tag (optional, defaults to framework:latest)'
1818
required: false
19-
ngc_ci_access_token:
20-
description: 'NGC CI Access Token'
21-
required: false
2219
ci_token:
2320
description: 'CI Token'
2421
required: false
@@ -67,20 +64,14 @@ runs:
6764
- name: Set up Docker Buildx
6865
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 #v3.11.1
6966
with:
70-
driver: docker
67+
driver: docker-container
7168
# Enable BuildKit for enhanced metadata
7269
buildkitd-flags: --debug
73-
- name: Login to ECR
70+
- name: Cleanup
71+
if: always()
7472
shell: bash
75-
env:
76-
ECR_HOSTNAME: ${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com
7773
run: |
78-
aws ecr get-login-password --region ${{ inputs.aws_default_region }} | docker login --username AWS --password-stdin ${ECR_HOSTNAME}
79-
- name: Login to NGC
80-
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name == 'push'
81-
uses: ./.github/actions/docker-login
82-
with:
83-
ngc_ci_access_token: ${{ inputs.ngc_ci_access_token }}
74+
docker system prune -af
8475
- name: Build image
8576
id: build
8677
shell: bash
@@ -91,9 +82,12 @@ runs:
9182
AWS_ACCESS_KEY_ID: ${{ inputs.aws_access_key_id }}
9283
AWS_SECRET_ACCESS_KEY: ${{ inputs.aws_secret_access_key }}
9384
PLATFORM: ${{ inputs.platform }}
85+
ECR_HOSTNAME: ${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com
9486
GITHUB_RUN_ID: ${{ github.run_id }}
9587
GITHUB_JOB: ${{ github.job }}
88+
GITHUB_REF_NAME: ${{ github.ref_name }}
9689
run: |
90+
set -x
9791
# Determine image tag
9892
if [ -n "${{ inputs.image_tag }}" ]; then
9993
IMAGE_TAG="${{ inputs.image_tag }}"
@@ -113,18 +107,28 @@ runs:
113107
echo "📝 Build log will be saved to: ${BUILD_LOG_FILE}"
114108
115109
# Collect optional overrides provided by the workflow
110+
# Set base cache args and set --cache-to if this is a main commit
116111
EXTRA_ARGS=""
112+
EXTRA_ARGS="--cache-to type=inline "
113+
EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-${PLATFORM##*/}-cache "
114+
EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${{ inputs.framework }}-${PLATFORM##*/} "
115+
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
116+
EXTRA_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-${PLATFORM##*/}-cache,mode=max "
117+
fi
118+
119+
echo "$EXTRA_ARGS"
120+
# Collect optional overrides provided by the workflow
117121
if [ -n "${{ inputs.base_image_tag }}" ]; then
118-
EXTRA_ARGS+=" --base-image-tag ${{ inputs.base_image_tag }}"
122+
EXTRA_ARGS+="--base-image-tag ${{ inputs.base_image_tag }} "
119123
fi
120124
if [ -n "${{ inputs.runtime_image_tag }}" ]; then
121-
EXTRA_ARGS+=" --build-arg RUNTIME_IMAGE_TAG=${{ inputs.runtime_image_tag }}"
125+
EXTRA_ARGS+="--build-arg RUNTIME_IMAGE_TAG=${{ inputs.runtime_image_tag }} "
122126
fi
123127
if [ -n "${{ inputs.cuda_version }}" ]; then
124-
EXTRA_ARGS+=" --build-arg CUDA_VERSION=${{ inputs.cuda_version }}"
128+
EXTRA_ARGS+="--build-arg CUDA_VERSION=${{ inputs.cuda_version }} "
125129
fi
126130
if [ -n "${{ inputs.torch_backend }}" ]; then
127-
EXTRA_ARGS+=" --build-arg TORCH_BACKEND=${{ inputs.torch_backend }}"
131+
EXTRA_ARGS+="--build-arg TORCH_BACKEND=${{ inputs.torch_backend }} "
128132
fi
129133
if [ -n "${{ inputs.dynamo_base_image }}" ]; then
130134
EXTRA_ARGS+=" --dynamo-base-image ${{ inputs.dynamo_base_image }}"
@@ -250,8 +254,7 @@ runs:
250254
chmod +x .github/scripts/parse_buildkit_output.py
251255
252256
# Check for build logs and build stage arguments dynamically
253-
BASE_BUILD_LOG="build-logs/base-image-build.log"
254-
FRAMEWORK_BUILD_LOG="build-logs/framework-${FRAMEWORK_LOWER}-build.log"
257+
BUILD_LOG="build-logs/single-stage-build.log"
255258
256259
# Path to container metadata created in previous step
257260
CONTAINER_METADATA="build-metrics/metrics-${{ inputs.framework }}-${PLATFORM_ARCH}-${WORKFLOW_ID}-${JOB_ID}.json"
@@ -264,18 +267,11 @@ runs:
264267
# Build stage arguments dynamically based on which logs exist
265268
STAGE_ARGS=()
266269
267-
if [ -f "$BASE_BUILD_LOG" ]; then
268-
echo " ✓ Found base image log: ${BASE_BUILD_LOG}"
269-
STAGE_ARGS+=("base:${BASE_BUILD_LOG}")
270-
else
271-
echo " ℹ️ No base image log found"
272-
fi
273-
274-
if [ -f "$FRAMEWORK_BUILD_LOG" ]; then
275-
echo " ✓ Found framework log: ${FRAMEWORK_BUILD_LOG}"
276-
STAGE_ARGS+=("runtime:${FRAMEWORK_BUILD_LOG}")
270+
if [ -f "$BUILD_LOG" ]; then
271+
echo " ✓ Found base image log: ${BUILD_LOG}"
272+
STAGE_ARGS+=("runtime:${BUILD_LOG}")
277273
else
278-
echo " ℹ️ No framework log found"
274+
echo " ℹ️ No image log found"
279275
fi
280276
281277
# Check for any additional stage logs (e.g., build-logs/stage3-*.log)

.github/actions/docker-tag-push/action.yml

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@ inputs:
88
push_tags:
99
description: 'Target Name:Tag (newline-separated list for multiple tags)'
1010
required: true
11+
# There isn't a clean way to have an additional tag that is conditional
12+
# Adding this to handle this use-case (we want multiple tags for main builds)
13+
conditional_tag:
14+
description: 'Optional tag for conditionals'
15+
required: false
1116
aws_push:
1217
description: 'Push to AWS Boolean'
1318
required: false
@@ -22,21 +27,9 @@ inputs:
2227
aws_default_region:
2328
description: 'AWS Default Region'
2429
required: false
25-
aws_access_key_id:
26-
description: 'AWS Access Key ID'
27-
required: false
28-
aws_secret_access_key:
29-
description: 'AWS Secret Access Key'
30-
required: false
3130
azure_acr_hostname:
3231
description: 'Azure ACR hostname'
3332
required: false
34-
azure_acr_user:
35-
description: 'Azure ACR user'
36-
required: false
37-
azure_acr_password:
38-
description: 'Azure ACR password'
39-
required: false
4033

4134
outputs:
4235
image_tags:
@@ -48,16 +41,20 @@ runs:
4841
steps:
4942
- name: Set up Docker Buildx
5043
uses: docker/setup-buildx-action@v3
51-
5244
- name: ECR Tag and Push
5345
shell: bash
5446
if: ${{ inputs.aws_push == 'true' }}
5547
env:
5648
LOCAL_IMAGE: ${{ inputs.local_image }}
5749
PUSH_TAGS: ${{ inputs.push_tags }}
50+
CONDITIONAL_TAG: ${{ inputs.conditional_tag }}
5851
ECR_HOSTNAME: ${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com
5952
run: |
6053
set -euo pipefail
54+
if [[ ${CONDITIONAL_TAG} != '' ]]; then
55+
docker tag ${LOCAL_IMAGE} ${ECR_HOSTNAME}/${CONDITIONAL_TAG}
56+
docker push ${ECR_HOSTNAME}/${CONDITIONAL_TAG}
57+
fi
6158
while IFS= read -r TAG; do
6259
if [ -z "$TAG" ]; then
6360
continue
@@ -66,7 +63,6 @@ runs:
6663
docker tag "${LOCAL_IMAGE}" "${ECR_HOSTNAME}/${TAG}"
6764
docker push "${ECR_HOSTNAME}/${TAG}"
6865
done <<< "$PUSH_TAGS"
69-
7066
- name: ACR Tag and Push
7167
shell: bash
7268
if: ${{ inputs.azure_push == 'true' }}

.github/workflows/container-validation-backends.yml

Lines changed: 35 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,10 @@ jobs:
6969
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
7070
- name: Set up Docker Buildx
7171
uses: docker/setup-buildx-action@v3
72-
with:
73-
driver: docker
74-
- name: Login to ECR
72+
- name: Docker Login
7573
uses: ./.github/actions/docker-login
7674
with:
75+
ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
7776
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
7877
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
7978
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
@@ -93,7 +92,6 @@ jobs:
9392
run: |
9493
cd deploy/cloud/operator
9594
docker build --target tester --progress=plain --build-arg DOCKER_PROXY=${ECR_HOSTNAME}/dockerhub/ .
96-
9795
- name: Set up Go
9896
uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
9997
with:
@@ -125,11 +123,7 @@ jobs:
125123
push_tags: ai-dynamo/dynamo:${{ github.sha }}-operator-${{ matrix.platform.arch }}
126124
aws_push: 'false'
127125
azure_push: 'true'
128-
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
129-
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
130126
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
131-
azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
132-
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
133127

134128
vllm:
135129
needs: changed-files
@@ -149,6 +143,15 @@ jobs:
149143
echo ${K8S_NODE_NAME}
150144
- name: Checkout code
151145
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
146+
- name: Docker Login
147+
uses: ./.github/actions/docker-login
148+
with:
149+
ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
150+
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
151+
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
152+
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
153+
azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
154+
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
152155
- name: Build Container
153156
id: build-image
154157
uses: ./.github/actions/docker-build
@@ -160,7 +163,6 @@ jobs:
160163
runtime_image_tag: ${{ matrix.platform.arch == 'arm64' && '12.9.0-runtime-ubuntu24.04' || '' }}
161164
cuda_version: ${{ matrix.platform.arch == 'arm64' && '129' || '' }}
162165
torch_backend: ${{ matrix.platform.arch == 'arm64' && 'cu129' || '' }}
163-
ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
164166
ci_token: ${{ secrets.CI_TOKEN }}
165167
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
166168
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
@@ -179,15 +181,12 @@ jobs:
179181
with:
180182
local_image: ${{ steps.build-image.outputs.image_tag }}
181183
push_tags: ai-dynamo/dynamo:${{ github.sha }}-vllm-${{ matrix.platform.arch }}
182-
# OPS-1145: Switch aws_push to true
183-
aws_push: 'false'
184+
conditional_tag: ${{ github.ref_name == 'main' && format('ai-dynamo/dynamo:main-vllm-{0}', matrix.platform.arch) || '' }}
185+
aws_push: 'true'
184186
azure_push: 'true'
185187
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
186188
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
187189
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
188-
azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
189-
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
190-
191190
- name: Run tests
192191
if: ${{ matrix.platform.arch != 'arm64' }}
193192
uses: ./.github/actions/pytest
@@ -216,43 +215,39 @@ jobs:
216215
echo ${K8S_NODE_NAME}
217216
- name: Checkout repository
218217
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
219-
218+
- name: Docker Login
219+
uses: ./.github/actions/docker-login
220+
with:
221+
ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
222+
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
223+
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
224+
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
225+
azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
226+
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
220227
- name: Build Container
221228
id: build-image
222229
uses: ./.github/actions/docker-build
223230
with:
224231
framework: sglang
225232
target: runtime
226233
platform: 'linux/${{ matrix.platform.arch }}'
227-
ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
228234
ci_token: ${{ secrets.CI_TOKEN }}
229235
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
230236
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
231237
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
232238
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
233239
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
234-
235-
- name: Login to Container Registries
236-
uses: ./.github/actions/docker-login
237-
with:
238-
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
239-
azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
240-
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
241-
ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
242240
- name: Docker Tag and Push
243241
uses: ./.github/actions/docker-tag-push
244242
with:
245243
local_image: ${{ steps.build-image.outputs.image_tag }}
246244
push_tags: ai-dynamo/dynamo:${{ github.sha }}-sglang-${{ matrix.platform.arch }}
247-
# OPS-1145: Switch aws_push to true
248-
aws_push: 'false'
245+
conditional_tag: ${{ github.ref_name == 'main' && format('ai-dynamo/dynamo:main-sglang-{0}', matrix.platform.arch) || '' }}
246+
aws_push: 'true'
249247
azure_push: 'true'
250248
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
251249
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
252250
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
253-
azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
254-
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
255-
256251
- name: Run tests
257252
if: ${{ matrix.platform.arch != 'arm64' }}
258253
uses: ./.github/actions/pytest
@@ -281,43 +276,39 @@ jobs:
281276
echo ${K8S_NODE_NAME}
282277
- name: Checkout code
283278
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
284-
279+
- name: Docker Login
280+
uses: ./.github/actions/docker-login
281+
with:
282+
ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
283+
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
284+
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
285+
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
286+
azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
287+
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
285288
- name: Build Container
286289
id: build-image
287290
uses: ./.github/actions/docker-build
288291
with:
289292
framework: trtllm
290293
target: runtime
291294
platform: 'linux/${{ matrix.platform.arch }}'
292-
ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
293295
ci_token: ${{ secrets.CI_TOKEN }}
294296
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
295297
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
296298
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
297299
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
298300
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
299-
300-
- name: Login to Container Registries
301-
uses: ./.github/actions/docker-login
302-
with:
303-
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
304-
azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
305-
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
306-
ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
307301
- name: Docker Tag and Push
308302
uses: ./.github/actions/docker-tag-push
309303
with:
310304
local_image: ${{ steps.build-image.outputs.image_tag }}
311305
push_tags: ai-dynamo/dynamo:${{ github.sha }}-trtllm-${{ matrix.platform.arch }}
312-
# OPS-1145: Switch aws_push to true
313-
aws_push: 'false'
306+
conditional_tag: ${{ github.ref_name == 'main' && format('ai-dynamo/dynamo:main-trtllm-{0}', matrix.platform.arch) || '' }}
307+
aws_push: 'true'
314308
azure_push: 'true'
315309
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
316310
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
317311
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
318-
azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
319-
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
320-
321312
- name: Run tests
322313
if: ${{ matrix.platform.arch != 'arm64' }}
323314
uses: ./.github/actions/pytest

components/src/dynamo/sglang/request_handlers/multimodal/encode_worker_handler.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ async def generate(
159159
# Create descriptor for the multimodal data
160160
descriptor = connect.Descriptor(precomputed_embeddings)
161161

162-
with self._connector.create_readable(descriptor) as readable:
162+
with await self._connector.create_readable(descriptor) as readable:
163163
request.serialized_request = readable.metadata()
164164

165165
logger.debug(f"Request: {request.model_dump_json()}")
@@ -184,6 +184,5 @@ async def async_init(self, runtime: DistributedRuntime):
184184
# Create and initialize a dynamo connector for this worker.
185185
# We'll needs this to move data between this worker and remote workers efficiently.
186186
self._connector = connect.Connector()
187-
await self._connector.initialize()
188187

189188
logger.info("Startup completed.")

components/src/dynamo/sglang/request_handlers/multimodal/worker_handler.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ def __init__(self):
7777
async def initialize(self):
7878
"""Initialize the connector for embeddings processing"""
7979
self._connector = connect.Connector()
80-
await self._connector.initialize()
8180

8281
async def process_embeddings(self, request: SglangMultimodalRequest):
8382
"""Process embeddings from serialized request"""
@@ -103,7 +102,6 @@ async def process_embeddings(self, request: SglangMultimodalRequest):
103102
"Connector is None - this should not happen after initialization"
104103
)
105104
self._connector = connect.Connector()
106-
await self._connector.initialize()
107105

108106
read_op = await self._connector.begin_read(
109107
request.serialized_request, descriptor

0 commit comments

Comments
 (0)