Skip to content

Commit f22e215

Browse files
authored
Merge branch 'main' into bis/dep-546-lora-aware-kv-routing-support
2 parents 657702a + 1e37c10 commit f22e215

File tree

23 files changed

+744
-321
lines changed

23 files changed

+744
-321
lines changed

components/src/dynamo/planner/utils/planner_core.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
PrefillInterpolator,
2525
)
2626
from dynamo.planner.utils.pre_swept_results_utils import PreSweptResultsHelper
27-
from dynamo.planner.utils.prometheus import PrometheusAPIClient
27+
from dynamo.planner.utils.prometheus import MetricSource, PrometheusAPIClient
2828
from dynamo.planner.utils.trace_data_extractor import extract_metrics_from_mooncake
2929
from dynamo.runtime import DistributedRuntime
3030
from dynamo.runtime.logging import configure_dynamo_logging
@@ -150,9 +150,20 @@ def __init__(
150150
else:
151151
raise ValueError(f"Invalid environment: {args.environment}")
152152

153+
# Use backend metrics for vLLM (queries vllm:* metrics directly from workers)
154+
# Use frontend metrics for other backends (queries dynamo_frontend_* metrics)
155+
metric_source = (
156+
MetricSource.VLLM
157+
if args.backend.lower() == "vllm"
158+
else MetricSource.FRONTEND
159+
)
160+
logger.info(
161+
f"Initializing Prometheus client with metric_source='{metric_source}' for backend '{args.backend}'"
162+
)
153163
self.prometheus_api_client = PrometheusAPIClient(
154164
args.metric_pulling_prometheus_endpoint,
155165
args.namespace,
166+
metric_source=metric_source,
156167
)
157168

158169
self.num_req_predictor = LOAD_PREDICTORS[args.load_predictor](

components/src/dynamo/planner/utils/prometheus.py

Lines changed: 221 additions & 46 deletions
Large diffs are not rendered by default.

components/src/dynamo/vllm/handlers.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from vllm.inputs import TokensPrompt
1313
from vllm.lora.request import LoRARequest
1414
from vllm.outputs import RequestOutput
15-
from vllm.sampling_params import SamplingParams
15+
from vllm.sampling_params import SamplingParams, StructuredOutputsParams
1616
from vllm.v1.engine.exceptions import EngineDeadError
1717

1818
from dynamo.llm import (
@@ -82,8 +82,22 @@ def build_sampling_params(
8282
sampling_params = SamplingParams(**default_sampling_params)
8383
sampling_params.detokenize = False
8484

85-
# Apply sampling_options
85+
# Handle guided_decoding - convert to StructuredOutputsParams
86+
guided_decoding = request["sampling_options"].get("guided_decoding")
87+
if guided_decoding is not None and isinstance(guided_decoding, dict):
88+
sampling_params.structured_outputs = StructuredOutputsParams(
89+
json=guided_decoding.get("json"),
90+
regex=guided_decoding.get("regex"),
91+
choice=guided_decoding.get("choice"),
92+
grammar=guided_decoding.get("grammar"),
93+
whitespace_pattern=guided_decoding.get("whitespace_pattern"),
94+
)
95+
96+
# Apply remaining sampling_options
8697
for key, value in request["sampling_options"].items():
98+
# Skip guided_decoding - already handled above
99+
if key == "guided_decoding":
100+
continue
87101
if value is not None and hasattr(sampling_params, key):
88102
setattr(sampling_params, key, value)
89103

components/src/dynamo/vllm/health_check.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,15 +67,14 @@ def __init__(self, engine_client=None):
6767
self.default_payload = {
6868
"token_ids": [bos_token_id],
6969
"sampling_options": {
70-
"max_tokens": 1,
7170
"temperature": 0.0,
7271
},
7372
"stop_conditions": {
73+
"max_tokens": 1,
7474
"stop": None,
7575
"stop_token_ids": None,
7676
"include_stop_str_in_output": False,
7777
"ignore_eos": False,
78-
"min_tokens": 0,
7978
},
8079
}
8180
super().__init__()

container/Dockerfile.local_dev

Lines changed: 33 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -14,22 +14,16 @@
1414
ARG DEV_BASE=""
1515
FROM ${DEV_BASE} AS local-dev
1616

17-
# Don't want dynamo to be editable, just change uid and gid.
18-
ENV USERNAME=dynamo
19-
ARG USER_UID
20-
ARG USER_GID
21-
ARG WORKSPACE_DIR=/workspace
22-
23-
ARG DYNAMO_COMMIT_SHA
24-
ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA
25-
26-
ARG ARCH
17+
# Switch to root for package installation (dev stage ends as dynamo user)
18+
USER root
19+
# Reset SHELL to non-login bash (dev stage uses login shell)
20+
SHELL ["/bin/bash", "-c"]
2721

2822
# Update package lists and install developer utilities. Some of these may exist in the base image,
2923
# but to ensure consistency across all dev images, we explicitly list all required dev tools here.
3024
RUN apt-get update && apt-get install -y \
3125
# Development utilities
32-
curl wget git vim nano \
26+
curl wget git vim nano less \
3327
# System utilities
3428
htop nvtop tmux screen \
3529
# Network utilities
@@ -47,20 +41,6 @@ RUN apt-get update && apt-get install -y \
4741
# Shell utilities
4842
zsh fish bash-completion
4943

50-
# https://code.visualstudio.com/remote/advancedcontainers/add-nonroot-user
51-
# Configure user with sudo access for Dev Container workflows
52-
RUN apt-get install -y sudo gnupg2 gnupg1 \
53-
&& echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \
54-
&& chmod 0440 /etc/sudoers.d/$USERNAME \
55-
&& mkdir -p /home/$USERNAME \
56-
# Handle GID conflicts: if target GID exists and it's not our group, remove it
57-
&& (getent group $USER_GID | grep -v "^$USERNAME:" && groupdel $(getent group $USER_GID | cut -d: -f1) || true) \
58-
# Create group if it doesn't exist, otherwise modify existing group
59-
&& (getent group $USERNAME > /dev/null 2>&1 && groupmod -g $USER_GID $USERNAME || groupadd -g $USER_GID $USERNAME) \
60-
&& usermod -u $USER_UID -g $USER_GID $USERNAME \
61-
&& chown -R $USERNAME:$USERNAME /home/$USERNAME \
62-
&& chsh -s /bin/bash $USERNAME
63-
6444
# Install awk separately with fault tolerance
6545
# awk is a virtual package with multiple implementations (gawk, mawk, original-awk).
6646
# Separated because TensorRT-LLM builds failed on awk package conflicts.
@@ -71,12 +51,35 @@ RUN (apt-get install -y gawk || \
7151
echo "Warning: Could not install any awk implementation") && \
7252
(which awk && echo "awk successfully installed: $(which awk)" || echo "awk not available")
7353

54+
55+
# Don't want dynamo to be editable, just change uid and gid.
56+
ENV USERNAME=dynamo
57+
ARG USER_UID
58+
ARG USER_GID
59+
ARG WORKSPACE_DIR=/workspace
60+
ARG ARCH=amd64
61+
7462
# Add NVIDIA devtools repository and install development tools
7563
RUN wget -qO - https://developer.download.nvidia.com/devtools/repos/ubuntu2404/${ARCH}/nvidia.pub | gpg --dearmor -o /etc/apt/keyrings/nvidia-devtools.gpg && \
7664
echo "deb [signed-by=/etc/apt/keyrings/nvidia-devtools.gpg] https://developer.download.nvidia.com/devtools/repos/ubuntu2404/${ARCH} /" | tee /etc/apt/sources.list.d/nvidia-devtools.list && \
7765
apt-get update && \
7866
apt-get install -y nsight-systems-2025.5.1
7967

68+
# https://code.visualstudio.com/remote/advancedcontainers/add-nonroot-user
69+
# Configure user with sudo access for Dev Container workflows
70+
RUN apt-get install -y sudo gnupg2 gnupg1 \
71+
&& echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \
72+
&& chmod 0440 /etc/sudoers.d/$USERNAME \
73+
&& mkdir -p /home/$USERNAME \
74+
# Handle GID conflicts: if target GID exists and it's not our group, remove it
75+
&& (getent group $USER_GID | grep -v "^$USERNAME:" && groupdel $(getent group $USER_GID | cut -d: -f1) || true) \
76+
# Create group if it doesn't exist, otherwise modify existing group
77+
&& (getent group $USERNAME > /dev/null 2>&1 && groupmod -g $USER_GID $USERNAME || groupadd -g $USER_GID $USERNAME) \
78+
&& usermod -u $USER_UID -g $USER_GID -G 0 $USERNAME \
79+
&& chown -R $USERNAME:$USERNAME /home/$USERNAME \
80+
&& chsh -s /bin/bash $USERNAME
81+
82+
8083
# Clean up package lists at the end
8184
RUN rm -rf /var/lib/apt/lists/*
8285

@@ -87,45 +90,26 @@ ENV WORKSPACE_DIR=${WORKSPACE_DIR}
8790
# Path configuration notes:
8891
# - DYNAMO_HOME: Main project directory (workspace mount point)
8992
# - CARGO_TARGET_DIR: Build artifacts in workspace/target for persistence
90-
# - CARGO_HOME: Must be in $HOME/.cargo (not workspace) because:
91-
# * Workspace gets mounted to different paths where cargo binaries may not exist
92-
# * Contains critical cargo binaries and registry that need consistent paths
93-
# - RUSTUP_HOME: Must be in $HOME/.rustup (not workspace) because:
94-
# * Contains rust toolchain binaries that must be at expected system paths
95-
# * Workspace mount point would break rustup's toolchain resolution
9693
# - PATH: Includes cargo binaries for rust tool access
9794
ENV HOME=/home/$USERNAME
9895
ENV DYNAMO_HOME=${WORKSPACE_DIR}
9996
ENV CARGO_TARGET_DIR=${WORKSPACE_DIR}/target
100-
ENV CARGO_HOME=${HOME}/.cargo
101-
ENV RUSTUP_HOME=${HOME}/.rustup
97+
# NOTE: CARGO_HOME and RUSTUP_HOME are already inherited from dev stage (Dockerfile.sglang|trtllm|vllm)
10298
ENV PATH=${CARGO_HOME}/bin:$PATH
10399

104-
# Copy Rust toolchain from system directories to user home directories with proper ownership
105-
RUN rsync -a --chown=$USER_UID:$USER_GID /usr/local/rustup/ $RUSTUP_HOME/
106-
107-
RUN rsync -a --chown=$USER_UID:$USER_GID /usr/local/cargo/ $CARGO_HOME/
108-
109-
# Copy virtual environment with proper ownership using rsync instead of chown.
110-
# Why rsync instead of chown -R:
111-
# chown -R is extremely slow in Docker containers, especially on large directory trees
112-
# like Python virtual environments with thousands of files. This is a well-documented
113-
# Docker performance issue. rsync --chown is 3-4x faster as it sets ownership during copy.
114-
RUN rsync -a --chown=$USER_UID:$USER_GID ${VIRTUAL_ENV}/ /tmp/venv-temp/ && \
115-
rm -rf ${VIRTUAL_ENV} && \
116-
mv /tmp/venv-temp ${VIRTUAL_ENV}
117-
118-
# At this point, we are executing as the ubuntu user
100+
# Switch to dynamo user (dev stage has umask 002, so files should already be group-writable)
119101
USER $USERNAME
120102
WORKDIR $HOME
121103

122104
# https://code.visualstudio.com/remote/advancedcontainers/persist-bash-history
123105
RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.commandhistory/.bash_history" \
124106
&& mkdir -p $HOME/.commandhistory \
107+
&& chmod g+w $HOME/.commandhistory \
125108
&& touch $HOME/.commandhistory/.bash_history \
126109
&& echo "$SNIPPET" >> "$HOME/.bashrc"
127110

128-
RUN mkdir -p /home/$USERNAME/.cache/
111+
RUN mkdir -p /home/$USERNAME/.cache/ \
112+
&& chmod g+w /home/$USERNAME/.cache/
129113

130114
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
131115
CMD []

container/Dockerfile.sglang

Lines changed: 52 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,21 @@
11
# syntax=docker/dockerfile:1.10.0
22
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
# Throughout this file, we make certain paths group-writable because this allows
6+
# both the dynamo user (UID 1000) and Dev Container users (UID != 1000) to work
7+
# properly without needing slow chown -R operations (which can add 2-10 extra
8+
# minutes).
9+
#
10+
# DEVELOPMENT PATHS THAT MUST BE GROUP-WRITABLE (for non-virtualenv containers):
11+
# /workspace - Users create/modify project files
12+
# /home/dynamo - Users create config/cache files
13+
# /home/dynamo/.local - SGLang uses $HOME/.local/lib/python3.10/site-packages for pip install
14+
#
15+
# HOW TO ACHIEVE GROUP-WRITABLE PERMISSIONS:
16+
# 1. SHELL + /etc/profile.d - Login shell sources umask 002 globally for all RUN commands (775/664)
17+
# 2. COPY --chmod=775 - Sets permissions on copied children (not destination)
18+
# 3. chmod g+w (no -R) - Fixes destination dirs only (milliseconds vs minutes)
419

520
# This section contains build arguments that are common and shared with
621
# the plain Dockerfile, so they should NOT have a default. The source of truth is from build.sh.
@@ -190,18 +205,25 @@ RUN git clone --depth 1 --branch ${GDRCOPY_COMMIT} https://github.com/NVIDIA/gdr
190205
# Fix DeepEP IBGDA symlink
191206
RUN ln -sf /usr/lib/$(uname -m)-linux-gnu/libmlx5.so.1 /usr/lib/$(uname -m)-linux-gnu/libmlx5.so
192207

193-
# Create dynamo user EARLY - before copying files, with group 0 for OpenShift compatibility
208+
# Create dynamo user with group 0 for OpenShift compatibility
194209
RUN userdel -r ubuntu > /dev/null 2>&1 || true \
195210
&& useradd -m -s /bin/bash -g 0 dynamo \
196211
&& [ `id -u dynamo` -eq 1000 ] \
197212
&& mkdir -p /workspace /home/dynamo/.cache /opt/dynamo \
198-
&& chown -R dynamo: /sgl-workspace /workspace /home/dynamo /opt/dynamo \
199-
&& chmod -R g+w /sgl-workspace /workspace /home/dynamo/.cache /opt/dynamo
213+
# Non-recursive chown - only the directories themselves, not contents
214+
&& chown dynamo:0 /sgl-workspace /workspace /home/dynamo /home/dynamo/.cache /opt/dynamo \
215+
# No chmod needed: umask 002 handles new files, COPY --chmod handles copied content
216+
# Set umask globally for all subsequent RUN commands (must be done as root before USER dynamo)
217+
# NOTE: Setting ENV UMASK=002 does NOT work - umask is a shell builtin, not an environment variable
218+
&& mkdir -p /etc/profile.d && echo 'umask 002' > /etc/profile.d/00-umask.sh
200219

201220
USER dynamo
202221
ENV HOME=/home/dynamo
222+
# This picks up the umask 002 from the /etc/profile.d/00-umask.sh file for subsequent RUN commands
223+
SHELL ["/bin/bash", "-l", "-o", "pipefail", "-c"]
203224

204-
# Install SGLang (requires CUDA 12.8.1 or 12.9.1)
225+
# Install SGLang (requires CUDA 12.8.1 or 12.9.1). Note that when system-wide packages is not writable,
226+
# so it gets installed to ~/.local/lib/python<version>/site-packages.
205227
RUN python3 -m pip install --no-cache-dir --ignore-installed pip==25.3 setuptools==80.9.0 wheel==0.45.1 html5lib==1.1 six==1.17.0 \
206228
&& git clone --depth 1 --branch v${SGLANG_COMMIT} https://github.com/sgl-project/sglang.git \
207229
&& cd sglang \
@@ -279,8 +301,10 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
279301
NVSHMEM_DIR=${NVSHMEM_DIR} TORCH_CUDA_ARCH_LIST="9.0;10.0" pip install --no-build-isolation .
280302

281303
# Copy rust installation from dynamo_base to avoid duplication efforts
282-
COPY --from=dynamo_base /usr/local/rustup /usr/local/rustup
283-
COPY --from=dynamo_base /usr/local/cargo /usr/local/cargo
304+
# Pattern: COPY --chmod=775 <path>; RUN chmod g+w <path> because COPY --chmod only affects <path>/*, not <path>
305+
COPY --from=dynamo_base --chown=dynamo:0 --chmod=775 /usr/local/rustup /usr/local/rustup
306+
COPY --from=dynamo_base --chown=dynamo:0 --chmod=775 /usr/local/cargo /usr/local/cargo
307+
RUN chmod g+w /usr/local/rustup /usr/local/cargo
284308

285309
ENV RUSTUP_HOME=/usr/local/rustup \
286310
CARGO_HOME=/usr/local/cargo \
@@ -341,8 +365,9 @@ COPY --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX
341365
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:${HOME}/.local/bin:$PATH
342366

343367
# Install Dynamo wheels from dynamo_base wheelhouse
344-
COPY --chown=dynamo: benchmarks/ /opt/dynamo/benchmarks/
345-
COPY --chown=dynamo: --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
368+
# Pattern: COPY --chmod=775 <path>; chmod g+w <path> done later as root because COPY --chmod only affects <path>/*, not <path>
369+
COPY --chmod=775 --chown=dynamo:0 benchmarks/ /opt/dynamo/benchmarks/
370+
COPY --chmod=775 --chown=dynamo:0 --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
346371
RUN python3 -m pip install \
347372
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
348373
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
@@ -361,29 +386,34 @@ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requi
361386
--requirement /tmp/requirements.test.txt
362387

363388
## Copy attribution files and launch banner with correct ownership
364-
COPY --chown=dynamo: ATTRIBUTION* LICENSE /workspace/
389+
COPY --chmod=664 --chown=dynamo:0 ATTRIBUTION* LICENSE /workspace/
390+
391+
# Copy tests, benchmarks, deploy and components for CI with correct ownership
392+
# Pattern: COPY --chmod=775 <path>; chmod g+w <path> done later as root because COPY --chmod only affects <path>/*, not <path>
393+
COPY --chmod=775 --chown=dynamo:0 pyproject.toml /workspace/
394+
COPY --chmod=775 --chown=dynamo:0 tests /workspace/tests
395+
COPY --chmod=775 --chown=dynamo:0 examples /workspace/examples
396+
COPY --chmod=775 --chown=dynamo:0 benchmarks /workspace/benchmarks
397+
COPY --chmod=775 --chown=dynamo:0 deploy /workspace/deploy
398+
COPY --chmod=775 --chown=dynamo:0 components/ /workspace/components/
399+
COPY --chmod=775 --chown=dynamo:0 recipes/ /workspace/recipes/
365400

366401
# Setup launch banner in common directory accessible to all users
367402
RUN --mount=type=bind,source=./container/launch_message/runtime.txt,target=/opt/dynamo/launch_message.txt \
368403
sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen
369404

370405
# Setup environment for all users
371406
USER root
372-
RUN chmod 755 /opt/dynamo/.launch_screen && \
407+
# Fix directory permissions: COPY --chmod only affects contents, not the directory itself
408+
RUN chmod g+w /workspace /workspace/* /opt/dynamo /opt/dynamo/* && \
409+
chown dynamo:0 /workspace /opt/dynamo/ && \
410+
chmod 755 /opt/dynamo/.launch_screen && \
373411
echo 'source /opt/dynamo/venv/bin/activate' >> /etc/bash.bashrc && \
374412
echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc
375413

376414
USER dynamo
377415

378416
# Copy tests, benchmarks, deploy and components for CI with correct ownership
379-
COPY --chown=dynamo: pyproject.toml /workspace/
380-
COPY --chown=dynamo: tests /workspace/tests
381-
COPY --chown=dynamo: examples /workspace/examples
382-
COPY --chown=dynamo: benchmarks /workspace/benchmarks
383-
COPY --chown=dynamo: deploy /workspace/deploy
384-
COPY --chown=dynamo: components/ /workspace/components/
385-
COPY --chown=dynamo: recipes/ /workspace/recipes/
386-
387417
ARG DYNAMO_COMMIT_SHA
388418
ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA
389419

@@ -420,6 +450,8 @@ ENV VIRTUAL_ENV=/opt/dynamo/venv \
420450
PATH="/opt/dynamo/venv/bin:${PATH}"
421451

422452
USER root
453+
# venv permissions are handled by umask 002 set earlier
454+
423455
# Install development tools and utilities
424456
RUN apt-get update -y && \
425457
apt-get install -y --no-install-recommends \
@@ -478,7 +510,7 @@ RUN curl --retry 3 --retry-delay 2 -LSso /usr/local/bin/clang-format https://git
478510
&& rm -rf clangd_18.1.3 clangd.zip
479511

480512
# Editable install of dynamo
481-
COPY README.md hatch_build.py /workspace/
513+
COPY --chmod=664 pyproject.toml README.md hatch_build.py /workspace/
482514
RUN python3 -m pip install --no-deps -e .
483515

484516
# Install Python development packages
@@ -496,4 +528,4 @@ RUN python3 -m pip install --no-cache-dir \
496528
tabulate
497529

498530
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
499-
CMD []
531+
CMD []

0 commit comments

Comments
 (0)