Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion benchmarks/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ dependencies = [
"pydantic>=2",
"tabulate",
"types-tabulate",
# Satisfies vLLM 0.11.0 (>=4.55.2), vLLM 0.11.2 (>=4.56.0,<5), TRT-LLM 1.2.0rc2/rc3 (==4.56.0), SGLang 0.5.6 (==4.57.1)
# Satisfies vLLM 0.11.0 (>=4.55.2), vLLM 0.11.2 (>=4.56.0,<5), TRT-LLM 1.2.0rc5 (==4.56.0), SGLang 0.5.6 (==4.57.1)
"transformers>=4.56.0,<=4.57.1",
"pytest-mypy",
]
Expand Down
9 changes: 0 additions & 9 deletions components/src/dynamo/trtllm/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import uvloop
from prometheus_client import REGISTRY
from tensorrt_llm.llmapi import (
BuildConfig,
CapacitySchedulerPolicy,
DynamicBatchConfig,
KvCacheConfig,
Expand Down Expand Up @@ -162,13 +161,6 @@ async def init(runtime: DistributedRuntime, config: Config):
else:
gpus_per_node = config.gpus_per_node

build_config = BuildConfig(
max_batch_size=config.max_batch_size,
max_num_tokens=config.max_num_tokens,
max_beam_width=config.max_beam_width,
max_seq_len=config.max_seq_len,
)

kv_cache_config = KvCacheConfig(
free_gpu_memory_fraction=config.free_gpu_memory_fraction
)
Expand All @@ -190,7 +182,6 @@ async def init(runtime: DistributedRuntime, config: Config):
"pipeline_parallel_size": config.pipeline_parallel_size,
"moe_expert_parallel_size": config.expert_parallel_size,
"backend": Backend.PYTORCH,
"build_config": build_config,
"kv_cache_config": kv_cache_config,
"gpus_per_node": gpus_per_node,
"max_num_tokens": config.max_num_tokens,
Expand Down
47 changes: 44 additions & 3 deletions container/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,14 @@ RUN yum groupinstall -y 'Development Tools' && \
libibumad \
libibumad-devel \
librdmacm-devel \
numactl-devel
numactl-devel \
# Hardware Locality (hwloc) - required for NIXL libfabric plugin topology awareness
hwloc \
hwloc-devel \
# Build tools for libfabric (will build from source for newer version)
autoconf \
automake \
libtool

# Ensure a modern protoc is available (required for --experimental_allow_proto3_optional)
RUN set -eux; \
Expand Down Expand Up @@ -245,7 +252,37 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
echo "/usr/local/ucx/lib/ucx" >> /etc/ld.so.conf.d/ucx.conf && \
ldconfig

# build and install nixl
# Build and install libfabric from source (minimum v2.3.0 required for NIXL)
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
LIBFABRIC_VERSION="v2.3.0" && \
wget --tries=3 --waitretry=5 --timeout=30 --read-timeout=60 \
"https://github.com/ofiwg/libfabric/releases/download/${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION#v}.tar.bz2" \
-O /tmp/libfabric.tar.bz2 && \
cd /tmp && \
tar xjf libfabric.tar.bz2 && \
cd libfabric-* && \
./autogen.sh && \
./configure --prefix="/usr/local/libfabric" \
--disable-verbs \
--disable-psm3 \
--disable-opx \
--disable-usnic \
--disable-rstream \
--enable-efa \
--with-cuda=/usr/local/cuda \
--enable-cuda-dlopen \
--with-gdrcopy=/usr/local \
--enable-gdrcopy-dlopen && \
make -j$(nproc) && \
make install && \
/tmp/use-sccache.sh show-stats "libfabric" && \
echo "/usr/local/libfabric/lib" > /etc/ld.so.conf.d/libfabric.conf && \
ldconfig && \
cd / && rm -rf /tmp/libfabric*

# build and install nixl with UCX and libfabric backends
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
Expand All @@ -255,6 +292,7 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
git clone --depth 1 --branch ${NIXL_REF} "https://github.com/ai-dynamo/nixl.git" && \
cd nixl && \
mkdir build && \
export PKG_CONFIG_PATH="/usr/local/libfabric/lib/pkgconfig:/usr/lib64/pkgconfig:/usr/share/pkgconfig:${PKG_CONFIG_PATH}" && \
meson setup build/ --prefix=/opt/nvidia/nvda_nixl --buildtype=release \
-Dcudapath_lib="/usr/local/cuda/lib64" \
-Dcudapath_inc="/usr/local/cuda/include" \
Expand All @@ -267,7 +305,7 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
ENV NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64 \
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib64/plugins \
NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH}
ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/libfabric/lib:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH}

RUN echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \
echo "$NIXL_PLUGIN_DIR" >> /etc/ld.so.conf.d/nixl.conf && \
Expand Down Expand Up @@ -367,6 +405,9 @@ RUN apt-get update -y \
protobuf-compiler \
# sudo for dev stage
sudo \
# hwloc - required for NIXL libfabric plugin
libhwloc15 \
libhwloc-dev \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* \
# Add sudo privileges to dynamo user
Expand Down
56 changes: 50 additions & 6 deletions container/Dockerfile.trtllm
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,14 @@ RUN yum groupinstall -y 'Development Tools' && \
libibumad \
libibumad-devel \
librdmacm-devel \
numactl-devel
numactl-devel \
# Hardware Locality (hwloc) - required for NIXL libfabric plugin topology awareness
hwloc \
hwloc-devel \
# Build tools for libfabric (will build from source for newer version)
autoconf \
automake \
libtool

# Ensure a modern protoc is available (required for --experimental_allow_proto3_optional)
RUN set -eux; \
Expand Down Expand Up @@ -270,7 +277,37 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
echo "/usr/local/ucx/lib/ucx" >> /etc/ld.so.conf.d/ucx.conf && \
ldconfig

# build and install nixl
# Build and install libfabric from source (minimum v2.3.0 required for NIXL)
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
LIBFABRIC_VERSION="v2.3.0" && \
wget --tries=3 --waitretry=5 --timeout=30 --read-timeout=60 \
"https://github.com/ofiwg/libfabric/releases/download/${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION#v}.tar.bz2" \
-O /tmp/libfabric.tar.bz2 && \
cd /tmp && \
tar xjf libfabric.tar.bz2 && \
cd libfabric-* && \
./autogen.sh && \
./configure --prefix="/usr/local/libfabric" \
--disable-verbs \
--disable-psm3 \
--disable-opx \
--disable-usnic \
--disable-rstream \
--enable-efa \
--with-cuda=/usr/local/cuda \
--enable-cuda-dlopen \
--with-gdrcopy=/usr/local \
--enable-gdrcopy-dlopen && \
make -j$(nproc) && \
make install && \
/tmp/use-sccache.sh show-stats "libfabric" && \
echo "/usr/local/libfabric/lib" > /etc/ld.so.conf.d/libfabric.conf && \
ldconfig && \
cd / && rm -rf /tmp/libfabric*

# build and install nixl with UCX and libfabric backends
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
Expand All @@ -280,6 +317,7 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
git clone --depth 1 --branch ${NIXL_REF} "https://github.com/ai-dynamo/nixl.git" && \
cd nixl && \
mkdir build && \
export PKG_CONFIG_PATH="/usr/local/libfabric/lib/pkgconfig:/usr/lib64/pkgconfig:/usr/share/pkgconfig:${PKG_CONFIG_PATH}" && \
meson setup build/ --prefix=/opt/nvidia/nvda_nixl --buildtype=release \
-Dcudapath_lib="/usr/local/cuda/lib64" \
-Dcudapath_inc="/usr/local/cuda/include" \
Expand All @@ -292,7 +330,7 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
ENV NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64 \
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib64/plugins \
NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH}
ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/libfabric/lib:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH}

RUN echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \
echo "$NIXL_PLUGIN_DIR" >> /etc/ld.so.conf.d/nixl.conf && \
Expand All @@ -314,6 +352,7 @@ COPY components/ /opt/dynamo/components/

# Build dynamo wheels
ARG ENABLE_KVBM
ARG USE_SCCACHE
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
Expand Down Expand Up @@ -453,7 +492,7 @@ RUN if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
sed -i 's/pip3 install/uv pip install/g' /tmp/install_tensorrt.sh && \
bash /tmp/install_tensorrt.sh && \
# Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI
# TRTLLM 1.2.0rc2 has issues installing from pypi with uv, installing from direct wheel link works best
# TRTLLM 1.2.0rc5 has issues installing from pypi with uv, installing from direct wheel link works best
# explicitly installing triton 3.5.0 as trtllm only lists triton as dependency on x64_64 for some reason
if echo "${TENSORRTLLM_PIP_WHEEL}" | grep -q '^tensorrt-llm=='; then \
TRTLLM_VERSION=$(echo "${TENSORRTLLM_PIP_WHEEL}" | sed -E 's/tensorrt-llm==([0-9a-zA-Z.+-]+).*/\1/'); \
Expand Down Expand Up @@ -589,6 +628,9 @@ RUN if [ ${ARCH_ALT} = "x86_64" ]; then \
libnuma1 \
librdmacm1 \
rdma-core \
# Hardware locality (hwloc) - required for libfabric NIXL backend
libhwloc15 \
libhwloc-dev \
# OpenMPI dependencies
openssh-client \
openssh-server \
Expand Down Expand Up @@ -616,10 +658,11 @@ COPY --from=framework /usr/lib/${ARCH_ALT}-linux-gnu/libgomp.so* /usr/lib/${ARCH
# Pattern: COPY --chmod=775 <path>; chmod g+w <path> done later as root because COPY --chmod only affects <path>/*, not <path>
COPY --chmod=775 --chown=dynamo:0 --from=framework ${VIRTUAL_ENV} ${VIRTUAL_ENV}

# Copy UCX from framework image as plugin for NIXL
# Copy NIXL source from framework image
# Copy UCX and libfabric from wheel_builder as plugins for NIXL
# Copy NIXL from wheel_builder
# Copy dynamo wheels for gitlab artifacts (read-only, no group-write needed)
COPY --chown=dynamo: --from=wheel_builder /usr/local/ucx /usr/local/ucx
COPY --chown=dynamo: --from=wheel_builder /usr/local/libfabric /usr/local/libfabric
COPY --chown=dynamo: --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX
COPY --chown=dynamo: --from=wheel_builder /opt/nvidia/nvda_nixl/lib64/. ${NIXL_LIB_DIR}/
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
Expand All @@ -628,6 +671,7 @@ COPY --chown=dynamo: --from=wheel_builder /workspace/nixl/build/src/bindings/pyt
ENV TENSORRT_LIB_DIR=/usr/local/tensorrt/targets/${ARCH_ALT}-linux-gnu/lib
ENV PATH="/usr/local/ucx/bin:${VIRTUAL_ENV}/bin:/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/bin:/usr/local/cuda/nvvm/bin:$PATH"
ENV LD_LIBRARY_PATH=\
/usr/local/libfabric/lib:\
$NIXL_LIB_DIR:\
$NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib:\
Expand Down
4 changes: 2 additions & 2 deletions container/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ DEFAULT_TENSORRTLLM_PIP_WHEEL_DIR="/tmp/trtllm_wheel/"
# TensorRT-LLM commit to use for building the trtllm wheel if not provided.
# Important Note: This commit is not used in our CI pipeline. See the CI
# variables to learn how to run a pipeline with a specific commit.
DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT="31116825b39f4e6a6a1e127001f5204b73d1dc32" # 1.2.0rc2
DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT="e4c707845ff58fcc0b1d87afb4dd0e64885c780a" # 1.2.0rc5
TRTLLM_COMMIT=""
TRTLLM_USE_NIXL_KVCACHE_EXPERIMENTAL="0"
TRTLLM_GIT_URL=""
Expand All @@ -98,7 +98,7 @@ TRTLLM_GIT_URL=""
DEFAULT_TENSORRTLLM_INDEX_URL="https://pypi.nvidia.com/"
# TODO: Remove the version specification from here and use the ai-dynamo[trtllm] package.
# Need to update the Dockerfile.trtllm to use the ai-dynamo[trtllm] package.
DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.2.0rc3"
DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.2.0rc5"
TENSORRTLLM_PIP_WHEEL=""

VLLM_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
Expand Down
2 changes: 1 addition & 1 deletion container/deps/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ tensorboard==2.19.0
tensorboardX==2.6.2.2
# Transformers version constraint for container builds
# - vLLM 0.11.0: >=4.55.2, vLLM 0.11.2: >=4.56.0,<5
# - TensorRT-LLM 1.2.0rc2/rc3: ==4.56.0
# - TensorRT-LLM 1.2.0rc5: ==4.56.0
# - SGLang 0.5.6: ==4.57.1
# Using >=4.56.0 and <=4.57.1 to satisfy all frameworks
transformers>=4.56.0,<=4.57.1
Expand Down
80 changes: 0 additions & 80 deletions container/deps/trtllm/install_nixl.sh

This file was deleted.

9 changes: 0 additions & 9 deletions docs/backends/trtllm/multimodal_support.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,15 +96,6 @@ To deploy `Llama-4-Maverick-17B-128E-Instruct` in disaggregated mode, you will n

For high-performance multimodal inference, Dynamo supports pre-computed embeddings with an **Encode-Prefill-Decode (EPD)** flow using **NIXL (RDMA)** for zero-copy tensor transfer.

### Enabling the Feature

This is an experimental feature that requires using a specific TensorRT-LLM commit.
To enable it build the dynamo container with the `--tensorrtllm-commit` flag:

```bash
./container/build.sh --framework trtllm --tensorrtllm-git-url https://github.com/NVIDIA/TensorRT-LLM.git --tensorrtllm-commit v1.2.0rc3
```

### Supported File Types

- `.pt` - PyTorch tensor files
Expand Down
Loading
Loading