diff --git a/Cargo.lock b/Cargo.lock index 174759fa26..cba8d9f485 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2663,7 +2663,7 @@ dependencies = [ "bytes", "candle-core 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)", "chrono", - "clap 4.5.52", + "clap 4.5.53", "criterion 0.3.6", "cudarc", "dashmap 5.5.3", @@ -4065,8 +4065,8 @@ checksum = "629d8f3bbeda9d148036d6b0de0a3ab947abd08ce90626327fc3547a49d59d97" dependencies = [ "dirs", "futures", - "indicatif 0.17.11", "http 1.4.0", + "indicatif 0.17.11", "libc", "log", "num_cpus", diff --git a/benchmarks/pyproject.toml b/benchmarks/pyproject.toml index 26431311ec..f8d98d5b17 100644 --- a/benchmarks/pyproject.toml +++ b/benchmarks/pyproject.toml @@ -46,7 +46,7 @@ dependencies = [ "pydantic>=2", "tabulate", "types-tabulate", - # Satisfies vLLM 0.11.0 (>=4.55.2), vLLM 0.11.2 (>=4.56.0,<5), TRT-LLM 1.2.0rc2/rc3 (==4.56.0), SGLang 0.5.6 (==4.57.1) + # Satisfies vLLM 0.11.0 (>=4.55.2), vLLM 0.11.2 (>=4.56.0,<5), TRT-LLM 1.2.0rc5 (==4.56.0), SGLang 0.5.6 (==4.57.1) "transformers>=4.56.0,<=4.57.1", "pytest-mypy", ] diff --git a/components/src/dynamo/trtllm/main.py b/components/src/dynamo/trtllm/main.py index 52b043cf09..80238cb4f0 100644 --- a/components/src/dynamo/trtllm/main.py +++ b/components/src/dynamo/trtllm/main.py @@ -22,7 +22,6 @@ import uvloop from prometheus_client import REGISTRY from tensorrt_llm.llmapi import ( - BuildConfig, CapacitySchedulerPolicy, DynamicBatchConfig, KvCacheConfig, @@ -162,13 +161,6 @@ async def init(runtime: DistributedRuntime, config: Config): else: gpus_per_node = config.gpus_per_node - build_config = BuildConfig( - max_batch_size=config.max_batch_size, - max_num_tokens=config.max_num_tokens, - max_beam_width=config.max_beam_width, - max_seq_len=config.max_seq_len, - ) - kv_cache_config = KvCacheConfig( free_gpu_memory_fraction=config.free_gpu_memory_fraction ) @@ -190,7 +182,6 @@ async def init(runtime: DistributedRuntime, config: Config): "pipeline_parallel_size": config.pipeline_parallel_size, "moe_expert_parallel_size": config.expert_parallel_size, "backend": Backend.PYTORCH, - "build_config": build_config, "kv_cache_config": kv_cache_config, "gpus_per_node": gpus_per_node, "max_num_tokens": config.max_num_tokens, diff --git a/container/Dockerfile b/container/Dockerfile index f0492abcbf..d080db8831 100644 --- a/container/Dockerfile +++ b/container/Dockerfile @@ -153,7 +153,14 @@ RUN yum groupinstall -y 'Development Tools' && \ libibumad \ libibumad-devel \ librdmacm-devel \ - numactl-devel + numactl-devel \ + # Hardware Locality (hwloc) - required for NIXL libfabric plugin topology awareness + hwloc \ + hwloc-devel \ + # Build tools for libfabric (will build from source for newer version) + autoconf \ + automake \ + libtool # Ensure a modern protoc is available (required for --experimental_allow_proto3_optional) RUN set -eux; \ @@ -245,7 +252,37 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ echo "/usr/local/ucx/lib/ucx" >> /etc/ld.so.conf.d/ucx.conf && \ ldconfig -# build and install nixl +# Build and install libfabric from source (minimum v2.3.0 required for NIXL) +RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ + --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \ + export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \ + LIBFABRIC_VERSION="v2.3.0" && \ + wget --tries=3 --waitretry=5 --timeout=30 --read-timeout=60 \ + "https://github.com/ofiwg/libfabric/releases/download/${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION#v}.tar.bz2" \ + -O /tmp/libfabric.tar.bz2 && \ + cd /tmp && \ + tar xjf libfabric.tar.bz2 && \ + cd libfabric-* && \ + ./autogen.sh && \ + ./configure --prefix="/usr/local/libfabric" \ + --disable-verbs \ + --disable-psm3 \ + --disable-opx \ + --disable-usnic \ + --disable-rstream \ + --enable-efa \ + --with-cuda=/usr/local/cuda \ + --enable-cuda-dlopen \ + --with-gdrcopy=/usr/local \ + --enable-gdrcopy-dlopen && \ + make -j$(nproc) && \ + make install && \ + /tmp/use-sccache.sh show-stats "libfabric" && \ + echo "/usr/local/libfabric/lib" > /etc/ld.so.conf.d/libfabric.conf && \ + ldconfig && \ + cd / && rm -rf /tmp/libfabric* + +# build and install nixl with UCX and libfabric backends RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \ export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \ @@ -255,6 +292,7 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ git clone --depth 1 --branch ${NIXL_REF} "https://github.com/ai-dynamo/nixl.git" && \ cd nixl && \ mkdir build && \ + export PKG_CONFIG_PATH="/usr/local/libfabric/lib/pkgconfig:/usr/lib64/pkgconfig:/usr/share/pkgconfig:${PKG_CONFIG_PATH}" && \ meson setup build/ --prefix=/opt/nvidia/nvda_nixl --buildtype=release \ -Dcudapath_lib="/usr/local/cuda/lib64" \ -Dcudapath_inc="/usr/local/cuda/include" \ @@ -267,7 +305,7 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ ENV NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64 \ NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib64/plugins \ NIXL_PREFIX=/opt/nvidia/nvda_nixl -ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH} +ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/libfabric/lib:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH} RUN echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \ echo "$NIXL_PLUGIN_DIR" >> /etc/ld.so.conf.d/nixl.conf && \ @@ -367,6 +405,9 @@ RUN apt-get update -y \ protobuf-compiler \ # sudo for dev stage sudo \ + # hwloc - required for NIXL libfabric plugin + libhwloc15 \ + libhwloc-dev \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* \ # Add sudo privileges to dynamo user diff --git a/container/Dockerfile.trtllm b/container/Dockerfile.trtllm index 62b92db358..52c5c7483c 100644 --- a/container/Dockerfile.trtllm +++ b/container/Dockerfile.trtllm @@ -178,7 +178,14 @@ RUN yum groupinstall -y 'Development Tools' && \ libibumad \ libibumad-devel \ librdmacm-devel \ - numactl-devel + numactl-devel \ + # Hardware Locality (hwloc) - required for NIXL libfabric plugin topology awareness + hwloc \ + hwloc-devel \ + # Build tools for libfabric (will build from source for newer version) + autoconf \ + automake \ + libtool # Ensure a modern protoc is available (required for --experimental_allow_proto3_optional) RUN set -eux; \ @@ -270,7 +277,37 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ echo "/usr/local/ucx/lib/ucx" >> /etc/ld.so.conf.d/ucx.conf && \ ldconfig -# build and install nixl +# Build and install libfabric from source (minimum v2.3.0 required for NIXL) +RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ + --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \ + export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \ + LIBFABRIC_VERSION="v2.3.0" && \ + wget --tries=3 --waitretry=5 --timeout=30 --read-timeout=60 \ + "https://github.com/ofiwg/libfabric/releases/download/${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION#v}.tar.bz2" \ + -O /tmp/libfabric.tar.bz2 && \ + cd /tmp && \ + tar xjf libfabric.tar.bz2 && \ + cd libfabric-* && \ + ./autogen.sh && \ + ./configure --prefix="/usr/local/libfabric" \ + --disable-verbs \ + --disable-psm3 \ + --disable-opx \ + --disable-usnic \ + --disable-rstream \ + --enable-efa \ + --with-cuda=/usr/local/cuda \ + --enable-cuda-dlopen \ + --with-gdrcopy=/usr/local \ + --enable-gdrcopy-dlopen && \ + make -j$(nproc) && \ + make install && \ + /tmp/use-sccache.sh show-stats "libfabric" && \ + echo "/usr/local/libfabric/lib" > /etc/ld.so.conf.d/libfabric.conf && \ + ldconfig && \ + cd / && rm -rf /tmp/libfabric* + +# build and install nixl with UCX and libfabric backends RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \ export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \ @@ -280,6 +317,7 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ git clone --depth 1 --branch ${NIXL_REF} "https://github.com/ai-dynamo/nixl.git" && \ cd nixl && \ mkdir build && \ + export PKG_CONFIG_PATH="/usr/local/libfabric/lib/pkgconfig:/usr/lib64/pkgconfig:/usr/share/pkgconfig:${PKG_CONFIG_PATH}" && \ meson setup build/ --prefix=/opt/nvidia/nvda_nixl --buildtype=release \ -Dcudapath_lib="/usr/local/cuda/lib64" \ -Dcudapath_inc="/usr/local/cuda/include" \ @@ -292,7 +330,7 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ ENV NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64 \ NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib64/plugins \ NIXL_PREFIX=/opt/nvidia/nvda_nixl -ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH} +ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/libfabric/lib:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH} RUN echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \ echo "$NIXL_PLUGIN_DIR" >> /etc/ld.so.conf.d/nixl.conf && \ @@ -314,6 +352,7 @@ COPY components/ /opt/dynamo/components/ # Build dynamo wheels ARG ENABLE_KVBM +ARG USE_SCCACHE RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \ export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \ @@ -453,7 +492,7 @@ RUN if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \ sed -i 's/pip3 install/uv pip install/g' /tmp/install_tensorrt.sh && \ bash /tmp/install_tensorrt.sh && \ # Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI - # TRTLLM 1.2.0rc2 has issues installing from pypi with uv, installing from direct wheel link works best + # TRTLLM 1.2.0rc5 has issues installing from pypi with uv, installing from direct wheel link works best # explicitly installing triton 3.5.0 as trtllm only lists triton as dependency on x64_64 for some reason if echo "${TENSORRTLLM_PIP_WHEEL}" | grep -q '^tensorrt-llm=='; then \ TRTLLM_VERSION=$(echo "${TENSORRTLLM_PIP_WHEEL}" | sed -E 's/tensorrt-llm==([0-9a-zA-Z.+-]+).*/\1/'); \ @@ -589,6 +628,9 @@ RUN if [ ${ARCH_ALT} = "x86_64" ]; then \ libnuma1 \ librdmacm1 \ rdma-core \ + # Hardware locality (hwloc) - required for libfabric NIXL backend + libhwloc15 \ + libhwloc-dev \ # OpenMPI dependencies openssh-client \ openssh-server \ @@ -616,10 +658,11 @@ COPY --from=framework /usr/lib/${ARCH_ALT}-linux-gnu/libgomp.so* /usr/lib/${ARCH # Pattern: COPY --chmod=775 ; chmod g+w done later as root because COPY --chmod only affects /*, not COPY --chmod=775 --chown=dynamo:0 --from=framework ${VIRTUAL_ENV} ${VIRTUAL_ENV} -# Copy UCX from framework image as plugin for NIXL -# Copy NIXL source from framework image +# Copy UCX and libfabric from wheel_builder as plugins for NIXL +# Copy NIXL from wheel_builder # Copy dynamo wheels for gitlab artifacts (read-only, no group-write needed) COPY --chown=dynamo: --from=wheel_builder /usr/local/ucx /usr/local/ucx +COPY --chown=dynamo: --from=wheel_builder /usr/local/libfabric /usr/local/libfabric COPY --chown=dynamo: --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX COPY --chown=dynamo: --from=wheel_builder /opt/nvidia/nvda_nixl/lib64/. ${NIXL_LIB_DIR}/ COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/ @@ -628,6 +671,7 @@ COPY --chown=dynamo: --from=wheel_builder /workspace/nixl/build/src/bindings/pyt ENV TENSORRT_LIB_DIR=/usr/local/tensorrt/targets/${ARCH_ALT}-linux-gnu/lib ENV PATH="/usr/local/ucx/bin:${VIRTUAL_ENV}/bin:/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/bin:/usr/local/cuda/nvvm/bin:$PATH" ENV LD_LIBRARY_PATH=\ +/usr/local/libfabric/lib:\ $NIXL_LIB_DIR:\ $NIXL_PLUGIN_DIR:\ /usr/local/ucx/lib:\ diff --git a/container/build.sh b/container/build.sh index 87e43eae24..06f634b186 100755 --- a/container/build.sh +++ b/container/build.sh @@ -89,7 +89,7 @@ DEFAULT_TENSORRTLLM_PIP_WHEEL_DIR="/tmp/trtllm_wheel/" # TensorRT-LLM commit to use for building the trtllm wheel if not provided. # Important Note: This commit is not used in our CI pipeline. See the CI # variables to learn how to run a pipeline with a specific commit. -DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT="31116825b39f4e6a6a1e127001f5204b73d1dc32" # 1.2.0rc2 +DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT="e4c707845ff58fcc0b1d87afb4dd0e64885c780a" # 1.2.0rc5 TRTLLM_COMMIT="" TRTLLM_USE_NIXL_KVCACHE_EXPERIMENTAL="0" TRTLLM_GIT_URL="" @@ -98,7 +98,7 @@ TRTLLM_GIT_URL="" DEFAULT_TENSORRTLLM_INDEX_URL="https://pypi.nvidia.com/" # TODO: Remove the version specification from here and use the ai-dynamo[trtllm] package. # Need to update the Dockerfile.trtllm to use the ai-dynamo[trtllm] package. -DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.2.0rc3" +DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.2.0rc5" TENSORRTLLM_PIP_WHEEL="" VLLM_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" diff --git a/container/deps/requirements.txt b/container/deps/requirements.txt index a62be36588..13a087f5c0 100644 --- a/container/deps/requirements.txt +++ b/container/deps/requirements.txt @@ -52,7 +52,7 @@ tensorboard==2.19.0 tensorboardX==2.6.2.2 # Transformers version constraint for container builds # - vLLM 0.11.0: >=4.55.2, vLLM 0.11.2: >=4.56.0,<5 -# - TensorRT-LLM 1.2.0rc2/rc3: ==4.56.0 +# - TensorRT-LLM 1.2.0rc5: ==4.56.0 # - SGLang 0.5.6: ==4.57.1 # Using >=4.56.0 and <=4.57.1 to satisfy all frameworks transformers>=4.56.0,<=4.57.1 diff --git a/container/deps/trtllm/install_nixl.sh b/container/deps/trtllm/install_nixl.sh deleted file mode 100755 index 4a0741fa94..0000000000 --- a/container/deps/trtllm/install_nixl.sh +++ /dev/null @@ -1,80 +0,0 @@ -#!/bin/bash -e -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Install NIXL for TensorRT-LLM. -# This script is an adapted version of the NIXL install script from the TensorRT-LLM repository. -# The original script is located at: -# https://github.com/NVIDIA/TensorRT-LLM/blob/main/docker/common/install_nixl.sh - -set -ex - -GITHUB_URL="https://github.com" - -UCX_VERSION="v1.19.1" -UCX_INSTALL_PATH="/usr/local/ucx/" -CUDA_PATH="/usr/local/cuda" - -NIXL_COMMIT="97c9b5b48e2ed3f1f2539c461c4971a7db8b1197" - -UCX_REPO="https://github.com/openucx/ucx.git" -NIXL_REPO="https://github.com/ai-dynamo/nixl.git" - - - - -if [ ! -d ${UCX_INSTALL_PATH} ]; then - git clone --depth 1 -b ${UCX_VERSION} ${UCX_REPO} - cd ucx - ./autogen.sh - ./contrib/configure-release \ - --prefix=${UCX_INSTALL_PATH} \ - --enable-shared \ - --disable-static \ - --disable-doxygen-doc \ - --enable-optimizations \ - --enable-cma \ - --enable-devel-headers \ - --with-cuda=${CUDA_PATH} \ - --with-verbs \ - --with-dm \ - --enable-mt - make install -j$(nproc) - cd .. - rm -rf ucx # Remove UCX source to save space - echo "export LD_LIBRARY_PATH=${UCX_INSTALL_PATH}/lib:\$LD_LIBRARY_PATH" >> "${ENV}" -fi - -ARCH_NAME="x86_64-linux-gnu" -if [ "$(uname -m)" != "amd64" ] && [ "$(uname -m)" != "x86_64" ]; then - ARCH_NAME="aarch64-linux-gnu" - EXTRA_NIXL_ARGS="-Ddisable_gds_backend=true" -fi - -if [ $ARCH_NAME != "x86_64-linux-gnu" ]; then - echo "The NIXL backend is temporarily unavailable on the aarch64 platform. Exiting script." - exit 0 -fi - -pip3 install --no-cache-dir meson ninja pybind11 -git clone ${NIXL_REPO} nixl -cd nixl -git checkout ${NIXL_COMMIT} -meson setup builddir -Ducx_path=${UCX_INSTALL_PATH} -Dstatic_plugins=UCX -Dbuildtype=release ${EXTRA_NIXL_ARGS} -cd builddir && ninja install -cd ../.. -rm -rf nixl* # Remove NIXL source tree to save space - -echo "export LD_LIBRARY_PATH=/opt/nvidia/nvda_nixl/lib/${ARCH_NAME}:/opt/nvidia/nvda_nixl/lib64:\$LD_LIBRARY_PATH" >> "${ENV}" diff --git a/docs/backends/trtllm/multimodal_support.md b/docs/backends/trtllm/multimodal_support.md index 876bdb21a0..cc58f924b9 100644 --- a/docs/backends/trtllm/multimodal_support.md +++ b/docs/backends/trtllm/multimodal_support.md @@ -96,15 +96,6 @@ To deploy `Llama-4-Maverick-17B-128E-Instruct` in disaggregated mode, you will n For high-performance multimodal inference, Dynamo supports pre-computed embeddings with an **Encode-Prefill-Decode (EPD)** flow using **NIXL (RDMA)** for zero-copy tensor transfer. -### Enabling the Feature - -This is an experimental feature that requires using a specific TensorRT-LLM commit. -To enable it build the dynamo container with the `--tensorrtllm-commit` flag: - -```bash -./container/build.sh --framework trtllm --tensorrtllm-git-url https://github.com/NVIDIA/TensorRT-LLM.git --tensorrtllm-commit v1.2.0rc3 -``` - ### Supported File Types - `.pt` - PyTorch tensor files diff --git a/docs/reference/support-matrix.md b/docs/reference/support-matrix.md index 2efb446874..e6c862b8b4 100644 --- a/docs/reference/support-matrix.md +++ b/docs/reference/support-matrix.md @@ -58,12 +58,12 @@ If you are using a **GPU**, the following GPU models and architectures are suppo ### Build Dependency -| **Build Dependency** | **Version as of Dynamo v0.7.0** | -| :------------------- | :------------------------------------------------------------------------------- | -| **SGLang** | 0.5.3.post4 | -| **TensorRT-LLM** | 1.2.0rc2 | -| **vLLM** | 0.11.0 | -| **NIXL** | 0.7.1 | +| **Build Dependency** | **Version as of Dynamo v0.7.0** | +| :------------------- | :------------------------------ | +| **SGLang** | 0.5.3.post4 | +| **TensorRT-LLM** | 1.2.0rc5 | +| **vLLM** | 0.11.0 | +| **NIXL** | 0.7.1 | > [!Important] diff --git a/pyproject.toml b/pyproject.toml index bcd8ad87dd..2100d47951 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,7 +50,7 @@ Repository = "https://github.com/ai-dynamo/dynamo.git" [project.optional-dependencies] trtllm =[ "uvloop", - "tensorrt-llm==1.2.0rc3", + "tensorrt-llm==1.2.0rc5", ] vllm = [