Skip to content

Commit 5250303

Browse files
chore: TRTLLM 1.2.0rc4 (#4836)
Signed-off-by: Dmitry Tokarev <[email protected]>
1 parent 00f8615 commit 5250303

File tree

8 files changed

+13
-30
lines changed

8 files changed

+13
-30
lines changed

benchmarks/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ dependencies = [
4646
"pydantic>=2",
4747
"tabulate",
4848
"types-tabulate",
49-
# Satisfies vLLM 0.11.0 (>=4.55.2), vLLM 0.11.2 (>=4.56.0,<5), TRT-LLM 1.2.0rc2/rc3 (==4.56.0), SGLang 0.5.6 (==4.57.1)
49+
# Satisfies vLLM 0.11.0 (>=4.55.2), vLLM 0.11.2 (>=4.56.0,<5), TRT-LLM 1.2.0rc5 (==4.56.0), SGLang 0.5.6 (==4.57.1)
5050
"transformers>=4.56.0,<=4.57.1",
5151
"pytest-mypy",
5252
]

components/src/dynamo/trtllm/main.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import uvloop
2323
from prometheus_client import REGISTRY
2424
from tensorrt_llm.llmapi import (
25-
BuildConfig,
2625
CapacitySchedulerPolicy,
2726
DynamicBatchConfig,
2827
KvCacheConfig,
@@ -162,13 +161,6 @@ async def init(runtime: DistributedRuntime, config: Config):
162161
else:
163162
gpus_per_node = config.gpus_per_node
164163

165-
build_config = BuildConfig(
166-
max_batch_size=config.max_batch_size,
167-
max_num_tokens=config.max_num_tokens,
168-
max_beam_width=config.max_beam_width,
169-
max_seq_len=config.max_seq_len,
170-
)
171-
172164
kv_cache_config = KvCacheConfig(
173165
free_gpu_memory_fraction=config.free_gpu_memory_fraction
174166
)
@@ -190,7 +182,6 @@ async def init(runtime: DistributedRuntime, config: Config):
190182
"pipeline_parallel_size": config.pipeline_parallel_size,
191183
"moe_expert_parallel_size": config.expert_parallel_size,
192184
"backend": Backend.PYTORCH,
193-
"build_config": build_config,
194185
"kv_cache_config": kv_cache_config,
195186
"gpus_per_node": gpus_per_node,
196187
"max_num_tokens": config.max_num_tokens,

container/Dockerfile.trtllm

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,7 @@ COPY components/ /opt/dynamo/components/
314314

315315
# Build dynamo wheels
316316
ARG ENABLE_KVBM
317+
ARG USE_SCCACHE
317318
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
318319
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
319320
export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
@@ -453,7 +454,7 @@ RUN if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
453454
sed -i 's/pip3 install/uv pip install/g' /tmp/install_tensorrt.sh && \
454455
bash /tmp/install_tensorrt.sh && \
455456
# Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI
456-
# TRTLLM 1.2.0rc2 has issues installing from pypi with uv, installing from direct wheel link works best
457+
# TRTLLM 1.2.0rc5 has issues installing from pypi with uv, installing from direct wheel link works best
457458
# explicitly installing triton 3.5.0 as trtllm only lists triton as dependency on x64_64 for some reason
458459
if echo "${TENSORRTLLM_PIP_WHEEL}" | grep -q '^tensorrt-llm=='; then \
459460
TRTLLM_VERSION=$(echo "${TENSORRTLLM_PIP_WHEEL}" | sed -E 's/tensorrt-llm==([0-9a-zA-Z.+-]+).*/\1/'); \

container/build.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ DEFAULT_TENSORRTLLM_PIP_WHEEL_DIR="/tmp/trtllm_wheel/"
8989
# TensorRT-LLM commit to use for building the trtllm wheel if not provided.
9090
# Important Note: This commit is not used in our CI pipeline. See the CI
9191
# variables to learn how to run a pipeline with a specific commit.
92-
DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT="31116825b39f4e6a6a1e127001f5204b73d1dc32" # 1.2.0rc2
92+
DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT="e4c707845ff58fcc0b1d87afb4dd0e64885c780a" # 1.2.0rc5
9393
TRTLLM_COMMIT=""
9494
TRTLLM_USE_NIXL_KVCACHE_EXPERIMENTAL="0"
9595
TRTLLM_GIT_URL=""
@@ -98,7 +98,7 @@ TRTLLM_GIT_URL=""
9898
DEFAULT_TENSORRTLLM_INDEX_URL="https://pypi.nvidia.com/"
9999
# TODO: Remove the version specification from here and use the ai-dynamo[trtllm] package.
100100
# Need to update the Dockerfile.trtllm to use the ai-dynamo[trtllm] package.
101-
DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.2.0rc3"
101+
DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.2.0rc5"
102102
TENSORRTLLM_PIP_WHEEL=""
103103

104104
VLLM_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"

container/deps/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ tensorboard==2.19.0
5252
tensorboardX==2.6.2.2
5353
# Transformers version constraint for container builds
5454
# - vLLM 0.11.0: >=4.55.2, vLLM 0.11.2: >=4.56.0,<5
55-
# - TensorRT-LLM 1.2.0rc2/rc3: ==4.56.0
55+
# - TensorRT-LLM 1.2.0rc5: ==4.56.0
5656
# - SGLang 0.5.6: ==4.57.1
5757
# Using >=4.56.0 and <=4.57.1 to satisfy all frameworks
5858
transformers>=4.56.0,<=4.57.1

docs/backends/trtllm/multimodal_support.md

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -96,15 +96,6 @@ To deploy `Llama-4-Maverick-17B-128E-Instruct` in disaggregated mode, you will n
9696

9797
For high-performance multimodal inference, Dynamo supports pre-computed embeddings with an **Encode-Prefill-Decode (EPD)** flow using **NIXL (RDMA)** for zero-copy tensor transfer.
9898

99-
### Enabling the Feature
100-
101-
This is an experimental feature that requires using a specific TensorRT-LLM commit.
102-
To enable it build the dynamo container with the `--tensorrtllm-commit` flag:
103-
104-
```bash
105-
./container/build.sh --framework trtllm --tensorrtllm-git-url https://github.com/NVIDIA/TensorRT-LLM.git --tensorrtllm-commit v1.2.0rc3
106-
```
107-
10899
### Supported File Types
109100

110101
- `.pt` - PyTorch tensor files

docs/reference/support-matrix.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,12 @@ If you are using a **GPU**, the following GPU models and architectures are suppo
5858

5959
### Build Dependency
6060

61-
| **Build Dependency** | **Version as of Dynamo v0.7.0** |
62-
| :------------------- | :------------------------------------------------------------------------------- |
63-
| **SGLang** | 0.5.3.post4 |
64-
| **TensorRT-LLM** | 1.2.0rc2 |
65-
| **vLLM** | 0.11.0 |
66-
| **NIXL** | 0.7.1 |
61+
| **Build Dependency** | **Version as of Dynamo v0.7.0** |
62+
| :------------------- | :------------------------------ |
63+
| **SGLang** | 0.5.3.post4 |
64+
| **TensorRT-LLM** | 1.2.0rc5 |
65+
| **vLLM** | 0.11.0 |
66+
| **NIXL** | 0.7.1 |
6767

6868

6969
> [!Important]

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ Repository = "https://github.com/ai-dynamo/dynamo.git"
5050
[project.optional-dependencies]
5151
trtllm =[
5252
"uvloop",
53-
"tensorrt-llm==1.2.0rc3",
53+
"tensorrt-llm==1.2.0rc5",
5454
]
5555

5656
vllm = [

0 commit comments

Comments
 (0)