build: new triton cli release to be compatible with the newer tritonserver 25.02 release, fix the pip install conflict issue (#114)

richardhuo-nv · web-flow · commit f3d5e9e6941e · 2025-03-19T15:25:51.000-07:00
diff --git a/README.md b/README.md
@@ -49,8 +49,8 @@ and running the CLI from within the latest corresponding `tritonserver`
 container image, which should have all necessary system dependencies installed.
 
 For vLLM and TRT-LLM, you can use their respective images:
-- `nvcr.io/nvidia/tritonserver:25.01-vllm-python-py3`
-- `nvcr.io/nvidia/tritonserver:25.01-trtllm-python-py3`
+- `nvcr.io/nvidia/tritonserver:25.02-vllm-python-py3`
+- `nvcr.io/nvidia/tritonserver:25.02-trtllm-python-py3`
 
 If you decide to run the CLI on the host or in a custom image, please
 see this list of [additional dependencies](#additional-dependencies-for-custom-environments)
@@ -65,6 +65,7 @@ matrix below:
 
 | Triton CLI Version | TRT-LLM Version | Triton Container Tag |
 |:------------------:|:---------------:|:--------------------:|
+| 0.1.3  | v0.17.0.post1 | 25.02 |
 | 0.1.2  | v0.17.0.post1 | 25.01 |
 | 0.1.1  | v0.14.0 | 24.10 |
 | 0.1.0  | v0.13.0 | 24.09 |
@@ -88,7 +89,7 @@ It is also possible to install from a specific branch name, a commit hash
 or a tag name. For example to install `triton_cli` with a specific tag:
 
 ```bash
-GIT_REF="0.1.2"
+GIT_REF="0.1.3"
 pip install git+https://github.com/triton-inference-server/triton_cli.git@${GIT_REF}
 ```
 
@@ -123,7 +124,7 @@ triton -h
 triton import -m gpt2
 
 # Start server pointing at the default model repository
-triton start --image nvcr.io/nvidia/tritonserver:25.01-vllm-python-py3
+triton start --image nvcr.io/nvidia/tritonserver:25.02-vllm-python-py3
 
 # Infer with CLI
 triton infer -m gpt2 --prompt "machine learning is"
@@ -209,10 +210,10 @@ docker run -ti \
   --shm-size=1g --ulimit memlock=-1 \
   -v ${HOME}/models:/root/models \
   -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-  nvcr.io/nvidia/tritonserver:25.01-vllm-python-py3
+  nvcr.io/nvidia/tritonserver:25.02-vllm-python-py3
 
 # Install the Triton CLI
-pip install git+https://github.com/triton-inference-server/triton_cli.git@0.1.2
+pip install git+https://github.com/triton-inference-server/triton_cli.git@0.1.3
 
 # Authenticate with huggingface for restricted models like Llama-2 and Llama-3
 huggingface-cli login
@@ -277,7 +278,7 @@ docker run -ti \
   nvcr.io/nvidia/tritonserver:25.01-trtllm-python-py3
 
 # Install the Triton CLI
-pip install git+https://github.com/triton-inference-server/triton_cli.git@0.1.2
+pip install git+https://github.com/triton-inference-server/triton_cli.git@0.1.3
 
 # Authenticate with huggingface for restricted models like Llama-2 and Llama-3
 huggingface-cli login
@@ -331,10 +332,10 @@ docker run -ti \
   -v /tmp:/tmp \
   -v ${HOME}/models:/root/models \
   -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-  nvcr.io/nvidia/tritonserver:25.01-trtllm-python-py3
+  nvcr.io/nvidia/tritonserver:25.02-trtllm-python-py3
 
 # Install the Triton CLI
-pip install git+https://github.com/triton-inference-server/triton_cli.git@main
+pip install git+https://github.com/triton-inference-server/triton_cli.git@0.1.3
 
 # Authenticate with huggingface for restricted models like Llama-2 and Llama-3
 huggingface-cli login
diff --git a/pyproject.toml b/pyproject.toml
@@ -50,18 +50,18 @@ dependencies = [
     # Client deps - generally versioned together
     "grpcio>=1.67.0",
     # Use explicit client version matching genai-perf version for tagged release
-    "tritonclient[all] == 2.51",
-    "genai-perf @ git+https://github.com/triton-inference-server/perf_analyzer.git@r25.01#subdirectory=genai-perf",
+    "tritonclient[all] == 2.55.0",
+    "genai-perf @ git+https://github.com/triton-inference-server/perf_analyzer.git@r25.02#subdirectory=genai-perf",
     # Misc deps
     "directory-tree == 0.0.4", # may remove in future
     # https://github.com/docker/docker-py/issues/3256#issuecomment-2376439000
     "docker == 7.1.0",
     # TODO: rely on tritonclient to pull in protobuf and numpy dependencies?
     "numpy >=1.21,<2",
-    "protobuf>=3.7.0",
+    "protobuf>=5.29.3,<6.0dev",
     "prometheus-client == 0.19.0",
     "psutil >= 5.9.5", # may remove later
-    "rich == 13.5.2",
+    "rich >= 13.9.4",
     # TODO: Test on cpu-only machine if [cuda] dependency is an issue,
     "huggingface-hub >= 0.19.4",
     # Testing
diff --git a/src/triton_cli/__init__.py b/src/triton_cli/__init__.py
@@ -24,4 +24,4 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-__version__ = "0.1.2"
+__version__ = "0.1.3"
diff --git a/src/triton_cli/docker/Dockerfile b/src/triton_cli/docker/Dockerfile
@@ -25,13 +25,13 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 # TRT-LLM image contains engine building and runtime dependencies
-FROM nvcr.io/nvidia/tritonserver:25.01-trtllm-python-py3
+FROM nvcr.io/nvidia/tritonserver:25.02-trtllm-python-py3
 
 # Setup vLLM Triton backend
 RUN mkdir -p /opt/tritonserver/backends/vllm && \
-    git clone -b r25.01 https://github.com/triton-inference-server/vllm_backend.git /tmp/vllm_backend && \
+    git clone -b r25.02 https://github.com/triton-inference-server/vllm_backend.git /tmp/vllm_backend && \
     cp -r /tmp/vllm_backend/src/* /opt/tritonserver/backends/vllm && \
     rm -r /tmp/vllm_backend
 
 # vLLM runtime dependencies
-RUN pip install "vllm==0.6.3.post1" "setuptools>=74.1.1"
+RUN pip install "vllm==0.7.0" "setuptools>=74.1.1"