fix: pin sniffio dependency (#4665)

biswapanda · PeaBrane · web-flow · commit 9fb5f03a6f83 · 2025-12-02T06:58:57.000Z
Signed-off-by: PeaBrane &lt;yanrpei@gmail.com&gt;
Co-authored-by: PeaBrane &lt;yanrpei@gmail.com&gt;
diff --git a/.github/workflows/container-validation-dynamo.yml b/.github/workflows/container-validation-dynamo.yml
@@ -65,7 +65,7 @@ jobs:
           docker compose down
       - name: Run pytest (parallel tests with xdist)
         env:
-          PYTEST_MARKS: "pre_merge and parallel and not (vllm or trtllm or sglang or k8s)"
+          PYTEST_MARKS: "pre_merge and parallel"
         run: |
           docker run -w /workspace \
             --name ${{ env.CONTAINER_ID }}_pytest_parallel \
@@ -77,7 +77,7 @@ jobs:
           docker cp ${{ env.CONTAINER_ID }}_pytest_parallel:/workspace/${{ env.PYTEST_PARALLEL_XML_FILE }} . || echo "No parallel test report found"
       - name: Run pytest (sequential tests)
         env:
-          PYTEST_MARKS: "(pre_merge and not parallel and not (vllm or trtllm or sglang or k8s)) or mypy"
+          PYTEST_MARKS: "(pre_merge and not parallel) or mypy"
         run: |
           docker run -w /workspace \
             --name ${{ env.CONTAINER_ID }}_pytest \
diff --git a/container/Dockerfile.vllm b/container/Dockerfile.vllm
@@ -174,6 +174,9 @@ WORKDIR /workspace
 ENV DYNAMO_HOME=/opt/dynamo
 ENV VIRTUAL_ENV=/opt/dynamo/venv
 ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
+# Set CUDA_DEVICE_ORDER to ensure CUDA logical device IDs match NVML physical device IDs
+# This fixes NVML InvalidArgument errors when CUDA_VISIBLE_DEVICES is set
+ENV CUDA_DEVICE_ORDER=PCI_BUS_ID
 
 ARG ARCH_ALT
 ARG PYTHON_VERSION
diff --git a/container/deps/requirements.txt b/container/deps/requirements.txt
@@ -43,6 +43,9 @@ PyYAML==6.0.3
 scikit-learn==1.7.2
 scipy<1.14.0  # Upper bound for pmdarima compatibility
 sentencepiece==0.2.1
+# Required by kr8s
+# https://github.com/kr8s-org/kr8s/blob/750022c3ebbb7988cddb5a979aca2ee8074a1069/examples/kubectl-ng/uv.lock#L988
+sniffio==1.3.1
 tensorboard==2.19.0
 tensorboardX==2.6.2.2
 # Transformers version constraint for container builds
diff --git a/tests/planner/unit/test_prometheus.py b/tests/planner/unit/test_prometheus.py
@@ -140,7 +140,7 @@ def test_get_average_metric_none_result():
         mock_query.return_value = None
 
         result = client._get_average_metric(
-            metric_name="test_metric",
+            full_metric_name="test_metric",
             interval="60s",
             operation_name="test operation",
             model_name="test_model",
@@ -157,7 +157,7 @@ def test_get_average_metric_empty_result():
         mock_query.return_value = []
 
         result = client._get_average_metric(
-            metric_name="test_metric",
+            full_metric_name="test_metric",
             interval="60s",
             operation_name="test operation",
             model_name="test_model",
@@ -175,7 +175,7 @@ def test_get_average_metric_no_matching_containers(mock_prometheus_result):
         mock_query.return_value = [mock_prometheus_result[0]]
 
         result = client._get_average_metric(
-            metric_name="test_metric",
+            full_metric_name="test_metric",
             interval="60s",
             operation_name="test operation",
             model_name="target_model",
@@ -193,7 +193,7 @@ def test_get_average_metric_one_matching_container(mock_prometheus_result):
         mock_query.return_value = mock_prometheus_result[:2]
 
         result = client._get_average_metric(
-            metric_name="test_metric",
+            full_metric_name="test_metric",
             interval="60s",
             operation_name="test operation",
             model_name="target_model",
@@ -227,7 +227,7 @@ def test_get_average_metric_with_validation_error():
         mock_query.return_value = mock_result
 
         result = client._get_average_metric(
-            metric_name="test_metric",
+            full_metric_name="test_metric",
             interval="60s",
             operation_name="test operation",
             model_name="target_model",
@@ -245,7 +245,7 @@ def test_get_average_metric_multiple_matching_containers(mock_prometheus_result)
         mock_query.return_value = mock_prometheus_result[1:]
 
         result = client._get_average_metric(
-            metric_name="test_metric",
+            full_metric_name="test_metric",
             interval="60s",
             operation_name="test operation",
             model_name="target_model",
diff --git a/tests/serve/test_sglang.py b/tests/serve/test_sglang.py
@@ -44,7 +44,7 @@ class SGLangConfig(EngineConfig):
         name="aggregated",
         directory=sglang_dir,
         script_name="agg.sh",
-        marks=[pytest.mark.gpu_1, pytest.mark.pre_merge],
+        marks=[pytest.mark.gpu_1],
         model="Qwen/Qwen3-0.6B",
         env={},
         models_port=8000,
diff --git a/tests/serve/test_trtllm.py b/tests/serve/test_trtllm.py
@@ -40,7 +40,7 @@ class TRTLLMConfig(EngineConfig):
         name="aggregated",
         directory=trtllm_dir,
         script_name="agg_metrics.sh",
-        marks=[pytest.mark.gpu_1, pytest.mark.trtllm, pytest.mark.pre_merge],
+        marks=[pytest.mark.gpu_1, pytest.mark.trtllm],
         model="Qwen/Qwen3-0.6B",
         models_port=8000,
         request_payloads=[
@@ -140,7 +140,6 @@ def test_deployment(trtllm_config_test, request, runtime_services, predownload_m
 # TODO make this a normal guy
 @pytest.mark.e2e
 @pytest.mark.gpu_1
-@pytest.mark.pre_merge
 @pytest.mark.trtllm
 def test_chat_only_aggregated_with_test_logits_processor(
     request, runtime_services, predownload_models, monkeypatch
diff --git a/tests/serve/test_vllm.py b/tests/serve/test_vllm.py
@@ -43,7 +43,7 @@ class VLLMConfig(EngineConfig):
         name="aggregated",
         directory=vllm_dir,
         script_name="agg.sh",
-        marks=[pytest.mark.gpu_1, pytest.mark.pre_merge],
+        marks=[pytest.mark.gpu_1],
         model="Qwen/Qwen3-0.6B",
         request_payloads=[
             chat_payload_default(),