ai-dynamo
diff --git a/‎.github/workflows/container-validation-dynamo.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/container-validation-dynamo.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎components/src/dynamo/frontend/main.py‎
Lines changed: 6 additions & 2 deletions b/‎components/src/dynamo/frontend/main.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎components/src/dynamo/sglang/publisher.py‎
Lines changed: 1 addition & 1 deletion b/‎components/src/dynamo/sglang/publisher.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/backends/sglang/launch/disagg.sh‎
Lines changed: 2 additions & 2 deletions b/‎examples/backends/sglang/launch/disagg.sh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/backends/vllm/launch/disagg_same_gpu.sh‎
Lines changed: 2 additions & 2 deletions b/‎examples/backends/vllm/launch/disagg_same_gpu.sh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎lib/runtime/src/config.rs‎
Lines changed: 1 addition & 0 deletions b/‎lib/runtime/src/config.rs‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 1 deletion b/‎pyproject.toml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎tests/conftest.py‎
Lines changed: 100 additions & 17 deletions b/‎tests/conftest.py‎
Lines changed: 100 additions & 17 deletions
@@ -70,7 +70,7 @@ jobs:
           docker run -w /workspace \
             --name ${{ env.CONTAINER_ID }}_pytest_parallel \
             ${{ steps.define_image_tag.outputs.image_tag }} \
-            bash -c "pytest --basetemp=/tmp/pytest-parallel --junitxml=${{ env.PYTEST_PARALLEL_XML_FILE }} -n 4 -m \"${{ env.PYTEST_MARKS }}\""
+            bash -c "pytest --basetemp=/tmp/pytest-parallel --junitxml=${{ env.PYTEST_PARALLEL_XML_FILE }} -n auto -m \"${{ env.PYTEST_MARKS }}\""
       - name: Copy parallel test report from Container
         if: always()
         run: |
 
@@ -265,15 +265,19 @@ async def async_main():
     flags = parse_args()
     dump_config(flags.dump_config_to, flags)
 
-    # Warn if DYN_SYSTEM_PORT is set (frontend doesn't use system metrics server)
+    # Warn and unset DYN_SYSTEM_PORT if set (frontend doesn't use system metrics server)
+    # The frontend creates a DRT but should NOT start a system metrics server
+    # Only backend workers should set DYN_SYSTEM_PORT
     if os.environ.get("DYN_SYSTEM_PORT"):
         logger.warning(
             "=" * 80 + "\n"
             "WARNING: DYN_SYSTEM_PORT is set but NOT used by the frontend!\n"
             "The frontend does not expose a system metrics server.\n"
             "Only backend workers should set DYN_SYSTEM_PORT.\n"
-            "Use --http-port to configure the frontend HTTP API port.\n" + "=" * 80
+            "Unsetting DYN_SYSTEM_PORT to prevent DRT from starting system server.\n"
+            + "=" * 80
         )
+        os.environ.pop("DYN_SYSTEM_PORT", None)
 
     # Configure Dynamo frontend HTTP service metrics prefix
     if flags.metrics_prefix is not None:
 
@@ -228,7 +228,7 @@ def setup_prometheus_registry(
     SGLang uses multiprocess architecture where metrics are stored in shared memory.
     MultiProcessCollector aggregates metrics from all worker processes. The Prometheus
     registry collects sglang:* metrics which are exposed via the metrics server endpoint
-    (set DYN_SYSTEM_PORT to a positive value to enable, e.g., DYN_SYSTEM_PORT=8081).
+    (typically port 8081) when DYN_SYSTEM_PORT is set to a positive value.
 
     Args:
         engine: The SGLang engine instance.
 
@@ -50,7 +50,7 @@ python3 -m dynamo.frontend &
 DYNAMO_PID=$!
 
 # run prefill worker
-OTEL_SERVICE_NAME=dynamo-worker-prefill DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT_PREFILL:-8081} \
+OTEL_SERVICE_NAME=dynamo-worker-prefill DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT1:-8081} \
 python3 -m dynamo.sglang \
   --model-path Qwen/Qwen3-0.6B \
   --served-model-name Qwen/Qwen3-0.6B \
@@ -65,7 +65,7 @@ python3 -m dynamo.sglang \
 PREFILL_PID=$!
 
 # run decode worker
-OTEL_SERVICE_NAME=dynamo-worker-decode DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT_DECODE:-8082} \
+OTEL_SERVICE_NAME=dynamo-worker-decode DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT2:-8082} \
 CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \
   --model-path Qwen/Qwen3-0.6B \
   --served-model-name Qwen/Qwen3-0.6B \
 
@@ -48,7 +48,7 @@ DYNAMO_PID=$!
 
 # run decode worker with metrics on port 8081
 # --enforce-eager is added for quick deployment. for production use, need to remove this flag
-DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT:-8081} \
+DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT1:-8081} \
 CUDA_VISIBLE_DEVICES=0 \
 python3 -m dynamo.vllm \
   --model Qwen/Qwen3-0.6B \
@@ -66,7 +66,7 @@ echo "Waiting for decode worker to initialize..."
 sleep 10
 
 # run prefill worker with metrics on port 8082 (foreground)
-DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT_PREFILL:-8082} \
+DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT2:-8082} \
 DYN_VLLM_KV_EVENT_PORT=20081 \
 VLLM_NIXL_SIDE_CHANNEL_PORT=20097 \
 CUDA_VISIBLE_DEVICES=0 \
 
@@ -101,6 +101,7 @@ pub struct RuntimeConfig {
     /// Set to 0 to bind to a random available port
     /// Set to a positive port number (e.g. 8081) to bind to a specific port
     /// Set this at runtime with environment variable DYN_SYSTEM_PORT
+    /// TODO: Change type from i16 to u16 to support full port range (0-65535)
     #[builder(default = "DEFAULT_SYSTEM_PORT")]
     #[builder_field_attr(serde(skip_serializing_if = "Option::is_none"))]
     pub system_port: i16,
 
@@ -220,7 +220,8 @@ markers = [
     "model: model id used by a test or parameter",
     "custom_build: marks tests that require custom builds or special setup (e.g., MoE models)",
     "k8s: marks tests as requiring Kubernetes",
-    "fault_tolerance: marks tests as fault tolerance tests"
+    "fault_tolerance: marks tests as fault tolerance tests",
+    "requires_hf_token: marks tests that require HuggingFace authentication token for gated models"
 ]
 
 # Linting/formatting
 
@@ -1,17 +1,5 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
 
 import logging
 import os
@@ -25,6 +13,14 @@
 
 from tests.utils.constants import TEST_MODELS
 from tests.utils.managed_process import ManagedProcess
+from tests.utils.port_utils import (
+    allocate_free_port,
+    allocate_free_ports,
+    free_port,
+    free_ports,
+)
+
+_logger = logging.getLogger(__name__)
 
 
 def pytest_configure(config):
@@ -227,44 +223,124 @@ def pytest_collection_modifyitems(config, items):
 
 
 class EtcdServer(ManagedProcess):
-    def __init__(self, request, port=2379, timeout=300):
+    def __init__(self, request, port=None, timeout=300):
+        # Allocate free ports if not specified
+        use_random_port = port is None
+        if use_random_port:
+            # Need two ports: client port and peer port for parallel execution
+            # Start from 2380 (etcd default 2379 + 1)
+            port, peer_port = allocate_free_ports(2, 2380)
+        else:
+            peer_port = None
+
+        self.port = port
+        self.peer_port = peer_port  # Store for cleanup
         port_string = str(port)
         etcd_env = os.environ.copy()
         etcd_env["ALLOW_NONE_AUTHENTICATION"] = "yes"
         data_dir = tempfile.mkdtemp(prefix="etcd_")
+
         command = [
             "etcd",
             "--listen-client-urls",
             f"http://0.0.0.0:{port_string}",
             "--advertise-client-urls",
             f"http://0.0.0.0:{port_string}",
-            "--data-dir",
-            data_dir,
         ]
+
+        # Add peer port configuration only for random ports (parallel execution)
+        if peer_port is not None:
+            peer_port_string = str(peer_port)
+            command.extend(
+                [
+                    "--listen-peer-urls",
+                    f"http://0.0.0.0:{peer_port_string}",
+                    "--initial-advertise-peer-urls",
+                    f"http://localhost:{peer_port_string}",
+                    "--initial-cluster",
+                    f"default=http://localhost:{peer_port_string}",
+                ]
+            )
+
+        command.extend(
+            [
+                "--data-dir",
+                data_dir,
+            ]
+        )
         super().__init__(
             env=etcd_env,
             command=command,
             timeout=timeout,
             display_output=False,
+            terminate_existing=not use_random_port,  # Disabled for parallel test execution with random ports
             health_check_ports=[port],
             data_dir=data_dir,
             log_dir=request.node.name,
         )
 
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Release allocated ports when server exits."""
+        ports_to_release = []
+        try:
+            # Release allocated ports BEFORE calling parent __exit__
+            if hasattr(self, "port") and self.port is not None:
+                ports_to_release.append(self.port)
+            if hasattr(self, "peer_port") and self.peer_port is not None:
+                ports_to_release.append(self.peer_port)
+
+            if ports_to_release:
+                free_ports(ports_to_release)
+        except Exception:
+            # Silently continue if port release fails
+            pass
+        finally:
+            # Always call parent __exit__ to terminate the process
+            return super().__exit__(exc_type, exc_val, exc_tb)
+
 
 class NatsServer(ManagedProcess):
-    def __init__(self, request, port=4222, timeout=300):
+    def __init__(self, request, port=None, timeout=300):
+        # Allocate a free port if not specified
+        use_random_port = port is None
+        if use_random_port:
+            # Start from 4223 (nats-server default 4222 + 1)
+            port = allocate_free_port(4223)
+
+        self.port = port
         data_dir = tempfile.mkdtemp(prefix="nats_")
-        command = ["nats-server", "-js", "--trace", "--store_dir", data_dir]
+        command = [
+            "nats-server",
+            "-js",
+            "--trace",
+            "--store_dir",
+            data_dir,
+            "-p",
+            str(port),
+        ]
         super().__init__(
             command=command,
             timeout=timeout,
             display_output=False,
+            terminate_existing=not use_random_port,  # Disabled for parallel test execution with random ports
             data_dir=data_dir,
             health_check_ports=[port],
             log_dir=request.node.name,
         )
 
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Release allocated port when server exits."""
+        try:
+            # Release allocated port BEFORE calling parent __exit__
+            if hasattr(self, "port") and self.port is not None:
+                free_port(self.port)
+        except Exception:
+            # Silently continue if port release fails
+            pass
+        finally:
+            # Always call parent __exit__ to terminate the process
+            return super().__exit__(exc_type, exc_val, exc_tb)
+
 
 class SharedManagedProcess:
     """Base class for ManagedProcess with file-based reference counting for multi-process sharing."""
@@ -393,6 +469,13 @@ def _create_server(self) -> ManagedProcess:
 
 @pytest.fixture()
 def runtime_services(request):
+    """Provide NATS and Etcd servers with dynamically allocated ports.
+
+    Returns a tuple of (nats_process, etcd_process) where each has a .port attribute.
+    Tests should set NATS_SERVER and ETCD_ENDPOINTS environment variables in their
+    subprocess environments using these ports.
+    """
+    # Port cleanup is now handled in NatsServer and EtcdServer __exit__ methods
     with NatsServer(request) as nats_process:
         with EtcdServer(request) as etcd_process:
             yield nats_process, etcd_process
Original file line number	Diff line number	Diff line change
`@@ -220,7 +220,8 @@ markers = [`
`220`	`220`	`"model: model id used by a test or parameter",`
`221`	`221`	`"custom_build: marks tests that require custom builds or special setup (e.g., MoE models)",`
`222`	`222`	`"k8s: marks tests as requiring Kubernetes",`
`223`		`- "fault_tolerance: marks tests as fault tolerance tests"`
	`223`	`+ "fault_tolerance: marks tests as fault tolerance tests",`
	`224`	`+ "requires_hf_token: marks tests that require HuggingFace authentication token for gated models"`
`224`	`225`	`]`
`225`	`226`
`226`	`227`	`# Linting/formatting`