11# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22# SPDX-License-Identifier: Apache-2.0
3- #
4- # Licensed under the Apache License, Version 2.0 (the "License");
5- # you may not use this file except in compliance with the License.
6- # You may obtain a copy of the License at
7- #
8- # http://www.apache.org/licenses/LICENSE-2.0
9- #
10- # Unless required by applicable law or agreed to in writing, software
11- # distributed under the License is distributed on an "AS IS" BASIS,
12- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13- # See the License for the specific language governing permissions and
14- # limitations under the License.
153
164import logging
175import os
2614
2715from tests .utils .constants import TEST_MODELS
2816from tests .utils .managed_process import ManagedProcess
17+ from tests .utils .port_utils import (
18+ allocate_port ,
19+ allocate_ports ,
20+ deallocate_port ,
21+ deallocate_ports ,
22+ )
23+
24+ _logger = logging .getLogger (__name__ )
2925
3026
3127def pytest_configure (config ):
@@ -249,43 +245,118 @@ def pytest_runtestloop(session):
249245
250246class EtcdServer (ManagedProcess ):
251247 def __init__ (self , request , port = 2379 , timeout = 300 ):
248+ # Allocate free ports if port is 0
249+ use_random_port = port == 0
250+ if use_random_port :
251+ # Need two ports: client port and peer port for parallel execution
252+ # Start from 2380 (etcd default 2379 + 1)
253+ port , peer_port = allocate_ports (2 , 2380 )
254+ else :
255+ peer_port = None
256+
257+ self .port = port
258+ self .peer_port = peer_port # Store for cleanup
259+ self .use_random_port = use_random_port # Track if we allocated the port
252260 port_string = str (port )
253261 etcd_env = os .environ .copy ()
254262 etcd_env ["ALLOW_NONE_AUTHENTICATION" ] = "yes"
255263 data_dir = tempfile .mkdtemp (prefix = "etcd_" )
264+
256265 command = [
257266 "etcd" ,
258267 "--listen-client-urls" ,
259268 f"http://0.0.0.0:{ port_string } " ,
260269 "--advertise-client-urls" ,
261270 f"http://0.0.0.0:{ port_string } " ,
262- "--data-dir" ,
263- data_dir ,
264271 ]
272+
273+ # Add peer port configuration only for random ports (parallel execution)
274+ if peer_port is not None :
275+ peer_port_string = str (peer_port )
276+ command .extend (
277+ [
278+ "--listen-peer-urls" ,
279+ f"http://0.0.0.0:{ peer_port_string } " ,
280+ "--initial-advertise-peer-urls" ,
281+ f"http://localhost:{ peer_port_string } " ,
282+ "--initial-cluster" ,
283+ f"default=http://localhost:{ peer_port_string } " ,
284+ ]
285+ )
286+
287+ command .extend (
288+ [
289+ "--data-dir" ,
290+ data_dir ,
291+ ]
292+ )
265293 super ().__init__ (
266294 env = etcd_env ,
267295 command = command ,
268296 timeout = timeout ,
269297 display_output = False ,
298+ terminate_existing = not use_random_port , # Disabled for parallel test execution with random ports
270299 health_check_ports = [port ],
271300 data_dir = data_dir ,
272301 log_dir = request .node .name ,
273302 )
274303
304+ def __exit__ (self , exc_type , exc_val , exc_tb ):
305+ """Release allocated ports when server exits."""
306+ try :
307+ # Only deallocate ports that were dynamically allocated (not default ports)
308+ if self .use_random_port :
309+ ports_to_release = [self .port ]
310+ if self .peer_port is not None :
311+ ports_to_release .append (self .peer_port )
312+ deallocate_ports (ports_to_release )
313+ except Exception as e :
314+ logging .warning (f"Failed to release EtcdServer port: { e } " )
315+
316+ return super ().__exit__ (exc_type , exc_val , exc_tb )
317+
275318
276319class NatsServer (ManagedProcess ):
277320 def __init__ (self , request , port = 4222 , timeout = 300 ):
321+ # Allocate a free port if port is 0
322+ use_random_port = port == 0
323+ if use_random_port :
324+ # Start from 4223 (nats-server default 4222 + 1)
325+ port = allocate_port (4223 )
326+
327+ self .port = port
328+ self .use_random_port = use_random_port # Track if we allocated the port
278329 data_dir = tempfile .mkdtemp (prefix = "nats_" )
279- command = ["nats-server" , "-js" , "--trace" , "--store_dir" , data_dir ]
330+ command = [
331+ "nats-server" ,
332+ "-js" ,
333+ "--trace" ,
334+ "--store_dir" ,
335+ data_dir ,
336+ "-p" ,
337+ str (port ),
338+ ]
280339 super ().__init__ (
281340 command = command ,
282341 timeout = timeout ,
283342 display_output = False ,
343+ terminate_existing = not use_random_port , # Disabled for parallel test execution with random ports
284344 data_dir = data_dir ,
285345 health_check_ports = [port ],
286346 log_dir = request .node .name ,
287347 )
288348
349+ def __exit__ (self , exc_type , exc_val , exc_tb ):
350+ """Release allocated port when server exits."""
351+ try :
352+ # Only deallocate ports that were dynamically allocated (not default ports)
353+ if self .use_random_port :
354+ deallocate_port (self .port )
355+ except Exception as e :
356+ logging .warning (f"Failed to release NatsServer port: { e } " )
357+
358+ return super ().__exit__ (exc_type , exc_val , exc_tb )
359+
289360
290361class SharedManagedProcess :
291362 """Base class for ManagedProcess with file-based reference counting for multi-process sharing."""
@@ -445,7 +516,10 @@ def runtime_services(request, store_kv, request_plane):
445516
446517 - If store_kv != "etcd", etcd is not started (returns None)
447518 - If request_plane != "nats", NATS is not started (returns None)
519+
520+ Returns a tuple of (nats_process, etcd_process) where each has a .port attribute.
448521 """
522+ # Port cleanup is now handled in NatsServer and EtcdServer __exit__ methods
449523 if request_plane == "nats" and store_kv == "etcd" :
450524 with NatsServer (request ) as nats_process :
451525 with EtcdServer (request ) as etcd_process :
@@ -460,6 +534,49 @@ def runtime_services(request, store_kv, request_plane):
460534 yield None , None
461535
462536
537+ @pytest .fixture ()
538+ def runtime_services_dynamic_ports (request , store_kv , request_plane ):
539+ """Provide NATS and Etcd servers with truly dynamic ports per test.
540+
541+ This fixture actually allocates dynamic ports by passing port=0 to the servers.
542+ It also sets the NATS_SERVER and ETCD_ENDPOINTS environment variables so that
543+ Dynamo processes can find the services on the dynamic ports.
544+
545+ - If store_kv != "etcd", etcd is not started (returns None)
546+ - If request_plane != "nats", NATS is not started (returns None)
547+
548+ Returns a tuple of (nats_process, etcd_process) where each has a .port attribute.
549+ """
550+ import os
551+
552+ # Port cleanup is now handled in NatsServer and EtcdServer __exit__ methods
553+ if request_plane == "nats" and store_kv == "etcd" :
554+ with NatsServer (request , port = 0 ) as nats_process :
555+ with EtcdServer (request , port = 0 ) as etcd_process :
556+ # Set environment variables for Rust/Python runtime to use. Note that xdist (parallel execution)
557+ # will launch isolated tests in a new process, so no need to worry about environment pollution.
558+ os .environ ["NATS_SERVER" ] = f"nats://localhost:{ nats_process .port } "
559+ os .environ ["ETCD_ENDPOINTS" ] = f"http://localhost:{ etcd_process .port } "
560+
561+ yield nats_process , etcd_process
562+
563+ # No test should rely on these variables after the test, but clean up just in case.
564+ os .environ .pop ("NATS_SERVER" , None )
565+ os .environ .pop ("ETCD_ENDPOINTS" , None )
566+ elif request_plane == "nats" :
567+ with NatsServer (request , port = 0 ) as nats_process :
568+ os .environ ["NATS_SERVER" ] = f"nats://localhost:{ nats_process .port } "
569+ yield nats_process , None
570+ os .environ .pop ("NATS_SERVER" , None )
571+ elif store_kv == "etcd" :
572+ with EtcdServer (request , port = 0 ) as etcd_process :
573+ os .environ ["ETCD_ENDPOINTS" ] = f"http://localhost:{ etcd_process .port } "
574+ yield None , etcd_process
575+ os .environ .pop ("ETCD_ENDPOINTS" , None )
576+ else :
577+ yield None , None
578+
579+
463580@pytest .fixture (scope = "session" )
464581def runtime_services_session (request , tmp_path_factory ):
465582 """Session-scoped fixture that provides shared NATS and etcd instances for all tests.
0 commit comments