1818import json
1919import logging
2020import os
21+ import signal
2122import subprocess
2223import time
2324from pathlib import Path
2425from typing import Any , Dict , List , Optional , Tuple
2526
2627import requests
28+ from kr8s .objects import Pod
2729
2830from tests .utils .managed_deployment import ManagedDeployment
2931
@@ -44,7 +46,7 @@ def get_frontend_port(
4446 deployment_spec : Any ,
4547 pod_ports : Dict [str , Any ],
4648 logger : logging .Logger ,
47- ) -> Tuple [Optional [str ], Optional [int ], Optional [str ]]:
49+ ) -> Tuple [Optional [str ], Optional [int ], Optional [Pod ]]:
4850 """
4951 Select a frontend pod using round-robin and setup port forwarding.
5052
@@ -60,7 +62,7 @@ def get_frontend_port(
6062 Returns:
6163 Tuple of (pod_name, local_port, pod_instance) or (None, None, None) if failed
6264 """
63- pods = managed_deployment .get_pods (managed_deployment .frontend_service_name )
65+ pods = managed_deployment .get_pods ([ managed_deployment .frontend_service_name ] )
6466
6567 port = 0
6668 pod_name = None
@@ -270,6 +272,7 @@ def run_aiperf(
270272 logger : logging .Logger ,
271273 max_retries : int = 1 ,
272274 retry_delay : float = 1 ,
275+ continuous_load : bool = False ,
273276) -> bool :
274277 """
275278 Execute AI-Perf with specified parameters.
@@ -280,13 +283,14 @@ def run_aiperf(
280283 model: Model name
281284 pod_name: Selected pod name for logging
282285 port: Local port number
283- requests_per_client: Number of requests to send
286+ requests_per_client: Number of requests to send (used if continuous load not enabled)
284287 input_token_length: Input token count
285288 output_token_length: Output token count
286289 output_dir: Directory for AI-Perf artifacts
287290 logger: Logger instance
288291 max_retries: Maximum number of retry attempts (default: 1)
289292 retry_delay: Delay in seconds between retries (default: 1)
293+ continuous_load: If True, use continuous load instead of fixed request count
290294
291295 Returns:
292296 True if successful, False otherwise
@@ -315,8 +319,6 @@ def run_aiperf(
315319 # Enable streaming for TTFT and ITL metrics
316320 "--streaming" ,
317321 # Request parameters
318- "--request-count" ,
319- str (requests_per_client ), # Required: how many requests
320322 "--concurrency" ,
321323 "1" , # Optional: we set to 1 for sequential
322324 # Token configuration
@@ -338,8 +340,13 @@ def run_aiperf(
338340 "100" , # For reproducible results
339341 ]
340342
341- # Calculate timeout (same as legacy would for all requests)
342- timeout = max (requests_per_client * 2 + 60 , 300 ) # At least 5 minutes
343+ if continuous_load :
344+ cmd .extend (["--benchmark-duration" , "1800" ]) # 30 minutes for continuous load
345+ logger .info ("Using continuous load with duration: 30 minutes" )
346+ timeout = 1860 # 31 minutes default for duration-based tests (30 minutes + 1 minute buffer)
347+ else :
348+ cmd .extend (["--request-count" , str (requests_per_client )])
349+ timeout = max (requests_per_client * 2 + 60 , 300 ) # At least 5 minutes
343350
344351 # Log execution
345352 logger .info (f"Starting AI-Perf for Pod { pod_name } Local Port { port } " )
@@ -354,15 +361,19 @@ def run_aiperf(
354361 logger .info (f"Command: { ' ' .join (cmd )} " )
355362
356363 # Retry logic for fault tolerance - retry FULL request count until success
357-
358- max_attempts = max_retries if max_retries > 0 else 1
364+ # Note: For continuous load, we only run once and expect SIGINT to stop it
365+ max_attempts = 1 if continuous_load else ( max_retries if max_retries > 0 else 1 )
359366 success = False
360- all_results = []
361367
362368 for attempt in range (max_attempts ):
363- logger .info (
364- f"AI-Perf attempt { attempt + 1 } /{ max_attempts } with { requests_per_client } requests"
365- )
369+ if continuous_load :
370+ logger .info (
371+ "AI-Perf continuous load (will run until interrupted by SIGINT)"
372+ )
373+ else :
374+ logger .info (
375+ f"AI-Perf attempt { attempt + 1 } /{ max_attempts } with { requests_per_client } requests"
376+ )
366377
367378 # Update output directory for this attempt
368379 attempt_dir = output_dir / f"attempt_{ attempt } "
@@ -374,13 +385,7 @@ def run_aiperf(
374385 cmd_attempt [artifact_dir_idx ] = str (attempt_dir )
375386
376387 try :
377- result = subprocess .run (
378- cmd_attempt ,
379- capture_output = True ,
380- text = True ,
381- timeout = timeout ,
382- stdin = subprocess .DEVNULL , # Prevent stdin reading which can cause process suspension
383- )
388+ result = run_aiperf_with_signal_handling (cmd_attempt , logger , timeout )
384389
385390 # Save logs for this attempt
386391 with open (attempt_dir / "genai_perf.log" , "w" ) as f :
@@ -389,15 +394,6 @@ def run_aiperf(
389394 f .write ("\n \n === STDERR ===\n " )
390395 f .write (result .stderr )
391396
392- all_results .append (
393- {
394- "attempt" : attempt + 1 ,
395- "returncode" : result .returncode ,
396- "stdout" : result .stdout ,
397- "stderr" : result .stderr ,
398- }
399- )
400-
401397 if result .returncode == 0 :
402398 # AI-Perf returns 0 even if all requests failed, so we need to check the output
403399 json_path = attempt_dir / "profile_export_aiperf.json"
@@ -412,6 +408,19 @@ def run_aiperf(
412408 )
413409 if success :
414410 break # Success - exit the retry loop
411+ ## TODO: bug with aiperf git+https://github.com/ai-dynamo/aiperf.git@4d3fa29403c8f75da22a14f1f7b3aeb27db9288f
412+ ## where sending a SIGINT on Mac can sometimes have an error code of -9 (SIGABRT) which results in profile_export_aiperf.json not being created
413+ elif result .returncode == - 9 and continuous_load :
414+ logger .warning (
415+ f"""
416+ Attempt { attempt + 1 } failed with return code { result .returncode }
417+ This is a known bug with aiperf on Mac where sending a SIGINT can sometimes have an error code of -9 (SIGABRT)
418+ which results in profile_export_aiperf.json not being created
419+ """
420+ )
421+ logger .debug (
422+ f"Stderr: { result .stderr [:500 ] if result .stderr else 'No stderr' } "
423+ )
415424 else :
416425 logger .warning (
417426 f"Attempt { attempt + 1 } failed with return code { result .returncode } "
@@ -421,22 +430,84 @@ def run_aiperf(
421430 )
422431 except Exception as e :
423432 logger .error (f"Error in attempt { attempt + 1 } : { str (e )} " )
424- all_results .append ({"attempt" : attempt + 1 , "error" : str (e )})
425433
426- # Sleep before next attempt (if not the last attempt)
427- if not success and attempt < max_attempts - 1 :
434+ # Sleep before next attempt (if not the last attempt and not continuous load )
435+ if not success and attempt < max_attempts - 1 and not continuous_load :
428436 time .sleep (retry_delay )
429437
430- if success :
438+ if success and not continuous_load :
431439 logger .info (
432440 f"AI-Perf successfully completed all { requests_per_client } requests for { pod_name } "
433441 )
442+ elif success and continuous_load :
443+ logger .info (
444+ f"AI-Perf sustained continuous load for { pod_name } and existed succesfully"
445+ )
434446 else :
435447 logger .error (f"AI-Perf failed all { max_attempts } attempts for { pod_name } " )
436448
437449 return success
438450
439451
452+ # TODO: use file redirection and wait() instead of pipes and communicate
453+ def run_aiperf_with_signal_handling (
454+ cmd_attempt : List [str ],
455+ logger : logging .Logger ,
456+ timeout : int ,
457+ ) -> subprocess .CompletedProcess :
458+ """
459+ Run aiperf with signal handling for graceful shutdown.
460+
461+ Handles SIGINT and SIGTERM forwarding and timeout when running with subprocess.Popen.
462+ This ensures that Ctrl-C (SIGINT) and graceful termination signals (SIGTERM)
463+ are properly forwarded to the subprocess so it can clean up gracefully and write results files.
464+ """
465+ proc = subprocess .Popen (
466+ cmd_attempt ,
467+ stdout = subprocess .PIPE ,
468+ stderr = subprocess .PIPE ,
469+ text = True ,
470+ stdin = subprocess .DEVNULL ,
471+ )
472+
473+ def signal_handler (signum , frame ):
474+ signal_names = {
475+ signal .SIGINT : "SIGINT" ,
476+ signal .SIGTERM : "SIGTERM" ,
477+ }
478+ signal_name = signal_names .get (signum , f"signal { signum } " )
479+ logger .info (f"Received { signal_name } , forwarding to aiperf subprocess" )
480+ try :
481+ proc .send_signal (signum )
482+ except ProcessLookupError :
483+ pass # Process already terminated
484+
485+ signal .signal (signal .SIGINT , signal_handler )
486+ signal .signal (signal .SIGTERM , signal_handler )
487+
488+ try :
489+ stdout , stderr = proc .communicate (timeout = timeout )
490+ returncode = proc .returncode
491+ except subprocess .TimeoutExpired :
492+ logger .warning (f"AI-Perf subprocess timed out after { timeout } s" )
493+ proc .kill ()
494+ stdout , stderr = proc .communicate ()
495+ returncode = proc .returncode
496+ except KeyboardInterrupt :
497+ logger .info ("Received KeyboardInterrupt, sending SIGINT to aiperf subprocess" )
498+ proc .send_signal (signal .SIGINT )
499+ try :
500+ stdout , stderr = proc .communicate (timeout = 30 ) # Give it time to clean up
501+ returncode = proc .returncode
502+ except subprocess .TimeoutExpired :
503+ logger .warning ("Subprocess didn't terminate gracefully, killing it" )
504+ proc .kill ()
505+ stdout , stderr = proc .communicate ()
506+ returncode = proc .returncode
507+
508+ return subprocess .CompletedProcess (cmd_attempt , returncode , stdout , stderr )
509+
510+
440511def log_summary_metrics (
441512 output_dir : Path , logger : logging .Logger , pod_name : str , port : int
442513) -> None :
@@ -513,6 +584,7 @@ def client(
513584 output_token_length : int ,
514585 max_retries : int ,
515586 retry_delay : float = 1 ,
587+ continuous_load : bool = False ,
516588):
517589 """
518590 Generate load using AI-Perf for fault tolerance testing.
@@ -527,11 +599,12 @@ def client(
527599 model: Model name
528600 log_dir: Directory for output logs and AI-Perf artifacts
529601 index: Client index used for round-robin pod selection
530- requests_per_client: Number of requests to generate
602+ requests_per_client: Number of requests to generate (used if continuous load not enabled)
531603 input_token_length: Number of input tokens per request
532604 output_token_length: Number of output tokens per request
533605 max_retries: Maximum retry attempts for AI-Perf execution
534606 retry_delay: Delay in seconds between retry attempts
607+ continuous_load: If True, use continuous load instead of fixed request count
535608 """
536609 logger = logging .getLogger (f"CLIENT: { index } " )
537610 logging .getLogger ("httpx" ).setLevel (logging .WARNING )
@@ -578,6 +651,7 @@ def client(
578651 logger = logger ,
579652 max_retries = max_retries ,
580653 retry_delay = retry_delay ,
654+ continuous_load = continuous_load ,
581655 )
582656
583657 if not success :
0 commit comments