Skip to content

Commit 3de77cd

Browse files
committed
revert to request.node.name for log_dir
1 parent 9b268c9 commit 3de77cd

File tree

3 files changed

+16
-20
lines changed

3 files changed

+16
-20
lines changed

tests/conftest.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import tempfile
2020
from pathlib import Path
2121
from typing import Optional
22-
from datetime import datetime
2322

2423
import pytest
2524
from filelock import FileLock
@@ -192,14 +191,10 @@ def predownload_tokenizers(pytestconfig):
192191

193192
@pytest.fixture(autouse=True)
194193
def logger(request):
195-
timestamp = datetime.now().strftime("%m-%d-%Y_%H-%M-%S")
196-
log_dir = f"{request.node.name}_{timestamp}"
197-
request.node.log_dir = log_dir
198-
log_path = os.path.join(log_dir, "test.log.txt")
199-
194+
log_path = os.path.join(request.node.name, "test.log.txt")
200195
logger = logging.getLogger()
201-
shutil.rmtree(log_dir, ignore_errors=True)
202-
os.makedirs(log_dir, exist_ok=True)
196+
shutil.rmtree(request.node.name, ignore_errors=True)
197+
os.makedirs(request.node.name, exist_ok=True)
203198
handler = logging.FileHandler(log_path, mode="w")
204199
formatter = logging.Formatter(LOG_FORMAT, datefmt=DATE_FORMAT)
205200
handler.setFormatter(formatter)

tests/fault_tolerance/deploy/client.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,7 @@ def run_aiperf(
448448
return success
449449

450450

451+
# TODO: use file redirection and wait() instead of pipes and communicate
451452
def run_aiperf_with_signal_handling(
452453
cmd_attempt: List[str],
453454
logger: logging.Logger,

tests/fault_tolerance/deploy/test_deployment.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
import re
99
import signal
1010
from contextlib import contextmanager
11-
from typing import Any
1211
from multiprocessing.context import SpawnProcess
12+
from typing import Any
1313

1414
import pytest
1515

@@ -191,7 +191,6 @@ def _clients(
191191
logger.debug(f"{proc} joined")
192192

193193

194-
195194
def _terminate_client_processes(
196195
client_procs: list[SpawnProcess],
197196
logger: logging.Logger,
@@ -233,7 +232,9 @@ async def _inject_failures(
233232

234233
logger.info(f"Injecting failure for: {failure}")
235234

236-
affected_pods[failure.get_failure_key()] = await failure.execute(deployment, logger)
235+
affected_pods[failure.get_failure_key()] = await failure.execute(
236+
deployment, logger
237+
)
237238

238239
return affected_pods
239240

@@ -263,26 +264,23 @@ def validation_context(request, scenario): # noqa: F811
263264

264265
yield context # Test receives this and populates it
265266

266-
# Get log_dir from request.node if available (set by test), otherwise use node.name
267-
base_log_dir = getattr(request.node, "log_dir", request.node.name)
268-
269267
# Determine log paths based on whether this is a mixed token test
270268
log_paths = []
271269
test_name = request.node.name
272270
logger = logging.getLogger(test_name)
273271

274272
if hasattr(scenario.load, "mixed_token_test") and scenario.load.mixed_token_test:
275273
# For mixed token tests, we have separate overflow and recovery directories
276-
overflow_dir = f"{base_log_dir}{OVERFLOW_SUFFIX}"
277-
recovery_dir = f"{base_log_dir}{RECOVERY_SUFFIX}"
274+
overflow_dir = f"{request.node.name}{OVERFLOW_SUFFIX}"
275+
recovery_dir = f"{request.node.name}{RECOVERY_SUFFIX}"
278276
log_paths = [overflow_dir, recovery_dir]
279277

280278
logging.info("Mixed token test detected. Looking for results in:")
281279
logging.info(f" - Overflow phase: {overflow_dir}")
282280
logging.info(f" - Recovery phase: {recovery_dir}")
283281
else:
284282
# Standard test with single directory
285-
log_paths = [base_log_dir]
283+
log_paths = [request.node.name]
286284

287285
# Use factory to auto-detect and parse results
288286
try:
@@ -495,7 +493,7 @@ async def test_fault_scenario(
495493

496494
async with ManagedDeployment(
497495
namespace=namespace,
498-
log_dir=request.node.log_dir,
496+
log_dir=request.node.name,
499497
deployment_spec=scenario.deployment,
500498
skip_service_restart=skip_service_restart,
501499
) as deployment:
@@ -505,14 +503,16 @@ async def test_fault_scenario(
505503

506504
with _clients(
507505
logger,
508-
request.node.log_dir,
506+
request.node.name,
509507
scenario.deployment,
510508
namespace,
511509
model,
512510
scenario.load, # Pass entire Load config object
513511
) as client_procs:
514512
# Inject failures and capture which pods were affected
515-
affected_pods = await _inject_failures(scenario.failures, logger, deployment)
513+
affected_pods = await _inject_failures(
514+
scenario.failures, logger, deployment
515+
)
516516
logger.info(f"Affected pods during test: {affected_pods}")
517517

518518
if scenario.load.continuous_load:

0 commit comments

Comments
 (0)