From eb0b70e94e9e550538b26e88c913d7c521524cb8 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Fri, 13 Feb 2026 20:25:18 +0000 Subject: [PATCH 1/3] DAOS-18593 test: replace sleep with retry in rebuild/interactive.py Replace arbitrary sleep with a retry on expected DER_NONEXIST. Test-repeat: 10 Test-tag: RbldInteractive Skip-unit-tests: true Skip-fault-injection-test: true Signed-off-by: Dalton Bohning --- src/tests/ftest/rebuild/interactive.py | 29 ++++++++++++++++++-------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/src/tests/ftest/rebuild/interactive.py b/src/tests/ftest/rebuild/interactive.py index 5dc968650a4..181a00391e6 100644 --- a/src/tests/ftest/rebuild/interactive.py +++ b/src/tests/ftest/rebuild/interactive.py @@ -1,5 +1,5 @@ """ - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -8,6 +8,7 @@ from apricot import TestWithServers from data_utils import assert_val_in_list +from exception_utils import CommandFailure from ior_utils import get_ior from job_manager_utils import get_job_manager @@ -74,10 +75,6 @@ def __run_rebuild_interactive(self, pool, cont_ior, ior, - 'dmg pool reintegrate' - 'dmg system reintegrate' """ - # Time to wait between rebuild start and manual stop. - # If we stop too early rebuild might not have started yet. - # Ideally, if we could poll the "actual" rebuild status this would not be necessary. - secs_between_rebuild_start_and_manual_stop = 4 ior_flags_read = self.params.get('flags_read', '/run/ior/*') ior_ppn = self.params.get('ppn', '/run/ior/*') @@ -100,8 +97,15 @@ def __run_rebuild_interactive(self, pool, cont_ior, ior, pool.wait_for_rebuild_to_start(interval=1) self.log_step(f'{exclude_method} - Manually stop rebuild') - time.sleep(secs_between_rebuild_start_and_manual_stop) - pool.rebuild_stop() + for i in range(3): + try: + pool.rebuild_stop() + break + except CommandFailure as error: + if i == 2 or 'DER_NONEXIST' not in str(error): + raise + self.log.info('Assuming rebuild is not started yet. Retrying in 3 seconds...') + time.sleep(3) self.log_step(f'{exclude_method} - Wait for rebuild to stop') pool.wait_for_rebuild_to_stop(interval=3) @@ -145,8 +149,15 @@ def __run_rebuild_interactive(self, pool, cont_ior, ior, pool.wait_for_rebuild_to_start(interval=1) self.log_step(f'{reint_method} - Manually stop rebuild') - time.sleep(secs_between_rebuild_start_and_manual_stop) - pool.rebuild_stop() + for i in range(3): + try: + pool.rebuild_stop() + break + except CommandFailure as error: + if i == 2 or 'DER_NONEXIST' not in str(error): + raise + self.log.info('Assuming rebuild is not started yet. Retrying in 3 seconds...') + time.sleep(3) self.log_step(f'{reint_method} - Wait for rebuild to stop') pool.wait_for_rebuild_to_stop(interval=3) From 7318e844d226478d431a206db9528371e3b179d6 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Tue, 17 Feb 2026 21:29:24 +0000 Subject: [PATCH 2/3] remove start detection to test DER_NONEXIST handling Test-repeat: 10 Test-tag: RbldInteractive Skip-unit-tests: true Skip-fault-injection-test: true Signed-off-by: Dalton Bohning --- src/tests/ftest/rebuild/interactive.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/ftest/rebuild/interactive.py b/src/tests/ftest/rebuild/interactive.py index 181a00391e6..15f19e15114 100644 --- a/src/tests/ftest/rebuild/interactive.py +++ b/src/tests/ftest/rebuild/interactive.py @@ -94,7 +94,7 @@ def __run_rebuild_interactive(self, pool, cont_ior, ior, self.fail(f'Unsupported exclude_method: {exclude_method}') self.log_step(f'{exclude_method} - Wait for rebuild to start') - pool.wait_for_rebuild_to_start(interval=1) + # pool.wait_for_rebuild_to_start(interval=1) self.log_step(f'{exclude_method} - Manually stop rebuild') for i in range(3): @@ -146,7 +146,7 @@ def __run_rebuild_interactive(self, pool, cont_ior, ior, self.fail(f'Unsupported reint_method: {reint_method}') self.log_step(f'{reint_method} - Wait for rebuild to start') - pool.wait_for_rebuild_to_start(interval=1) + # pool.wait_for_rebuild_to_start(interval=1) self.log_step(f'{reint_method} - Manually stop rebuild') for i in range(3): From 862b1001892f3a24eb5a84936817b7fba3425fc1 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Wed, 18 Feb 2026 16:55:15 +0000 Subject: [PATCH 3/3] Revert "remove start detection to test DER_NONEXIST handling" This reverts commit 7318e844d226478d431a206db9528371e3b179d6. Test-repeat: 10 Test-tag: RbldInteractive Skip-unit-tests: true Skip-fault-injection-test: true Signed-off-by: Dalton Bohning --- src/tests/ftest/rebuild/interactive.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/ftest/rebuild/interactive.py b/src/tests/ftest/rebuild/interactive.py index 15f19e15114..181a00391e6 100644 --- a/src/tests/ftest/rebuild/interactive.py +++ b/src/tests/ftest/rebuild/interactive.py @@ -94,7 +94,7 @@ def __run_rebuild_interactive(self, pool, cont_ior, ior, self.fail(f'Unsupported exclude_method: {exclude_method}') self.log_step(f'{exclude_method} - Wait for rebuild to start') - # pool.wait_for_rebuild_to_start(interval=1) + pool.wait_for_rebuild_to_start(interval=1) self.log_step(f'{exclude_method} - Manually stop rebuild') for i in range(3): @@ -146,7 +146,7 @@ def __run_rebuild_interactive(self, pool, cont_ior, ior, self.fail(f'Unsupported reint_method: {reint_method}') self.log_step(f'{reint_method} - Wait for rebuild to start') - # pool.wait_for_rebuild_to_start(interval=1) + pool.wait_for_rebuild_to_start(interval=1) self.log_step(f'{reint_method} - Manually stop rebuild') for i in range(3):