@@ -20,8 +20,61 @@ import {
2020 targetEpoch ,
2121 testName ,
2222 workerNames ,
23+ type ChaosPreset ,
2324} from "./config" ;
2425
26+ const startChaos = (
27+ allNodes : DockerContainer [ ] ,
28+ preset : ChaosPreset ,
29+ ) : NodeJS . Timeout => {
30+ console . log ( `[chaos] Initialized ${ allNodes . length } Docker containers` ) ;
31+
32+ // Validate containers are running
33+ for ( const node of allNodes ) {
34+ try {
35+ // Test if container exists by trying to get its IP
36+ if ( ! node . ip ) {
37+ throw new Error ( `Container ${ node . name } has no IP address` ) ;
38+ }
39+ } catch {
40+ throw new Error (
41+ `Docker container ${ node . name } is not running. Network chaos requires local multinode setup (./dev/up).` ,
42+ ) ;
43+ }
44+ }
45+ console . log ( "[chaos] All Docker containers validated" ) ;
46+
47+ // Function to apply chaos to all nodes
48+ const applyChaos = ( ) => {
49+ console . log (
50+ "[chaos] Applying jitter, delay, and drop rules to all nodes..." ,
51+ ) ;
52+ for ( const node of allNodes ) {
53+ const delay = Math . floor (
54+ preset . delayMin + Math . random ( ) * ( preset . delayMax - preset . delayMin ) ,
55+ ) ;
56+ const jitter = Math . floor (
57+ preset . jitterMin +
58+ Math . random ( ) * ( preset . jitterMax - preset . jitterMin ) ,
59+ ) ;
60+ const loss =
61+ preset . lossMin + Math . random ( ) * ( preset . lossMax - preset . lossMin ) ;
62+
63+ try {
64+ node . addJitter ( delay , jitter ) ;
65+ if ( Math . random ( ) < 0.5 ) node . addLoss ( loss ) ;
66+ } catch ( err ) {
67+ console . warn ( `[chaos] Error applying netem on ${ node . name } :` , err ) ;
68+ }
69+ }
70+ } ;
71+
72+ // Apply chaos immediately
73+ applyChaos ( ) ;
74+
75+ return setInterval ( applyChaos , preset . interval ) ;
76+ } ;
77+
2578describe ( testName , ( ) => {
2679 setupDurationTracking ( { testName } ) ;
2780
@@ -91,79 +144,29 @@ describe(testName, () => {
91144
92145 // Initialize Docker containers for multinode setup
93146 allNodes = multinodeContainers . map ( ( name ) => new DockerContainer ( name ) ) ;
94- console . log ( `[chaos] Initialized ${ allNodes . length } Docker containers` ) ;
95-
96- // Validate containers are running
97- for ( const node of allNodes ) {
98- try {
99- // Test if container exists by trying to get its IP
100- if ( ! node . ip ) {
101- throw new Error ( `Container ${ node . name } has no IP address` ) ;
102- }
103- } catch ( _err ) {
104- throw new Error (
105- `Docker container ${ node . name } is not running. Network chaos requires local multinode setup (./dev/up).` ,
106- ) ;
107- }
108- }
109- console . log ( "[chaos] All Docker containers validated" ) ;
110-
111147 const preset = chaosPresets [ chaosConfig . level ] ;
112-
113- // Function to apply chaos to all nodes
114- const applyChaos = ( ) => {
115- console . log (
116- "[chaos] Applying jitter, delay, and drop rules to all nodes..." ,
117- ) ;
118- for ( const node of allNodes ) {
119- const delay = Math . floor (
120- preset . delayMin +
121- Math . random ( ) * ( preset . delayMax - preset . delayMin ) ,
122- ) ;
123- const jitter = Math . floor (
124- preset . jitterMin +
125- Math . random ( ) * ( preset . jitterMax - preset . jitterMin ) ,
126- ) ;
127- const loss =
128- preset . lossMin +
129- Math . random ( ) * ( preset . lossMax - preset . lossMin ) ;
130-
131- try {
132- node . addJitter ( delay , jitter ) ;
133- if ( Math . random ( ) < 0.5 ) node . addLoss ( loss ) ;
134- } catch ( err ) {
135- console . warn (
136- `[chaos] Error applying netem on ${ node . name } :` ,
137- err ,
138- ) ;
139- }
140- }
141- } ;
142-
143- // Apply chaos immediately
144- applyChaos ( ) ;
145-
146148 // Then set interval for continued chaos
147- chaosInterval = setInterval ( applyChaos , preset . interval ) ;
149+ chaosInterval = startChaos ( allNodes , preset ) ;
148150 console . log ( `[chaos] Started chaos interval (${ preset . interval } ms)` ) ;
151+ }
149152
150- // Start periodic verification during chaos
151- const verifyLoop = ( ) => {
152- verifyInterval = setInterval ( ( ) => {
153- void ( async ( ) => {
154- try {
155- console . log ( "[verify] Checking forks under chaos" ) ;
156- await workers . checkForks ( ) ;
157- } catch ( e ) {
158- console . warn ( "[verify] Skipping check due to exception:" , e ) ;
159- }
160- } ) ( ) ;
161- } , 10 * 1000 ) ;
162- } ;
153+ // Start periodic verification during chaos
154+ const verifyLoop = ( ) => {
155+ verifyInterval = setInterval ( ( ) => {
156+ void ( async ( ) => {
157+ try {
158+ console . log ( "[verify] Checking forks under chaos" ) ;
159+ await workers . checkForks ( ) ;
160+ } catch ( e ) {
161+ console . warn ( "[verify] Skipping check due to exception:" , e ) ;
162+ throw e ;
163+ }
164+ } ) ( ) ;
165+ } , 10 * 1000 ) ;
166+ } ;
163167
164- verifyLoop ( ) ;
165- console . log ( "[chaos] Started verification interval (10000ms)" ) ;
166- }
168+ verifyLoop ( ) ;
169+ console . log ( "Started verification interval (10000ms)" ) ;
167170
168171 // Create groups
169172 const groupOperationPromises = Array . from (
@@ -228,7 +231,13 @@ describe(testName, () => {
228231 ) ;
229232
230233 await Promise . all ( groupOperationPromises ) ;
234+ await workers . checkForks ( ) ;
235+ } catch ( e ) {
236+ console . error ( "Error during fork testing:" , e ) ;
231237 } finally {
238+ if ( verifyInterval ) {
239+ clearInterval ( verifyInterval ) ;
240+ }
232241 // Clean up chaos if it was enabled
233242 if ( chaosConfig . enabled ) {
234243 console . log ( "[chaos] Cleaning up network chaos..." ) ;
@@ -237,9 +246,6 @@ describe(testName, () => {
237246 if ( chaosInterval ) {
238247 clearInterval ( chaosInterval ) ;
239248 }
240- if ( verifyInterval ) {
241- clearInterval ( verifyInterval ) ;
242- }
243249
244250 // Clear network rules
245251 for ( const node of allNodes ) {
@@ -253,10 +259,6 @@ describe(testName, () => {
253259 }
254260 }
255261
256- // Cooldown period to allow in-flight messages to be processed
257- console . log ( "[chaos] Waiting 5s cooldown before final validation" ) ;
258- await new Promise ( ( r ) => setTimeout ( r , 5000 ) ) ;
259-
260262 console . log ( "[chaos] Cleanup complete" ) ;
261263 }
262264 }
0 commit comments