Skip to content

Commit 2804af1

Browse files
committed
Set back to 20 epochs
1 parent a34011f commit 2804af1

File tree

8 files changed

+106
-108
lines changed

8 files changed

+106
-108
lines changed

forks/cli.ts

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ import {
88
epochRotationOperations,
99
groupCount,
1010
installationCount,
11-
network,
1211
NODE_VERSION,
1312
otherOperations,
1413
parallelOperations,
@@ -82,15 +81,15 @@ function runForkTest(options: ForkOptions): void {
8281

8382
try {
8483
execSync(command, {
85-
stdio: "ignore",
84+
stdio: "pipe",
8685
env: {
8786
...process.env,
8887
CHAOS_ENABLED: options.chaosEnabled ? "true" : "false",
8988
CHAOS_LEVEL: options.chaosLevel,
9089
},
9190
});
92-
} catch {
93-
console.log("Error running fork test");
91+
} catch (e) {
92+
console.error("Error running fork test", e);
9493
// Test may fail if forks are detected, that's expected
9594
// We'll analyze the logs afterward
9695
}
@@ -111,7 +110,7 @@ function logForkMatrixParameters(options: ForkOptions): void {
111110
);
112111
console.info(`otherOperations: ${JSON.stringify(otherOperations)}`);
113112
console.info(`targetEpoch: ${targetEpoch}`);
114-
console.info(`network: ${network || "undefined"}`);
113+
console.info(`network: ${options.env || "undefined"}`);
115114
console.info(`randomInboxIdsCount: ${randomInboxIdsCount}`);
116115
console.info(`installationCount: ${installationCount}`);
117116
console.info(`testName: ${testName}`);
@@ -192,15 +191,6 @@ async function runForkDetection(options: ForkOptions): Promise<void> {
192191
stats.runsWithoutForks++;
193192
console.info(`Run ${i}/${options.count}: ⚪ No forks`);
194193
}
195-
196-
// Clean up empty cleaned directory if it exists
197-
const logsDir = path.join(process.cwd(), "logs", "cleaned");
198-
if (fs.existsSync(logsDir)) {
199-
const files = fs.readdirSync(logsDir);
200-
if (files.length === 0) {
201-
fs.rmdirSync(logsDir);
202-
}
203-
}
204194
}
205195

206196
// Display final statistics

forks/config.ts

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ export const epochRotationOperations = {
2121
removeMember: true, // removes a random member from the group
2222
};
2323
export const otherOperations = {
24-
createInstallation: true, // creates a new installation for a random worker
24+
createInstallation: false, // creates a new installation for a random worker
2525
sendMessage: true, // sends a message to the group
2626
};
27-
export const targetEpoch = 150n; // The target epoch to stop the test (epochs are when performing forks to the group)
27+
export const targetEpoch = 30n; // The target epoch to stop the test (epochs are when performing forks to the group)
2828
export const network = process.env.XMTP_ENV; // Network environment setting
2929
export const randomInboxIdsCount = 10; // How many inboxIds to use randomly in the add/remove operations
3030
export const installationCount = 2; // How many installations to use randomly in the createInstallation operations
@@ -63,12 +63,12 @@ export const chaosPresets: Record<ChaosLevel, ChaosPreset> = {
6363
interval: 10000, // 10 seconds
6464
},
6565
high: {
66-
delayMin: 100,
66+
delayMin: 0,
6767
delayMax: 500,
68-
jitterMin: 0,
69-
jitterMax: 100,
68+
jitterMin: 50,
69+
jitterMax: 200,
7070
lossMin: 0,
71-
lossMax: 5,
71+
lossMax: 10,
7272
interval: 10000, // 10 seconds
7373
},
7474
};
@@ -90,4 +90,6 @@ export const multinodeContainers = [
9090
"multinode-node2-1",
9191
"multinode-node3-1",
9292
"multinode-node4-1",
93+
// Include the MLS validation service to add some additional chaos
94+
"multinode-validation-1",
9395
];

forks/constants.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
export const forkDetectedString = "[FORK DETECTED]";

forks/forks.test.ts

Lines changed: 77 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,61 @@ import {
2020
targetEpoch,
2121
testName,
2222
workerNames,
23+
type ChaosPreset,
2324
} from "./config";
2425

26+
const startChaos = (
27+
allNodes: DockerContainer[],
28+
preset: ChaosPreset,
29+
): NodeJS.Timeout => {
30+
console.log(`[chaos] Initialized ${allNodes.length} Docker containers`);
31+
32+
// Validate containers are running
33+
for (const node of allNodes) {
34+
try {
35+
// Test if container exists by trying to get its IP
36+
if (!node.ip) {
37+
throw new Error(`Container ${node.name} has no IP address`);
38+
}
39+
} catch {
40+
throw new Error(
41+
`Docker container ${node.name} is not running. Network chaos requires local multinode setup (./dev/up).`,
42+
);
43+
}
44+
}
45+
console.log("[chaos] All Docker containers validated");
46+
47+
// Function to apply chaos to all nodes
48+
const applyChaos = () => {
49+
console.log(
50+
"[chaos] Applying jitter, delay, and drop rules to all nodes...",
51+
);
52+
for (const node of allNodes) {
53+
const delay = Math.floor(
54+
preset.delayMin + Math.random() * (preset.delayMax - preset.delayMin),
55+
);
56+
const jitter = Math.floor(
57+
preset.jitterMin +
58+
Math.random() * (preset.jitterMax - preset.jitterMin),
59+
);
60+
const loss =
61+
preset.lossMin + Math.random() * (preset.lossMax - preset.lossMin);
62+
63+
try {
64+
node.addJitter(delay, jitter);
65+
if (Math.random() < 0.5) node.addLoss(loss);
66+
} catch (err) {
67+
console.warn(`[chaos] Error applying netem on ${node.name}:`, err);
68+
}
69+
}
70+
};
71+
72+
// Apply chaos immediately
73+
applyChaos();
74+
75+
return setInterval(applyChaos, preset.interval);
76+
};
77+
2578
describe(testName, () => {
2679
setupDurationTracking({ testName });
2780

@@ -91,79 +144,29 @@ describe(testName, () => {
91144

92145
// Initialize Docker containers for multinode setup
93146
allNodes = multinodeContainers.map((name) => new DockerContainer(name));
94-
console.log(`[chaos] Initialized ${allNodes.length} Docker containers`);
95-
96-
// Validate containers are running
97-
for (const node of allNodes) {
98-
try {
99-
// Test if container exists by trying to get its IP
100-
if (!node.ip) {
101-
throw new Error(`Container ${node.name} has no IP address`);
102-
}
103-
} catch (_err) {
104-
throw new Error(
105-
`Docker container ${node.name} is not running. Network chaos requires local multinode setup (./dev/up).`,
106-
);
107-
}
108-
}
109-
console.log("[chaos] All Docker containers validated");
110-
111147
const preset = chaosPresets[chaosConfig.level];
112-
113-
// Function to apply chaos to all nodes
114-
const applyChaos = () => {
115-
console.log(
116-
"[chaos] Applying jitter, delay, and drop rules to all nodes...",
117-
);
118-
for (const node of allNodes) {
119-
const delay = Math.floor(
120-
preset.delayMin +
121-
Math.random() * (preset.delayMax - preset.delayMin),
122-
);
123-
const jitter = Math.floor(
124-
preset.jitterMin +
125-
Math.random() * (preset.jitterMax - preset.jitterMin),
126-
);
127-
const loss =
128-
preset.lossMin +
129-
Math.random() * (preset.lossMax - preset.lossMin);
130-
131-
try {
132-
node.addJitter(delay, jitter);
133-
if (Math.random() < 0.5) node.addLoss(loss);
134-
} catch (err) {
135-
console.warn(
136-
`[chaos] Error applying netem on ${node.name}:`,
137-
err,
138-
);
139-
}
140-
}
141-
};
142-
143-
// Apply chaos immediately
144-
applyChaos();
145-
146148
// Then set interval for continued chaos
147-
chaosInterval = setInterval(applyChaos, preset.interval);
149+
chaosInterval = startChaos(allNodes, preset);
148150
console.log(`[chaos] Started chaos interval (${preset.interval}ms)`);
151+
}
149152

150-
// Start periodic verification during chaos
151-
const verifyLoop = () => {
152-
verifyInterval = setInterval(() => {
153-
void (async () => {
154-
try {
155-
console.log("[verify] Checking forks under chaos");
156-
await workers.checkForks();
157-
} catch (e) {
158-
console.warn("[verify] Skipping check due to exception:", e);
159-
}
160-
})();
161-
}, 10 * 1000);
162-
};
153+
// Start periodic verification during chaos
154+
const verifyLoop = () => {
155+
verifyInterval = setInterval(() => {
156+
void (async () => {
157+
try {
158+
console.log("[verify] Checking forks under chaos");
159+
await workers.checkForks();
160+
} catch (e) {
161+
console.warn("[verify] Skipping check due to exception:", e);
162+
throw e;
163+
}
164+
})();
165+
}, 10 * 1000);
166+
};
163167

164-
verifyLoop();
165-
console.log("[chaos] Started verification interval (10000ms)");
166-
}
168+
verifyLoop();
169+
console.log("Started verification interval (10000ms)");
167170

168171
// Create groups
169172
const groupOperationPromises = Array.from(
@@ -228,7 +231,13 @@ describe(testName, () => {
228231
);
229232

230233
await Promise.all(groupOperationPromises);
234+
await workers.checkForks();
235+
} catch (e) {
236+
console.error("Error during fork testing:", e);
231237
} finally {
238+
if (verifyInterval) {
239+
clearInterval(verifyInterval);
240+
}
232241
// Clean up chaos if it was enabled
233242
if (chaosConfig.enabled) {
234243
console.log("[chaos] Cleaning up network chaos...");
@@ -237,9 +246,6 @@ describe(testName, () => {
237246
if (chaosInterval) {
238247
clearInterval(chaosInterval);
239248
}
240-
if (verifyInterval) {
241-
clearInterval(verifyInterval);
242-
}
243249

244250
// Clear network rules
245251
for (const node of allNodes) {
@@ -253,10 +259,6 @@ describe(testName, () => {
253259
}
254260
}
255261

256-
// Cooldown period to allow in-flight messages to be processed
257-
console.log("[chaos] Waiting 5s cooldown before final validation");
258-
await new Promise((r) => setTimeout(r, 5000));
259-
260262
console.log("[chaos] Cleanup complete");
261263
}
262264
}

helpers/analyzer.ts

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import fs from "fs";
22
import path from "path";
3+
import { forkDetectedString } from "forks/constants";
34
import { processLogFile, stripAnsi } from "./logger";
45

56
// Known test issues for tracking
@@ -180,11 +181,6 @@ export async function cleanForksLogs(
180181
const logsDir = path.join(process.cwd(), "logs");
181182
const outputDir = path.join(logsDir, "cleaned");
182183

183-
if (!fs.existsSync(logsDir)) {
184-
console.debug("No logs directory found");
185-
return;
186-
}
187-
188184
if (!fs.existsSync(outputDir)) {
189185
await fs.promises.mkdir(outputDir, { recursive: true });
190186
}
@@ -214,7 +210,7 @@ export async function cleanForksLogs(
214210
// Check if the file contains fork-related content
215211
const containsForkContent = await fileContainsString(
216212
rawFilePath,
217-
"may have forked",
213+
forkDetectedString,
218214
);
219215

220216
// Always preserve raw logs for debugging/analysis

helpers/logger.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import fs from "fs";
22
import path from "path";
33
import winston from "winston";
44
import "dotenv/config";
5+
import { forkDetectedString } from "forks/constants";
56

67
// Consolidated ANSI escape code regex
78
// eslint-disable-next-line no-control-regex
@@ -44,7 +45,7 @@ export async function processLogFile(
4445

4546
let buffer = "";
4647
let foundForkLine = false;
47-
const targetString = "may be fork";
48+
const targetString = forkDetectedString;
4849

4950
readStream.on("data", (chunk: string | Buffer) => {
5051
if (foundForkLine) {

network-stability/netem.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ export function applyLatency(
44
container: DockerContainer,
55
latencyMs: number,
66
): void {
7-
console.log(`[netem] Clearing existing qdisc on ${container.veth}`);
87
container.sh(`sudo tc qdisc del dev ${container.veth} root`, true);
98
container.sh(
109
`sudo tc qdisc add dev ${container.veth} root netem delay ${latencyMs}ms`,
@@ -30,7 +29,7 @@ export function applyLoss(container: DockerContainer, percent: number): void {
3029
}
3130

3231
export function clear(container: DockerContainer): void {
33-
console.log(`[netem] Clearing latency from ${container.veth}`);
32+
// console.log(`[netem] Clearing latency from ${container.veth}`);
3433
container.sh(`sudo tc qdisc del dev ${container.veth} root`, true);
3534
}
3635

workers/manager.ts

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import {
1212
type Group,
1313
type XmtpEnv,
1414
} from "@helpers/versions";
15+
import { forkDetectedString } from "forks/constants";
1516
import { generatePrivateKey, privateKeyToAccount } from "viem/accounts";
1617
import {
1718
installationThreshold,
@@ -154,8 +155,10 @@ export class WorkerManager implements IWorkerManager {
154155
await Promise.all(
155156
groups.flat().map(async (g) => {
156157
const debugInfo = await g.debugInfo();
157-
if (debugInfo.maybeForked) {
158-
throw new Error(`Stopping test, group id ${g.id} may have forked`);
158+
if (debugInfo.maybeForked || debugInfo.isCommitLogForked) {
159+
throw new Error(
160+
`${forkDetectedString} Stopping test, group id ${g.id} may have forked`,
161+
);
159162
}
160163
}),
161164
);
@@ -179,8 +182,8 @@ export class WorkerManager implements IWorkerManager {
179182
for (const member of members)
180183
totalGroupInstallations += member.installationIds.length;
181184

182-
if (debugInfo.maybeForked) {
183-
const logMessage = `Fork detected, group id ${groupId} may have forked, epoch ${debugInfo.epoch} for worker ${worker.name}`;
185+
if (debugInfo.maybeForked || debugInfo.isCommitLogForked) {
186+
const logMessage = `${forkDetectedString}. Group id ${groupId} may have forked, epoch ${debugInfo.epoch} for worker ${worker.name}`;
184187
console.error(logMessage);
185188
throw new Error(logMessage);
186189
}
@@ -268,6 +271,10 @@ export class WorkerManager implements IWorkerManager {
268271
await group.addMembers(extraMembers);
269272
}
270273

274+
for (const member of memberList) {
275+
await group.addAdmin(member);
276+
}
277+
271278
return group as Group;
272279
}
273280

0 commit comments

Comments
 (0)