Skip to content

Commit 7afb4cb

Browse files
committed
[INF-3641] Include container failure reason in error messages
1 parent 16276d2 commit 7afb4cb

File tree

3 files changed

+137
-1
lines changed

3 files changed

+137
-1
lines changed

lib/renderer/states/tasks-failed.js

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,20 @@ module.exports = function (deployment) {
66
let extraMsg = "\nFailure Reasons\n";
77
deployment.tasksFailedFull.forEach((task) => {
88
extraMsg += `\nTask: ${task.taskArn}\nReason: ${task.stoppedReason}\n`
9+
10+
// The above stopped reason is often generic for the whole task, and doesn't
11+
// give a lot of context about why an individual container stopped. So let's
12+
// dig into the individual containers and print their own stopped reasons as well.
13+
task.containers.forEach((container) => {
14+
if (container.exitCode !== 0) {
15+
extraMsg += ` Container: ${container.name} (${container.lastStatus})\n`
16+
extraMsg += ` Exit code: ${container.exitCode}\n`
17+
18+
if (container.reason) {
19+
extraMsg += ` Reason: ${container.reason}\n`
20+
}
21+
}
22+
})
923
});
1024

1125
return {
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
{
2+
"tasks": [
3+
{
4+
"attachments": [],
5+
"attributes": [
6+
{
7+
"name": "ecs.cpu-architecture",
8+
"value": "x86_64"
9+
}
10+
],
11+
"availabilityZone": "us-east-1f",
12+
"clusterArn": "arn:aws:ecs:us-east-1:123456789012:cluster/test-cluster",
13+
"connectivity": "CONNECTED",
14+
"connectivityAt": "2025-11-25T13:41:24.739000+11:00",
15+
"containerInstanceArn": "arn:aws:ecs:us-east-1:123456789012:container-instance/test-cluster/07b64a68b8744baca8162efc012c59e6",
16+
"containers": [
17+
{
18+
"containerArn": "arn:aws:ecs:us-east-1:123456789012:container/test-cluster/6dc5a8ce009c4521bf210cdb350f49b1/b8e6303b-6c49-4a20-852e-aed35d92f925",
19+
"taskArn": "arn:aws:ecs:us-east-1:123456789012:task/test-cluster/6dc5a8ce009c4521bf210cdb350f49b1",
20+
"name": "pause",
21+
"image": "public.ecr.aws/eks-distro/kubernetes/pause:v1.34.1-eks-1-34-10",
22+
"imageDigest": "sha256:6bbea9dfb3660ecef21013757406d54c0b2daa5287d39eb85068cf4ab312584a",
23+
"runtimeId": "54794508ae8a4bbd68927982ef52fbc3d3963e45934e1537642a28fa0ee53edd",
24+
"lastStatus": "STOPPED",
25+
"exitCode": 0,
26+
"networkBindings": [
27+
{
28+
"bindIP": "0.0.0.0",
29+
"containerPort": 81,
30+
"hostPort": 33151,
31+
"protocol": "tcp"
32+
}
33+
],
34+
"networkInterfaces": [],
35+
"healthStatus": "UNKNOWN",
36+
"cpu": "128",
37+
"memory": "128"
38+
},
39+
{
40+
"containerArn": "arn:aws:ecs:us-east-1:123456789012:container/test-cluster/6dc5a8ce009c4521bf210cdb350f49b1/f1762959-4457-4683-989a-66ba59cbc7d5",
41+
"taskArn": "arn:aws:ecs:us-east-1:123456789012:task/test-cluster/6dc5a8ce009c4521bf210cdb350f49b1",
42+
"name": "app",
43+
"image": "test-account/test-image:latest",
44+
"imageDigest": "sha256:a5ffe28da196f01d2798c450204877b1c99f85d806e6eb43ecd83838dd174421",
45+
"runtimeId": "2f25baf30e622f3c64bf42fb9b92de19c1ed26e2dce68b1adb2d78dc68bd64e4",
46+
"lastStatus": "STOPPED",
47+
"exitCode": 137,
48+
"reason": "OutOfMemoryError: Container killed due to memory usage",
49+
"networkBindings": [
50+
{
51+
"bindIP": "0.0.0.0",
52+
"containerPort": 80,
53+
"hostPort": 33150,
54+
"protocol": "tcp"
55+
}
56+
],
57+
"networkInterfaces": [],
58+
"healthStatus": "UNKNOWN",
59+
"cpu": "1024",
60+
"memory": "1024",
61+
"memoryReservation": "1024"
62+
}
63+
],
64+
"cpu": "1152",
65+
"createdAt": "2025-11-25T13:41:24.739000+11:00",
66+
"desiredStatus": "STOPPED",
67+
"enableExecuteCommand": false,
68+
"executionStoppedAt": "2025-11-25T13:41:41.278000+11:00",
69+
"group": "service:test-service",
70+
"healthStatus": "UNKNOWN",
71+
"lastStatus": "STOPPED",
72+
"launchType": "EC2",
73+
"memory": "1152",
74+
"overrides": {
75+
"containerOverrides": [
76+
{
77+
"name": "app"
78+
},
79+
{
80+
"name": "pause"
81+
}
82+
],
83+
"inferenceAcceleratorOverrides": []
84+
},
85+
"pullStartedAt": "2025-11-25T13:41:26.908000+11:00",
86+
"pullStoppedAt": "2025-11-25T13:41:27.432000+11:00",
87+
"startedAt": "2025-11-25T13:41:26.952000+11:00",
88+
"startedBy": "ecs-svc/6026894300699578750",
89+
"stopCode": "EssentialContainerExited",
90+
"stoppedAt": "2025-11-25T13:41:41.530000+11:00",
91+
"stoppedReason": "Essential container in task exited",
92+
"stoppingAt": "2025-11-25T13:41:41.530000+11:00",
93+
"tags": [],
94+
"taskArn": "arn:aws:ecs:us-east-1:123456789012:task/test-cluster/6dc5a8ce009c4521bf210cdb350f49b1",
95+
"taskDefinitionArn": "arn:aws:ecs:us-east-1:123456789012:task-definition/test-service:6",
96+
"version": 4
97+
}
98+
],
99+
"failures": []
100+
}

test/renderer/states/tasks-failed.js

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ const EventEmitter = require('events');
66

77
const Deployment = require('../../../lib/deployment');
88
const TasksFailedRenderState = require('../../../lib/renderer/states/tasks-failed');
9+
const containerFailedDeployment = require('../../fixtures/container-failed-deployment.json');
910

1011
describe('Renderer:State:TasksFailed', function () {
1112
it('should include TaskDefinitionArn in done message', function () {
@@ -19,10 +20,31 @@ describe('Renderer:State:TasksFailed', function () {
1920
deployment.tasksFailed.push('arn:task:1');
2021
deployment.tasksFailedFull.push({
2122
taskArn: taskArn,
22-
stoppedReason: stopReason
23+
stoppedReason: stopReason,
24+
containers: []
2325
});
2426

2527
let stateInfo = TasksFailedRenderState(deployment, bufferStream);
2628
expect(stateInfo.extra).to.contain(`Task: ${taskArn}\nReason: ${stopReason}`);
2729
});
30+
31+
it('should include container exit codes and reasons', function () {
32+
let taskDefinitionArn = 'arn:taskdefinition:1';
33+
let service = new EventEmitter();
34+
let bufferStream = new streamBuffers.WritableStreamBuffer();
35+
let deployment = new Deployment({ service: service, taskDefinitionArn: taskDefinitionArn });
36+
37+
const failedTask = containerFailedDeployment.tasks[0];
38+
deployment.tasksFailed.push(failedTask.taskArn);
39+
deployment.tasksFailedFull.push(failedTask);
40+
41+
let stateInfo = TasksFailedRenderState(deployment, bufferStream);
42+
43+
const appContainer = failedTask.containers.find(c => c.name === 'app');
44+
45+
expect(stateInfo.extra).to.contain(`Task: ${failedTask.taskArn}\nReason: ${failedTask.stoppedReason}`);
46+
expect(stateInfo.extra).to.contain(`Container: ${appContainer.name} (${appContainer.lastStatus})`);
47+
expect(stateInfo.extra).to.contain(`Exit code: ${appContainer.exitCode}`);
48+
expect(stateInfo.extra).to.contain(`Reason: ${appContainer.reason}`);
49+
});
2850
});

0 commit comments

Comments
 (0)