Skip to content

Commit 95a012d

Browse files
committed
feat(eksapi): enhance error messages for cfn failures
1 parent 270c7b8 commit 95a012d

File tree

3 files changed

+53
-2
lines changed

3 files changed

+53
-2
lines changed

internal/deployers/eksapi/infra.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424

2525
"github.com/aws/aws-k8s-tester/internal/deployers/eksapi/templates"
2626
"github.com/aws/aws-k8s-tester/internal/metrics"
27+
"github.com/aws/aws-k8s-tester/internal/util"
2728
)
2829

2930
const (
@@ -521,7 +522,7 @@ func (m *InfrastructureManager) createCloudWatchInfrastructureStack(clusterName
521522
},
522523
})
523524
if err != nil {
524-
return "", fmt.Errorf("failed to create CloudWatch infrastructure stack: %w", err)
525+
return "", util.WrapCFNStackFailure(context.TODO(), m.clients.CFN(), fmt.Errorf("failed to create CloudWatch infrastructure stack: %w", err), stackName)
525526
}
526527

527528
klog.Infof("waiting for CloudWatch infrastructure stack to be created: %s", *out.StackId)

internal/deployers/eksapi/node.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1"
2929

3030
"github.com/aws/aws-k8s-tester/internal/deployers/eksapi/templates"
31+
"github.com/aws/aws-k8s-tester/internal/util"
3132
apierrors "k8s.io/apimachinery/pkg/api/errors"
3233
)
3334

@@ -494,7 +495,7 @@ func (m *nodeManager) createUnmanagedNodegroup(infra *Infrastructure, cluster *C
494495
}
495496
out, err := m.clients.CFN().CreateStack(context.TODO(), &input)
496497
if err != nil {
497-
return err
498+
return util.WrapCFNStackFailure(context.TODO(), m.clients.CFN(), err, stackName)
498499
}
499500
klog.Infof("waiting for unmanaged nodegroup stack to be created: %s", aws.ToString(out.StackId))
500501
err = cloudformation.NewStackCreateCompleteWaiter(m.clients.CFN()).

internal/util/cloudformation.go

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
package util
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"strings"
7+
8+
"github.com/aws/aws-sdk-go-v2/aws"
9+
"github.com/aws/aws-sdk-go-v2/service/cloudformation"
10+
types "github.com/aws/aws-sdk-go-v2/service/cloudformation/types"
11+
)
12+
13+
// TODO: implement AWS client wrappers, and incorporate this into the cfn:CreateStack call
14+
func WrapCFNStackFailure(ctx context.Context, cfnClient *cloudformation.Client, createStackErr error, stackName string) error {
15+
if createStackErr == nil {
16+
return createStackErr
17+
}
18+
resourceByFailureMode := make(map[string][]string)
19+
eventsPaginator := cloudformation.NewDescribeStackEventsPaginator(cfnClient, &cloudformation.DescribeStackEventsInput{
20+
StackName: &stackName,
21+
})
22+
for eventsPaginator.HasMorePages() {
23+
page, err := eventsPaginator.NextPage(ctx)
24+
if err != nil {
25+
return createStackErr
26+
}
27+
for _, event := range page.StackEvents {
28+
if event.ResourceStatus == types.ResourceStatusCreateFailed {
29+
if _, ok := resourceByFailureMode[aws.ToString(event.ResourceStatusReason)]; !ok {
30+
resourceByFailureMode[aws.ToString(event.ResourceStatusReason)] = []string{}
31+
}
32+
resourceByFailureMode[aws.ToString(event.ResourceStatusReason)] = append(resourceByFailureMode[aws.ToString(event.ResourceStatusReason)], aws.ToString(event.LogicalResourceId))
33+
}
34+
}
35+
}
36+
nonCancellationFailure := len(resourceByFailureMode) > 1
37+
var enhancedDetails []string
38+
for reason, resources := range resourceByFailureMode {
39+
if nonCancellationFailure && reason == "Resource creation cancelled" {
40+
// Ignore resource cancellation errors if there's another failure reported, those failures
41+
// would just be a consequence of that failure. If all the failures are resource cancellation,
42+
// then there was likely a user initiated delete of the whole stack based on a timeout
43+
// waiting for one of the resources to create
44+
continue
45+
}
46+
enhancedDetails = append(enhancedDetails, fmt.Sprintf("%s: %s", strings.Join(resources, ","), reason))
47+
}
48+
return fmt.Errorf("%w: %s", createStackErr, strings.Join(enhancedDetails, "--"))
49+
}

0 commit comments

Comments
 (0)