diff --git a/Makefile b/Makefile index 4280c97b1..39dff1fb5 100644 --- a/Makefile +++ b/Makefile @@ -9,3 +9,6 @@ update-deps: "$$SCRIPT" ; \ done +.PHONY: test-integration +test-integration: ## Run unit and integration tests + go test -v -tags=integration ./... diff --git a/internal/deployers/eksapi/ami_resolver.go b/internal/deployers/eksapi/ami_resolver.go new file mode 100644 index 000000000..61d6e2ff4 --- /dev/null +++ b/internal/deployers/eksapi/ami_resolver.go @@ -0,0 +1,128 @@ +package eksapi + +import ( + "context" + "fmt" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/ec2" + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/aws/aws-sdk-go-v2/service/ssm" + "k8s.io/klog/v2" +) + +func NewAMIResolver(awsClients *awsClients) *amiResolver { + return &amiResolver{ + clients: awsClients, + } +} + +type amiResolver struct { + clients *awsClients +} + +func (r *amiResolver) Resolve(ctx context.Context, opts *deployerOptions) (string, error) { + switch opts.UserDataFormat { + case UserDataBootstrapSh: + // TODO: AL2 is not a high priority, skipping for now. + return "", fmt.Errorf("%s is not handled", opts.UserDataFormat) + case UserDataNodeadm: + return r.ResolveAL2023(ctx, opts) + case UserDataBottlerocket: + return r.ResolveBottlerocket(ctx, opts) + default: + return "", fmt.Errorf("unhandled userdata format: %s", opts.UserDataFormat) + } +} + +func (r *amiResolver) ResolveAL2023(ctx context.Context, opts *deployerOptions) (string, error) { + describeInstanceTypesResponse, err := r.clients.EC2().DescribeInstanceTypes(ctx, &ec2.DescribeInstanceTypesInput{ + InstanceTypes: []ec2types.InstanceType{ec2types.InstanceType(r.getInstance(opts))}, + }) + if err != nil { + return "", err + } + instanceTypeInfo := describeInstanceTypesResponse.InstanceTypes[0] + + arch, err := r.resolveArch(instanceTypeInfo) + if err != nil { + return "", err + } + + variant := "standard" + if instanceTypeInfo.NeuronInfo != nil { + if len(instanceTypeInfo.NeuronInfo.NeuronDevices) > 0 { + variant = "neuron" + } + } else if instanceTypeInfo.GpuInfo != nil { + for _, gpu := range instanceTypeInfo.GpuInfo.Gpus { + if aws.ToString(gpu.Manufacturer) == "NVIDIA" { + variant = "nvidia" + break + } + } + } + + getParameterReponse, err := r.clients.SSM().GetParameter(ctx, &ssm.GetParameterInput{ + Name: aws.String(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/%s/%s/recommended/image_id", opts.KubernetesVersion, arch, variant)), + }) + if err != nil { + return "", err + } + + return aws.ToString(getParameterReponse.Parameter.Value), nil +} + +func (r *amiResolver) ResolveBottlerocket(ctx context.Context, opts *deployerOptions) (string, error) { + describeInstanceTypesResponse, err := r.clients.EC2().DescribeInstanceTypes(ctx, &ec2.DescribeInstanceTypesInput{ + InstanceTypes: []ec2types.InstanceType{ec2types.InstanceType(r.getInstance(opts))}, + }) + if err != nil { + return "", err + } + instanceTypeInfo := describeInstanceTypesResponse.InstanceTypes[0] + + arch, err := r.resolveArch(instanceTypeInfo) + if err != nil { + return "", err + } + + // TODO: enable fips + flavorSuffix := "" + if instanceTypeInfo.GpuInfo != nil { + for _, gpu := range instanceTypeInfo.GpuInfo.Gpus { + if aws.ToString(gpu.Manufacturer) == "NVIDIA" { + flavorSuffix = "-nvidia" + break + } + } + } + + getParameterResponse, err := r.clients.SSM().GetParameter(ctx, &ssm.GetParameterInput{ + Name: aws.String(fmt.Sprintf("/aws/service/bottlerocket/aws-k8s-%s%s/%s/latest/image_id", opts.KubernetesVersion, flavorSuffix, arch)), + }) + if err != nil { + return "", err + } + + return aws.ToString(getParameterResponse.Parameter.Value), nil +} + +func (r *amiResolver) getInstance(opts *deployerOptions) string { + instanceType := opts.InstanceTypes[0] + if len(opts.InstanceTypes) > 1 { + klog.Warningf("only resolving AMI based on first instance type: %s", instanceType) + } + return instanceType +} + +func (r *amiResolver) resolveArch(instanceTypeInfo ec2types.InstanceTypeInfo) (string, error) { + // TODO: the ordering might be weird because old instances might support + // both i386 and x8664. + switch arch := instanceTypeInfo.ProcessorInfo.SupportedArchitectures[0]; arch { + case ec2types.ArchitectureTypeArm64, ec2types.ArchitectureTypeX8664: + return string(arch), nil + default: + return "", fmt.Errorf("unhandled arch: %s", arch) + } +} diff --git a/internal/deployers/eksapi/ami_resolver_test.go b/internal/deployers/eksapi/ami_resolver_test.go new file mode 100644 index 000000000..5a61b6379 --- /dev/null +++ b/internal/deployers/eksapi/ami_resolver_test.go @@ -0,0 +1,65 @@ +//go:build integration + +package eksapi + +import ( + "context" + "testing" + + "github.com/aws/aws-sdk-go-v2/config" + "github.com/stretchr/testify/assert" +) + +func TestAMIResolver(t *testing.T) { + ctx := context.Background() + awsCfg, err := config.LoadDefaultConfig(ctx) + assert.NoError(t, err) + + amiResolver := NewAMIResolver(newAWSClients(awsCfg, "")) + + t.Run("AL2023-nvidia", func(t *testing.T) { + opts := deployerOptions{ + UserDataFormat: UserDataNodeadm, + KubernetesVersion: "1.33", + } + t.Run("nvidia", func(t *testing.T) { + opts := opts + opts.InstanceTypes = []string{"g5.xlarge"} + + ami, err := amiResolver.Resolve(ctx, &opts) + assert.NoError(t, err) + assert.Regexp(t, "ami-.*", ami) + }) + t.Run("standard", func(t *testing.T) { + opts := opts + opts.InstanceTypes = []string{"m5.xlarge"} + + ami, err := amiResolver.Resolve(ctx, &opts) + assert.NoError(t, err) + assert.Regexp(t, "ami-.*", ami) + }) + }) + + t.Run("Bottlerocket", func(t *testing.T) { + opts := deployerOptions{ + UserDataFormat: UserDataBottlerocket, + KubernetesVersion: "1.33", + } + t.Run("nvidia", func(t *testing.T) { + opts := opts + opts.InstanceTypes = []string{"g5.xlarge"} + + ami, err := amiResolver.Resolve(ctx, &opts) + assert.NoError(t, err) + assert.Regexp(t, "ami-.*", ami) + }) + t.Run("standard", func(t *testing.T) { + opts := opts + opts.InstanceTypes = []string{"m5.xlarge"} + + ami, err := amiResolver.Resolve(ctx, &opts) + assert.NoError(t, err) + assert.Regexp(t, "ami-.*", ami) + }) + }) +} diff --git a/internal/deployers/eksapi/deployer.go b/internal/deployers/eksapi/deployer.go index 35685d419..a7e9dd6a7 100644 --- a/internal/deployers/eksapi/deployer.go +++ b/internal/deployers/eksapi/deployer.go @@ -1,6 +1,7 @@ package eksapi import ( + "context" "flag" "fmt" "path/filepath" @@ -15,8 +16,8 @@ import ( "github.com/aws/aws-sdk-go-v2/service/cloudwatch" ekstypes "github.com/aws/aws-sdk-go-v2/service/eks/types" - "github.com/urfave/sflags/gen/gpflag" "github.com/spf13/pflag" + "github.com/urfave/sflags/gen/gpflag" "golang.org/x/exp/slices" "k8s.io/klog" "sigs.k8s.io/kubetest2/pkg/types" @@ -299,9 +300,6 @@ func (d *deployer) verifyUpFlags() error { return fmt.Errorf("--instance-types and --instance-type-archs are mutually exclusive") } if d.UnmanagedNodes { - if d.AMI == "" { - return fmt.Errorf("--ami must be specified for --unmanaged-nodes") - } if d.AMIType != "" { return fmt.Errorf("--ami-type should not be provided with --unmanaged-nodes") } @@ -314,9 +312,19 @@ func (d *deployer) verifyUpFlags() error { } } if d.UserDataFormat == "" { - d.UserDataFormat = "bootstrap.sh" + d.UserDataFormat = UserDataBootstrapSh klog.Infof("Using default user data format: %s", d.UserDataFormat) } + // AMI ID check must come after user-data format resolution because we + // can try to infer the AMI type for unmanaged nodes. + if d.AMI == "" { + ami, err := NewAMIResolver(d.awsClients).Resolve(context.TODO(), &d.deployerOptions) + if err != nil { + return fmt.Errorf("failed to automatically resolve ami for unmanaged nodegroup (provide --ami to short circuit this): %w", err) + } + d.AMI = ami + } + if d.EFA && len(d.InstanceTypes) != 1 { return fmt.Errorf("--efa requires a single instance type") } diff --git a/internal/deployers/eksapi/userdata.go b/internal/deployers/eksapi/userdata.go index 43b5d8b87..78f57cb44 100644 --- a/internal/deployers/eksapi/userdata.go +++ b/internal/deployers/eksapi/userdata.go @@ -10,16 +10,22 @@ import ( "github.com/aws/aws-k8s-tester/internal/deployers/eksapi/templates" ) +const ( + UserDataBootstrapSh = "bootstrap.sh" + UserDataNodeadm = "nodeadm" + UserDataBottlerocket = "bottlerocket" +) + func generateUserData(cluster *Cluster, opts *deployerOptions) (string, bool, error) { userDataIsMimePart := true var t *template.Template switch opts.UserDataFormat { - case "bootstrap.sh": + case UserDataBootstrapSh: t = templates.UserDataBootstrapSh - case "nodeadm": + case UserDataNodeadm: // TODO: replace the YAML template with proper usage of the nodeadm API go types t = templates.UserDataNodeadm - case "bottlerocket": + case UserDataBottlerocket: t = templates.UserDataBottlerocket userDataIsMimePart = false default: