From 4e90d25f71d6f40aea5cead07616f2454e5e6b95 Mon Sep 17 00:00:00 2001 From: Carter McKinnon Date: Fri, 28 Feb 2025 21:01:35 +0000 Subject: [PATCH 1/3] Add unmanaged nodegroup support --- tests/assets/asg_node_group.yaml | 51 ++++++++ ...eks_node_group_launch_template_al2023.yaml | 16 +++ .../tasks/setup/eks/awscli-cfn-lt-al2023.yaml | 5 + tests/tasks/setup/eks/awscli-mng.yaml | 110 +++++++++++++----- 4 files changed, 151 insertions(+), 31 deletions(-) create mode 100644 tests/assets/asg_node_group.yaml diff --git a/tests/assets/asg_node_group.yaml b/tests/assets/asg_node_group.yaml new file mode 100644 index 00000000..7bba3de1 --- /dev/null +++ b/tests/assets/asg_node_group.yaml @@ -0,0 +1,51 @@ +--- +AWSTemplateFormatVersion: '2010-09-09' +Description: 'Unmanaged EKS nodegroup using EC2 AutoScaling' +Parameters: + ClusterName: + Type: String + Description: Name of EKS cluster. + AutoScalingGroupName: + Description: Name of ASG. + Type: String + VpcId: + Type: AWS::EC2::VPC::Id + SubnetIds: + Type: List + SecurityGroup: + Type: AWS::EC2::SecurityGroup::Id + LaunchTemplateName: + Type: String + Description: Launch template name. + LaunchTemplateVersion: + Type: String + Description: Launch template version. Default is 1, since our launch templates are generally ephemeral/single-use. + Default: "1" + NodeCount: + Type: Number +Resources: + AutoScalingGroup: + Type: AWS::AutoScaling::AutoScalingGroup + UpdatePolicy: + AutoScalingRollingUpdate: + WaitOnResourceSignals: true + PauseTime: PT15M + Properties: + AutoScalingGroupName: !Ref AutoScalingGroupName + DesiredCapacity: !Ref NodeCount + MinSize: !Ref NodeCount + MaxSize: !Ref NodeCount + MixedInstancesPolicy: + LaunchTemplate: + LaunchTemplateSpecification: + LaunchTemplateName: !Ref LaunchTemplateName + Version: !Ref LaunchTemplateVersion + # this will be replaced out-of-band, CFN really doesn't want you to pass in sub-structs as JSON + Overrides: PLACEHOLDER_LAUNCH_TEMPLATE_OVERRIDES + VPCZoneIdentifier: + !Ref SubnetIds + Tags: + # necessary for kubelet's legacy, in-tree cloud provider + - Key: !Sub kubernetes.io/cluster/${ClusterName} + Value: owned + PropagateAtLaunch: true diff --git a/tests/assets/eks_node_group_launch_template_al2023.yaml b/tests/assets/eks_node_group_launch_template_al2023.yaml index 3cb6b30e..241a5f3c 100644 --- a/tests/assets/eks_node_group_launch_template_al2023.yaml +++ b/tests/assets/eks_node_group_launch_template_al2023.yaml @@ -29,10 +29,22 @@ Parameters: Type: String Description: Launch template ImageId value, which may be an AMI ID or resolve:ssm reference. Default: '' + NodeRoleName: + Type: String + Description: Name of the IAM Role for the node instances. + SecurityGroup: + Type: AWS::EC2::SecurityGroup::Id + Description: EKS-created cluster security group that allows node communication with the control plane. Conditions: AMIProvided: !Not [!Equals [!Ref AMI, '']] Resources: + NodeInstanceProfile: + Type: AWS::IAM::InstanceProfile + Properties: + Path: "/" + Roles: + - !Ref NodeRoleName LaunchTemplate: Type: AWS::EC2::LaunchTemplate Properties: @@ -50,6 +62,10 @@ Resources: HttpPutResponseHopLimit: 2 HttpEndpoint: enabled HttpTokens: required + IamInstanceProfile: + Arn: !GetAtt NodeInstanceProfile.Arn + SecurityGroupIds: + - !Ref SecurityGroup ImageId: !If - AMIProvided diff --git a/tests/tasks/setup/eks/awscli-cfn-lt-al2023.yaml b/tests/tasks/setup/eks/awscli-cfn-lt-al2023.yaml index 975e4694..619a3275 100644 --- a/tests/tasks/setup/eks/awscli-cfn-lt-al2023.yaml +++ b/tests/tasks/setup/eks/awscli-cfn-lt-al2023.yaml @@ -30,6 +30,8 @@ spec: - name: ami default: "" description: The AMI ID (or SSM parameter) to use for the launch template. If not provided, the launch template will not specify an AMI. + - name: node-role-name + description: The name of the IAM role to use for the node's instance profile, specified in the launch template. workspaces: - name: config mountPath: /config/ @@ -80,6 +82,8 @@ spec: --arg CertificateAuthority "$(jq -r .cluster.certificateAuthority.data cluster.json)" \ --arg KubeletConfig '$(params.kubelet-config)' \ --arg AMI "$(params.ami)" \ + --arg SecurityGroup "$(jq -r .cluster.resourcesVpcConfig.clusterSecurityGroupId cluster.json)" \ + --arg NodeRoleName '$(params.node-role-name)' \ '$ARGS.named | to_entries | map({"ParameterKey": .key, "ParameterValue": .value})' \ > parameters.json @@ -88,6 +92,7 @@ spec: --stack-name $STACK_NAME \ --template-body file://$(pwd)/amazon-ng-cfn \ --parameters file://$(pwd)/parameters.json \ + --capabilities CAPABILITY_IAM \ --region $(params.region) aws cloudformation wait stack-create-complete --stack-name $STACK_NAME --region $(params.region) diff --git a/tests/tasks/setup/eks/awscli-mng.yaml b/tests/tasks/setup/eks/awscli-mng.yaml index be38065d..bba9f6e0 100644 --- a/tests/tasks/setup/eks/awscli-mng.yaml +++ b/tests/tasks/setup/eks/awscli-mng.yaml @@ -6,8 +6,8 @@ metadata: namespace: scalability spec: description: | - Create an EKS managed nodegroup for a given cluster. - This Task can be used to create an EKS managed nodegroup for a given VPC Subnets, security groups and service role in an AWS account. + Create an EKS nodegroup, managed or unmanaged, for a given cluster. + This Task can be used to create an EKS managed or unmanaged nodegroup for a given VPC Subnets, security groups and service role in an AWS account. params: - name: cluster-name description: The name of the EKS cluster you want to spin managed nodegroups for. @@ -36,6 +36,12 @@ spec: - name: nodegroup-prefix description: Prefix that needs to be appended to asg names. default: "" + - name: unmanaged-nodegroup-cfn-url + default: "" + description: URL for "unmanaged nodegroup" (AutoScaling group) CloudFormation template. If not specified, a managed nodegroup will be created. + - name: launch-template-name + default: "$(params.cluster-name)-launchTemplate" + description: Name of the launch template to be used for the nodegroup. workspaces: - name: config mountPath: /config/ @@ -47,6 +53,11 @@ spec: - name: create-nodegroup image: alpine/k8s:1.23.7 script: | + set -o xtrace + set -o errexit + set -o pipefail + set -o nounset + ENDPOINT_FLAG="" NODE_ROLE_NAME=$(params.host-cluster-node-role-name) @@ -60,9 +71,8 @@ spec: TAINTS_FLAG="--taints $(params.host-taints)" fi - NG_SUBNETS=$(aws eks $ENDPOINT_FLAG --region $(params.region) describe-cluster --name $(params.cluster-name) \ - --query cluster.resourcesVpcConfig.subnetIds --output text \ - ) + aws eks $ENDPOINT_FLAG --region $(params.region) describe-cluster --name $(params.cluster-name) --output json > cluster.json + NG_SUBNETS=$(jq -r '.cluster.resourcesVpcConfig.subnetIds | join(" ")' cluster.json) max_nodes=$(params.max-nodes) nodes=$(params.desired-nodes) @@ -72,33 +82,71 @@ spec: create_and_validate_dp_nodes() { node_group_name=$node_group-$1 - launch_template_name=$(params.cluster-name)-launchTemplate - CREATED_NODEGROUP=$(aws eks $ENDPOINT_FLAG --region $(params.region) list-nodegroups --cluster-name $(params.cluster-name) --query 'nodegroups[?@==`'$node_group_name'`]' --output text) EC2_INSTANCES=$3 - if [ "$CREATED_NODEGROUP" == "" ]; then - #create node group - aws eks $ENDPOINT_FLAG create-nodegroup \ - --cluster-name $(params.cluster-name) \ - --nodegroup-name $node_group_name \ - --node-role $NODE_ROLE_ARN \ - --launch-template name=$launch_template_name\ - --region $(params.region) \ - --instance-types $EC2_INSTANCES \ - --scaling-config minSize=$(params.min-nodes),maxSize=$2,desiredSize=$2 \ - --subnets $NG_SUBNETS $TAINTS_FLAG + # if no unmanaged nodegroup cfn template is provided, assume we want managed nodegroups + if [ "$(params.unmanaged-nodegroup-cfn-url)" = "" ]; then + CREATED_NODEGROUP=$(aws eks $ENDPOINT_FLAG --region $(params.region) list-nodegroups --cluster-name $(params.cluster-name) --query 'nodegroups[?@==`'$node_group_name'`]' --output text) + if [ "$CREATED_NODEGROUP" == "" ]; then + aws eks $ENDPOINT_FLAG create-nodegroup \ + --cluster-name $(params.cluster-name) \ + --nodegroup-name $node_group_name \ + --node-role $NODE_ROLE_ARN \ + --launch-template name=$(params.launch-template-name) \ + --region $(params.region) \ + --instance-types $EC2_INSTANCES \ + --scaling-config minSize=$(params.min-nodes),maxSize=$2,desiredSize=$2 \ + --subnets $NG_SUBNETS $TAINTS_FLAG + fi + echo "CREATED_NODEGROUP=$node_group_name" + while [[ "$(aws eks $ENDPOINT_FLAG --region $(params.region) describe-nodegroup --cluster-name $(params.cluster-name) --nodegroup-name $node_group_name --query nodegroup.status --output text)" == "CREATING" ]] + do + echo "$node_group_name is "CREATING" at $(date)" + sleep 2 + done + # TODO: do this for unmanaged nodes as well + # right now we don't have an appropriate label to filter on for unmanaged nodes + while true; do + ready_node=$(kubectl get nodes -l eks.amazonaws.com/nodegroup=$node_group_name --no-headers 2>/dev/null | grep -w Ready | wc -l) + echo "ready-nodes=$ready_node out of $2, for nodegroup: $node_group_name" + if [[ "$ready_node" -eq $2 ]]; then break; fi + sleep 5 + done + else + STACK_NAME=$node_group_name + STACK_STATUS=$(aws cloudformation describe-stacks --query 'Stacks[?StackName==`'${STACK_NAME}'`].StackStatus' --output text --region $(params.region)) + if [[ "$STACK_STATUS" == "" ]]; then + curl -s $(params.unmanaged-nodegroup-cfn-url) -o ./cfn-template + + # assemble the stack parameters as a JSON file + # the AWS CLI can't handle a JSON string as a ParameterValue in the flag representation + # and we need that for kubelet-config + jq --null-input \ + --arg LaunchTemplateName "$(params.launch-template-name)" \ + --arg ClusterName "$(params.cluster-name)" \ + --arg AutoScalingGroupName "${node_group_name}" \ + --arg NodeCount "$2" \ + --arg SubnetIds $(jq -r '.cluster.resourcesVpcConfig.subnetIds | join(",")' cluster.json) \ + --arg SecurityGroup "$(jq -r '.cluster.resourcesVpcConfig.clusterSecurityGroupId' cluster.json)" \ + --arg VpcId $(jq -r '.cluster.resourcesVpcConfig.vpcId' cluster.json) \ + '$ARGS.named | to_entries | map({"ParameterKey": .key, "ParameterValue": .value})' \ + > parameters.json + + # cloudformation really fights you every step of the way to pass JSON in, so let's just hack it + LAUNCH_TEMPLATE_OVERRIDES=$(echo "$EC2_INSTANCES" | jq -R -c 'split(" ") | map({"InstanceType": .})') + sed -i "s/PLACEHOLDER_LAUNCH_TEMPLATE_OVERRIDES/$LAUNCH_TEMPLATE_OVERRIDES/g" cfn-template + + aws cloudformation create-stack \ + --region $(params.region) \ + --stack-name $STACK_NAME \ + --template-body file://$(pwd)/cfn-template \ + --parameters file://$(pwd)/parameters.json + + aws cloudformation wait stack-create-complete --stack-name $STACK_NAME --region $(params.region) + echo "CREATED_CFN_STACK=$STACK_NAME" + else + echo "$STACK_NAME Already exists" + fi fi - echo "CREATED_NODEGROUP=$node_group_name" - while [[ "$(aws eks $ENDPOINT_FLAG --region $(params.region) describe-nodegroup --cluster-name $(params.cluster-name) --nodegroup-name $node_group_name --query nodegroup.status --output text)" == "CREATING" ]] - do - echo "$node_group_name is "CREATING" at $(date)" - sleep 2 - done - while true; do - ready_node=$(kubectl get nodes -l eks.amazonaws.com/nodegroup=$node_group_name --no-headers 2>/dev/null | grep -w Ready | wc -l) - echo "ready-nodes=$ready_node out of $2, for nodegroup: $node_group_name" - if [[ "$ready_node" -eq $2 ]]; then break; fi - sleep 5 - done } for i in $(seq 1 $asgs) do @@ -119,4 +167,4 @@ spec: kubectl describe clusterrole eks:node-manager kubectl get nodes -o wide kubectl get ns - kubectl get cs \ No newline at end of file + kubectl get cs From 37fd87aa2df0ae585857c944ba638f43e8ef4c53 Mon Sep 17 00:00:00 2001 From: Shiv Bhosale Date: Tue, 25 Mar 2025 08:00:20 +0000 Subject: [PATCH 2/3] tiny fixes --- .../tasks/setup/eks/awscli-cfn-lt-al2023.yaml | 2 +- tests/tekton-resources/tasks/setup/eks/awscli-mng.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/tekton-resources/tasks/setup/eks/awscli-cfn-lt-al2023.yaml b/tests/tekton-resources/tasks/setup/eks/awscli-cfn-lt-al2023.yaml index 619a3275..bf98b905 100644 --- a/tests/tekton-resources/tasks/setup/eks/awscli-cfn-lt-al2023.yaml +++ b/tests/tekton-resources/tasks/setup/eks/awscli-cfn-lt-al2023.yaml @@ -64,7 +64,7 @@ spec: aws ec2 create-key-pair --region $(params.region) --key-name $SSH_KEY_NAME --query 'KeyMaterial' --output text fi - aws eks describe-cluster --name $(params.cluster-name) --region $(params.region) --output json > cluster.json + aws eks $ENDPOINT_FLAG describe-cluster --name $(params.cluster-name) --region $(params.region) --output json > cluster.json launch_template_name=$(params.cluster-name)-launchTemplate STACK_NAME=$(params.stack-name) diff --git a/tests/tekton-resources/tasks/setup/eks/awscli-mng.yaml b/tests/tekton-resources/tasks/setup/eks/awscli-mng.yaml index bba9f6e0..6ded585a 100644 --- a/tests/tekton-resources/tasks/setup/eks/awscli-mng.yaml +++ b/tests/tekton-resources/tasks/setup/eks/awscli-mng.yaml @@ -40,7 +40,7 @@ spec: default: "" description: URL for "unmanaged nodegroup" (AutoScaling group) CloudFormation template. If not specified, a managed nodegroup will be created. - name: launch-template-name - default: "$(params.cluster-name)-launchTemplate" + default: "" description: Name of the launch template to be used for the nodegroup. workspaces: - name: config @@ -167,4 +167,4 @@ spec: kubectl describe clusterrole eks:node-manager kubectl get nodes -o wide kubectl get ns - kubectl get cs + kubectl get cs \ No newline at end of file From cf68361be7f7f8479db7712274c2673020cbf968 Mon Sep 17 00:00:00 2001 From: Shiv Bhosale Date: Wed, 26 Mar 2025 10:38:15 +0000 Subject: [PATCH 3/3] apply an aws-auth config map with groups necessary for letting nodes join the cluster --- .../tasks/setup/eks/awscli-mng.yaml | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/tekton-resources/tasks/setup/eks/awscli-mng.yaml b/tests/tekton-resources/tasks/setup/eks/awscli-mng.yaml index 6ded585a..ff25c90e 100644 --- a/tests/tekton-resources/tasks/setup/eks/awscli-mng.yaml +++ b/tests/tekton-resources/tasks/setup/eks/awscli-mng.yaml @@ -112,6 +112,35 @@ spec: sleep 5 done else + echo "Creating or updating aws-auth ConfigMap..." + if ! kubectl get configmap aws-auth -n kube-system &> /dev/null; then + # Download the official template as shown in the Amazon EKS User Guide: + # https://docs.aws.amazon.com/eks/latest/userguide/auth-configmap.html#aws-auth-configmap + curl -O https://s3.us-west-2.amazonaws.com/amazon-eks/cloudformation/2020-10-29/aws-auth-cm.yaml + + # Replace the placeholder with our role ARN and apply the configmap + sed -i.bak -e "s||${NODE_ROLE_ARN}|" aws-auth-cm.yaml + + kubectl apply -f aws-auth-cm.yaml + echo "Created aws-auth ConfigMap" + # Wait for the config map to be ready + echo "Verifying aws-auth ConfigMap..." + for i in {1..10}; do + if kubectl get configmap aws-auth -n kube-system -o yaml | grep -q "${NODE_ROLE_ARN}"; then + echo "aws-auth ConfigMap verified successfully" + break + fi + if [ $i -eq 10 ]; then + echo "Warning: Could not verify aws-auth ConfigMap after 10 attempts" + else + echo "Waiting for aws-auth ConfigMap to be ready... attempt $i" + sleep 5 + fi + done + else + echo "aws-auth ConfigMap already exists" + fi + STACK_NAME=$node_group_name STACK_STATUS=$(aws cloudformation describe-stacks --query 'Stacks[?StackName==`'${STACK_NAME}'`].StackStatus' --output text --region $(params.region)) if [[ "$STACK_STATUS" == "" ]]; then