From fa95cb46c3d54b83bf2c55a7b0c9585e6217c8d5 Mon Sep 17 00:00:00 2001 From: Ihor Dvoretskyi Date: Tue, 18 Mar 2025 19:47:22 +0200 Subject: [PATCH 1/3] add README for Akamai provider in CNCF self-hosted runners Signed-off-by: Ihor Dvoretskyi --- ci/cluster/akamai/README.md | 56 +++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 ci/cluster/akamai/README.md diff --git a/ci/cluster/akamai/README.md b/ci/cluster/akamai/README.md new file mode 100644 index 0000000..1eb33e4 --- /dev/null +++ b/ci/cluster/akamai/README.md @@ -0,0 +1,56 @@ +# Akamai Provider for CNCF Self-Hosted Runners + +This directory contains automation tools and configurations for deploying and managing CNCF GitHub self-hosted runners on Akamai infrastructure. + +## Overview + +The Akamai provider enables CNCF projects to leverage Akamai's cloud infrastructure for running CI/CD workflows with GitHub Actions. These self-hosted runners offer enhanced performance, customized environments, and dedicated resources tailored to CNCF project needs. + +This automation specifically provisions and manages **Linode managed Kubernetes clusters** and deploys **Actions Runner Controller (ARC)** to handle GitHub Actions workloads efficiently. + +## Features + +- Automated provisioning of managed Kubernetes clusters on Linode +- Deployment and configuration of Actions Runner Controller (ARC) +- Runner configuration and registration with GitHub +- Auto-scaling capabilities based on workflow demand +- Monitoring and maintenance utilities +- Support for multiple GitHub organizations and repositories + +## Prerequisites + +- Akamai cloud account with appropriate permissions +- Linode API credentials for Kubernetes cluster management +- Service account credentials configured for automation +- GitHub Personal Access Token (PAT) with appropriate permissions + +## Configuration + +Configuration is managed through environment variables and config files: + +- `AKAMAI_API_KEY`: API key for accessing Akamai services +- `AKAMAI_API_SECRET`: API secret for authentication +- `LINODE_API_TOKEN`: API token for Linode Kubernetes service +- `GITHUB_PAT`: GitHub Personal Access Token for runner registration + +See the sample configuration file in `config-example.yaml` for detailed settings. + +## Usage + +Detailed usage instructions for provisioning and managing runners are coming soon. + +## Kubernetes Deployment + +This provider automatically: +1. Creates a Kubernetes cluster in Linode +2. Installs and configures Actions Runner Controller using Helm +3. Sets up runner scale sets for GitHub repositories/organizations +4. Configures auto-scaling based on workflow demand + +## Troubleshooting + +Common issues and their solutions will be documented as they are encountered. + +## Contributing + +Contributions to improve the Akamai provider are welcome! Please follow the contributing guidelines in the root of this repository. From 4f61f1b8e3e45fd23f56a99f63ef7d5ffa456c63 Mon Sep 17 00:00:00 2001 From: Ihor Dvoretskyi Date: Tue, 18 Mar 2025 19:59:59 +0200 Subject: [PATCH 2/3] add Akamai provider setup and documentation for CNCF self-hosted runners PoC Signed-off-by: Ihor Dvoretskyi --- ci/cluster/akamai/README.md | 8 +- ci/cluster/akamai/SETUP.md | 134 ++++++++++++++++++++++++++ ci/cluster/akamai/config-example.yaml | 29 ++++++ ci/cluster/akamai/main.tf | 125 ++++++++++++++++++++++++ ci/cluster/akamai/outputs.tf | 19 ++++ ci/cluster/akamai/providers.tf | 3 + ci/cluster/akamai/variables.tf | 41 ++++++++ 7 files changed, 358 insertions(+), 1 deletion(-) create mode 100644 ci/cluster/akamai/SETUP.md create mode 100644 ci/cluster/akamai/config-example.yaml create mode 100644 ci/cluster/akamai/main.tf create mode 100644 ci/cluster/akamai/outputs.tf create mode 100644 ci/cluster/akamai/providers.tf create mode 100644 ci/cluster/akamai/variables.tf diff --git a/ci/cluster/akamai/README.md b/ci/cluster/akamai/README.md index 1eb33e4..41f7c57 100644 --- a/ci/cluster/akamai/README.md +++ b/ci/cluster/akamai/README.md @@ -1,7 +1,9 @@ -# Akamai Provider for CNCF Self-Hosted Runners +# Akamai Provider for CNCF Self-Hosted Runners (PoC) This directory contains automation tools and configurations for deploying and managing CNCF GitHub self-hosted runners on Akamai infrastructure. +> **Note:** This implementation is currently in Proof of Concept (PoC) stage. + ## Overview The Akamai provider enables CNCF projects to leverage Akamai's cloud infrastructure for running CI/CD workflows with GitHub Actions. These self-hosted runners offer enhanced performance, customized environments, and dedicated resources tailored to CNCF project needs. @@ -39,6 +41,10 @@ See the sample configuration file in `config-example.yaml` for detailed settings Detailed usage instructions for provisioning and managing runners are coming soon. +### Proof of Concept Deployment + +This PoC uses an intentionally cost-effective setup with spot instances to demonstrate the functionality at minimal expense. The configuration is not intended for production use without appropriate adjustments. + ## Kubernetes Deployment This provider automatically: diff --git a/ci/cluster/akamai/SETUP.md b/ci/cluster/akamai/SETUP.md new file mode 100644 index 0000000..106a11e --- /dev/null +++ b/ci/cluster/akamai/SETUP.md @@ -0,0 +1,134 @@ +# Setting Up Linode Kubernetes Engine with OpenTofu + +This guide walks you through deploying a Linode Kubernetes Engine (LKE) cluster using OpenTofu. + +## Prerequisites + +- [OpenTofu](https://opentofu.org/docs/intro/install/) installed +- Linode account with API token +- GitHub Personal Access Token with appropriate permissions + +## Installation Steps + +### 1. Install OpenTofu + +If you haven't already installed OpenTofu, follow these instructions: + +```bash +# For Linux/macOS +brew install opentofu/tap/opentofu + +# Alternatively, you can download directly from the releases +# https://github.com/opentofu/opentofu/releases +``` + +### 2. Configure Environment Variables + +Create a `.env` file (which is ignored by git) to store your sensitive credentials: + +```bash +# Create and edit .env file +touch .env +``` + +Add the following content to the `.env` file: + +``` +export TF_VAR_linode_api_token="your-linode-api-token" +export TF_VAR_github_token="your-github-pat" +``` + +Source the environment variables: + +```bash +source .env +``` + +### 3. Initialize OpenTofu + +```bash +cd ci/cluster/akamai +tofu init +``` + +This will download the necessary providers defined in the configuration. + +### 4. Review the Execution Plan + +```bash +tofu plan +``` + +This will show you what resources will be created without actually creating them. + +### 5. Apply the Configuration + +When you're ready to create the cluster: + +```bash +tofu apply +``` + +Review the planned changes and type `yes` to confirm. + +### 6. Access Your Kubernetes Cluster + +After successful deployment, OpenTofu will generate a `kubeconfig.yaml` file in the current directory: + +```bash +export KUBECONFIG=$(pwd)/kubeconfig.yaml +kubectl get nodes +``` + +### 7. Verify Actions Runner Controller Installation + +Check that ARC is running in the cluster: + +```bash +kubectl -n arc-system get pods +``` + +### 8. Create Runner Scale Sets + +After ARC is installed, you can create runner scale sets for your GitHub organizations or repositories: + +```bash +kubectl apply -f - < Date: Tue, 16 Dec 2025 17:57:29 +0200 Subject: [PATCH 3/3] refactor: update Akamai provider setup and documentation for GitHub Actions runners - Enhance .gitignore to include OpenTofu and Terraform files - Revise README.md for clarity and detail on Akamai provider usage - Remove outdated SETUP.md and config-example.yaml files - Update main.tf with improved resource configurations and variable handling - Add terraform.tfvars.example for user configuration guidance - Enhance variables.tf with validation rules for better input handling - Modify outputs.tf to provide clearer output descriptions - Update providers.tf to streamline provider configurations Signed-off-by: Ihor Dvoretskyi --- .gitignore | 20 ++ ci/cluster/akamai/README.md | 268 ++++++++++++++++++--- ci/cluster/akamai/SETUP.md | 134 ----------- ci/cluster/akamai/config-example.yaml | 29 --- ci/cluster/akamai/main.tf | 98 +++++--- ci/cluster/akamai/outputs.tf | 31 ++- ci/cluster/akamai/providers.tf | 19 ++ ci/cluster/akamai/terraform.tfvars.example | 22 ++ ci/cluster/akamai/variables.tf | 93 ++++++- 9 files changed, 458 insertions(+), 256 deletions(-) delete mode 100644 ci/cluster/akamai/SETUP.md delete mode 100644 ci/cluster/akamai/config-example.yaml create mode 100644 ci/cluster/akamai/terraform.tfvars.example diff --git a/.gitignore b/.gitignore index 74e02e1..74adf3b 100644 --- a/.gitignore +++ b/.gitignore @@ -98,3 +98,23 @@ python_env_cncfpeople python_venv_ambassadors credentials.json token.json + +# OpenTofu and Terraform +**/.terraform/* +**/.terraform.lock.hcl +*.tfstate +*.tfstate.* +*.tfvars +*.tfvars.json +override.tf +override.tf.json +*_override.tf +*_override.tf.json +.terraformrc +terraform.rc +crash.log +crash.*.log + +# Kubernetes configs generated by IaC +**/kubeconfig.yaml +**/kubeconfig.yml diff --git a/ci/cluster/akamai/README.md b/ci/cluster/akamai/README.md index 41f7c57..9a5b963 100644 --- a/ci/cluster/akamai/README.md +++ b/ci/cluster/akamai/README.md @@ -1,62 +1,252 @@ -# Akamai Provider for CNCF Self-Hosted Runners (PoC) +# GitHub Actions Self-Hosted Runners on Akamai/Linode -This directory contains automation tools and configurations for deploying and managing CNCF GitHub self-hosted runners on Akamai infrastructure. - -> **Note:** This implementation is currently in Proof of Concept (PoC) stage. +OpenTofu configuration for deploying GitHub Actions self-hosted runners on Akamai infrastructure using Linode Kubernetes Engine (LKE). ## Overview -The Akamai provider enables CNCF projects to leverage Akamai's cloud infrastructure for running CI/CD workflows with GitHub Actions. These self-hosted runners offer enhanced performance, customized environments, and dedicated resources tailored to CNCF project needs. +This infrastructure-as-code provisions a Linode Kubernetes Engine cluster and deploys Actions Runner Controller (ARC) to manage GitHub Actions self-hosted runners. OpenTofu is used as the open source, Linux Foundation-maintained infrastructure-as-code tool. -This automation specifically provisions and manages **Linode managed Kubernetes clusters** and deploys **Actions Runner Controller (ARC)** to handle GitHub Actions workloads efficiently. +## Prerequisites -## Features +**Required Tools:** +- [OpenTofu](https://opentofu.org/docs/intro/install/) v1.6.0+ +- [kubectl](https://kubernetes.io/docs/tasks/tools/) -- Automated provisioning of managed Kubernetes clusters on Linode -- Deployment and configuration of Actions Runner Controller (ARC) -- Runner configuration and registration with GitHub -- Auto-scaling capabilities based on workflow demand -- Monitoring and maintenance utilities -- Support for multiple GitHub organizations and repositories +**Required Credentials:** +- Linode API Token with read/write permissions +- GitHub Personal Access Token with `admin:org` scope (required for managing organization runners) -## Prerequisites +## Configuration -- Akamai cloud account with appropriate permissions -- Linode API credentials for Kubernetes cluster management -- Service account credentials configured for automation -- GitHub Personal Access Token (PAT) with appropriate permissions +### Using Environment Variables -## Configuration +The recommended approach is to use environment variables: -Configuration is managed through environment variables and config files: +```bash +export TF_VAR_linode_api_token="your-linode-token" +export TF_VAR_github_token="your-github-pat" +export TF_VAR_github_organization="your-org-name" +``` -- `AKAMAI_API_KEY`: API key for accessing Akamai services -- `AKAMAI_API_SECRET`: API secret for authentication -- `LINODE_API_TOKEN`: API token for Linode Kubernetes service -- `GITHUB_PAT`: GitHub Personal Access Token for runner registration +### Using terraform.tfvars File -See the sample configuration file in `config-example.yaml` for detailed settings. +Alternatively, copy `terraform.tfvars.example` to `terraform.tfvars` and fill in your values: -## Usage +```bash +cp terraform.tfvars.example terraform.tfvars +# Edit terraform.tfvars with your values +``` -Detailed usage instructions for provisioning and managing runners are coming soon. +**Required Variables:** +- `linode_api_token` - Linode API token +- `github_token` - GitHub PAT for runner registration +- `github_organization` - GitHub organization name -### Proof of Concept Deployment +**Optional Variables with Defaults:** +- `cluster_name` - Cluster name (default: "github-runners") +- `kubernetes_version` - Kubernetes version (default: "1.34") +- `region` - Linode region (default: "us-east") +- `node_type` - Instance type (default: "g6-standard-1") +- `node_count` - Initial node count (default: 1) +- `autoscaler_min` - Minimum nodes (default: 1) +- `autoscaler_max` - Maximum nodes (default: 3) +- `environment` - Environment name (default: "dev") +- `arc_version` - ARC Helm chart version (default: "0.23.7") -This PoC uses an intentionally cost-effective setup with spot instances to demonstrate the functionality at minimal expense. The configuration is not intended for production use without appropriate adjustments. +See [variables.tf](variables.tf) for complete variable documentation. -## Kubernetes Deployment +## Deployment -This provider automatically: -1. Creates a Kubernetes cluster in Linode -2. Installs and configures Actions Runner Controller using Helm -3. Sets up runner scale sets for GitHub repositories/organizations -4. Configures auto-scaling based on workflow demand +### Phase 1: Initialize and Deploy Cluster -## Troubleshooting +```bash +# Initialize OpenTofu +tofu init + +# Validate configuration +tofu validate + +# Deploy cluster and kubeconfig +tofu apply -target=linode_lke_cluster.github_runners -target=local_file.kubeconfig +``` + +### Phase 2: Deploy Kubernetes Resources + +```bash +# Deploy namespace, secrets, and ARC controller +tofu apply -target=kubernetes_namespace.arc_system \ + -target=kubernetes_secret.github_token \ + -target=helm_release.arc +``` + +### Phase 3: Deploy Runner Scale Set + +```bash +# Deploy runner deployment (requires ARC CRDs to be installed) +tofu apply +``` + +### Access the Cluster + +```bash +export KUBECONFIG=$(pwd)/kubeconfig.yaml +kubectl get nodes +kubectl get pods -n arc-system +``` + +## Multi-Phase Deployment Requirement + +The deployment must be executed in multiple phases due to OpenTofu provider dependencies: + +1. **Phase 1**: Create the LKE cluster and generate kubeconfig file + - The Kubernetes and Helm providers require the cluster to exist + - The kubeconfig is needed for provider configuration -Common issues and their solutions will be documented as they are encountered. +2. **Phase 2**: Deploy Kubernetes namespace, secrets, and ARC Helm chart + - Requires the cluster from Phase 1 + - Installs ARC Custom Resource Definitions (CRDs) -## Contributing +3. **Phase 3**: Deploy runner scale set manifest + - Requires ARC CRDs to be installed from Phase 2 + - Creates the actual runner deployment + +This phased approach ensures proper dependency resolution and successful deployment. + +## What Gets Deployed + +- **LKE Cluster**: Managed Kubernetes cluster with auto-scaling nodes +- **Actions Runner Controller**: Helm chart deployed in `arc-system` namespace +- **Runner Scale Set**: GitHub Actions runners managed as Kubernetes pods +- **Kubeconfig**: Local file for cluster access (automatically generated) + +## Viewing Outputs + +After successful deployment, view cluster information: + +```bash +tofu output +``` + +Available outputs: +- `cluster_id` - Unique identifier of the LKE cluster +- `api_endpoints` - Kubernetes API endpoints +- `cluster_status` - Operational status +- `kubeconfig_path` - Path to generated kubeconfig file +- `region` - Deployment region +- `k8s_version` - Kubernetes version + +## Cleanup + +To destroy all infrastructure: + +```bash +tofu destroy +``` + +This will remove all resources including the cluster, runner controller, and associated resources. + +## Cost Optimization + +This configuration uses minimal resources for cost-effectiveness: +- Instance type: `g6-standard-1` (2 vCPU, 2GB RAM) +- Node count: 1 (minimum), auto-scaling up to 3 +- Single availability zone deployment + +For production workloads, consider adjustments for: +- Performance requirements (larger instance types) +- High availability (multiple nodes, multi-zone) +- Security requirements (private networking, ACLs) + +Estimated costs vary by region and usage. Check current [Linode pricing](https://www.linode.com/pricing/) for accurate estimates. + +## Troubleshooting -Contributions to improve the Akamai provider are welcome! Please follow the contributing guidelines in the root of this repository. +### Runners Not Registering + +If runners show `0 CURRENT` in the deployment: + +```bash +# Check ARC controller logs +kubectl logs -n arc-system -l app.kubernetes.io/name=actions-runner-controller + +# Common issue: GitHub token lacks admin:org scope +# Solution: Create a new token with admin:org scope and redeploy +export TF_VAR_github_token="new-token-with-admin-org-scope" +tofu apply +``` + +### Provider Configuration Errors + +If you encounter provider initialization errors: +- Ensure Phase 1 completes successfully before Phase 2 +- Verify the kubeconfig file was created: `ls -l kubeconfig.yaml` +- Check file permissions on kubeconfig: `stat kubeconfig.yaml` + +### Helm Chart Installation Failures + +If ARC Helm chart fails to install: +- Verify Kubernetes cluster is running: `kubectl get nodes` +- Check namespace exists: `kubectl get namespaces` +- Review Helm release status: `kubectl get all -n arc-system` + +## Security Best Practices + +**State File Management:** +- Terraform state files contain sensitive credentials including API tokens and cluster credentials +- Never commit state files to version control (already in `.gitignore`) +- For production environments, use remote state backends with encryption +- Consider using S3 with encryption, Terraform Cloud, or HashiCorp Consul + +**Credential Management:** +- Use environment variables for credentials (do not commit in terraform.tfvars) +- Rotate API tokens and GitHub PATs regularly +- Use GitHub fine-grained tokens with minimal required permissions +- Store credentials in secure secret management systems +- Avoid using long-lived credentials in production + +**Access Control:** +- Apply principle of least privilege to all API tokens +- Use separate credentials for dev/staging/prod environments +- Enable audit logging on cloud provider accounts +- Review access logs and runner activity regularly + +**Network Security:** +- Configure firewall rules to restrict cluster access +- Use private networking where possible +- Enable LKE control plane ACLs for production clusters +- Review and implement Linode security best practices + +**Production Deployment Considerations:** +- Enable high availability for the control plane +- Implement Pod Security Standards and network policies +- Configure resource quotas and limits +- Set up comprehensive monitoring and alerting +- Implement backup and disaster recovery procedures +- Use separate node pools for different workload types + +## Architecture + +**Components:** +- **Linode LKE Cluster**: Managed Kubernetes with auto-scaling (1-3 nodes by default) +- **Actions Runner Controller**: Manages GitHub Actions runners as Kubernetes pods +- **Runner Deployment**: Scales runners based on GitHub Actions workload +- **Kubeconfig**: Automatically generated for cluster access with 0600 permissions + +**Resource Dependencies:** +``` +linode_lke_cluster.github_runners + ├── local_file.kubeconfig + ├── kubernetes_namespace.arc_system + │ ├── kubernetes_secret.github_token + │ └── helm_release.arc + │ └── kubernetes_manifest.runner_scale_set +``` + +## Provider Versions + +This configuration uses the following provider versions: +- Linode provider: `~> 2.41.0` +- Helm provider: `~> 2.17.0` +- Kubernetes provider: `~> 2.38.0` + +These are pinned to minor versions for stability while allowing patch updates. diff --git a/ci/cluster/akamai/SETUP.md b/ci/cluster/akamai/SETUP.md deleted file mode 100644 index 106a11e..0000000 --- a/ci/cluster/akamai/SETUP.md +++ /dev/null @@ -1,134 +0,0 @@ -# Setting Up Linode Kubernetes Engine with OpenTofu - -This guide walks you through deploying a Linode Kubernetes Engine (LKE) cluster using OpenTofu. - -## Prerequisites - -- [OpenTofu](https://opentofu.org/docs/intro/install/) installed -- Linode account with API token -- GitHub Personal Access Token with appropriate permissions - -## Installation Steps - -### 1. Install OpenTofu - -If you haven't already installed OpenTofu, follow these instructions: - -```bash -# For Linux/macOS -brew install opentofu/tap/opentofu - -# Alternatively, you can download directly from the releases -# https://github.com/opentofu/opentofu/releases -``` - -### 2. Configure Environment Variables - -Create a `.env` file (which is ignored by git) to store your sensitive credentials: - -```bash -# Create and edit .env file -touch .env -``` - -Add the following content to the `.env` file: - -``` -export TF_VAR_linode_api_token="your-linode-api-token" -export TF_VAR_github_token="your-github-pat" -``` - -Source the environment variables: - -```bash -source .env -``` - -### 3. Initialize OpenTofu - -```bash -cd ci/cluster/akamai -tofu init -``` - -This will download the necessary providers defined in the configuration. - -### 4. Review the Execution Plan - -```bash -tofu plan -``` - -This will show you what resources will be created without actually creating them. - -### 5. Apply the Configuration - -When you're ready to create the cluster: - -```bash -tofu apply -``` - -Review the planned changes and type `yes` to confirm. - -### 6. Access Your Kubernetes Cluster - -After successful deployment, OpenTofu will generate a `kubeconfig.yaml` file in the current directory: - -```bash -export KUBECONFIG=$(pwd)/kubeconfig.yaml -kubectl get nodes -``` - -### 7. Verify Actions Runner Controller Installation - -Check that ARC is running in the cluster: - -```bash -kubectl -n arc-system get pods -``` - -### 8. Create Runner Scale Sets - -After ARC is installed, you can create runner scale sets for your GitHub organizations or repositories: - -```bash -kubectl apply -f - < 0 && length(var.cluster_name) <= 32 + error_message = "Cluster name must be between 1 and 32 characters." + } +} + +variable "environment" { + description = "Environment name (e.g., dev, staging, prod)" + type = string + default = "dev" + + validation { + condition = contains(["dev", "staging", "prod"], var.environment) + error_message = "Environment must be one of: dev, staging, prod." + } } variable "kubernetes_version" { description = "The Kubernetes version to use for the cluster" type = string - default = "1.32" # Updated to the most recent version + default = "1.34" + + validation { + condition = can(regex("^[0-9]+\\.[0-9]+$", var.kubernetes_version)) + error_message = "Kubernetes version must be in format X.Y (e.g., 1.34)." + } } variable "region" { description = "The region where the cluster will be deployed" type = string - default = "us-east" # Choose an appropriate region + default = "us-east" + + validation { + condition = length(var.region) > 0 + error_message = "Region must be specified." + } } variable "node_count" { - description = "The number of nodes in the cluster" + description = "The initial number of nodes in the cluster" type = number - default = 1 # Start with minimal resources for PoC + default = 1 + + validation { + condition = var.node_count >= 1 && var.node_count <= 100 + error_message = "Node count must be between 1 and 100." + } } +# Credentials and Authentication variable "github_token" { description = "GitHub Personal Access Token for Actions Runner Controller" type = string @@ -37,5 +73,50 @@ variable "linode_api_token" { variable "github_organization" { description = "GitHub organization for runners" type = string - default = "cncf" + + validation { + condition = length(var.github_organization) > 0 + error_message = "GitHub organization must be specified." + } +} + +# Node Pool Configuration +variable "node_type" { + description = "Linode instance type for cluster nodes" + type = string + default = "g6-standard-1" +} + +variable "autoscaler_min" { + description = "Minimum number of nodes for autoscaler" + type = number + default = 1 + + validation { + condition = var.autoscaler_min >= 1 + error_message = "Autoscaler minimum must be at least 1." + } +} + +variable "autoscaler_max" { + description = "Maximum number of nodes for autoscaler" + type = number + default = 3 + + validation { + condition = var.autoscaler_max >= var.autoscaler_min + error_message = "Autoscaler maximum must be greater than or equal to minimum." + } +} + +# Actions Runner Controller Configuration +variable "arc_version" { + description = "Version of Actions Runner Controller Helm chart" + type = string + default = "0.23.7" + + validation { + condition = can(regex("^[0-9]+\\.[0-9]+\\.[0-9]+$", var.arc_version)) + error_message = "ARC version must be in semantic versioning format (X.Y.Z)." + } }