diff --git a/.github/workflows/alarm-to-slack-checks.yml b/.github/workflows/alarm-to-slack-checks.yml index 582e1e2b..8381a901 100644 --- a/.github/workflows/alarm-to-slack-checks.yml +++ b/.github/workflows/alarm-to-slack-checks.yml @@ -7,12 +7,18 @@ on: paths: - 'terraform/services/alarm-to-slack/lambda_src/**/*.py' - 'terraform/services/alarm-to-slack/lambda_src/**/requirements.txt' - - 'terraform/modules/function/**' jobs: - python-checks: - uses: ./.github/workflows/python-checks-reusable.yml - with: - source_path: terraform/services/alarm-to-slack/lambda_src - sonar_project_key: cdap-alarm-to-slack - sonar_project_name: "CDAP Alarm to Slack" + python-tests: + runs-on: codebuild-cdap-${{ github.ref_name == 'main' && 'prod' || 'non-prod' }}-${{ github.run_id }}-${{ github.run_attempt }} + defaults: + run: + working-directory: terraform/services/alarm-to-slack/lambda_src + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.3.0 + - name: Install dependencies + run: | + pip install -r requirements.txt + pip install pytest pytest-cov + - name: Run tests + run: pytest test_lambda_function.py -v --cov=lambda_function --cov-report=term-missing diff --git a/.github/workflows/set_log_retention_checks.yml b/.github/workflows/set_log_retention_checks.yml index 2339c08b..c49abf78 100644 --- a/.github/workflows/set_log_retention_checks.yml +++ b/.github/workflows/set_log_retention_checks.yml @@ -7,7 +7,6 @@ on: paths: - 'scripts/set_log_retention/*.py' - 'scripts/set_log_retention/requirements.txt' - - 'terraform/modules/function/**' jobs: python-checks: diff --git a/.github/workflows/tftesting-function.yml b/.github/workflows/tftesting-function.yml new file mode 100644 index 00000000..b367daa1 --- /dev/null +++ b/.github/workflows/tftesting-function.yml @@ -0,0 +1,67 @@ +name: tftesting-lambda + +on: + workflow_dispatch: + pull_request: + paths: + - 'terraform/services/tftesting/function/**' + - 'terraform/modules/function/**' + - '.github/workflows/tftesting-function.yml' + push: + branches: + - main + paths: + - 'terraform/services/tftesting/function/**' + - 'terraform/modules/function/**' + - '.github/workflows/tftesting-function.yml' + +concurrency: + group: tftesting-function + cancel-in-progress: true + +env: + TENV_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + APP: cdap + ENV: test + TF_DIR: terraform/services/tftesting/function + +permissions: + contents: read + id-token: write + +jobs: + apply: + name: Tofu Apply + runs-on: codebuild-cdap-non-prod-${{ github.run_id }}-${{ github.run_attempt }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.3.0 + - uses: cmsgov/cdap/actions/setup-tenv@f4c14d47cc20e7f6de9112d7155af1213c9bca5a + - uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37 # v6.1.0 + with: + role-to-assume: arn:aws:iam::${{ secrets.NON_PROD_ACCOUNT }}:role/delegatedadmin/developer/${{ env.APP }}-${{ env.ENV }}-github-actions + aws-region: ${{ vars.AWS_REGION }} + - name: Tofu Init + working-directory: ${{ env.TF_DIR }} + run: tofu init -reconfigure -backend-config="../../../backends/${{ env.APP }}-${{ env.ENV }}.s3.tfbackend" + - name: Tofu Apply + working-directory: ${{ env.TF_DIR }} + run: tofu apply + + destroy: + name: Tofu Destroy + if: success() + needs: apply + runs-on: codebuild-cdap-non-prod-${{ github.run_id }}-${{ github.run_attempt }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.3.0 + - uses: cmsgov/cdap/actions/setup-tenv@f4c14d47cc20e7f6de9112d7155af1213c9bca5a + - uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37 # v6.1.0 + with: + role-to-assume: arn:aws:iam::${{ secrets.NON_PROD_ACCOUNT }}:role/delegatedadmin/developer/${{ env.APP }}-${{ env.ENV }}-github-actions + aws-region: ${{ vars.AWS_REGION }} + - name: Tofu Init + working-directory: ${{ env.TF_DIR }} + run: tofu init -reconfigure -backend-config="../../../backends/${{ env.APP }}-${{ env.ENV }}.s3.tfbackend" + - name: Tofu Destroy + working-directory: ${{ env.TF_DIR }} + run: tofu destroy -auto-approve diff --git a/terraform/modules/function/README.md b/terraform/modules/function/README.md index 2ab524b2..2c4d1c60 100644 --- a/terraform/modules/function/README.md +++ b/terraform/modules/function/README.md @@ -2,4 +2,126 @@ This is a generic module for creating lambda function resources in CMS Cloud. Use it in terraform services where a lambda function is needed. -Note that a dummy function is included to allow for initialization. It is meant to be replaced once the function has been created. +Note that a dummy function will be made if source_dir with function logic is not yet provided or github_actions_repo is not defined. +The dummy function allows for infrastructure scaffolding before source code is written. +If source code is written and the lifecycle is managed outside of terraform, set github_actions_repo. + + + +## Providers + +| Name | Version | +|------|---------| +| [archive](#provider\_archive) | n/a | +| [aws](#provider\_aws) | n/a | + + +## Requirements + +No requirements. + + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [app](#input\_app) | The application name (ab2d, bcda, cdap dpc) | `string` | n/a | yes | +| [description](#input\_description) | Description of the lambda function | `string` | n/a | yes | +| [env](#input\_env) | The application environment (dev, test, sandbox, prod) | `string` | n/a | yes | +| [name](#input\_name) | Name of the lambda function | `string` | n/a | yes | +| [architecture](#input\_architecture) | Lambda function CPU architecture. Use arm64 for Graviton (better price/performance for most workloads). | `string` | `"x86_64"` | no | +| [egress\_rules](#input\_egress\_rules) | List of egress rules to apply to the security group |
list(object({
name = string
from_port = number
to_port = number
protocol = string
cidr_ipv4 = optional(string)
cidr_ipv6 = optional(string)
referenced_sg_id = optional(string)
description = optional(string)
}))
|
[
{
"cidr_ipv4": "0.0.0.0/0",
"description": "Allow all egress traffic (IPv4) - migration default",
"from_port": 0,
"name": "allow-all-ipv4",
"protocol": "-1",
"to_port": 0
},
{
"cidr_ipv6": "::/0",
"description": "Allow all egress traffic (IPv6) - migration default",
"from_port": 0,
"name": "allow-all-ipv6",
"protocol": "-1",
"to_port": 0
}
]
| no | +| [environment\_variables](#input\_environment\_variables) | Map of environment variables for the function | `map(string)` | `{}` | no | +| [extra\_kms\_key\_arns](#input\_extra\_kms\_key\_arns) | Optional list of additional KMS key ARNs the Lambda can use | `list(string)` | `[]` | no | +| [function\_role\_inline\_policies](#input\_function\_role\_inline\_policies) | Inline policies (in JSON) for the function IAM role | `map(string)` | `{}` | no | +| [github\_actions\_repos](#input\_github\_actions\_repos) | Used for integration tests and, when source\_dir is null,
for CI/CD workflows that upload the function zip.
Format: "repo:CMSgov/:*" or a more specific ref pattern.
Defaults to empty — no GitHub Actions access unless explicitly granted. | `list(string)` | `[]` | no | +| [handler](#input\_handler) | Lambda function handler | `string` | `"function_handler"` | no | +| [layer\_arns](#input\_layer\_arns) | Optional list of layer arns | `list(string)` | `[]` | no | +| [liveness\_check\_enabled](#input\_liveness\_check\_enabled) | Enables a deploy-time liveness check that invokes the Lambda function
immediately after deployment to verify it is healthy and correctly configured.

When enabled, an aws\_lambda\_invocation resource is created that sends a
{ "RequestType": "LivenessCheck" } payload to the Lambda function after
each deployment. The invocation is re-triggered whenever the Lambda source
code changes (tracked via source\_code\_hash).

The Lambda function is responsible for implementing the liveness check logic
in its handler. This may include verifying external dependencies, validating
configuration, checking connectivity to downstream services, or any other
health validation relevant to the function's purpose.

If the liveness check fails, the Lambda should raise an exception. This
surfaces as a function error and causes the Tofu apply to fail, alerting
the deploying team immediately.

Recommended: true in all environments to catch misconfiguration at deploy time. | `bool` | `true` | no | +| [log\_retention\_days](#input\_log\_retention\_days) | Number of days to retain Lambda function logs in CloudWatch. If null, no retention policy is set and retention is managed externally (e.g., via cdap/scripts/set\_log\_retention/). | `number` | `180` | no | +| [memory\_size](#input\_memory\_size) | Lambda function memory size | `number` | `null` | no | +| [rollback\_version](#input\_rollback\_version) | Pin the live alias to a specific version for rollback. Set to null for normal deploys (alias tracks latest published version). | `string` | `null` | no | +| [runtime](#input\_runtime) | Lambda function runtime | `string` | `"python3.11"` | no | +| [schedule\_expression](#input\_schedule\_expression) | Cron or rate expression for a scheduled function | `string` | `""` | no | +| [source\_code\_version](#input\_source\_code\_version) | Optional S3 object version of function.zip uploaded to module's zip\_bucket by external sources. | `string` | `null` | no | +| [source\_dir](#input\_source\_dir) | Path to the Lambda source directory to zip and upload. If set, the module manages zipping and deployment. If null, an external process (or dummy zip) is used. | `string` | `null` | no | +| [source\_dir\_excludes](#input\_source\_dir\_excludes) | List of glob (**/*) patterns to exclude when zipping the source directory. | `list(string)` | `[]` | no | +| [ssm\_parameter\_paths](#input\_ssm\_parameter\_paths) | List of SSM parameter ARNs or path patterns this function is permitted to read.
Each entry should be a full ARN or ARN pattern. This can be retrieved from platform.module.ssm.ssm\_root\_name.parameter\_name.arn.
If empty (default), the function receives no SSM access.
Do not use broad wildcards — scope each entry to the specific parameters this function requires. | `list(string)` | `[]` | no | +| [timeout](#input\_timeout) | Lambda function timeout | `number` | `900` | no | + + +## Modules + +| Name | Source | Version | +|------|--------|---------| +| [subnets](#module\_subnets) | ../subnets | n/a | +| [vpc](#module\_vpc) | ../vpc | n/a | +| [zip\_bucket](#module\_zip\_bucket) | ../bucket | n/a | + + +## Resources + +| Name | Type | +|------|------| +| [aws_cloudwatch_event_rule.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_rule) | resource | +| [aws_cloudwatch_event_target.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_target) | resource | +| [aws_cloudwatch_log_group.function](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group) | resource | +| [aws_iam_role.function](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | +| [aws_iam_role_policy.default_function](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy) | resource | +| [aws_iam_role_policy.extra_policies](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy) | resource | +| [aws_lambda_alias.live](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_alias) | resource | +| [aws_lambda_function.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_function) | resource | +| [aws_lambda_invocation.liveness_check](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_invocation) | resource | +| [aws_lambda_permission.cloudwatch_events](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_permission) | resource | +| [aws_s3_object.empty_function_zip](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_object) | resource | +| [aws_s3_object.function_zip](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_object) | resource | +| [aws_security_group.function](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group) | resource | +| [aws_vpc_security_group_egress_rule.ipv4](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/vpc_security_group_egress_rule) | resource | +| [aws_vpc_security_group_egress_rule.ipv6](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/vpc_security_group_egress_rule) | resource | +| [aws_vpc_security_group_egress_rule.sg_source](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/vpc_security_group_egress_rule) | resource | +| [archive_file.function](https://registry.terraform.io/providers/hashicorp/archive/latest/docs/data-sources/file) | data source | +| [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source | +| [aws_iam_openid_connect_provider.github](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_openid_connect_provider) | data source | +| [aws_iam_policy_document.cicd_manage_lambda_objects](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.default_function](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.function_assume_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_role.admin](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_role) | data source | +| [aws_iam_role.dasg_admin](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_role) | data source | +| [aws_kms_alias.kms_key](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/kms_alias) | data source | + + +## Outputs + +| Name | Description | +|------|-------------| +| [alias\_arn](#output\_alias\_arn) | ARN of the live alias | +| [function\_version](#output\_function\_version) | Published version number of the Lambda function | +| [name](#output\_name) | Name for the lambda function | +| [role\_arn](#output\_role\_arn) | ARN of the IAM role for the function | +| [security\_group\_id](#output\_security\_group\_id) | ID for the security group for the function | +| [zip\_bucket](#output\_zip\_bucket) | Bucket name for the function.zip file | + diff --git a/terraform/modules/function/iam.tf b/terraform/modules/function/iam.tf new file mode 100644 index 00000000..542312ad --- /dev/null +++ b/terraform/modules/function/iam.tf @@ -0,0 +1,154 @@ +data "aws_iam_openid_connect_provider" "github" { + url = "https://${local.provider_domain}" +} + +data "aws_iam_role" "admin" { + name = "ct-ado-bcda-application-admin" +} + +data "aws_iam_role" "dasg_admin" { + name = "ct-ado-dasg-application-admin" +} + +data "aws_iam_policy_document" "function_assume_role" { + statement { + actions = ["sts:AssumeRole"] + + principals { + type = "Service" + identifiers = ["lambda.amazonaws.com"] + } + } + + # Allow access from GitHub-hosted runners via OIDC for integration tests + dynamic "statement" { + for_each = length(var.github_actions_repos) > 0 ? [1] : [] + content { + actions = ["sts:AssumeRoleWithWebIdentity", "sts:TagSession"] + principals { + type = "Federated" + identifiers = [data.aws_iam_openid_connect_provider.github.arn] + } + condition { + test = "StringEquals" + variable = "${local.provider_domain}:aud" + values = ["sts.amazonaws.com"] + } + condition { + test = "StringLike" + variable = "${local.provider_domain}:sub" + values = var.github_actions_repos + } + } + } + + # Allow access from admin role for manual checks + statement { + actions = [ + "sts:AssumeRole", + ] + + principals { + type = "AWS" + identifiers = [data.aws_iam_role.admin.arn, data.aws_iam_role.dasg_admin.arn] + } + } +} + +data "aws_caller_identity" "current" {} + +data "aws_iam_policy_document" "default_function" { + dynamic "statement" { + for_each = length(var.ssm_parameter_paths) > 0 ? [1] : [] + content { + sid = "SSMParameterRead" + actions = [ + "ssm:GetParameter", + "ssm:GetParameters", + ] + resources = var.ssm_parameter_paths + } + } + + statement { + sid = "VPCNetworkingENI" + actions = [ + "ec2:CreateNetworkInterface", + "ec2:DeleteNetworkInterface", + "ec2:DescribeAccountAttributes", + "ec2:DescribeNetworkInterfaces", + ] + resources = ["*"] + } + + statement { + sid = "CloudWatchLogsWrite" + actions = [ + "logs:CreateLogStream", + "logs:PutLogEvents", + ] + resources = ["${aws_cloudwatch_log_group.function.arn}:*"] + } + + statement { + sid = "KMSKeyAccess" + actions = [ + "kms:Encrypt", + "kms:Decrypt", + "kms:GenerateDataKey", + ] + resources = concat( + [data.aws_kms_alias.kms_key.target_key_arn], + var.extra_kms_key_arns + ) + } +} + +resource "aws_iam_role" "function" { + name = "${local.full_name_string}-function" + path = "/delegatedadmin/developer/" + + assume_role_policy = data.aws_iam_policy_document.function_assume_role.json +} + +resource "aws_iam_role_policy" "default_function" { + name = "${local.full_name_string}-default" + role = aws_iam_role.function.id + policy = data.aws_iam_policy_document.default_function.json +} + +resource "aws_iam_role_policy" "extra_policies" { + for_each = var.function_role_inline_policies + + name = each.key + role = aws_iam_role.function.id + policy = each.value +} + +# Allow CICD management outside of Tofu runs +data "aws_iam_policy_document" "cicd_manage_lambda_objects" { + statement { + sid = "CICDZipUpload" + actions = [ + "s3:GetObject", + "s3:GetObjectTagging", + "s3:GetObjectVersion", + "s3:GetObjectVersionTagging", + "s3:ListBucket", + "s3:PutObject", + "s3:PutObjectTagging", + ] + + principals { + type = "AWS" + identifiers = [ + "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/delegatedadmin/developer/${var.app}-${var.env}-github-actions", + ] + } + + resources = [ + module.zip_bucket.arn, + "${module.zip_bucket.arn}/*", + ] + } +} diff --git a/terraform/modules/function/main.tf b/terraform/modules/function/main.tf index 4a380452..c3f0c7c1 100644 --- a/terraform/modules/function/main.tf +++ b/terraform/modules/function/main.tf @@ -1,196 +1,53 @@ locals { - provider_domain = "token.actions.githubusercontent.com" - - repos = { - bcda = [ - "repo:CMSgov/bcda-app:*", - ] - cdap = [ - "repo:CMSgov/cdap:*", - ] - dpc = [ - "repo:CMSgov/dpc-app:*", - ] - } + provider_domain = "token.actions.githubusercontent.com" + full_name_string = "${var.app}-${var.env}-${var.name}" } data "aws_kms_alias" "kms_key" { name = "alias/${var.app}-${var.env}" } -data "aws_iam_openid_connect_provider" "github" { - url = "https://${local.provider_domain}" -} - -data "aws_iam_role" "admin" { - name = "ct-ado-bcda-application-admin" -} - -data "aws_iam_role" "dasg_admin" { - name = "ct-ado-dasg-application-admin" -} - -data "aws_iam_policy_document" "function_assume_role" { - statement { - actions = ["sts:AssumeRole"] - - principals { - type = "Service" - identifiers = ["lambda.amazonaws.com"] - } - } - - # Allow access from GitHub-hosted runners via OIDC for integration tests - statement { - actions = [ - "sts:AssumeRoleWithWebIdentity", - "sts:TagSession", - ] - - principals { - type = "Federated" - identifiers = [data.aws_iam_openid_connect_provider.github.arn] - } - - condition { - test = "StringEquals" - variable = "${local.provider_domain}:aud" - values = ["sts.amazonaws.com"] - } - - condition { - test = "StringLike" - variable = "${local.provider_domain}:sub" - values = local.repos[var.app] - } - } - - # Allow access from admin role for manual checks - statement { - actions = [ - "sts:AssumeRole", - "sts:TagSession", - ] - - principals { - type = "AWS" - identifiers = [data.aws_iam_role.admin.arn, data.aws_iam_role.dasg_admin.arn] - } - } -} - -data "aws_caller_identity" "current" {} - -data "aws_iam_policy_document" "default_function" { - statement { - actions = [ - "ec2:CreateNetworkInterface", - "ec2:DeleteNetworkInterface", - "ec2:DescribeAccountAttributes", - "ec2:DescribeNetworkInterfaces", - "logs:CreateLogGroup", - "logs:CreateLogStream", - "logs:PutLogEvents", - "sqs:DeleteMessage", - "sqs:GetQueueAttributes", - "sqs:ReceiveMessage", - "ssm:GetParameter", - "ssm:GetParameters", - ] - resources = ["*"] - } - - statement { - actions = [ - "kms:Encrypt", - "kms:Decrypt", - "kms:GenerateDataKey" - ] - resources = concat( - [data.aws_kms_alias.kms_key.target_key_arn], - var.extra_kms_key_arns - ) - } -} - -resource "aws_iam_role" "function" { - name = "${var.name}-function" - path = "/delegatedadmin/developer/" - - permissions_boundary = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:policy/cms-cloud-admin/developer-boundary-policy" - - assume_role_policy = data.aws_iam_policy_document.function_assume_role.json -} - -resource "aws_iam_role_policy" "default_function" { - name = "default-function" - role = aws_iam_role.function.id - policy = data.aws_iam_policy_document.default_function.json -} - -resource "aws_iam_role_policy" "extra_policies" { - for_each = var.function_role_inline_policies - - name = each.key - role = aws_iam_role.function.id - policy = each.value -} - -data "aws_ssm_parameter" "prod_account_id" { - count = var.env == "test" ? 1 : 0 - name = "/prod/account-id" -} +# Only used when source_dir is provided +data "archive_file" "function" { + count = var.source_dir != null ? 1 : 0 -data "aws_iam_policy_document" "bucket_cross_account_read_roles_policy" { - count = var.env == "test" ? 1 : 0 - - statement { - actions = [ - "s3:GetObject", - "s3:GetObjectTagging", - "s3:GetObjectVersion", - "s3:GetObjectVersionTagging", - "s3:ListBucket", - ] - - principals { - type = "AWS" - identifiers = [ - "arn:aws:iam::${data.aws_ssm_parameter.prod_account_id[0].value}:role/delegatedadmin/developer/${var.app}-prod-github-actions", - "arn:aws:iam::${data.aws_ssm_parameter.prod_account_id[0].value}:role/delegatedadmin/developer/${var.app}-${var.app == "cdap" ? "mgmt" : "sandbox"}-github-actions", - ] - } - - resources = [ - module.zip_bucket.arn, - "${module.zip_bucket.arn}/*", - ] - - sid = "CrossAccountRead" - } + type = "zip" + source_dir = var.source_dir + output_path = "${path.module}/.terraform/tmp/${var.name}-function.zip" + excludes = var.source_dir_excludes } module "zip_bucket" { source = "../bucket" - additional_bucket_policies = var.env == "test" ? [data.aws_iam_policy_document.bucket_cross_account_read_roles_policy[0].json] : [] + additional_bucket_policies = length(var.github_actions_repos) > 0 ? [data.aws_iam_policy_document.cicd_manage_lambda_objects.json] : [] app = var.app env = var.env - name = "${var.name}-function" + name = "${var.app}-${var.env}-${var.name}-function" ssm_parameter = "/${var.app}/${var.env}/${var.name}-bucket" } +# Managed zip upload — used when source_dir is provided +resource "aws_s3_object" "function_zip" { + count = var.source_dir != null ? 1 : 0 + + bucket = module.zip_bucket.id + key = "function.zip" + source = data.archive_file.function[0].output_path + + # Use the hash so S3 object (and Lambda) updates when source changes + source_hash = data.archive_file.function[0].output_base64sha256 + + # KMS encryption + kms_key_id = data.aws_kms_alias.kms_key.target_key_arn +} + resource "aws_s3_object" "empty_function_zip" { - count = var.create_function_zip ? 1 : 0 + count = var.source_dir == null && length(var.github_actions_repos) == 0 ? 1 : 0 bucket = module.zip_bucket.id key = "function.zip" source = "${path.module}/dummy_function.zip" - - # This resource only exists to initialize the function, not manage it - lifecycle { - ignore_changes = all - } } module "vpc" { @@ -207,25 +64,61 @@ module "subnets" { } resource "aws_security_group" "function" { - name = "${var.name}-function" - description = "For the ${var.name} function" + name = "${local.full_name_string}-function" + description = "For the ${local.full_name_string} function" vpc_id = module.vpc.id +} + +resource "aws_vpc_security_group_egress_rule" "ipv4" { + for_each = { + for idx, rule in var.egress_rules : tostring(idx) => rule + if rule.cidr_ipv4 != null + } + + security_group_id = aws_security_group.function.id + cidr_ipv4 = each.value.cidr_ipv4 + ip_protocol = each.value.protocol + from_port = each.value.from_port + to_port = each.value.to_port + description = each.value.description +} - egress { - from_port = 0 - to_port = 0 - protocol = "-1" - cidr_blocks = ["0.0.0.0/0"] - ipv6_cidr_blocks = ["::/0"] +resource "aws_vpc_security_group_egress_rule" "ipv6" { + for_each = { + for idx, rule in var.egress_rules : tostring(idx) => rule + if rule.cidr_ipv6 != null } + + security_group_id = aws_security_group.function.id + cidr_ipv6 = each.value.cidr_ipv6 + ip_protocol = each.value.protocol + from_port = each.value.from_port + to_port = each.value.to_port + description = each.value.description +} + +resource "aws_vpc_security_group_egress_rule" "sg_source" { + for_each = { + for idx, rule in var.egress_rules : tostring(idx) => rule + if rule.referenced_sg_id != null + } + + security_group_id = aws_security_group.function.id + referenced_security_group_id = each.value.referenced_sg_id + ip_protocol = each.value.protocol + from_port = each.value.from_port + to_port = each.value.to_port + description = each.value.description } resource "aws_lambda_function" "this" { - description = var.description - function_name = var.name - s3_key = "function.zip" - s3_bucket = module.zip_bucket.id - s3_object_version = var.source_code_version + description = var.description + function_name = local.full_name_string + s3_key = "function.zip" + s3_bucket = module.zip_bucket.id + # If source_dir is managed by this module, track the uploaded object version. + # Otherwise, fall back to the externally-supplied version (or null). + s3_object_version = var.source_dir != null ? aws_s3_object.function_zip[0].version_id : var.source_code_version kms_key_arn = data.aws_kms_alias.kms_key.target_key_arn role = aws_iam_role.function.arn handler = var.handler @@ -252,7 +145,7 @@ resource "aws_lambda_function" "this" { resource "aws_cloudwatch_event_rule" "this" { count = var.schedule_expression != "" ? 1 : 0 - name = "${var.name}-function" + name = "${local.full_name_string}-function" description = "Trigger ${var.name} function" schedule_expression = var.schedule_expression } @@ -273,3 +166,32 @@ resource "aws_lambda_permission" "cloudwatch_events" { principal = "events.amazonaws.com" source_arn = aws_cloudwatch_event_rule.this[0].arn } + +# Manage cloudwatch log group to ensure compliant +resource "aws_cloudwatch_log_group" "function" { + name = "/aws/lambda/${var.name}" + kms_key_id = data.aws_kms_alias.kms_key.target_key_arn + skip_destroy = strcontains(var.env, "prod") ? true : false + retention_in_days = var.log_retention_days +} + +resource "aws_lambda_invocation" "liveness_check" { + count = var.liveness_check_enabled ? 1 : 0 + function_name = aws_lambda_function.this.function_name + qualifier = aws_lambda_alias.live.name + + # Re-runs whenever the Lambda source code changes + triggers = { + redeployment = aws_lambda_function.this.source_code_hash + } + + input = jsonencode({ + RequestType = "LivenessCheck" + }) +} + +resource "aws_lambda_alias" "live" { + name = "live" + function_name = aws_lambda_function.this.function_name + function_version = var.rollback_version != null ? var.rollback_version : aws_lambda_function.this.version +} diff --git a/terraform/modules/function/outputs.tf b/terraform/modules/function/outputs.tf index ebde26b3..6a0d77c2 100644 --- a/terraform/modules/function/outputs.tf +++ b/terraform/modules/function/outputs.tf @@ -3,6 +3,16 @@ output "name" { value = aws_lambda_function.this.function_name } +output "alias_arn" { + description = "ARN of the live alias" + value = aws_lambda_alias.live.arn +} + +output "function_version" { + description = "Published version number of the Lambda function" + value = aws_lambda_function.this.version +} + output "role_arn" { description = "ARN of the IAM role for the function" value = aws_iam_role.function.arn diff --git a/terraform/modules/function/variables.tf b/terraform/modules/function/variables.tf index c8a2006e..0c80a40b 100644 --- a/terraform/modules/function/variables.tf +++ b/terraform/modules/function/variables.tf @@ -26,6 +26,8 @@ variable "description" { type = string } +# ── Core Function Config + variable "handler" { description = "Lambda function handler" type = string @@ -33,7 +35,7 @@ variable "handler" { } variable "architecture" { - description = "" + description = "Lambda function CPU architecture. Use arm64 for Graviton (better price/performance for most workloads)." type = string default = "x86_64" validation { @@ -60,44 +62,156 @@ variable "memory_size" { default = null } -variable "function_role_inline_policies" { - description = "Inline policies (in JSON) for the function IAM role" - type = map(string) - default = {} +# ── Source / Deployment ─────────────────────────────────────────────────────── + +variable "source_dir" { + description = "Path to the Lambda source directory to zip and upload. If set, the module manages zipping and deployment. If null, an external process (or dummy zip) is used." + type = string + default = null } -variable "environment_variables" { - description = "Map of environment variables for the function" - type = map(string) - default = {} +variable "source_dir_excludes" { + description = "List of glob (**/*) patterns to exclude when zipping the source directory." + type = list(string) + default = [] } -variable "create_function_zip" { - description = "Create the function zip file, necessary for initialization (defaults to true)" +variable "source_code_version" { + description = "Optional S3 object version of function.zip uploaded to module's zip_bucket by external sources." + type = string + default = null +} + +variable "liveness_check_enabled" { + description = <<-EOT + Enables a deploy-time liveness check that invokes the Lambda function + immediately after deployment to verify it is healthy and correctly configured. + + When enabled, an aws_lambda_invocation resource is created that sends a + { "RequestType": "LivenessCheck" } payload to the Lambda function after + each deployment. The invocation is re-triggered whenever the Lambda source + code changes (tracked via source_code_hash). + + The Lambda function is responsible for implementing the liveness check logic + in its handler. This may include verifying external dependencies, validating + configuration, checking connectivity to downstream services, or any other + health validation relevant to the function's purpose. + + If the liveness check fails, the Lambda should raise an exception. This + surfaces as a function error and causes the Tofu apply to fail, alerting + the deploying team immediately. + + Recommended: true in all environments to catch misconfiguration at deploy time. + EOT type = bool default = true } +variable "rollback_version" { + description = "Pin the live alias to a specific version for rollback. Set to null for normal deploys (alias tracks latest published version)." + type = string + default = null +} + +# ── Runtime Behavior ────────────────────────────────────────────────────────── + +variable "environment_variables" { + description = "Map of environment variables for the function" + type = map(string) + default = {} +} + variable "schedule_expression" { description = "Cron or rate expression for a scheduled function" type = string default = "" } -variable "extra_kms_key_arns" { +variable "log_retention_days" { + description = "Number of days to retain Lambda function logs in CloudWatch. If null, no retention policy is set and retention is managed externally (e.g., via cdap/scripts/set_log_retention/)." + type = number + default = 180 +} + +# ── IAM / Permissions ───────────────────────────────────────────────────────── + +variable "ssm_parameter_paths" { + description = <<-EOT + List of SSM parameter ARNs or path patterns this function is permitted to read. + Each entry should be a full ARN or ARN pattern. This can be retrieved from platform.module.ssm.ssm_root_name.parameter_name.arn. + If empty (default), the function receives no SSM access. + Do not use broad wildcards — scope each entry to the specific parameters this function requires. + EOT type = list(string) default = [] + validation { + condition = alltrue([ + for arn in var.ssm_parameter_paths : + can(regex("^arn:aws:ssm:", arn)) + ]) + error_message = "Each entry in ssm_parameter_paths must be a valid SSM parameter ARN starting with 'arn:aws:ssm:'." + } +} + +variable "function_role_inline_policies" { + description = "Inline policies (in JSON) for the function IAM role" + type = map(string) + default = {} +} + +# ── Advanced / Migration strategies ───────────────────────────────────────────────── + +variable "extra_kms_key_arns" { description = "Optional list of additional KMS key ARNs the Lambda can use" + type = list(string) + default = [] } variable "layer_arns" { + description = "Optional list of layer arns" type = list(string) default = [] - description = "Optional list of layer arns" } -variable "source_code_version" { - description = "Optional S3 object version of function.zip uploaded to module's zip_bucket." - type = string - default = null +variable "github_actions_repos" { + description = <<-EOT + Used for integration tests and, when source_dir is null, + for CI/CD workflows that upload the function zip. + Format: "repo:CMSgov/:*" or a more specific ref pattern. + Defaults to empty — no GitHub Actions access unless explicitly granted. + EOT + type = list(string) + default = [] +} + +variable "egress_rules" { + description = "List of egress rules to apply to the security group" + type = list(object({ + name = string + from_port = number + to_port = number + protocol = string + cidr_ipv4 = optional(string) + cidr_ipv6 = optional(string) + referenced_sg_id = optional(string) + description = optional(string) + })) + default = [ + { + name = "allow-all-ipv4" + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_ipv4 = "0.0.0.0/0" + description = "Allow all egress traffic (IPv4) - migration default" + }, + { + name = "allow-all-ipv6" + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_ipv6 = "::/0" + description = "Allow all egress traffic (IPv6) - migration default" + } + ] } diff --git a/terraform/services/alarm-to-slack/README.md b/terraform/services/alarm-to-slack/README.md index 4309e4d9..fa626beb 100644 --- a/terraform/services/alarm-to-slack/README.md +++ b/terraform/services/alarm-to-slack/README.md @@ -1,6 +1,6 @@ # OpenTofu for alarm-to-slack function and associated infra -This service sets up the infrastructure for the alarm-to-slack lambda function in upper and lower environments for DPC +This service sets up the infrastructure for the alarm-to-slack lambda function in upper and lower environments for all applications in var.apps_served. ## Updating the lambda code @@ -20,3 +20,76 @@ AWS_REGION=us-east-1 tofu apply ## Automated deploy This terraform is automatically applied on merge to main by the tf-alarm-to-slack-apply.yml workflow. + + + +## Providers + +| Name | Version | +|------|---------| +| [aws](#provider\_aws) | 5.100.0 | + + +## Requirements + +No requirements. + + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [app](#input\_app) | The application name (bcda, cdap) | `string` | n/a | yes | +| [env](#input\_env) | The application environment (test, prod) | `string` | n/a | yes | +| [apps\_served](#input\_apps\_served) | n/a | `list(string)` |
[
"bcda",
"cdap",
"dpc"
]
| no | + + +## Modules + +| Name | Source | Version | +|------|--------|---------| +| [sns\_to\_slack\_function](#module\_sns\_to\_slack\_function) | ../../modules/function | n/a | +| [sns\_to\_slack\_queue](#module\_sns\_to\_slack\_queue) | github.com/CMSgov/cdap/terraform/modules/queue | b177921621c97d02dc4a21f830e4532147aa0749 | +| [standards](#module\_standards) | ../../modules/standards | n/a | + + +## Resources + +| Name | Type | +|------|------| +| [aws_iam_policy_document.sqs_queue_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_ssm_parameters_by_path.slack_webhook_urls](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ssm_parameters_by_path) | data source | + + +## Outputs + +| Name | Description | +|------|-------------| +| [function\_role\_arn](#output\_function\_role\_arn) | n/a | +| [sqs\_queue\_arn](#output\_sqs\_queue\_arn) | n/a | +| [zip\_bucket](#output\_zip\_bucket) | n/a | + \ No newline at end of file diff --git a/terraform/services/alarm-to-slack/iam.tf b/terraform/services/alarm-to-slack/iam.tf new file mode 100644 index 00000000..0bf7f8c2 --- /dev/null +++ b/terraform/services/alarm-to-slack/iam.tf @@ -0,0 +1,40 @@ +data "aws_iam_policy_document" "sqs_trigger" { + statement { + sid = "SQSTriggerReceive" + actions = [ + "sqs:DeleteMessage", + "sqs:GetQueueAttributes", + "sqs:ReceiveMessage", + ] + resources = [module.sns_to_slack_queue.arn] + } +} + +data "aws_iam_policy_document" "sqs_queue_policy" { + statement { + sid = "allow_sns_access" + effect = "Allow" + + principals { + type = "Service" + identifiers = ["sns.amazonaws.com"] + } + + actions = [ + "SQS:SendMessage", + ] + + resources = [ + module.sns_to_slack_queue.arn + ] + + condition { + test = "ArnLike" + variable = "aws:SourceArn" + + values = [ + "arn:aws:sns:us-east-1:${module.standards.account_id}:*" + ] + } + } +} diff --git a/terraform/services/alarm-to-slack/lambda_src/lambda_function.py b/terraform/services/alarm-to-slack/lambda_src/lambda_function.py index 4453eb9c..511f511f 100644 --- a/terraform/services/alarm-to-slack/lambda_src/lambda_function.py +++ b/terraform/services/alarm-to-slack/lambda_src/lambda_function.py @@ -7,7 +7,7 @@ import json import os from urllib import request -from urllib.error import URLError +from urllib.error import URLError, HTTPError import boto3 from botocore.exceptions import ClientError @@ -44,12 +44,129 @@ def is_ignore_ok(): """ return os.environ.get('IGNORE_OK', 'false').lower() == 'true' + +def ping_slack_webhook(webhook, app, message_id=None): + """ + Sends a liveness ping to a Slack webhook using an empty payload. + Slack returns 400 for empty payloads, but a 400 still confirms the + webhook URL is reachable. A URLError or non-reachable host indicates + a broken webhook. + """ + try: + jsondata = json.dumps({}).encode('utf-8') + req = request.Request(webhook) + req.add_header('Content-Type', 'application/json; charset=utf-8') + req.add_header('Content-Length', str(len(jsondata))) + with request.urlopen(req, jsondata) as resp: + log({'msg': f'Liveness ping succeeded for app: {app}', + 'status': resp.status, 'messageId': message_id}) + return True + except HTTPError as e: + # Slack returns 400 for empty payloads — still means the URL is reachable + if e.code == 400: + log({'msg': f'Liveness ping reachable (400 expected) for app: {app}', + 'messageId': message_id}) + return True + log({'msg': f'Liveness ping FAILED (HTTP {e.code}) for app: {app}', + 'messageId': message_id}) + return False + except URLError as e: + log({'msg': f'Liveness ping FAILED for app: {app}, reason: {e.reason}', + 'messageId': message_id}) + return False + +def get_app_list(): + apps_env = os.environ.get('APPS', '') + return [app.strip() for app in apps_env.split(',') if app.strip()] + +def liveness_check(): + """ + Iterates over all configured apps (from the APPS env var), retrieves each + app's Slack webhook SSM parameter, and performs a connectivity ping. + + Returns a dict with: + - 'results': per-app status (ssm_ok, webhook_reachable) + - 'all_ok': True only if every app passed both checks + """ + apps = get_app_list() + if not apps: + log({'msg': 'Liveness check: No apps configured in APPS environment variable'}) + return {'results': {}, 'all_ok': True} + + results = {} + all_ok = True + + for app in apps: + param_name = f'/{app}/lambda/slack_webhook_url' + webhook = get_ssm_parameter(param_name) + + ssm_ok = webhook is not None + webhook_reachable = False + + if ssm_ok: + webhook_reachable = ping_slack_webhook(webhook, app) + else: + log({'msg': f'Liveness check FAILED: SSM parameter missing or broken for app: {app}', + 'param': param_name}) + + app_ok = ssm_ok and webhook_reachable + all_ok = all_ok and app_ok + + results[app] = { + 'ssm_ok': ssm_ok, + 'webhook_reachable': webhook_reachable, + 'ok': app_ok, + } + + log({ + 'msg': 'Liveness check result', + 'app': app, + 'ssm_ok': ssm_ok, + 'webhook_reachable': webhook_reachable, + 'ok': app_ok, + }) + + return {'results': results, 'all_ok': all_ok} + +def handle_liveness_event(event): + """ + Handles a deploy-time liveness check invocation from Tofu's aws_lambda_invocation. + Raises RuntimeError if any app's SSM parameter or Slack webhook is unreachable, + which surfaces as a function error and fails the Tofu apply. + """ + check = liveness_check() + + log({ + 'msg': 'Liveness check complete', + 'all_ok': check['all_ok'], + 'results': check['results'], + }) + + if not check['all_ok']: + failed = [app for app, r in check['results'].items() if not r['ok']] + raise RuntimeError( + f"Liveness check failed for app(s): {', '.join(failed)}. " + "Check CloudWatch logs for details." + ) + + return { + 'statusCode': 200, + 'body': 'Liveness check passed', + 'results': check['results'], + } + def lambda_handler(event, _): """ Main entry point for the Lambda function. - It iterates through the SQS records, processes each CloudWatch alarm, + Handles two event types: + 1) A liveness check that can be invoked via Tofu changes + 2) Primary function: Iteration through the SQS records, processes each CloudWatch alarm, and forwards it to the appropriate Slack channel. """ + + if event.get('RequestType') == 'LivenessCheck': + return handle_liveness_event(event) + processed_count = 0 for record in event['Records']: message = enriched_cloudwatch_message(record) diff --git a/terraform/services/alarm-to-slack/lambda_src/test_lambda_function.py b/terraform/services/alarm-to-slack/lambda_src/test_lambda_function.py index 66cc1557..58167bf0 100644 --- a/terraform/services/alarm-to-slack/lambda_src/test_lambda_function.py +++ b/terraform/services/alarm-to-slack/lambda_src/test_lambda_function.py @@ -28,12 +28,6 @@ def mock_boto3_client(): with patch('lambda_function.boto3.client') as mock_client: yield mock_client -def reload_lambda(): - """Reload the lambda_function module to pick up environment variable changes.""" - if 'lambda_function' in sys.modules: - importlib.reload(sys.modules['lambda_function']) - return sys.modules['lambda_function'] - def test_cloudwatch_message_sqs_record(): """Test parsing a valid CloudWatch message from an SQS record.""" cloudwatch_message = { @@ -77,7 +71,6 @@ def test_enriched_cloudwatch_message_alarm_record(): @patch.dict(os.environ, {'IGNORE_OK': 'false'}, clear=True) def test_enriched_cloudwatch_message_alarm_record_ok_ignored(): """Test enrichment when IGNORE_OK is false and state is ALARM.""" - reload_lambda() cloudwatch_message = { 'AlarmName': 'bcda-dev-SomeAlarm', 'OldStateValue': 'OK', @@ -121,7 +114,6 @@ def test_enriched_cloudwatch_message_ok_record(): @patch.dict(os.environ, {'IGNORE_OK': 'false'}, clear=True) def test_enriched_cloudwatch_message_ok_record_ignore_false(): """Test OK state message with IGNORE_OK explicitly set to false.""" - reload_lambda() cloudwatch_message = { 'AlarmName': 'bcda-dev-SomeAlarm', 'OldStateValue': 'ALARM', @@ -144,7 +136,6 @@ def test_enriched_cloudwatch_message_ok_record_ignore_false(): @patch.dict(os.environ, {'IGNORE_OK': 'true'}, clear=True) def test_enriched_cloudwatch_message_ok_record_ok_ignored(): """Test that OK state message is ignored when IGNORE_OK is true.""" - reload_lambda() cloudwatch_message = { 'AlarmName': 'bcda-dev-SomeAlarm', 'OldStateValue': 'ALARM', @@ -259,3 +250,81 @@ def test_logger(capsys): log_output = json.loads(captured.out) assert log_output['test'] == 'log' assert 'time' in log_output + +@patch.dict(os.environ, {'APPS': 'bcda,cdap,dpc'}, clear=True) +def test_get_app_list_returns_list(): + """Test that APPS env var is parsed into a list correctly.""" + assert lambda_function.get_app_list() == ['bcda', 'cdap', 'dpc'] + +@patch.dict(os.environ, {}, clear=True) +def test_get_app_list_empty(): + """Test that missing APPS env var returns empty list.""" + assert lambda_function.get_app_list() == [] + +# ── ping_slack_webhook ───────────────────────────────────────────────────── + +@patch('urllib.request.urlopen') +def test_ping_slack_webhook_success(mock_urlopen): + """200 response → reachable.""" + cm = MagicMock() + cm.status = 200 + cm.__enter__.return_value = cm + mock_urlopen.return_value = cm + assert lambda_function.ping_slack_webhook('https://hooks.slack.com/test', 'bcda') is True + + +@patch('urllib.request.urlopen') +def test_ping_slack_webhook_400_treated_as_alive(mock_urlopen): + """Slack's 400 for empty payload still means the URL is reachable.""" + from urllib.error import HTTPError + mock_urlopen.side_effect = HTTPError( + url='https://hooks.slack.com/test', code=400, + msg='no_text', hdrs=None, fp=None, + ) + assert lambda_function.ping_slack_webhook('https://hooks.slack.com/test', 'bcda') is True + + +@patch('urllib.request.urlopen') +def test_ping_slack_webhook_network_failure(mock_urlopen): + """Genuine network error → not reachable.""" + from urllib.error import URLError + mock_urlopen.side_effect = URLError('connection refused') + assert lambda_function.ping_slack_webhook('https://hooks.slack.com/test', 'bcda') is False + + +# ── liveness_check ───────────────────────────────────────────────────────── + +@patch.dict(os.environ, {'APPS': 'bcda,dpc'}, clear=True) +@patch('lambda_function.ping_slack_webhook', return_value=True) +@patch('lambda_function.get_ssm_parameter', return_value='https://hooks.slack.com/test') +def test_liveness_check_all_ok(mock_ssm, mock_ping): + """All apps pass → all_ok is True.""" + result = lambda_function.liveness_check() + assert result['all_ok'] is True + + +@patch.dict(os.environ, {'APPS': 'bcda'}, clear=True) +@patch('lambda_function.get_ssm_parameter', return_value=None) +def test_liveness_check_ssm_missing(mock_ssm): + """Missing SSM parameter → app fails, all_ok is False.""" + result = lambda_function.liveness_check() + assert result['all_ok'] is False + assert result['results']['bcda']['ssm_ok'] is False + + +@patch.dict(os.environ, {'APPS': 'bcda'}, clear=True) +@patch('lambda_function.ping_slack_webhook', return_value=True) +@patch('lambda_function.get_ssm_parameter', return_value='https://hooks.slack.com/test') +def test_handle_liveness_event_passes(mock_ssm, mock_ping): + """Returns 200 when all checks pass.""" + response = lambda_function.handle_liveness_event({'RequestType': 'LivenessCheck'}) + assert response['statusCode'] == 200 + + +@patch.dict(os.environ, {'APPS': 'bcda'}, clear=True) +@patch('lambda_function.ping_slack_webhook', return_value=False) +@patch('lambda_function.get_ssm_parameter', return_value='https://hooks.slack.com/test') +def test_handle_liveness_event_raises_on_failure(mock_ssm, mock_ping): + """Raises RuntimeError when a check fails — surfaces as Lambda error in Tofu.""" + with pytest.raises(RuntimeError, match='bcda'): + lambda_function.handle_liveness_event({'RequestType': 'LivenessCheck'}) \ No newline at end of file diff --git a/terraform/services/alarm-to-slack/main.tf b/terraform/services/alarm-to-slack/main.tf index 9ce9d454..f9212072 100644 --- a/terraform/services/alarm-to-slack/main.tf +++ b/terraform/services/alarm-to-slack/main.tf @@ -2,25 +2,50 @@ locals { full_name = "${var.app}-${var.env}-alarm-to-slack" } -data "aws_caller_identity" "current" {} +import { + to = module.sns_to_slack_function.aws_cloudwatch_log_group.function + id = "/aws/lambda/${local.full_name}" +} -module "sns_to_slack_function" { - source = "github.com/CMSgov/cdap/terraform/modules/function?ref=2874c72ccd4c4821e5e3f77ccf61cf77ed05169f" +data "aws_ssm_parameters_by_path" "slack_webhook_urls" { + for_each = toset(var.apps_served) + path = "/${each.value}/lambda/slack_webhook_url" +} - app = var.app - env = var.env - architecture = "arm64" +module "sns_to_slack_function" { + source = "../../modules/function" + app = var.app + env = var.env name = local.full_name description = "Listens for CloudWatch Alerts and forwards to Slack" - # TODO use zip file + architecture = "arm64" + handler = "lambda_function.lambda_handler" + runtime = "python3.13" + + ssm_parameter_paths = flatten([ + for app, data in data.aws_ssm_parameters_by_path.slack_webhook_urls : + data.arns + ]) - handler = "lambda_function.lambda_handler" - runtime = "python3.13" + function_role_inline_policies = { + sqs-trigger = data.aws_iam_policy_document.sqs_trigger.json + } + + # Point to the local source directory — module handles zip + upload + source_dir = "${path.module}/lambda_src" + + # Optionally exclude tests and cache + source_dir_excludes = [ + "__pycache__", + "test_*.py", + "*.pyc", + ] environment_variables = { IGNORE_OK = true + APPS = join(",", var.apps_served) } } @@ -36,32 +61,3 @@ module "sns_to_slack_queue" { data.aws_iam_policy_document.sqs_queue_policy.json ] } - -data "aws_iam_policy_document" "sqs_queue_policy" { - statement { - sid = "allow_sns_access" - effect = "Allow" - - principals { - type = "Service" - identifiers = ["sns.amazonaws.com"] - } - - actions = [ - "SQS:SendMessage", - ] - - resources = [ - module.sns_to_slack_queue.arn - ] - - condition { - test = "ArnLike" - variable = "aws:SourceArn" - - values = [ - "arn:aws:sns:us-east-1:${data.aws_caller_identity.current.account_id}:*" - ] - } - } -} diff --git a/terraform/services/alarm-to-slack/terraform.tf b/terraform/services/alarm-to-slack/terraform.tf deleted file mode 100644 index 4ae8cd4d..00000000 --- a/terraform/services/alarm-to-slack/terraform.tf +++ /dev/null @@ -1,18 +0,0 @@ -provider "aws" { - default_tags { - tags = { - application = var.app - business = "oeda" - code = "https://github.com/CMSgov/cdap/tree/main/terraform/services/alarm-to-slack" - component = "alarm-to-slack" - environment = var.env - terraform = true - } - } -} - -terraform { - backend "s3" { - key = "alarm-to-slack/terraform.tfstate" - } -} diff --git a/terraform/services/alarm-to-slack/tofu.tf b/terraform/services/alarm-to-slack/tofu.tf new file mode 100644 index 00000000..881e5a4b --- /dev/null +++ b/terraform/services/alarm-to-slack/tofu.tf @@ -0,0 +1,30 @@ +terraform { + backend "s3" { + key = "alarm-to-slack/terraform.tfstate" + } +} + +provider "aws" { + region = "us-east-1" + default_tags { + tags = module.standards.default_tags + } +} + +provider "aws" { + alias = "secondary" + region = "us-west-2" + default_tags { + tags = module.standards.default_tags + } +} + +module "standards" { + source = "../../modules/standards" + providers = { aws = aws, aws.secondary = aws.secondary } + + app = var.app + env = var.env + root_module = "https://github.com/CMSgov/cdap/tree/main/terraform/services/${basename(abspath(path.module))}/" + service = replace(basename(abspath(path.module)), "/^[0-9]+-/", "") +} diff --git a/terraform/services/alarm-to-slack/variables.tf b/terraform/services/alarm-to-slack/variables.tf index c2220adf..70bb3c76 100644 --- a/terraform/services/alarm-to-slack/variables.tf +++ b/terraform/services/alarm-to-slack/variables.tf @@ -15,3 +15,9 @@ variable "env" { error_message = "Valid values for env are test, prod." } } + +variable "apps_served" { + description = "List of app names whose Slack webhook URLs this function reads from SSM at runtime." + type = list(string) + default = ["bcda", "cdap", "dpc"] +} diff --git a/terraform/services/github-actions-role/main.tf b/terraform/services/github-actions-role/main.tf index 3ebbe950..72313c73 100644 --- a/terraform/services/github-actions-role/main.tf +++ b/terraform/services/github-actions-role/main.tf @@ -398,6 +398,7 @@ data "aws_iam_policy_document" "github_actions_policy" { "logs:DescribeLogGroups", "logs:DescribeLogStreams", "logs:DescribeSubscriptionFilters", + "logs:ListTagsForResource", "logs:PutRetentionPolicy" ] resources = ["*"] diff --git a/terraform/services/tftesting/function/README.md b/terraform/services/tftesting/function/README.md new file mode 100644 index 00000000..6ec21fb7 --- /dev/null +++ b/terraform/services/tftesting/function/README.md @@ -0,0 +1,3 @@ +# Lambda function module +This allows manual testing of the module at terraform/modules/function. +This will provision a basic lambda, from the source code in ./lambda_src/ and verify liveness using the invocation in the function module. diff --git a/terraform/services/tftesting/function/iam.tf b/terraform/services/tftesting/function/iam.tf new file mode 100644 index 00000000..c5f9634e --- /dev/null +++ b/terraform/services/tftesting/function/iam.tf @@ -0,0 +1,10 @@ +data "aws_iam_policy_document" "ssm_inline_test" { + statement { + sid = "InlinePolicySSMRead" + effect = "Allow" + actions = [ + "ssm:GetParameter" + ] + resources = [aws_ssm_parameter.inline_policy_test.arn] + } +} diff --git a/terraform/services/tftesting/function/lambda_src/lambda_function.py b/terraform/services/tftesting/function/lambda_src/lambda_function.py new file mode 100644 index 00000000..09c9c4cd --- /dev/null +++ b/terraform/services/tftesting/function/lambda_src/lambda_function.py @@ -0,0 +1,45 @@ +import json +import logging +import os + +import boto3 + +logger = logging.getLogger() +logger.setLevel(logging.INFO) + +ssm = boto3.client("ssm") + + +def function_handler(event, context): + logger.info("Received event: %s", json.dumps(event)) + + request_type = event.get("RequestType") or event.get("source", "") + + if request_type == "LivenessCheck": + return _liveness_check() + + logger.warning("Unknown RequestType: %s", request_type) + return {"status": "ok", "event": event} + + +def _liveness_check(): + """ + Validates that the function can reach dependencies. + Raises on failure so tofu apply fails. + """ + param_name = os.environ["SSM_PARAM_PATH"] + + # Validates: egress rules, IAM SSM permissions, KMS decrypt permission + response = ssm.get_parameter(Name=param_name, WithDecryption=True) + value = response["Parameter"]["Value"] + + if not value: + raise ValueError("SSM parameter was empty") + + if os.environ.get("ENVIRONMENT") != "tftesting": + raise ValueError( + f"ENVIRONMENT env var not set correctly: {os.environ.get('ENVIRONMENT')!r}" + ) + + logger.info("Liveness check passed. SSM value retrieved successfully.") + return {"status": "ok", "message": "Lambda is healthy"} diff --git a/terraform/services/tftesting/function/main.tf b/terraform/services/tftesting/function/main.tf new file mode 100644 index 00000000..83b8ee01 --- /dev/null +++ b/terraform/services/tftesting/function/main.tf @@ -0,0 +1,86 @@ +resource "aws_ssm_parameter" "test_config" { + name = "/cdap/test/tftesting/function/testvalue" + # only setting as secure for testing + type = "SecureString" + # not an actually secure string + value = "tftesting" + + key_id = module.platform.kms_alias_primary.id +} + +# This parameter is NOT in ssm_parameter_paths — only accessible via inline policy +resource "aws_ssm_parameter" "inline_policy_test" { + name = "/cdap/test/tftesting/function/inline-policy-test" + type = "SecureString" + value = "inline-policy-access-confirmed" + key_id = module.platform.kms_alias_primary.id +} + +module "tftesting_function" { + source = "../../../modules/function" + + app = "cdap" + env = "test" + name = "tftesting" + description = "Ephemeral Lambda for CI/CD integration testing — exercises module features" + + source_dir = "${path.module}/lambda_src" + source_dir_excludes = ["**/__pycache__/**", "**/*.pyc", "**/tests/**"] + + handler = "lambda_function.function_handler" + runtime = "python3.11" + architecture = "arm64" + timeout = 30 + memory_size = 256 # Evaluates non-default memory + + liveness_check_enabled = true + + log_retention_days = 7 + + # Exercises environment_variables + environment_variables = { + ENVIRONMENT = "tftesting" + SSM_PARAM_PATH = aws_ssm_parameter.test_config.name + INLINE_POLICY_PARAM_PATH = aws_ssm_parameter.inline_policy_test.name + } + + # Exercises ssm_parameter_paths + ssm_parameter_paths = [aws_ssm_parameter.test_config.arn] + + # Exercises schedule_expression — can be set for scheduler testing + schedule_expression = "" + + # Exercises function_role_inline_policies — + function_role_inline_policies = { + "ssm-inline-test" = data.aws_iam_policy_document.ssm_inline_test.json + } + + # Placeholder if evaluating github_actions_repos for deploys outside of Tofu + github_actions_repos = [] + + # Scoped egress — HTTPS only to allow testing of SSM parameter retrieval ; remove when VPC endpoint is introduced + egress_rules = [ + { + name = "allow-https-ipv4" + from_port = 443 + to_port = 443 + protocol = "tcp" + cidr_ipv4 = "0.0.0.0/0" + description = "Allow HTTPS egress for AWS API calls" + } + ] + + # Rollback support + rollback_version = null # null = track latest published version +} + + +module "platform" { + providers = { aws = aws, aws.secondary = aws.secondary } + + source = "../../../modules/platform" + app = "cdap" + env = "test" + root_module = "https://github.com/CMSgov/cdap/tree/main/terraform/services/tftesting/${basename(abspath(path.module))}/" + service = replace(basename(abspath(path.module)), "/^[0-9]+-/", "") +} diff --git a/terraform/services/tftesting/function/outputs.tf b/terraform/services/tftesting/function/outputs.tf new file mode 100644 index 00000000..d98ce5e5 --- /dev/null +++ b/terraform/services/tftesting/function/outputs.tf @@ -0,0 +1,14 @@ +output "function_name" { + description = "Name of the test Lambda function" + value = module.tftesting_function.name +} + +output "function_arn" { + description = "ARN of the test Lambda function" + value = module.tftesting_function.alias_arn +} + +output "function_version" { + description = "Published version of the test Lambda" + value = module.tftesting_function.function_version +} diff --git a/terraform/services/tftesting/function/tofu.tf b/terraform/services/tftesting/function/tofu.tf new file mode 100644 index 00000000..d9149467 --- /dev/null +++ b/terraform/services/tftesting/function/tofu.tf @@ -0,0 +1,20 @@ +provider "aws" { + region = "us-east-1" + default_tags { + tags = module.platform.default_tags + } +} + +provider "aws" { + alias = "secondary" + region = "us-west-2" + default_tags { + tags = module.platform.default_tags + } +} + +terraform { + backend "s3" { + key = "tftesting/function/terraform.tfstate" + } +}