diff --git a/.github/workflows/alarm-to-slack-checks.yml b/.github/workflows/alarm-to-slack-checks.yml index 582e1e2b..8381a901 100644 --- a/.github/workflows/alarm-to-slack-checks.yml +++ b/.github/workflows/alarm-to-slack-checks.yml @@ -7,12 +7,18 @@ on: paths: - 'terraform/services/alarm-to-slack/lambda_src/**/*.py' - 'terraform/services/alarm-to-slack/lambda_src/**/requirements.txt' - - 'terraform/modules/function/**' jobs: - python-checks: - uses: ./.github/workflows/python-checks-reusable.yml - with: - source_path: terraform/services/alarm-to-slack/lambda_src - sonar_project_key: cdap-alarm-to-slack - sonar_project_name: "CDAP Alarm to Slack" + python-tests: + runs-on: codebuild-cdap-${{ github.ref_name == 'main' && 'prod' || 'non-prod' }}-${{ github.run_id }}-${{ github.run_attempt }} + defaults: + run: + working-directory: terraform/services/alarm-to-slack/lambda_src + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.3.0 + - name: Install dependencies + run: | + pip install -r requirements.txt + pip install pytest pytest-cov + - name: Run tests + run: pytest test_lambda_function.py -v --cov=lambda_function --cov-report=term-missing diff --git a/.github/workflows/set_log_retention_checks.yml b/.github/workflows/set_log_retention_checks.yml index 2339c08b..c49abf78 100644 --- a/.github/workflows/set_log_retention_checks.yml +++ b/.github/workflows/set_log_retention_checks.yml @@ -7,7 +7,6 @@ on: paths: - 'scripts/set_log_retention/*.py' - 'scripts/set_log_retention/requirements.txt' - - 'terraform/modules/function/**' jobs: python-checks: diff --git a/.github/workflows/tftesting-function.yml b/.github/workflows/tftesting-function.yml new file mode 100644 index 00000000..b367daa1 --- /dev/null +++ b/.github/workflows/tftesting-function.yml @@ -0,0 +1,67 @@ +name: tftesting-lambda + +on: + workflow_dispatch: + pull_request: + paths: + - 'terraform/services/tftesting/function/**' + - 'terraform/modules/function/**' + - '.github/workflows/tftesting-function.yml' + push: + branches: + - main + paths: + - 'terraform/services/tftesting/function/**' + - 'terraform/modules/function/**' + - '.github/workflows/tftesting-function.yml' + +concurrency: + group: tftesting-function + cancel-in-progress: true + +env: + TENV_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + APP: cdap + ENV: test + TF_DIR: terraform/services/tftesting/function + +permissions: + contents: read + id-token: write + +jobs: + apply: + name: Tofu Apply + runs-on: codebuild-cdap-non-prod-${{ github.run_id }}-${{ github.run_attempt }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.3.0 + - uses: cmsgov/cdap/actions/setup-tenv@f4c14d47cc20e7f6de9112d7155af1213c9bca5a + - uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37 # v6.1.0 + with: + role-to-assume: arn:aws:iam::${{ secrets.NON_PROD_ACCOUNT }}:role/delegatedadmin/developer/${{ env.APP }}-${{ env.ENV }}-github-actions + aws-region: ${{ vars.AWS_REGION }} + - name: Tofu Init + working-directory: ${{ env.TF_DIR }} + run: tofu init -reconfigure -backend-config="../../../backends/${{ env.APP }}-${{ env.ENV }}.s3.tfbackend" + - name: Tofu Apply + working-directory: ${{ env.TF_DIR }} + run: tofu apply + + destroy: + name: Tofu Destroy + if: success() + needs: apply + runs-on: codebuild-cdap-non-prod-${{ github.run_id }}-${{ github.run_attempt }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.3.0 + - uses: cmsgov/cdap/actions/setup-tenv@f4c14d47cc20e7f6de9112d7155af1213c9bca5a + - uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37 # v6.1.0 + with: + role-to-assume: arn:aws:iam::${{ secrets.NON_PROD_ACCOUNT }}:role/delegatedadmin/developer/${{ env.APP }}-${{ env.ENV }}-github-actions + aws-region: ${{ vars.AWS_REGION }} + - name: Tofu Init + working-directory: ${{ env.TF_DIR }} + run: tofu init -reconfigure -backend-config="../../../backends/${{ env.APP }}-${{ env.ENV }}.s3.tfbackend" + - name: Tofu Destroy + working-directory: ${{ env.TF_DIR }} + run: tofu destroy -auto-approve diff --git a/terraform/modules/function/README.md b/terraform/modules/function/README.md index 2ab524b2..2c4d1c60 100644 --- a/terraform/modules/function/README.md +++ b/terraform/modules/function/README.md @@ -2,4 +2,126 @@ This is a generic module for creating lambda function resources in CMS Cloud. Use it in terraform services where a lambda function is needed. -Note that a dummy function is included to allow for initialization. It is meant to be replaced once the function has been created. +Note that a dummy function will be made if source_dir with function logic is not yet provided or github_actions_repo is not defined. +The dummy function allows for infrastructure scaffolding before source code is written. +If source code is written and the lifecycle is managed outside of terraform, set github_actions_repo. + + + +## Providers + +| Name | Version | +|------|---------| +| [archive](#provider\_archive) | n/a | +| [aws](#provider\_aws) | n/a | + + +## Requirements + +No requirements. + + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [app](#input\_app) | The application name (ab2d, bcda, cdap dpc) | `string` | n/a | yes | +| [description](#input\_description) | Description of the lambda function | `string` | n/a | yes | +| [env](#input\_env) | The application environment (dev, test, sandbox, prod) | `string` | n/a | yes | +| [name](#input\_name) | Name of the lambda function | `string` | n/a | yes | +| [architecture](#input\_architecture) | Lambda function CPU architecture. Use arm64 for Graviton (better price/performance for most workloads). | `string` | `"x86_64"` | no | +| [egress\_rules](#input\_egress\_rules) | List of egress rules to apply to the security group |
list(object({
name = string
from_port = number
to_port = number
protocol = string
cidr_ipv4 = optional(string)
cidr_ipv6 = optional(string)
referenced_sg_id = optional(string)
description = optional(string)
})) | [| no | +| [environment\_variables](#input\_environment\_variables) | Map of environment variables for the function | `map(string)` | `{}` | no | +| [extra\_kms\_key\_arns](#input\_extra\_kms\_key\_arns) | Optional list of additional KMS key ARNs the Lambda can use | `list(string)` | `[]` | no | +| [function\_role\_inline\_policies](#input\_function\_role\_inline\_policies) | Inline policies (in JSON) for the function IAM role | `map(string)` | `{}` | no | +| [github\_actions\_repos](#input\_github\_actions\_repos) | Used for integration tests and, when source\_dir is null,
{
"cidr_ipv4": "0.0.0.0/0",
"description": "Allow all egress traffic (IPv4) - migration default",
"from_port": 0,
"name": "allow-all-ipv4",
"protocol": "-1",
"to_port": 0
},
{
"cidr_ipv6": "::/0",
"description": "Allow all egress traffic (IPv6) - migration default",
"from_port": 0,
"name": "allow-all-ipv6",
"protocol": "-1",
"to_port": 0
}
]
[| no | + + +## Modules + +| Name | Source | Version | +|------|--------|---------| +| [sns\_to\_slack\_function](#module\_sns\_to\_slack\_function) | ../../modules/function | n/a | +| [sns\_to\_slack\_queue](#module\_sns\_to\_slack\_queue) | github.com/CMSgov/cdap/terraform/modules/queue | b177921621c97d02dc4a21f830e4532147aa0749 | +| [standards](#module\_standards) | ../../modules/standards | n/a | + + +## Resources + +| Name | Type | +|------|------| +| [aws_iam_policy_document.sqs_queue_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_ssm_parameters_by_path.slack_webhook_urls](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ssm_parameters_by_path) | data source | + + +## Outputs + +| Name | Description | +|------|-------------| +| [function\_role\_arn](#output\_function\_role\_arn) | n/a | +| [sqs\_queue\_arn](#output\_sqs\_queue\_arn) | n/a | +| [zip\_bucket](#output\_zip\_bucket) | n/a | + \ No newline at end of file diff --git a/terraform/services/alarm-to-slack/iam.tf b/terraform/services/alarm-to-slack/iam.tf new file mode 100644 index 00000000..0bf7f8c2 --- /dev/null +++ b/terraform/services/alarm-to-slack/iam.tf @@ -0,0 +1,40 @@ +data "aws_iam_policy_document" "sqs_trigger" { + statement { + sid = "SQSTriggerReceive" + actions = [ + "sqs:DeleteMessage", + "sqs:GetQueueAttributes", + "sqs:ReceiveMessage", + ] + resources = [module.sns_to_slack_queue.arn] + } +} + +data "aws_iam_policy_document" "sqs_queue_policy" { + statement { + sid = "allow_sns_access" + effect = "Allow" + + principals { + type = "Service" + identifiers = ["sns.amazonaws.com"] + } + + actions = [ + "SQS:SendMessage", + ] + + resources = [ + module.sns_to_slack_queue.arn + ] + + condition { + test = "ArnLike" + variable = "aws:SourceArn" + + values = [ + "arn:aws:sns:us-east-1:${module.standards.account_id}:*" + ] + } + } +} diff --git a/terraform/services/alarm-to-slack/lambda_src/lambda_function.py b/terraform/services/alarm-to-slack/lambda_src/lambda_function.py index 4453eb9c..511f511f 100644 --- a/terraform/services/alarm-to-slack/lambda_src/lambda_function.py +++ b/terraform/services/alarm-to-slack/lambda_src/lambda_function.py @@ -7,7 +7,7 @@ import json import os from urllib import request -from urllib.error import URLError +from urllib.error import URLError, HTTPError import boto3 from botocore.exceptions import ClientError @@ -44,12 +44,129 @@ def is_ignore_ok(): """ return os.environ.get('IGNORE_OK', 'false').lower() == 'true' + +def ping_slack_webhook(webhook, app, message_id=None): + """ + Sends a liveness ping to a Slack webhook using an empty payload. + Slack returns 400 for empty payloads, but a 400 still confirms the + webhook URL is reachable. A URLError or non-reachable host indicates + a broken webhook. + """ + try: + jsondata = json.dumps({}).encode('utf-8') + req = request.Request(webhook) + req.add_header('Content-Type', 'application/json; charset=utf-8') + req.add_header('Content-Length', str(len(jsondata))) + with request.urlopen(req, jsondata) as resp: + log({'msg': f'Liveness ping succeeded for app: {app}', + 'status': resp.status, 'messageId': message_id}) + return True + except HTTPError as e: + # Slack returns 400 for empty payloads — still means the URL is reachable + if e.code == 400: + log({'msg': f'Liveness ping reachable (400 expected) for app: {app}', + 'messageId': message_id}) + return True + log({'msg': f'Liveness ping FAILED (HTTP {e.code}) for app: {app}', + 'messageId': message_id}) + return False + except URLError as e: + log({'msg': f'Liveness ping FAILED for app: {app}, reason: {e.reason}', + 'messageId': message_id}) + return False + +def get_app_list(): + apps_env = os.environ.get('APPS', '') + return [app.strip() for app in apps_env.split(',') if app.strip()] + +def liveness_check(): + """ + Iterates over all configured apps (from the APPS env var), retrieves each + app's Slack webhook SSM parameter, and performs a connectivity ping. + + Returns a dict with: + - 'results': per-app status (ssm_ok, webhook_reachable) + - 'all_ok': True only if every app passed both checks + """ + apps = get_app_list() + if not apps: + log({'msg': 'Liveness check: No apps configured in APPS environment variable'}) + return {'results': {}, 'all_ok': True} + + results = {} + all_ok = True + + for app in apps: + param_name = f'/{app}/lambda/slack_webhook_url' + webhook = get_ssm_parameter(param_name) + + ssm_ok = webhook is not None + webhook_reachable = False + + if ssm_ok: + webhook_reachable = ping_slack_webhook(webhook, app) + else: + log({'msg': f'Liveness check FAILED: SSM parameter missing or broken for app: {app}', + 'param': param_name}) + + app_ok = ssm_ok and webhook_reachable + all_ok = all_ok and app_ok + + results[app] = { + 'ssm_ok': ssm_ok, + 'webhook_reachable': webhook_reachable, + 'ok': app_ok, + } + + log({ + 'msg': 'Liveness check result', + 'app': app, + 'ssm_ok': ssm_ok, + 'webhook_reachable': webhook_reachable, + 'ok': app_ok, + }) + + return {'results': results, 'all_ok': all_ok} + +def handle_liveness_event(event): + """ + Handles a deploy-time liveness check invocation from Tofu's aws_lambda_invocation. + Raises RuntimeError if any app's SSM parameter or Slack webhook is unreachable, + which surfaces as a function error and fails the Tofu apply. + """ + check = liveness_check() + + log({ + 'msg': 'Liveness check complete', + 'all_ok': check['all_ok'], + 'results': check['results'], + }) + + if not check['all_ok']: + failed = [app for app, r in check['results'].items() if not r['ok']] + raise RuntimeError( + f"Liveness check failed for app(s): {', '.join(failed)}. " + "Check CloudWatch logs for details." + ) + + return { + 'statusCode': 200, + 'body': 'Liveness check passed', + 'results': check['results'], + } + def lambda_handler(event, _): """ Main entry point for the Lambda function. - It iterates through the SQS records, processes each CloudWatch alarm, + Handles two event types: + 1) A liveness check that can be invoked via Tofu changes + 2) Primary function: Iteration through the SQS records, processes each CloudWatch alarm, and forwards it to the appropriate Slack channel. """ + + if event.get('RequestType') == 'LivenessCheck': + return handle_liveness_event(event) + processed_count = 0 for record in event['Records']: message = enriched_cloudwatch_message(record) diff --git a/terraform/services/alarm-to-slack/lambda_src/test_lambda_function.py b/terraform/services/alarm-to-slack/lambda_src/test_lambda_function.py index 66cc1557..58167bf0 100644 --- a/terraform/services/alarm-to-slack/lambda_src/test_lambda_function.py +++ b/terraform/services/alarm-to-slack/lambda_src/test_lambda_function.py @@ -28,12 +28,6 @@ def mock_boto3_client(): with patch('lambda_function.boto3.client') as mock_client: yield mock_client -def reload_lambda(): - """Reload the lambda_function module to pick up environment variable changes.""" - if 'lambda_function' in sys.modules: - importlib.reload(sys.modules['lambda_function']) - return sys.modules['lambda_function'] - def test_cloudwatch_message_sqs_record(): """Test parsing a valid CloudWatch message from an SQS record.""" cloudwatch_message = { @@ -77,7 +71,6 @@ def test_enriched_cloudwatch_message_alarm_record(): @patch.dict(os.environ, {'IGNORE_OK': 'false'}, clear=True) def test_enriched_cloudwatch_message_alarm_record_ok_ignored(): """Test enrichment when IGNORE_OK is false and state is ALARM.""" - reload_lambda() cloudwatch_message = { 'AlarmName': 'bcda-dev-SomeAlarm', 'OldStateValue': 'OK', @@ -121,7 +114,6 @@ def test_enriched_cloudwatch_message_ok_record(): @patch.dict(os.environ, {'IGNORE_OK': 'false'}, clear=True) def test_enriched_cloudwatch_message_ok_record_ignore_false(): """Test OK state message with IGNORE_OK explicitly set to false.""" - reload_lambda() cloudwatch_message = { 'AlarmName': 'bcda-dev-SomeAlarm', 'OldStateValue': 'ALARM', @@ -144,7 +136,6 @@ def test_enriched_cloudwatch_message_ok_record_ignore_false(): @patch.dict(os.environ, {'IGNORE_OK': 'true'}, clear=True) def test_enriched_cloudwatch_message_ok_record_ok_ignored(): """Test that OK state message is ignored when IGNORE_OK is true.""" - reload_lambda() cloudwatch_message = { 'AlarmName': 'bcda-dev-SomeAlarm', 'OldStateValue': 'ALARM', @@ -259,3 +250,81 @@ def test_logger(capsys): log_output = json.loads(captured.out) assert log_output['test'] == 'log' assert 'time' in log_output + +@patch.dict(os.environ, {'APPS': 'bcda,cdap,dpc'}, clear=True) +def test_get_app_list_returns_list(): + """Test that APPS env var is parsed into a list correctly.""" + assert lambda_function.get_app_list() == ['bcda', 'cdap', 'dpc'] + +@patch.dict(os.environ, {}, clear=True) +def test_get_app_list_empty(): + """Test that missing APPS env var returns empty list.""" + assert lambda_function.get_app_list() == [] + +# ── ping_slack_webhook ───────────────────────────────────────────────────── + +@patch('urllib.request.urlopen') +def test_ping_slack_webhook_success(mock_urlopen): + """200 response → reachable.""" + cm = MagicMock() + cm.status = 200 + cm.__enter__.return_value = cm + mock_urlopen.return_value = cm + assert lambda_function.ping_slack_webhook('https://hooks.slack.com/test', 'bcda') is True + + +@patch('urllib.request.urlopen') +def test_ping_slack_webhook_400_treated_as_alive(mock_urlopen): + """Slack's 400 for empty payload still means the URL is reachable.""" + from urllib.error import HTTPError + mock_urlopen.side_effect = HTTPError( + url='https://hooks.slack.com/test', code=400, + msg='no_text', hdrs=None, fp=None, + ) + assert lambda_function.ping_slack_webhook('https://hooks.slack.com/test', 'bcda') is True + + +@patch('urllib.request.urlopen') +def test_ping_slack_webhook_network_failure(mock_urlopen): + """Genuine network error → not reachable.""" + from urllib.error import URLError + mock_urlopen.side_effect = URLError('connection refused') + assert lambda_function.ping_slack_webhook('https://hooks.slack.com/test', 'bcda') is False + + +# ── liveness_check ───────────────────────────────────────────────────────── + +@patch.dict(os.environ, {'APPS': 'bcda,dpc'}, clear=True) +@patch('lambda_function.ping_slack_webhook', return_value=True) +@patch('lambda_function.get_ssm_parameter', return_value='https://hooks.slack.com/test') +def test_liveness_check_all_ok(mock_ssm, mock_ping): + """All apps pass → all_ok is True.""" + result = lambda_function.liveness_check() + assert result['all_ok'] is True + + +@patch.dict(os.environ, {'APPS': 'bcda'}, clear=True) +@patch('lambda_function.get_ssm_parameter', return_value=None) +def test_liveness_check_ssm_missing(mock_ssm): + """Missing SSM parameter → app fails, all_ok is False.""" + result = lambda_function.liveness_check() + assert result['all_ok'] is False + assert result['results']['bcda']['ssm_ok'] is False + + +@patch.dict(os.environ, {'APPS': 'bcda'}, clear=True) +@patch('lambda_function.ping_slack_webhook', return_value=True) +@patch('lambda_function.get_ssm_parameter', return_value='https://hooks.slack.com/test') +def test_handle_liveness_event_passes(mock_ssm, mock_ping): + """Returns 200 when all checks pass.""" + response = lambda_function.handle_liveness_event({'RequestType': 'LivenessCheck'}) + assert response['statusCode'] == 200 + + +@patch.dict(os.environ, {'APPS': 'bcda'}, clear=True) +@patch('lambda_function.ping_slack_webhook', return_value=False) +@patch('lambda_function.get_ssm_parameter', return_value='https://hooks.slack.com/test') +def test_handle_liveness_event_raises_on_failure(mock_ssm, mock_ping): + """Raises RuntimeError when a check fails — surfaces as Lambda error in Tofu.""" + with pytest.raises(RuntimeError, match='bcda'): + lambda_function.handle_liveness_event({'RequestType': 'LivenessCheck'}) \ No newline at end of file diff --git a/terraform/services/alarm-to-slack/main.tf b/terraform/services/alarm-to-slack/main.tf index 9ce9d454..f9212072 100644 --- a/terraform/services/alarm-to-slack/main.tf +++ b/terraform/services/alarm-to-slack/main.tf @@ -2,25 +2,50 @@ locals { full_name = "${var.app}-${var.env}-alarm-to-slack" } -data "aws_caller_identity" "current" {} +import { + to = module.sns_to_slack_function.aws_cloudwatch_log_group.function + id = "/aws/lambda/${local.full_name}" +} -module "sns_to_slack_function" { - source = "github.com/CMSgov/cdap/terraform/modules/function?ref=2874c72ccd4c4821e5e3f77ccf61cf77ed05169f" +data "aws_ssm_parameters_by_path" "slack_webhook_urls" { + for_each = toset(var.apps_served) + path = "/${each.value}/lambda/slack_webhook_url" +} - app = var.app - env = var.env - architecture = "arm64" +module "sns_to_slack_function" { + source = "../../modules/function" + app = var.app + env = var.env name = local.full_name description = "Listens for CloudWatch Alerts and forwards to Slack" - # TODO use zip file + architecture = "arm64" + handler = "lambda_function.lambda_handler" + runtime = "python3.13" + + ssm_parameter_paths = flatten([ + for app, data in data.aws_ssm_parameters_by_path.slack_webhook_urls : + data.arns + ]) - handler = "lambda_function.lambda_handler" - runtime = "python3.13" + function_role_inline_policies = { + sqs-trigger = data.aws_iam_policy_document.sqs_trigger.json + } + + # Point to the local source directory — module handles zip + upload + source_dir = "${path.module}/lambda_src" + + # Optionally exclude tests and cache + source_dir_excludes = [ + "__pycache__", + "test_*.py", + "*.pyc", + ] environment_variables = { IGNORE_OK = true + APPS = join(",", var.apps_served) } } @@ -36,32 +61,3 @@ module "sns_to_slack_queue" { data.aws_iam_policy_document.sqs_queue_policy.json ] } - -data "aws_iam_policy_document" "sqs_queue_policy" { - statement { - sid = "allow_sns_access" - effect = "Allow" - - principals { - type = "Service" - identifiers = ["sns.amazonaws.com"] - } - - actions = [ - "SQS:SendMessage", - ] - - resources = [ - module.sns_to_slack_queue.arn - ] - - condition { - test = "ArnLike" - variable = "aws:SourceArn" - - values = [ - "arn:aws:sns:us-east-1:${data.aws_caller_identity.current.account_id}:*" - ] - } - } -} diff --git a/terraform/services/alarm-to-slack/terraform.tf b/terraform/services/alarm-to-slack/terraform.tf deleted file mode 100644 index 4ae8cd4d..00000000 --- a/terraform/services/alarm-to-slack/terraform.tf +++ /dev/null @@ -1,18 +0,0 @@ -provider "aws" { - default_tags { - tags = { - application = var.app - business = "oeda" - code = "https://github.com/CMSgov/cdap/tree/main/terraform/services/alarm-to-slack" - component = "alarm-to-slack" - environment = var.env - terraform = true - } - } -} - -terraform { - backend "s3" { - key = "alarm-to-slack/terraform.tfstate" - } -} diff --git a/terraform/services/alarm-to-slack/tofu.tf b/terraform/services/alarm-to-slack/tofu.tf new file mode 100644 index 00000000..881e5a4b --- /dev/null +++ b/terraform/services/alarm-to-slack/tofu.tf @@ -0,0 +1,30 @@ +terraform { + backend "s3" { + key = "alarm-to-slack/terraform.tfstate" + } +} + +provider "aws" { + region = "us-east-1" + default_tags { + tags = module.standards.default_tags + } +} + +provider "aws" { + alias = "secondary" + region = "us-west-2" + default_tags { + tags = module.standards.default_tags + } +} + +module "standards" { + source = "../../modules/standards" + providers = { aws = aws, aws.secondary = aws.secondary } + + app = var.app + env = var.env + root_module = "https://github.com/CMSgov/cdap/tree/main/terraform/services/${basename(abspath(path.module))}/" + service = replace(basename(abspath(path.module)), "/^[0-9]+-/", "") +} diff --git a/terraform/services/alarm-to-slack/variables.tf b/terraform/services/alarm-to-slack/variables.tf index c2220adf..70bb3c76 100644 --- a/terraform/services/alarm-to-slack/variables.tf +++ b/terraform/services/alarm-to-slack/variables.tf @@ -15,3 +15,9 @@ variable "env" { error_message = "Valid values for env are test, prod." } } + +variable "apps_served" { + description = "List of app names whose Slack webhook URLs this function reads from SSM at runtime." + type = list(string) + default = ["bcda", "cdap", "dpc"] +} diff --git a/terraform/services/github-actions-role/main.tf b/terraform/services/github-actions-role/main.tf index 3ebbe950..72313c73 100644 --- a/terraform/services/github-actions-role/main.tf +++ b/terraform/services/github-actions-role/main.tf @@ -398,6 +398,7 @@ data "aws_iam_policy_document" "github_actions_policy" { "logs:DescribeLogGroups", "logs:DescribeLogStreams", "logs:DescribeSubscriptionFilters", + "logs:ListTagsForResource", "logs:PutRetentionPolicy" ] resources = ["*"] diff --git a/terraform/services/tftesting/function/README.md b/terraform/services/tftesting/function/README.md new file mode 100644 index 00000000..6ec21fb7 --- /dev/null +++ b/terraform/services/tftesting/function/README.md @@ -0,0 +1,3 @@ +# Lambda function module +This allows manual testing of the module at terraform/modules/function. +This will provision a basic lambda, from the source code in ./lambda_src/ and verify liveness using the invocation in the function module. diff --git a/terraform/services/tftesting/function/iam.tf b/terraform/services/tftesting/function/iam.tf new file mode 100644 index 00000000..c5f9634e --- /dev/null +++ b/terraform/services/tftesting/function/iam.tf @@ -0,0 +1,10 @@ +data "aws_iam_policy_document" "ssm_inline_test" { + statement { + sid = "InlinePolicySSMRead" + effect = "Allow" + actions = [ + "ssm:GetParameter" + ] + resources = [aws_ssm_parameter.inline_policy_test.arn] + } +} diff --git a/terraform/services/tftesting/function/lambda_src/lambda_function.py b/terraform/services/tftesting/function/lambda_src/lambda_function.py new file mode 100644 index 00000000..09c9c4cd --- /dev/null +++ b/terraform/services/tftesting/function/lambda_src/lambda_function.py @@ -0,0 +1,45 @@ +import json +import logging +import os + +import boto3 + +logger = logging.getLogger() +logger.setLevel(logging.INFO) + +ssm = boto3.client("ssm") + + +def function_handler(event, context): + logger.info("Received event: %s", json.dumps(event)) + + request_type = event.get("RequestType") or event.get("source", "") + + if request_type == "LivenessCheck": + return _liveness_check() + + logger.warning("Unknown RequestType: %s", request_type) + return {"status": "ok", "event": event} + + +def _liveness_check(): + """ + Validates that the function can reach dependencies. + Raises on failure so tofu apply fails. + """ + param_name = os.environ["SSM_PARAM_PATH"] + + # Validates: egress rules, IAM SSM permissions, KMS decrypt permission + response = ssm.get_parameter(Name=param_name, WithDecryption=True) + value = response["Parameter"]["Value"] + + if not value: + raise ValueError("SSM parameter was empty") + + if os.environ.get("ENVIRONMENT") != "tftesting": + raise ValueError( + f"ENVIRONMENT env var not set correctly: {os.environ.get('ENVIRONMENT')!r}" + ) + + logger.info("Liveness check passed. SSM value retrieved successfully.") + return {"status": "ok", "message": "Lambda is healthy"} diff --git a/terraform/services/tftesting/function/main.tf b/terraform/services/tftesting/function/main.tf new file mode 100644 index 00000000..83b8ee01 --- /dev/null +++ b/terraform/services/tftesting/function/main.tf @@ -0,0 +1,86 @@ +resource "aws_ssm_parameter" "test_config" { + name = "/cdap/test/tftesting/function/testvalue" + # only setting as secure for testing + type = "SecureString" + # not an actually secure string + value = "tftesting" + + key_id = module.platform.kms_alias_primary.id +} + +# This parameter is NOT in ssm_parameter_paths — only accessible via inline policy +resource "aws_ssm_parameter" "inline_policy_test" { + name = "/cdap/test/tftesting/function/inline-policy-test" + type = "SecureString" + value = "inline-policy-access-confirmed" + key_id = module.platform.kms_alias_primary.id +} + +module "tftesting_function" { + source = "../../../modules/function" + + app = "cdap" + env = "test" + name = "tftesting" + description = "Ephemeral Lambda for CI/CD integration testing — exercises module features" + + source_dir = "${path.module}/lambda_src" + source_dir_excludes = ["**/__pycache__/**", "**/*.pyc", "**/tests/**"] + + handler = "lambda_function.function_handler" + runtime = "python3.11" + architecture = "arm64" + timeout = 30 + memory_size = 256 # Evaluates non-default memory + + liveness_check_enabled = true + + log_retention_days = 7 + + # Exercises environment_variables + environment_variables = { + ENVIRONMENT = "tftesting" + SSM_PARAM_PATH = aws_ssm_parameter.test_config.name + INLINE_POLICY_PARAM_PATH = aws_ssm_parameter.inline_policy_test.name + } + + # Exercises ssm_parameter_paths + ssm_parameter_paths = [aws_ssm_parameter.test_config.arn] + + # Exercises schedule_expression — can be set for scheduler testing + schedule_expression = "" + + # Exercises function_role_inline_policies — + function_role_inline_policies = { + "ssm-inline-test" = data.aws_iam_policy_document.ssm_inline_test.json + } + + # Placeholder if evaluating github_actions_repos for deploys outside of Tofu + github_actions_repos = [] + + # Scoped egress — HTTPS only to allow testing of SSM parameter retrieval ; remove when VPC endpoint is introduced + egress_rules = [ + { + name = "allow-https-ipv4" + from_port = 443 + to_port = 443 + protocol = "tcp" + cidr_ipv4 = "0.0.0.0/0" + description = "Allow HTTPS egress for AWS API calls" + } + ] + + # Rollback support + rollback_version = null # null = track latest published version +} + + +module "platform" { + providers = { aws = aws, aws.secondary = aws.secondary } + + source = "../../../modules/platform" + app = "cdap" + env = "test" + root_module = "https://github.com/CMSgov/cdap/tree/main/terraform/services/tftesting/${basename(abspath(path.module))}/" + service = replace(basename(abspath(path.module)), "/^[0-9]+-/", "") +} diff --git a/terraform/services/tftesting/function/outputs.tf b/terraform/services/tftesting/function/outputs.tf new file mode 100644 index 00000000..d98ce5e5 --- /dev/null +++ b/terraform/services/tftesting/function/outputs.tf @@ -0,0 +1,14 @@ +output "function_name" { + description = "Name of the test Lambda function" + value = module.tftesting_function.name +} + +output "function_arn" { + description = "ARN of the test Lambda function" + value = module.tftesting_function.alias_arn +} + +output "function_version" { + description = "Published version of the test Lambda" + value = module.tftesting_function.function_version +} diff --git a/terraform/services/tftesting/function/tofu.tf b/terraform/services/tftesting/function/tofu.tf new file mode 100644 index 00000000..d9149467 --- /dev/null +++ b/terraform/services/tftesting/function/tofu.tf @@ -0,0 +1,20 @@ +provider "aws" { + region = "us-east-1" + default_tags { + tags = module.platform.default_tags + } +} + +provider "aws" { + alias = "secondary" + region = "us-west-2" + default_tags { + tags = module.platform.default_tags + } +} + +terraform { + backend "s3" { + key = "tftesting/function/terraform.tfstate" + } +}
"bcda",
"cdap",
"dpc"
]