|
| 1 | +# Terraform deployment for Data Commons Import Automation Workflow |
| 2 | +# This file sets up: |
| 3 | +# - Necessary GCP APIs |
| 4 | +# - Secret Manager for the import-config secret |
| 5 | +# - GCS Buckets for imports, mounting, and Dataflow templates |
| 6 | +# - Spanner Instance and Database with schema |
| 7 | +# - Artifact Registry for hosting Docker images (Flex Template & Executor) |
| 8 | +# - Pub/Sub Topic and Subscription for triggering imports |
| 9 | +# - Cloud Build Triggers for CI/CD of Executor, Functions, Workflows, and Ingestion Pipeline |
| 10 | +# - Unified Service Account with necessary IAM roles for Workflows, Functions, and Pub/Sub |
| 11 | + |
| 12 | +terraform { |
| 13 | + required_providers { |
| 14 | + google = { |
| 15 | + source = "hashicorp/google" |
| 16 | + version = ">= 5.0.0" |
| 17 | + } |
| 18 | + archive = { |
| 19 | + source = "hashicorp/archive" |
| 20 | + } |
| 21 | + } |
| 22 | +} |
| 23 | + |
| 24 | +variable "project_id" { |
| 25 | + description = "The GCP Project ID" |
| 26 | + type = string |
| 27 | +} |
| 28 | + |
| 29 | +variable "region" { |
| 30 | + description = "The GCP Region" |
| 31 | + type = string |
| 32 | + default = "us-central1" |
| 33 | +} |
| 34 | + |
| 35 | +variable "github_owner" { |
| 36 | + description = "The owner of the GitHub repository" |
| 37 | + type = string |
| 38 | + default = "datacommonsorg" |
| 39 | +} |
| 40 | + |
| 41 | +variable "github_repo_name" { |
| 42 | + description = "The name of the GitHub repository (data)" |
| 43 | + type = string |
| 44 | + default = "data" |
| 45 | +} |
| 46 | + |
| 47 | +variable "github_repo_ingestion_name" { |
| 48 | + description = "The name of the GitHub repository (import)" |
| 49 | + type = string |
| 50 | + default = "import" |
| 51 | +} |
| 52 | + |
| 53 | +variable "spanner_instance_id" { |
| 54 | + description = "Spanner Instance ID" |
| 55 | + type = string |
| 56 | + default = "datcom-import-instance" |
| 57 | +} |
| 58 | + |
| 59 | +variable "spanner_database_id" { |
| 60 | + description = "Spanner Database ID" |
| 61 | + type = string |
| 62 | + default = "dc-import-db" |
| 63 | +} |
| 64 | + |
| 65 | +variable "bq_dataset_id" { |
| 66 | + description = "BigQuery Dataset ID for aggregation" |
| 67 | + type = string |
| 68 | + default = "datacommons" |
| 69 | +} |
| 70 | + |
| 71 | +variable "dc_api_key" { |
| 72 | + description = "Data Commons API Key" |
| 73 | + type = string |
| 74 | + sensitive = true |
| 75 | +} |
| 76 | + |
| 77 | +# --- APIs --- |
| 78 | + |
| 79 | +locals { |
| 80 | + services = [ |
| 81 | + "artifactregistry.googleapis.com", |
| 82 | + "batch.googleapis.com", |
| 83 | + "cloudbuild.googleapis.com", |
| 84 | + "cloudfunctions.googleapis.com", |
| 85 | + "cloudscheduler.googleapis.com", |
| 86 | + "compute.googleapis.com", |
| 87 | + "dataflow.googleapis.com", |
| 88 | + "iam.googleapis.com", |
| 89 | + "pubsub.googleapis.com", |
| 90 | + "run.googleapis.com", |
| 91 | + "secretmanager.googleapis.com", |
| 92 | + "spanner.googleapis.com", |
| 93 | + "storage.googleapis.com", |
| 94 | + "workflows.googleapis.com", |
| 95 | + ] |
| 96 | +} |
| 97 | + |
| 98 | +resource "google_project_service" "services" { |
| 99 | + for_each = toset(local.services) |
| 100 | + project = var.project_id |
| 101 | + service = each.key |
| 102 | + |
| 103 | + disable_on_destroy = false |
| 104 | +} |
| 105 | + |
| 106 | +# --- Secret Manager --- |
| 107 | + |
| 108 | +resource "google_secret_manager_secret" "import_config" { |
| 109 | + secret_id = "import-config" |
| 110 | + project = var.project_id |
| 111 | + |
| 112 | + replication { |
| 113 | + auto {} |
| 114 | + } |
| 115 | + |
| 116 | + depends_on = [google_project_service.services] |
| 117 | +} |
| 118 | + |
| 119 | +resource "google_secret_manager_secret_version" "import_config_v1" { |
| 120 | + secret = google_secret_manager_secret.import_config.id |
| 121 | + secret_data = jsonencode({ |
| 122 | + dc_api_key = var.dc_api_key |
| 123 | + }) |
| 124 | +} |
| 125 | + |
| 126 | +resource "google_secret_manager_secret" "dc_api_key" { |
| 127 | + secret_id = "dc-api-key" |
| 128 | + project = var.project_id |
| 129 | + |
| 130 | + replication { |
| 131 | + auto {} |
| 132 | + } |
| 133 | + |
| 134 | + depends_on = [google_project_service.services] |
| 135 | +} |
| 136 | + |
| 137 | +resource "google_secret_manager_secret_version" "dc_api_key_v1" { |
| 138 | + secret = google_secret_manager_secret.dc_api_key.id |
| 139 | + secret_data = var.dc_api_key |
| 140 | +} |
| 141 | + |
| 142 | +# --- GCS Buckets --- |
| 143 | + |
| 144 | +resource "google_storage_bucket" "import_bucket" { |
| 145 | + name = "${var.project_id}-imports" |
| 146 | + location = var.region |
| 147 | + project = var.project_id |
| 148 | + uniform_bucket_level_access = true |
| 149 | + |
| 150 | + depends_on = [google_project_service.services] |
| 151 | +} |
| 152 | + |
| 153 | +# --- Spanner --- |
| 154 | + |
| 155 | +resource "google_spanner_instance" "import_instance" { |
| 156 | + name = var.spanner_instance_id |
| 157 | + config = "regional-${var.region}" |
| 158 | + display_name = "Import Automation Spanner Instance" |
| 159 | + num_nodes = 1 |
| 160 | + project = var.project_id |
| 161 | + |
| 162 | + depends_on = [google_project_service.services] |
| 163 | +} |
| 164 | + |
| 165 | +resource "google_spanner_database" "import_db" { |
| 166 | + instance = google_spanner_instance.import_instance.name |
| 167 | + name = var.spanner_database_id |
| 168 | + project = var.project_id |
| 169 | + ddl = [for s in split(";", file("${path.module}/../workflow/spanner_schema.sql")) : trimspace(s) if trimspace(s) != ""] |
| 170 | + |
| 171 | + deletion_protection = true |
| 172 | +} |
| 173 | + |
| 174 | +# Initialize IngestionLock (DML) |
| 175 | +resource "null_resource" "init_spanner_lock" { |
| 176 | + provisioner "local-exec" { |
| 177 | + command = <<EOT |
| 178 | +gcloud spanner databases execute-sql ${google_spanner_database.import_db.name} \ |
| 179 | + --instance=${google_spanner_instance.import_instance.name} \ |
| 180 | + --project=${var.project_id} \ |
| 181 | + --sql="INSERT INTO IngestionLock (LockID) VALUES ('global_ingestion_lock')" || echo 'Lock already exists' |
| 182 | +EOT |
| 183 | + } |
| 184 | + |
| 185 | + depends_on = [google_spanner_database.import_db] |
| 186 | +} |
| 187 | + |
| 188 | +# --- IAM --- |
| 189 | + |
| 190 | +resource "google_service_account" "automation_sa" { |
| 191 | + account_id = "import-automation-sa" |
| 192 | + display_name = "Service Account for Import Automation (Workflows & Functions)" |
| 193 | + project = var.project_id |
| 194 | +} |
| 195 | + |
| 196 | +resource "google_project_iam_member" "automation_roles" { |
| 197 | + for_each = toset([ |
| 198 | + "roles/workflows.admin", |
| 199 | + "roles/cloudfunctions.admin", |
| 200 | + "roles/run.admin", |
| 201 | + "roles/run.invoker", |
| 202 | + "roles/batch.jobsEditor", |
| 203 | + "roles/dataflow.admin", |
| 204 | + "roles/logging.logWriter", |
| 205 | + "roles/storage.objectAdmin", |
| 206 | + "roles/iam.serviceAccountUser", |
| 207 | + "roles/spanner.databaseAdmin", |
| 208 | + "roles/bigquery.dataEditor", |
| 209 | + "roles/bigquery.jobUser", |
| 210 | + "roles/artifactregistry.admin", |
| 211 | + "roles/secretmanager.secretAccessor", |
| 212 | + "roles/cloudbuild.builds.builder", |
| 213 | + ]) |
| 214 | + project = var.project_id |
| 215 | + role = each.key |
| 216 | + member = "service_account:${google_service_account.automation_sa.email}" |
| 217 | +} |
| 218 | + |
| 219 | +# --- Artifact Registry --- |
| 220 | + |
| 221 | +resource "google_artifact_registry_repository" "automation_repo" { |
| 222 | + location = var.region |
| 223 | + repository_id = "import-automation" |
| 224 | + description = "Docker repository for import automation images" |
| 225 | + format = "DOCKER" |
| 226 | + project = var.project_id |
| 227 | + |
| 228 | + depends_on = [google_project_service.services] |
| 229 | +} |
| 230 | + |
| 231 | +# --- Cloud Build Triggers --- |
| 232 | + |
| 233 | +resource "google_cloudbuild_trigger" "executor_trigger" { |
| 234 | + name = "dc-import-executor" |
| 235 | + location = var.region |
| 236 | + project = var.project_id |
| 237 | + |
| 238 | + github { |
| 239 | + owner = var.github_owner |
| 240 | + name = var.github_repo_name |
| 241 | + push { |
| 242 | + branch = "^main$" |
| 243 | + } |
| 244 | + } |
| 245 | + |
| 246 | + filename = "import-automation/executor/cloudbuild.yaml" |
| 247 | + |
| 248 | + substitutions = { |
| 249 | + _DOCKER_IMAGE = "${var.region}-docker.pkg.dev/${var.project_id}/${google_artifact_registry_repository.automation_repo.name}/dc-import-executor" |
| 250 | + } |
| 251 | + |
| 252 | + service_account = google_service_account.automation_sa.id |
| 253 | + depends_on = [google_artifact_registry_repository.automation_repo] |
| 254 | +} |
| 255 | + |
| 256 | +resource "google_cloudbuild_trigger" "workflow_trigger" { |
| 257 | + name = "import-workflow-trigger" |
| 258 | + location = var.region |
| 259 | + project = var.project_id |
| 260 | + |
| 261 | + github { |
| 262 | + owner = var.github_owner |
| 263 | + name = var.github_repo_name |
| 264 | + push { |
| 265 | + branch = "^main$" |
| 266 | + } |
| 267 | + } |
| 268 | + |
| 269 | + filename = "import-automation/workflow/cloudbuild_main.yaml" |
| 270 | + |
| 271 | + substitutions = { |
| 272 | + _PROJECT_ID = var.project_id |
| 273 | + _SPANNER_PROJECT_ID = var.project_id |
| 274 | + _SPANNER_INSTANCE_ID = var.spanner_instance_id |
| 275 | + _SPANNER_DATABASE_ID = var.spanner_database_id |
| 276 | + _GCS_BUCKET_ID = google_storage_bucket.import_bucket.name |
| 277 | + _LOCATION = var.region |
| 278 | + _GCS_MOUNT_BUCKET = "${var.project_id}-mount" |
| 279 | + _BQ_DATASET_ID = var.bq_dataset_id |
| 280 | + } |
| 281 | + |
| 282 | + service_account = google_service_account.automation_sa.id |
| 283 | +} |
| 284 | + |
| 285 | +resource "google_cloudbuild_trigger" "ingestion_trigger" { |
| 286 | + name = "ingestion-pipeline-trigger" |
| 287 | + location = var.region |
| 288 | + project = var.project_id |
| 289 | + |
| 290 | + github { |
| 291 | + owner = var.github_owner |
| 292 | + name = var.github_repo_ingestion_name |
| 293 | + push { |
| 294 | + branch = "^main$" |
| 295 | + } |
| 296 | + } |
| 297 | + |
| 298 | + filename = "pipeline/ingestion/cloudbuild.yaml" |
| 299 | + |
| 300 | + substitutions = { |
| 301 | + _TEMPLATE_BUCKET = google_storage_bucket.import_bucket.name |
| 302 | + _IMAGE_GCR_PATH = "${var.region}-docker.pkg.dev/${var.project_id}/${google_artifact_registry_repository.automation_repo.name}/dataflow-templates/ingestion" |
| 303 | + _VERSION = "0.1-SNAPSHOT" |
| 304 | + } |
| 305 | + |
| 306 | + service_account = google_service_account.automation_sa.id |
| 307 | + depends_on = [ |
| 308 | + google_artifact_registry_repository.automation_repo, |
| 309 | + google_storage_bucket.import_bucket |
| 310 | + ] |
| 311 | +} |
| 312 | + |
| 313 | +resource "google_cloudbuild_trigger" "import_tool_trigger" { |
| 314 | + name = "dc-import-tool-trigger" |
| 315 | + location = var.region |
| 316 | + project = var.project_id |
| 317 | + |
| 318 | + github { |
| 319 | + owner = var.github_owner |
| 320 | + name = var.github_repo_ingestion_name |
| 321 | + push { |
| 322 | + branch = "^main$" |
| 323 | + } |
| 324 | + } |
| 325 | + |
| 326 | + filename = "cloudbuild.yaml" |
| 327 | + |
| 328 | + substitutions = { |
| 329 | + _GCS_BUCKET = google_storage_bucket.import_bucket.name |
| 330 | + _DOCKER_IMAGE = "${var.region}-docker.pkg.dev/${var.project_id}/${google_artifact_registry_repository.automation_repo.name}/dc-import-executor" |
| 331 | + } |
| 332 | + |
| 333 | + service_account = google_service_account.automation_sa.id |
| 334 | +} |
| 335 | + |
| 336 | +# --- Pub/Sub --- |
| 337 | + |
| 338 | +resource "google_pubsub_topic" "import_automation_trigger" { |
| 339 | + name = "import-automation-trigger" |
| 340 | + project = var.project_id |
| 341 | +} |
| 342 | + |
| 343 | +resource "google_pubsub_subscription" "import_automation_sub" { |
| 344 | + name = "import-automation-sub" |
| 345 | + topic = google_pubsub_topic.import_automation_trigger.name |
| 346 | + project = var.project_id |
| 347 | + |
| 348 | + filter = "attributes.transfer_status=\"TRANSFER_COMPLETED\"" |
| 349 | + |
| 350 | + push_config { |
| 351 | + # Note: This endpoint is deployed via Cloud Build. |
| 352 | + # The URL pattern below assumes Function Gen2 deployment. |
| 353 | + push_endpoint = "https://import-automation-helper-${var.project_id}-${var.region}.a.run.app" |
| 354 | + oidc_token { |
| 355 | + service_account_email = google_service_account.automation_sa.email |
| 356 | + } |
| 357 | + } |
| 358 | +} |
| 359 | + |
| 360 | +# --- Trigger Initial Builds --- |
| 361 | + |
| 362 | +resource "null_resource" "trigger_initial_builds" { |
| 363 | + provisioner "local-exec" { |
| 364 | + command = <<EOT |
| 365 | +gcloud builds triggers run ${google_cloudbuild_trigger.executor_trigger.name} --region=${var.region} --project=${var.project_id} --branch=main |
| 366 | +gcloud builds triggers run ${google_cloudbuild_trigger.ingestion_trigger.name} --region=${var.region} --project=${var.project_id} --branch=main |
| 367 | +gcloud builds triggers run ${google_cloudbuild_trigger.workflow_trigger.name} --region=${var.region} --project=${var.project_id} --branch=main |
| 368 | +gcloud builds triggers run ${google_cloudbuild_trigger.import_tool_trigger.name} --region=${var.region} --project=${var.project_id} --branch=main |
| 369 | +EOT |
| 370 | + } |
| 371 | + |
| 372 | + depends_on = [ |
| 373 | + google_cloudbuild_trigger.executor_trigger, |
| 374 | + google_cloudbuild_trigger.ingestion_trigger, |
| 375 | + google_cloudbuild_trigger.workflow_trigger, |
| 376 | + google_cloudbuild_trigger.import_tool_trigger |
| 377 | + ] |
| 378 | +} |
| 379 | + |
| 380 | +# Outputs |
| 381 | +output "executor_trigger_id" { |
| 382 | + value = google_cloudbuild_trigger.executor_trigger.id |
| 383 | +} |
| 384 | + |
| 385 | +output "workflow_trigger_id" { |
| 386 | + value = google_cloudbuild_trigger.workflow_trigger.id |
| 387 | +} |
| 388 | + |
| 389 | +output "ingestion_trigger_id" { |
| 390 | + value = google_cloudbuild_trigger.ingestion_trigger.id |
| 391 | +} |
| 392 | + |
| 393 | +output "import_tool_trigger_id" { |
| 394 | + value = google_cloudbuild_trigger.import_tool_trigger.id |
| 395 | +} |
0 commit comments