Skip to content

Commit 99c5361

Browse files
committed
Add terraform deployment script for import automation
1 parent dc738c8 commit 99c5361

1 file changed

Lines changed: 395 additions & 0 deletions

File tree

Lines changed: 395 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,395 @@
1+
# Terraform deployment for Data Commons Import Automation Workflow
2+
# This file sets up:
3+
# - Necessary GCP APIs
4+
# - Secret Manager for the import-config secret
5+
# - GCS Buckets for imports, mounting, and Dataflow templates
6+
# - Spanner Instance and Database with schema
7+
# - Artifact Registry for hosting Docker images (Flex Template & Executor)
8+
# - Pub/Sub Topic and Subscription for triggering imports
9+
# - Cloud Build Triggers for CI/CD of Executor, Functions, Workflows, and Ingestion Pipeline
10+
# - Unified Service Account with necessary IAM roles for Workflows, Functions, and Pub/Sub
11+
12+
terraform {
13+
required_providers {
14+
google = {
15+
source = "hashicorp/google"
16+
version = ">= 5.0.0"
17+
}
18+
archive = {
19+
source = "hashicorp/archive"
20+
}
21+
}
22+
}
23+
24+
variable "project_id" {
25+
description = "The GCP Project ID"
26+
type = string
27+
}
28+
29+
variable "region" {
30+
description = "The GCP Region"
31+
type = string
32+
default = "us-central1"
33+
}
34+
35+
variable "github_owner" {
36+
description = "The owner of the GitHub repository"
37+
type = string
38+
default = "datacommonsorg"
39+
}
40+
41+
variable "github_repo_name" {
42+
description = "The name of the GitHub repository (data)"
43+
type = string
44+
default = "data"
45+
}
46+
47+
variable "github_repo_ingestion_name" {
48+
description = "The name of the GitHub repository (import)"
49+
type = string
50+
default = "import"
51+
}
52+
53+
variable "spanner_instance_id" {
54+
description = "Spanner Instance ID"
55+
type = string
56+
default = "datcom-import-instance"
57+
}
58+
59+
variable "spanner_database_id" {
60+
description = "Spanner Database ID"
61+
type = string
62+
default = "dc-import-db"
63+
}
64+
65+
variable "bq_dataset_id" {
66+
description = "BigQuery Dataset ID for aggregation"
67+
type = string
68+
default = "datacommons"
69+
}
70+
71+
variable "dc_api_key" {
72+
description = "Data Commons API Key"
73+
type = string
74+
sensitive = true
75+
}
76+
77+
# --- APIs ---
78+
79+
locals {
80+
services = [
81+
"artifactregistry.googleapis.com",
82+
"batch.googleapis.com",
83+
"cloudbuild.googleapis.com",
84+
"cloudfunctions.googleapis.com",
85+
"cloudscheduler.googleapis.com",
86+
"compute.googleapis.com",
87+
"dataflow.googleapis.com",
88+
"iam.googleapis.com",
89+
"pubsub.googleapis.com",
90+
"run.googleapis.com",
91+
"secretmanager.googleapis.com",
92+
"spanner.googleapis.com",
93+
"storage.googleapis.com",
94+
"workflows.googleapis.com",
95+
]
96+
}
97+
98+
resource "google_project_service" "services" {
99+
for_each = toset(local.services)
100+
project = var.project_id
101+
service = each.key
102+
103+
disable_on_destroy = false
104+
}
105+
106+
# --- Secret Manager ---
107+
108+
resource "google_secret_manager_secret" "import_config" {
109+
secret_id = "import-config"
110+
project = var.project_id
111+
112+
replication {
113+
auto {}
114+
}
115+
116+
depends_on = [google_project_service.services]
117+
}
118+
119+
resource "google_secret_manager_secret_version" "import_config_v1" {
120+
secret = google_secret_manager_secret.import_config.id
121+
secret_data = jsonencode({
122+
dc_api_key = var.dc_api_key
123+
})
124+
}
125+
126+
resource "google_secret_manager_secret" "dc_api_key" {
127+
secret_id = "dc-api-key"
128+
project = var.project_id
129+
130+
replication {
131+
auto {}
132+
}
133+
134+
depends_on = [google_project_service.services]
135+
}
136+
137+
resource "google_secret_manager_secret_version" "dc_api_key_v1" {
138+
secret = google_secret_manager_secret.dc_api_key.id
139+
secret_data = var.dc_api_key
140+
}
141+
142+
# --- GCS Buckets ---
143+
144+
resource "google_storage_bucket" "import_bucket" {
145+
name = "${var.project_id}-imports"
146+
location = var.region
147+
project = var.project_id
148+
uniform_bucket_level_access = true
149+
150+
depends_on = [google_project_service.services]
151+
}
152+
153+
# --- Spanner ---
154+
155+
resource "google_spanner_instance" "import_instance" {
156+
name = var.spanner_instance_id
157+
config = "regional-${var.region}"
158+
display_name = "Import Automation Spanner Instance"
159+
num_nodes = 1
160+
project = var.project_id
161+
162+
depends_on = [google_project_service.services]
163+
}
164+
165+
resource "google_spanner_database" "import_db" {
166+
instance = google_spanner_instance.import_instance.name
167+
name = var.spanner_database_id
168+
project = var.project_id
169+
ddl = [for s in split(";", file("${path.module}/../workflow/spanner_schema.sql")) : trimspace(s) if trimspace(s) != ""]
170+
171+
deletion_protection = true
172+
}
173+
174+
# Initialize IngestionLock (DML)
175+
resource "null_resource" "init_spanner_lock" {
176+
provisioner "local-exec" {
177+
command = <<EOT
178+
gcloud spanner databases execute-sql ${google_spanner_database.import_db.name} \
179+
--instance=${google_spanner_instance.import_instance.name} \
180+
--project=${var.project_id} \
181+
--sql="INSERT INTO IngestionLock (LockID) VALUES ('global_ingestion_lock')" || echo 'Lock already exists'
182+
EOT
183+
}
184+
185+
depends_on = [google_spanner_database.import_db]
186+
}
187+
188+
# --- IAM ---
189+
190+
resource "google_service_account" "automation_sa" {
191+
account_id = "import-automation-sa"
192+
display_name = "Service Account for Import Automation (Workflows & Functions)"
193+
project = var.project_id
194+
}
195+
196+
resource "google_project_iam_member" "automation_roles" {
197+
for_each = toset([
198+
"roles/workflows.admin",
199+
"roles/cloudfunctions.admin",
200+
"roles/run.admin",
201+
"roles/run.invoker",
202+
"roles/batch.jobsEditor",
203+
"roles/dataflow.admin",
204+
"roles/logging.logWriter",
205+
"roles/storage.objectAdmin",
206+
"roles/iam.serviceAccountUser",
207+
"roles/spanner.databaseAdmin",
208+
"roles/bigquery.dataEditor",
209+
"roles/bigquery.jobUser",
210+
"roles/artifactregistry.admin",
211+
"roles/secretmanager.secretAccessor",
212+
"roles/cloudbuild.builds.builder",
213+
])
214+
project = var.project_id
215+
role = each.key
216+
member = "service_account:${google_service_account.automation_sa.email}"
217+
}
218+
219+
# --- Artifact Registry ---
220+
221+
resource "google_artifact_registry_repository" "automation_repo" {
222+
location = var.region
223+
repository_id = "import-automation"
224+
description = "Docker repository for import automation images"
225+
format = "DOCKER"
226+
project = var.project_id
227+
228+
depends_on = [google_project_service.services]
229+
}
230+
231+
# --- Cloud Build Triggers ---
232+
233+
resource "google_cloudbuild_trigger" "executor_trigger" {
234+
name = "dc-import-executor"
235+
location = var.region
236+
project = var.project_id
237+
238+
github {
239+
owner = var.github_owner
240+
name = var.github_repo_name
241+
push {
242+
branch = "^main$"
243+
}
244+
}
245+
246+
filename = "import-automation/executor/cloudbuild.yaml"
247+
248+
substitutions = {
249+
_DOCKER_IMAGE = "${var.region}-docker.pkg.dev/${var.project_id}/${google_artifact_registry_repository.automation_repo.name}/dc-import-executor"
250+
}
251+
252+
service_account = google_service_account.automation_sa.id
253+
depends_on = [google_artifact_registry_repository.automation_repo]
254+
}
255+
256+
resource "google_cloudbuild_trigger" "workflow_trigger" {
257+
name = "import-workflow-trigger"
258+
location = var.region
259+
project = var.project_id
260+
261+
github {
262+
owner = var.github_owner
263+
name = var.github_repo_name
264+
push {
265+
branch = "^main$"
266+
}
267+
}
268+
269+
filename = "import-automation/workflow/cloudbuild_main.yaml"
270+
271+
substitutions = {
272+
_PROJECT_ID = var.project_id
273+
_SPANNER_PROJECT_ID = var.project_id
274+
_SPANNER_INSTANCE_ID = var.spanner_instance_id
275+
_SPANNER_DATABASE_ID = var.spanner_database_id
276+
_GCS_BUCKET_ID = google_storage_bucket.import_bucket.name
277+
_LOCATION = var.region
278+
_GCS_MOUNT_BUCKET = "${var.project_id}-mount"
279+
_BQ_DATASET_ID = var.bq_dataset_id
280+
}
281+
282+
service_account = google_service_account.automation_sa.id
283+
}
284+
285+
resource "google_cloudbuild_trigger" "ingestion_trigger" {
286+
name = "ingestion-pipeline-trigger"
287+
location = var.region
288+
project = var.project_id
289+
290+
github {
291+
owner = var.github_owner
292+
name = var.github_repo_ingestion_name
293+
push {
294+
branch = "^main$"
295+
}
296+
}
297+
298+
filename = "pipeline/ingestion/cloudbuild.yaml"
299+
300+
substitutions = {
301+
_TEMPLATE_BUCKET = google_storage_bucket.import_bucket.name
302+
_IMAGE_GCR_PATH = "${var.region}-docker.pkg.dev/${var.project_id}/${google_artifact_registry_repository.automation_repo.name}/dataflow-templates/ingestion"
303+
_VERSION = "0.1-SNAPSHOT"
304+
}
305+
306+
service_account = google_service_account.automation_sa.id
307+
depends_on = [
308+
google_artifact_registry_repository.automation_repo,
309+
google_storage_bucket.import_bucket
310+
]
311+
}
312+
313+
resource "google_cloudbuild_trigger" "import_tool_trigger" {
314+
name = "dc-import-tool-trigger"
315+
location = var.region
316+
project = var.project_id
317+
318+
github {
319+
owner = var.github_owner
320+
name = var.github_repo_ingestion_name
321+
push {
322+
branch = "^main$"
323+
}
324+
}
325+
326+
filename = "cloudbuild.yaml"
327+
328+
substitutions = {
329+
_GCS_BUCKET = google_storage_bucket.import_bucket.name
330+
_DOCKER_IMAGE = "${var.region}-docker.pkg.dev/${var.project_id}/${google_artifact_registry_repository.automation_repo.name}/dc-import-executor"
331+
}
332+
333+
service_account = google_service_account.automation_sa.id
334+
}
335+
336+
# --- Pub/Sub ---
337+
338+
resource "google_pubsub_topic" "import_automation_trigger" {
339+
name = "import-automation-trigger"
340+
project = var.project_id
341+
}
342+
343+
resource "google_pubsub_subscription" "import_automation_sub" {
344+
name = "import-automation-sub"
345+
topic = google_pubsub_topic.import_automation_trigger.name
346+
project = var.project_id
347+
348+
filter = "attributes.transfer_status=\"TRANSFER_COMPLETED\""
349+
350+
push_config {
351+
# Note: This endpoint is deployed via Cloud Build.
352+
# The URL pattern below assumes Function Gen2 deployment.
353+
push_endpoint = "https://import-automation-helper-${var.project_id}-${var.region}.a.run.app"
354+
oidc_token {
355+
service_account_email = google_service_account.automation_sa.email
356+
}
357+
}
358+
}
359+
360+
# --- Trigger Initial Builds ---
361+
362+
resource "null_resource" "trigger_initial_builds" {
363+
provisioner "local-exec" {
364+
command = <<EOT
365+
gcloud builds triggers run ${google_cloudbuild_trigger.executor_trigger.name} --region=${var.region} --project=${var.project_id} --branch=main
366+
gcloud builds triggers run ${google_cloudbuild_trigger.ingestion_trigger.name} --region=${var.region} --project=${var.project_id} --branch=main
367+
gcloud builds triggers run ${google_cloudbuild_trigger.workflow_trigger.name} --region=${var.region} --project=${var.project_id} --branch=main
368+
gcloud builds triggers run ${google_cloudbuild_trigger.import_tool_trigger.name} --region=${var.region} --project=${var.project_id} --branch=main
369+
EOT
370+
}
371+
372+
depends_on = [
373+
google_cloudbuild_trigger.executor_trigger,
374+
google_cloudbuild_trigger.ingestion_trigger,
375+
google_cloudbuild_trigger.workflow_trigger,
376+
google_cloudbuild_trigger.import_tool_trigger
377+
]
378+
}
379+
380+
# Outputs
381+
output "executor_trigger_id" {
382+
value = google_cloudbuild_trigger.executor_trigger.id
383+
}
384+
385+
output "workflow_trigger_id" {
386+
value = google_cloudbuild_trigger.workflow_trigger.id
387+
}
388+
389+
output "ingestion_trigger_id" {
390+
value = google_cloudbuild_trigger.ingestion_trigger.id
391+
}
392+
393+
output "import_tool_trigger_id" {
394+
value = google_cloudbuild_trigger.import_tool_trigger.id
395+
}

0 commit comments

Comments
 (0)