キーワード
GCP/Google Cloud
CloudSQL
BigQuery
スケジュールされたクエリ
terraform
本記事はterraformを使い、CloudSQLからBigQueryにデータを転送するため、BigQueryのtfファイルの作成方法を説明します。
terraformコード
bigquery.tf
# --------------------------------------------------
# RESOURCES
# Note the comments below
# --------------------------------------------------
data "google_project" "project" {
}
# Enable Data Transfer Service
# bigquery data transfer serviceを有効する
resource "google_project_service" "dts" {
project = data.google_project.project.project_id
service = "bigquerydatatransfer.googleapis.com"
disable_dependent_services = false
disable_on_destroy = false
}
# Service Account
# bigquery service accountを作成する
resource "google_service_account" "bigquery_scheduled_queries" {
account_id = "bigquery-scheduled-queries"
display_name = "BigQuery Scheduled Queries Service Account"
description = "Used to run BigQuery Data Transfer jobs."
}
# Wait for the new Services and Service Accounts settings to propagate
resource "time_sleep" "wait_for_settings_propagation" {
# It can take a while for the enabled services
# and service accounts to propagate. Experiment
# with this value until you find a time that is
# consistently working for all the deployments.
create_duration = "10s"
depends_on = [
google_project_service.dts,
google_service_account.bigquery_scheduled_queries
]
}
# IAM Permisions
# IAMロールを設定する
resource "google_project_iam_member" "bigquery_scheduler_permissions" {
project = data.google_project.project.project_id
role = "roles/iam.serviceAccountShortTermTokenMinter"
#role = "roles/iam.serviceAccountTokenCreator"
member = "serviceAccount:service-${data.google_project.project.number}@gcp-sa-bigquerydatatransfer.iam.gserviceaccount.com"
depends_on = [time_sleep.wait_for_settings_propagation]
}
resource "google_project_iam_binding" "bigquery_datatransfer_admin" {
project = data.google_project.project.project_id
role = "roles/bigquery.admin"
members = ["serviceAccount:${google_service_account.bigquery_scheduled_queries.email}"]
depends_on = [google_project_iam_binding.bigquery_datatransfer_admin]
}
# Create the BigQuery dataset
resource "google_bigquery_dataset" "your_dataset" {
depends_on = [google_project_iam_member.bigquery_scheduler_permissions]
dataset_id = "your_dataset"
friendly_name = "Your Dataset"
description = "Your Dataset with Scheduled Queries"
# bigqueryのリージョン
location = "asia-northeast1"
}
# BigQueryとCloudSQLのコネクションの設定
resource "google_bigquery_connection" "your_connection" {
provider = google-beta
connection_id = "test-cloudsql-connection"
friendly_name = "Cloud SQL connection"
# コネクションのリージョン
location = "asia-northeast1"
# 接続したいCloudSQLデータベースの情報
cloud_sql {
database = "testdb"
instance_id = "your_cloudsql_database_instance_id"
type = "MYSQL"
credential {
username = "your_cloudsql_database_instance_username"
password = "your_cloudsql_database_instance_password"
}
}
}
# Cloud SQL からインポートしたデータを入れるテーブルを定義する
resource "google_bigquery_table" "test_table" {
dataset_id = google_bigquery_dataset.your_dataset.dataset_id
table_id = "test_table"
# deletion_protectionをfalseにしないと、テーブルを削除できない
deletion_protection=false
# テーブルの具体的なカラム定義
# CLoudSQLから転送したいテーブルをここで定義する
schema = <<EOF
[
{
"name": "user_id",
"type": "STRING",
"mode": "REQUIRED",
"description": "user_id"
},
{
"name": "user_name",
"type": "STRING",
"mode": "REQUIRED",
"description": "user_name"
},
]
EOF
}
# 具体的なdata transferルールを設定する
resource "google_bigquery_data_transfer_config" "test_table_query_config" {
display_name = "test-table-query"
# google_bigquery_data_transfer_configのリージョン
location = "asia-northeast1"
data_source_id = "scheduled_query"
# 毎日0時にデータを転送する
schedule = "every day 00:00"
service_account_name = "${google_service_account.bigquery_scheduled_queries.email}"
destination_dataset_id = google_bigquery_dataset.your_dataset.dataset_id
params = {
destination_table_name_template = "test_table"
write_disposition = "WRITE_TRUNCATE"
query = "SELECT * FROM EXTERNAL_QUERY('your_project_id.asia-northeast1.test-cloudsql-connection', 'SELECT * FROM testdb.your_cloudsql_table;')"
}
depends_on = [google_project_iam_member.bigquery_scheduler_permissions]
}