LoginSignup
1
1

More than 1 year has passed since last update.

CloudSQLからBigQueryにデータの転送方法

Posted at

キーワード

GCP/Google Cloud
CloudSQL
BigQuery
スケジュールされたクエリ
terraform

本記事はterraformを使い、CloudSQLからBigQueryにデータを転送するため、BigQueryのtfファイルの作成方法を説明します。

terraformコード

bigquery.tf
# --------------------------------------------------
# RESOURCES
# Note the comments below
# --------------------------------------------------

data "google_project" "project" {
}

# Enable Data Transfer Service
# bigquery data transfer serviceを有効する
resource "google_project_service" "dts" {
  project                    = data.google_project.project.project_id
  service                    = "bigquerydatatransfer.googleapis.com"
  disable_dependent_services = false
  disable_on_destroy         = false
}

# Service Account
# bigquery service accountを作成する
resource "google_service_account" "bigquery_scheduled_queries" {
  account_id   = "bigquery-scheduled-queries"
  display_name = "BigQuery Scheduled Queries Service Account"
  description  = "Used to run BigQuery Data Transfer jobs."
}

# Wait for the new Services and Service Accounts settings to propagate
resource "time_sleep" "wait_for_settings_propagation" {
  # It can take a while for the enabled services
  # and service accounts to propagate. Experiment
  # with this value until you find a time that is
  # consistently working for all the deployments.
  create_duration = "10s"

  depends_on = [
    google_project_service.dts,
    google_service_account.bigquery_scheduled_queries
  ]
}

# IAM Permisions
# IAMロールを設定する
resource "google_project_iam_member" "bigquery_scheduler_permissions" {
  project = data.google_project.project.project_id
  role   = "roles/iam.serviceAccountShortTermTokenMinter"
  #role   = "roles/iam.serviceAccountTokenCreator"
  member = "serviceAccount:service-${data.google_project.project.number}@gcp-sa-bigquerydatatransfer.iam.gserviceaccount.com"

  depends_on = [time_sleep.wait_for_settings_propagation]
}

resource "google_project_iam_binding" "bigquery_datatransfer_admin" {
  project = data.google_project.project.project_id
  role = "roles/bigquery.admin"
  members = ["serviceAccount:${google_service_account.bigquery_scheduled_queries.email}"]

  depends_on = [google_project_iam_binding.bigquery_datatransfer_admin]
}

# Create the BigQuery dataset
resource "google_bigquery_dataset" "your_dataset" {
  depends_on = [google_project_iam_member.bigquery_scheduler_permissions]

  dataset_id    = "your_dataset"
  friendly_name = "Your Dataset"
  description   = "Your Dataset with Scheduled Queries"
    # bigqueryのリージョン
  location      = "asia-northeast1"
}


# BigQueryとCloudSQLのコネクションの設定
resource "google_bigquery_connection" "your_connection" { 
    provider = google-beta 

    connection_id = "test-cloudsql-connection" 
    friendly_name = "Cloud SQL connection" 
    # コネクションのリージョン
    location = "asia-northeast1" 
    # 接続したいCloudSQLデータベースの情報
    cloud_sql { 
        database = "testdb" 
        instance_id = "your_cloudsql_database_instance_id" 
        type = "MYSQL" 
        
        credential { 
            username = "your_cloudsql_database_instance_username" 
            password = "your_cloudsql_database_instance_password" 
        } 
    } 
}

# Cloud SQL からインポートしたデータを入れるテーブルを定義する
resource "google_bigquery_table" "test_table" {
  dataset_id = google_bigquery_dataset.your_dataset.dataset_id
  table_id   = "test_table"
  # deletion_protectionをfalseにしないと、テーブルを削除できない
  deletion_protection=false

  # テーブルの具体的なカラム定義
  # CLoudSQLから転送したいテーブルをここで定義する
  schema     = <<EOF
[
  {
    "name": "user_id",
    "type": "STRING",
    "mode": "REQUIRED",
    "description": "user_id"
  },
  {
    "name": "user_name",
    "type": "STRING",
    "mode": "REQUIRED",
    "description": "user_name"
  },
]
EOF
}

# 具体的なdata transferルールを設定する
resource "google_bigquery_data_transfer_config" "test_table_query_config" {
  display_name           = "test-table-query"
  # google_bigquery_data_transfer_configのリージョン
  location               = "asia-northeast1"
  data_source_id         = "scheduled_query"
  # 毎日0時にデータを転送する
  schedule               = "every day 00:00"
  service_account_name   = "${google_service_account.bigquery_scheduled_queries.email}"
  destination_dataset_id = google_bigquery_dataset.your_dataset.dataset_id
  params = {
    destination_table_name_template = "test_table"
    write_disposition               = "WRITE_TRUNCATE"
    query = "SELECT * FROM EXTERNAL_QUERY('your_project_id.asia-northeast1.test-cloudsql-connection', 'SELECT * FROM testdb.your_cloudsql_table;')"
  }

  depends_on = [google_project_iam_member.bigquery_scheduler_permissions]
}
1
1
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
1
1