はじめに
LLM の Trace や Prompt を管理できる Langfuse を、自分たちのクラウド環境でセルフホストする必要があったので、Terraform で実装しました。
Langfuse
https://langfuse.com/
実装にあたり、以下の記事を参考にさせていただきました。
- https://zenn.dev/machinelearning/articles/langfuse_self_host
- https://book.st-hakky.com/data-science/langfuse-self-hosting/
実装
ディレクトリ構成はこちらです。
terraform-for-langfuse/
├── environments/
│ └── dev/
│ ├── main.tf
│ ├── backend.tf
│ ├── provider.tf
│ ├── versions.tf
│ └── variables.tf
└── modules/
├── project.tf
├── variables.tf
├── iam.tf
├── vpc.tf
├── secretmanager.tf
├── cloudsql.tf
└── cloudrun.tf
ソースコードはこちらです。(長いので折りたたみ)
環境ごとに必要なソースコード
module "langfuse" {
source = "../../modules"
db_password = var.db_password
}
terraform {
backend "gcs" {
bucket = "<BUCKET_NAME>"
prefix = "<PREFIX>"
}
}
provider "google" {
project = "<PROJECT_ID>"
region = "<REGION_NAME>"
}
terraform {
required_providers {
google = {
source = "hashicorp/google"
version = "6.6.0"
}
random = {
source = "hashicorp/random"
version = "3.6.3"
}
}
}
terraform {
required_version = "1.4.2"
}
variable "db_password" {
type = string
sensitive = true
}
モジュール化済みの各リソースを実装したソースコード
(長いので、各リソースの実装も折りたたんでいます)
project.tf
data "google_project" "project" {}
resource "google_project_service" "servicenetworking" {
service = "servicenetworking.googleapis.com"
disable_on_destroy = false
}
resource "google_project_service" "compute" {
service = "compute.googleapis.com"
disable_on_destroy = false
}
resource "google_project_service" "sqladmin" {
service = "sqladmin.googleapis.com"
disable_on_destroy = false
}
resource "google_project_service" "secretmanager" {
service = "secretmanager.googleapis.com"
disable_on_destroy = false
}
resource "google_project_service" "run" {
service = "run.googleapis.com"
disable_on_destroy = false
}
variables.tf
variable "location" {
type = string
default = "asia-northeast1"
}
variable "db_password" {
type = string
sensitive = true
}
locals {
langfuse_image = "docker.io/langfuse/langfuse:2"
langfuse_db_name = "langfuse-db"
langfuse_service_name = "langfuse-service"
direct_url = "postgresql://postgres:${google_sql_database_instance.main.root_password}@localhost/${local.langfuse_db_name}/?host=/cloudsql/${google_sql_database_instance.main.connection_name}&sslmode=none"
nextauth_url = "https://${local.langfuse_service_name}-${data.google_project.project.number}.${var.location}.run.app"
}
iam.tf
resource "google_service_account" "langfuse-service-account" {
account_id = "cloud-run-langfuse-sa"
display_name = "Cloud Run for Langfuse Service Account"
}
resource "google_project_iam_member" "secret-accessor" {
project = data.google_project.project.project_id
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.langfuse-service-account.email}"
}
resource "google_project_iam_member" "cloud-run-service-agent" {
project = data.google_project.project.project_id
role = "roles/run.serviceAgent"
member = "serviceAccount:${google_service_account.langfuse-service-account.email}"
}
resource "google_project_iam_member" "cloud-sql-client" {
project = data.google_project.project.project_id
role = "roles/cloudsql.client"
member = "serviceAccount:${google_service_account.langfuse-service-account.email}"
}
vpc.tf
resource "google_compute_network" "langfuse-network" {
name = "langfuse-network"
auto_create_subnetworks = false
depends_on = [google_project_service.servicenetworking]
}
resource "google_compute_subnetwork" "langfuse-subnetwork" {
network = google_compute_network.langfuse-network.id
name = "langfuse-subnetwork"
ip_cidr_range = "10.1.0.0/24"
}
resource "google_compute_global_address" "langfuse-range" {
name = "langfuse-range"
purpose = "VPC_PEERING"
address_type = "INTERNAL"
address = "10.2.0.0"
prefix_length = 24
network = google_compute_network.langfuse-network.id
}
resource "google_service_networking_connection" "langfuse-conn" {
network = google_compute_network.langfuse-network.id
service = "servicenetworking.googleapis.com"
reserved_peering_ranges = [google_compute_global_address.langfuse-range.name]
depends_on = [google_project_service.servicenetworking]
}
resource "google_compute_network_peering_routes_config" "peering-routes" {
peering = google_service_networking_connection.langfuse-conn.peering
network = google_compute_network.langfuse-network.name
export_custom_routes = false
import_custom_routes = true
}
secretmanager.tf
resource "random_bytes" "langfuse-nextauth-secret" {
length = 32
}
resource "random_bytes" "langfuse-salt" {
length = 32
}
resource "google_secret_manager_secret" "langfuse-database-url" {
secret_id = "langfuse-database-url"
replication {
auto {}
}
}
resource "google_secret_manager_secret_version" "langfuse-database-url-data" {
secret = google_secret_manager_secret.langfuse-database-url.id
secret_data = "${local.direct_url}&pgbouncer=true"
}
resource "google_secret_manager_secret" "langfuse-nextauth-url" {
secret_id = "langfuse-nextauth-url"
replication {
auto {}
}
}
resource "google_secret_manager_secret_version" "langfuse-nextauth-url-data" {
secret = google_secret_manager_secret.langfuse-nextauth-url.id
secret_data = local.nextauth_url
}
resource "google_secret_manager_secret" "langfuse-nextauth-secret" {
secret_id = "langfuse-nextauth-secret"
replication {
auto {}
}
}
resource "google_secret_manager_secret_version" "langfuse-nextauth-secret-data" {
secret = google_secret_manager_secret.langfuse-nextauth-secret.id
secret_data = random_bytes.langfuse-nextauth-secret.base64
}
resource "google_secret_manager_secret" "langfuse-salt" {
secret_id = "langfuse-salt"
replication {
auto {}
}
}
resource "google_secret_manager_secret_version" "langfuse-salt-data" {
secret = google_secret_manager_secret.langfuse-salt.id
secret_data = random_bytes.langfuse-salt.base64
}
resource "google_secret_manager_secret" "langfuse-direct-url" {
secret_id = "langfuse-direct-url"
replication {
auto {}
}
}
resource "google_secret_manager_secret_version" "langfuse-direct-url-data" {
secret = google_secret_manager_secret.langfuse-direct-url.id
secret_data = local.direct_url
}
cloudsql.tf
resource "google_sql_database_instance" "main" {
name = "langfuse-db"
database_version = "POSTGRES_16"
region = var.location
deletion_protection = false
depends_on = [google_service_networking_connection.langfuse-conn]
root_password = var.db_password
settings {
tier = "db-custom-1-3840"
deletion_protection_enabled = true
disk_autoresize = true
disk_size = 10
availability_type = "REGIONAL"
ip_configuration {
private_network = google_compute_network.langfuse-network.id
allocated_ip_range = google_compute_global_address.langfuse-range.name
ipv4_enabled = false
}
backup_configuration {
enabled = true
start_time = "00:00"
point_in_time_recovery_enabled = true
location = var.location
backup_retention_settings {
retained_backups = 7
retention_unit = "COUNT"
}
}
}
}
output "cloud_sql_self_link_output" {
value = google_sql_database_instance.main.self_link
}
output "cloud_sql_conn_name_output" {
value = google_sql_database_instance.main.connection_name
}
cloudrun.tf
resource "google_cloud_run_service_iam_binding" "public_invoker" {
project = data.google_project.project.project_id
location = var.location
service = google_cloud_run_v2_service.langfuse-service.name
role = "roles/run.invoker"
members = ["allUsers"]
}
resource "google_cloud_run_v2_service" "langfuse-service" {
name = local.langfuse_service_name
location = var.location
deletion_protection = false
launch_stage = "GA"
depends_on = [ google_project_iam_member.cloud-run-service-agent, google_project_iam_member.cloud-sql-client, google_project_iam_member.secret-accessor ]
template {
service_account = google_service_account.langfuse-service-account.email
volumes {
name = "cloudsql"
cloud_sql_instance {
instances = [google_sql_database_instance.main.connection_name]
}
}
containers {
image = local.langfuse_image
ports {
container_port = 3000
}
volume_mounts {
mount_path = "/cloudsql"
name = "cloudsql"
}
env {
name = "DATABASE_URL"
value_source {
secret_key_ref {
secret = google_secret_manager_secret.langfuse-database-url.name
version = "latest"
}
}
}
env {
name = "NEXTAUTH_URL"
value_source {
secret_key_ref {
secret = google_secret_manager_secret.langfuse-nextauth-url.name
version = "latest"
}
}
}
env {
name = "NEXTAUTH_SECRET"
value_source {
secret_key_ref {
secret = google_secret_manager_secret.langfuse-nextauth-secret.name
version = "latest"
}
}
}
env {
name = "SALT"
value_source {
secret_key_ref {
secret = google_secret_manager_secret.langfuse-salt.name
version = "latest"
}
}
}
env {
name = "DIRECT_URL"
value_source {
secret_key_ref {
secret = google_secret_manager_secret.langfuse-direct-url.name
version = "latest"
}
}
}
}
scaling {
min_instance_count = 1
max_instance_count = 100
}
vpc_access{
network_interfaces {
network = google_compute_network.langfuse-network.name
subnetwork = google_compute_subnetwork.langfuse-subnetwork.name
}
}
}
}
実行の事前準備
- Google Cloud のコンソール上で行う準備
Terraform の実装も行いましたが、以下の Google Cloud の API は Terraform apply 前に有効化しておくのがおすすめです。すでに有効化されている場合はそのままで。
・Cloud SQL Admin API
・Secret Manager API
・Cloud Run Admin API
また、今回の実装では、.tfstate ファイルを Cloud Storage で管理するようにしているため、管理先のバケットを準備しておく必要があります。(backend.tf でバケットを指定します)
- ソースコードの準備
backend.tf と provider.tf に、必要な情報を入れます。
bucket = "<BUCKET_NAME>"
prefix = "<PREFIX>"
BUCKET_NAME は、事前準備で用意した Cloud Storage のバケット名、PREFIX は、保存される default.tfstate ファイルのファイル名の先頭につけることができる prefix です。
project = "<PROJECT_ID>"
region = "<REGION_NAME>"
こちらは、各環境の Project ID と Region の名前です。
実行
$ cd environments/dev
$ terraform init
$ terraform plan
問題なければ
$ terraform apply
(今回の実装では、Cloud SQL の root_password をコードで管理していないため、plan, apply 時に対話形式で db_password を入力します。)
動作確認
Cloud Run のサービスにアクセスしてみて、Langfuse が利用できれば完了です!