LoginSignup
11
5

More than 5 years have passed since last update.

TerraformでAWSのスポットインスタンスを作成する

Last updated at Posted at 2018-05-25

概要

最近仕事でDeepLearningをしていまして、
AWSでp系でスポットインスタンスを立ててちょっと学習させる、みたい事が増えてきました。
毎回コンソールでポチポチするのが手間だったのでTerraformでできるようにしました。

コード(2018/8/10修正)

※IAMロールの作成部分を2018/8/10に追加しました

公式ドキュメントを見ながら実装しました。
aws_spot_instance_requestを使おうとしてハマったのですが(自分が作ったVPCの指定ができなかった)、結局aws_spot_fleet_requestで台数を1台で指定することで解決しました。
あとポイントとしては、特にp系は1AZだとリソースが取れないことがあるので、複数AZを指定するようにしています。

ここにあげてます。(デフォルトでp2を使うようになっているので料金お気をつけください。)
こんな感じです。

main.tf
provider "aws" {
  access_key = "${var.aws_access_key}"
  secret_key = "${var.aws_secret_key}"
  region     = "${var.region}"
}

# IAMロール
data "aws_iam_policy_document" "assume_role" {
  statement {
    actions = ["sts:AssumeRole"]

    principals {
      type        = "Service"
      identifiers = ["spotfleet.amazonaws.com"]
    }
  }
}

resource "aws_iam_role" "spot-fleet-role" {
  name               = "ml-role"
  assume_role_policy = "${data.aws_iam_policy_document.assume_role.json}"
}

resource "aws_iam_policy_attachment" "policy-attach" {
  name       = "ml-role-policy"
  roles      = ["${aws_iam_role.spot-fleet-role.id}"]
  policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEC2SpotFleetTaggingRole"
}

# VPC
resource "aws_vpc" "ml-vpc" {
  cidr_block           = "10.1.0.0/16"
  instance_tenancy     = "default"
  enable_dns_support   = "true"
  enable_dns_hostnames = "true"

  tags {
    Name = "ml-vpc"
  }
}

# Internet Gateway
resource "aws_internet_gateway" "ml-igw" {
  vpc_id = "${aws_vpc.ml-vpc.id}"

  tags {
    Name = "ml-igw"
  }
}

# Subnet
resource "aws_subnet" "ml-subnet-public" {
  count                   = "${length(var.availability_zones)}"
  vpc_id                  = "${aws_vpc.ml-vpc.id}"
  cidr_block              = "${format("10.1.%d.0/24", count.index + 1)}"
  availability_zone       = "${lookup(var.availability_zones, count.index)}"
  map_public_ip_on_launch = "true"

  tags {
    Name = "${format("ml-subnet-public-%d", count.index + 1)}"
  }
}

# Route Table
resource "aws_route_table" "ml-route-public" {
  vpc_id = "${aws_vpc.ml-vpc.id}"

  route {
    cidr_block = "0.0.0.0/0"
    gateway_id = "${aws_internet_gateway.ml-igw.id}"
  }

  tags {
    Name = "ml-route-table-public"
  }
}

resource "aws_route_table_association" "ml-assoc" {
  count          = "${length(var.availability_zones)}"
  subnet_id      = "${element(aws_subnet.ml-subnet-public.*.id, count.index)}"
  route_table_id = "${aws_route_table.ml-route-public.id}"
}

# Security Group
### Web
resource "aws_security_group" "ml-web-sg" {
  name        = "ml-web-sg"
  description = "Allow SSH inbound traffic"
  vpc_id      = "${aws_vpc.ml-vpc.id}"

  ingress {
    from_port   = 22
    to_port     = 22
    protocol    = "tcp"
    cidr_blocks = ["${var.my_ip_address}"]
  }

  egress {
    from_port   = 0
    to_port     = 0
    protocol    = "-1"
    cidr_blocks = ["0.0.0.0/0"]
  }

  tags {
    Name = "ml-web-sg"
  }
}

# Key Pair
resource "aws_key_pair" "ml-key" {
  key_name   = "ml-key"
  public_key = "${var.aws_public_key}"
}

data "aws_caller_identity" "current" {}

# Spot Fleet Request
resource "aws_spot_fleet_request" "ml-spot-request" {
  iam_fleet_role = "${aws_iam_role.spot-fleet-role.arn}"

  # spot_price      = "0.1290" # Max Price デフォルトはOn-demand Price
  target_capacity                     = "${var.spot_target_capacity}"
  terminate_instances_with_expiration = true
  wait_for_fulfillment                = "true" # fulfillするまでTerraformが待つ

  launch_specification {
    ami                         = "${var.spot_instance_ami}"
    instance_type               = "${var.spot_instance_type}"
    key_name                    = "${aws_key_pair.ml-key.key_name}"
    vpc_security_group_ids      = ["${aws_security_group.ml-web-sg.id}"]
    subnet_id                   = "${element(aws_subnet.ml-subnet-public.*.id, 0)}"
    associate_public_ip_address = true

    root_block_device {
      volume_size = "${var.gp2_volume_size}"
      volume_type = "gp2"
    }

    tags {
      Name = "ml-instance"
    }
  }

  launch_specification {
    ami                         = "${var.spot_instance_ami}"
    instance_type               = "${var.spot_instance_type}"
    key_name                    = "${aws_key_pair.ml-key.key_name}"
    vpc_security_group_ids      = ["${aws_security_group.ml-web-sg.id}"]
    subnet_id                   = "${element(aws_subnet.ml-subnet-public.*.id, 1)}"
    associate_public_ip_address = true

    root_block_device {
      volume_size = "${var.gp2_volume_size}"
      volume_type = "gp2"
    }

    tags {
      Name = "ml-instance"
    }
  }

  launch_specification {
    ami                         = "${var.spot_instance_ami}"
    instance_type               = "${var.spot_instance_type}"
    key_name                    = "${aws_key_pair.ml-key.key_name}"
    vpc_security_group_ids      = ["${aws_security_group.ml-web-sg.id}"]
    subnet_id                   = "${element(aws_subnet.ml-subnet-public.*.id, 2)}"
    associate_public_ip_address = true

    root_block_device {
      volume_size = "${var.gp2_volume_size}"
      volume_type = "gp2"
    }

    tags {
      Name = "ml-instance"
    }
  }
}

data "aws_instance" "ml-instance" {
  filter {
    name   = "tag:Name"
    values = ["ml-instance"]
  }

  depends_on = ["aws_spot_fleet_request.ml-spot-request"]
}

output "ip" {
  value      = "${data.aws_instance.ml-instance.public_ip}"
  depends_on = ["aws_spot_fleet_request.ml-spot-request"]
}

使い方

いつも通りです。

使う時

terraform plan
terraform apply

消す時

terraform plan --destroy
trraform destroy

その他

スポットリクエスト以外のリソースを毎回消して作るのも非効率だなと思って、
自分は以下のような感じで使ってます。

terraform apply --target=aws_spot_fleet_request.ml-spot-request
terraform destroy --target=aws_spot_fleet_request.ml-spot-request

今後の予定

次はEC2でS3をマウントさせる部分を自動化しようと思っています。

11
5
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
11
5