4
4

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 5 years have passed since last update.

AWS AutoScalingで増減したEC2インスタンスに、動的にCloudWatchのAlarmをLambda(Go)で設定する

Last updated at Posted at 2019-08-12

はじめに

こんにちわ。Wano株式会社のエンジニアのnariと申します。

今回は、前回の記事で、EC2のメトリクスを要件を満たす形で収集することはできるようになりました。

しかしこのメトリクスに対してアラートを貼ろうとすると、AutoScalingを設定しているインスタンスに関しては、いちいちスケールするたびにアラートを削除したり増やしたりする必要があります。

それを手動でやるのは流石に現実的ではないため、[前の記事](ApexとTerraformとGoでAWS上に構築したCD Pipelineのステータスをslackに通知 - Qiita)で作成したステータスチェックシステム(Apex,Terraform)とLambda(Go)で動的に増減するアラームの設定をすることにしました。

システム全体像

スクリーンショット 2019-08-12 21.50.30.png

どう設定したか

1.LambdaのRoleに以下のpolicyを設定する

policy.json
{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "logs:CreateLogGroup",
                "logs:CreateLogStream",
                "logs:PutLogEvents"
            ],
            "Resource": "arn:aws:logs:*:*:*"
        },
        {
            "Effect": "Allow",
            "Action": [
                "ec2:DescribeInstances",
                "cloudwatch:PutMetricAlarm",
                "cloudwatch:DeleteAlarms",
                "cloudwatch:DescribeAlarms"
            ],
            "Resource": "*"
        }
    ]
}

2.Lambda(Go)の以下のスクリプトをデプロイする(Apex)

main.go
package main

import (
	"context"
	"encoding/json"
	"fmt"
	"github.com/aws/aws-lambda-go/events"
	"github.com/aws/aws-lambda-go/lambda"
	"github.com/aws/aws-sdk-go/aws"
	"github.com/aws/aws-sdk-go/aws/session"
	"github.com/aws/aws-sdk-go/service/cloudwatch"
	"github.com/labstack/gommon/log"
	"os"
	"strings"
)

type AutoScalingStatus struct {
	AutoScalingGroupName string
	EC2InstanceId        string
	Description          string
}

func main() {
	lambda.Start(controlAutoScalingInstanceHandler)
}

func controlAutoScalingInstanceHandler(context context.Context, event events.CloudWatchEvent) (e error) {
	status := &AutoScalingStatus{}
	err := json.Unmarshal([]byte(event.Detail), status)
	if err != nil {
		log.Error(err)
		return err
	}

	cw := cloudwatch.New(session.New(), &aws.Config{Region: aws.String("ap-northeast-1")})
	snsArn := os.Getenv("SNS_ARN")

	splits := strings.Split(status.Description, " ")
	scaleStatus := splits[0]
	log.Infof("splits:%v", splits)
	switch scaleStatus {
	case "Launching":
		return createAlarm(status, cw, snsArn)
	case "Terminating":
		return deleteAlarm(status, cw, snsArn)
	default:
		log.Info("該当のstatusTypeではない")
		return nil
	}
}

func createAlarm(status *AutoScalingStatus, svc *cloudwatch.CloudWatch, snsArn string) error {
	for _, input := range getAlarmInputs(status, snsArn) {
		putMetricAlarmOutput, err := svc.PutMetricAlarm(input)
		if err != nil {
			log.Error(err)
			return err
		}
		log.Infof("putMetricAlarmOutput:%v", putMetricAlarmOutput)
	}
	return nil
}

func deleteAlarm(status *AutoScalingStatus, svc *cloudwatch.CloudWatch, snsArn string) error {
	loadAverageAlarmName := fmt.Sprintf("disk_used_percent-%v-%v", status.AutoScalingGroupName, status.EC2InstanceId)
	memoryAlarmName := fmt.Sprintf("mem_used_percent-%v-%v", status.AutoScalingGroupName, status.EC2InstanceId)
	cpu0AlarmName := fmt.Sprintf("cpu_usage_system-cpu0-%v-%v", status.AutoScalingGroupName, status.EC2InstanceId)
	cpu1AlarmName := fmt.Sprintf("cpu_usage_system-cpu1-%v-%v", status.AutoScalingGroupName, status.EC2InstanceId)
	diskAlarmName := fmt.Sprintf("LoadAverage-%v-%v", status.AutoScalingGroupName, status.EC2InstanceId)
	input := cloudwatch.DeleteAlarmsInput{
		AlarmNames: []*string{
			&loadAverageAlarmName,
			&memoryAlarmName,
			&cpu0AlarmName,
			&cpu1AlarmName,
			&diskAlarmName,
		},
	}

	deleteAlarmOutput, err := svc.DeleteAlarms(&input)
	if err != nil {
		log.Error(err)
		return err
	}
	log.Infof("deleteAlarmOutput:%v", deleteAlarmOutput)
	return nil
}

func getAlarmInputs(status *AutoScalingStatus, snsArn string) []*cloudwatch.PutMetricAlarmInput {
	return append([]*cloudwatch.PutMetricAlarmInput{
		getDiscAlarmInput(status, snsArn),
		getLoadAverageAlarmInput(status, snsArn),
		getMemoryAlarmInput(status, snsArn),
	}, getCPUAlarmInput(status, snsArn)...)
}
func getDiscAlarmInput(status *AutoScalingStatus, snsArn string) *cloudwatch.PutMetricAlarmInput {
	var (
		dimensionNameAutoScalingGroupName string = "AutoScalingGroupName"
		dimensionNameInstanceID           string = "InstanceId"

		dimensionNameDivice  string = "device"
		dimensionValueDivice string = "tmpfs"

		dimensionNameFstype  string = "fstype"
		dimensionValueFstype string = "tmpfs"

		dimensionNamePath  string = "path"
		dimensionValuePath string = "/dev/shm"

		evaluationPeriods  int64                   = 2
		threshold          float64                 = 80.0
		alarmName          string                  = fmt.Sprintf("disk_used_percent-%v-%v", status.AutoScalingGroupName, status.EC2InstanceId)
		comparisonOperator string                  = cloudwatch.ComparisonOperatorGreaterThanThreshold
		metricName         string                  = "disk_used_percent"
		namespace          string                  = "CWAgent"
		period             int64                   = 60
		statistic          string                  = cloudwatch.StatisticAverage
		alarmDescriotion   string                  = fmt.Sprintf("disk_used_percent-%v-%v", status.AutoScalingGroupName, status.EC2InstanceId)
		dimension          []*cloudwatch.Dimension = []*cloudwatch.Dimension{
			{
				Name:  &dimensionNameAutoScalingGroupName,
				Value: &status.AutoScalingGroupName,
			},
			{
				Name:  &dimensionNameInstanceID,
				Value: &status.EC2InstanceId,
			},
			{
				Name:  &dimensionNameDivice,
				Value: &dimensionValueDivice,
			},
			{
				Name:  &dimensionNameFstype,
				Value: &dimensionValueFstype,
			},
			{
				Name:  &dimensionNamePath,
				Value: &dimensionValuePath,
			},
		}
	)
	input := &cloudwatch.PutMetricAlarmInput{
		AlarmActions:       []*string{&snsArn},
		OKActions:          []*string{&snsArn},
		AlarmName:          &alarmName,
		ComparisonOperator: &comparisonOperator,
		EvaluationPeriods:  &evaluationPeriods,
		Threshold:          &threshold,
		Dimensions:         dimension,
		MetricName:         &metricName,
		Namespace:          &namespace,
		Period:             &period,
		Statistic:          &statistic,
		AlarmDescription:   &alarmDescriotion,
	}
	return input
}

func getMemoryAlarmInput(status *AutoScalingStatus, snsArn string) *cloudwatch.PutMetricAlarmInput {
	var (
		dimensionNameAutoScalingGroupName string                  = "AutoScalingGroupName"
		dimensionNameInstanceID           string                  = "InstanceId"
		evaluationPeriods                 int64                   = 2
		threshold                         float64                 = 80.0
		alarmName                         string                  = fmt.Sprintf("mem_used_percent-%v-%v", status.AutoScalingGroupName, status.EC2InstanceId)
		comparisonOperator                string                  = cloudwatch.ComparisonOperatorGreaterThanThreshold
		metricName                        string                  = "mem_used_percent"
		namespace                         string                  = "CWAgent"
		period                            int64                   = 60
		statistic                         string                  = cloudwatch.StatisticAverage
		alarmDescriotion                  string                  = fmt.Sprintf("mem_used_percent-%v-%v", status.AutoScalingGroupName, status.EC2InstanceId)
		dimension                         []*cloudwatch.Dimension = []*cloudwatch.Dimension{
			{
				Name:  &dimensionNameAutoScalingGroupName,
				Value: &status.AutoScalingGroupName,
			},
			{
				Name:  &dimensionNameInstanceID,
				Value: &status.EC2InstanceId,
			},
		}
	)
	input := &cloudwatch.PutMetricAlarmInput{
		AlarmActions:       []*string{&snsArn},
		OKActions:          []*string{&snsArn},
		AlarmName:          &alarmName,
		ComparisonOperator: &comparisonOperator,
		EvaluationPeriods:  &evaluationPeriods,
		Threshold:          &threshold,
		Dimensions:         dimension,
		MetricName:         &metricName,
		Namespace:          &namespace,
		Period:             &period,
		Statistic:          &statistic,
		AlarmDescription:   &alarmDescriotion,
	}
	return input
}

func getCPUAlarmInput(status *AutoScalingStatus, snsArn string) []*cloudwatch.PutMetricAlarmInput {
	var (
		dimensionNameAutoScalingGroupName string   = "AutoScalingGroupName"
		dimensionNameInstanceID           string   = "InstanceId"
		dimensionNameCPU                  string   = "cpu"
		dimensionValuesCPU                []string = []string{"cpu0", "cpu1"}
		evaluationPeriods                 int64    = 2
		threshold                         float64  = 60.0
		comparisonOperator                string   = cloudwatch.ComparisonOperatorGreaterThanThreshold
		metricName                        string   = "cpu_usage_system"
		namespace                         string   = "CWAgent"
		period                            int64    = 60
		statistic                         string   = cloudwatch.StatisticAverage
	)
	inputs := []*cloudwatch.PutMetricAlarmInput{}
	for _, cpu := range dimensionValuesCPU {
		dimension := []*cloudwatch.Dimension{
			{
				Name:  &dimensionNameAutoScalingGroupName,
				Value: &status.AutoScalingGroupName,
			},
			{
				Name:  &dimensionNameInstanceID,
				Value: &status.EC2InstanceId,
			},
			{
				Name:  &dimensionNameCPU,
				Value: &cpu,
			},
		}
		alarmName := fmt.Sprintf("cpu_usage_system-%v-%v-%v", cpu, status.AutoScalingGroupName, status.EC2InstanceId)
		alarmDescription := alarmName
		input := &cloudwatch.PutMetricAlarmInput{
			AlarmActions:       []*string{&snsArn},
			OKActions:          []*string{&snsArn},
			AlarmName:          &alarmName,
			ComparisonOperator: &comparisonOperator,
			EvaluationPeriods:  &evaluationPeriods,
			Threshold:          &threshold,
			Dimensions:         dimension,
			MetricName:         &metricName,
			Namespace:          &namespace,
			Period:             &period,
			Statistic:          &statistic,
			AlarmDescription:   &alarmDescription,
		}
		inputs = append(inputs, input)
	}
	return inputs
}

func getLoadAverageAlarmInput(status *AutoScalingStatus, snsArn string) *cloudwatch.PutMetricAlarmInput {
	var (
		dimensionNameInstanceID string                  = "InstanceId"
		evaluationPeriods       int64                   = 2
		threshold               float64                 = 10.0
		alarmName               string                  = fmt.Sprintf("LoadAverage-%v-%v", status.AutoScalingGroupName, status.EC2InstanceId)
		comparisonOperator      string                  = cloudwatch.ComparisonOperatorGreaterThanThreshold
		metricName              string                  = fmt.Sprintf("%v/LoadAverage", status.EC2InstanceId)
		namespace               string                  = "AmazonLinux/LoadAverage"
		period                  int64                   = 60
		statistic               string                  = cloudwatch.StatisticAverage
		alarmDescriotion        string                  = fmt.Sprintf("LoadAverage-%v", status.EC2InstanceId)
		dimension               []*cloudwatch.Dimension = []*cloudwatch.Dimension{
			{
				Name:  &dimensionNameInstanceID,
				Value: &status.EC2InstanceId,
			},
		}
	)

	input := &cloudwatch.PutMetricAlarmInput{
		AlarmActions:       []*string{&snsArn},
		OKActions:          []*string{&snsArn},
		AlarmName:          &alarmName,
		ComparisonOperator: &comparisonOperator,
		EvaluationPeriods:  &evaluationPeriods,
		Threshold:          &threshold,
		Dimensions:         dimension,
		MetricName:         &metricName,
		Namespace:          &namespace,
		Period:             &period,
		Statistic:          &statistic,
		AlarmDescription:   &alarmDescriotion,
	}
	return input
}

3.AutoScalingを既存のステータス監視機構に追加する

autoscaling.json
{
  "source": [
    "aws.autoscaling"
  ],
  "detail-type": [
    "EC2 Instance Launch Successful",
    "EC2 Instance Terminate Successful"
  ]
}

参考文献

4
4
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
4
4

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?