aws-scripts-mon
cloudwatchに登録した情報に対してterraformでalarmを設定していきます。
aws-scripts-monで--autoscaling
をくっつけることでdimensionsにAutoScalingGroupNameをつけさせます。
mon-put-instance-data.pl --mem-util --disk-path=/ --disk-space-util --auto-scaling --from-cron
これをautoscalingで起動しているサーバにcronを登録します。
iamのpolicyにec2:DescribeTags
権限がないとAutoscalingGroupNameが取れずにdimensionsに追加されないのに注意が必要です。
autoscaling
下記のようにすればcloudwatchを設定できます。aws_sns_topic.notify_to_slack.arn
およびaws_autoscaling_group.front
は作成済みという前提です。
resource "aws_cloudwatch_metric_alarm" "front_cpu" {
alarm_name = "${aws_autoscaling_group.front.name} CPU Utilization"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "2"
metric_name = "CPUUtilization"
namespace = "AWS/EC2"
dimensions { "AutoScalingGroupName"="${aws_autoscaling_group.front.name}" }
statistic = "Average"
period = "300"
threshold = "80"
alarm_description = "${aws_autoscaling_group.front.name} CPU Utilization"
alarm_actions = [ "${aws_sns_topic.notify_to_slack.arn}" ]
insufficient_data_actions = [ "${aws_sns_topic.notify_to_slack.arn}" ]
ok_actions = [ "${aws_sns_topic.notify_to_slack.arn}" ]
}
resource "aws_cloudwatch_metric_alarm" "front_mem" {
alarm_name = "${aws_autoscaling_group.front.name} Memory Utilization"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "2"
metric_name = "MemoryUtilization"
namespace = "System/Linux"
dimensions { "AutoScalingGroupName"="${aws_autoscaling_group.front.name}" }
statistic = "Average"
period = "300"
threshold = "90"
alarm_description = "${aws_autoscaling_group.front.name} Memory Utilization"
alarm_actions = [ "${aws_sns_topic.notify_to_slack.arn}" ]
insufficient_data_actions = [ "${aws_sns_topic.notify_to_slack.arn}" ]
ok_actions = [ "${aws_sns_topic.notify_to_slack.arn}" ]
}
instance
通常のインスタンスに対してはdimensionsとしてInstanceIdを使います。
aws_instance
作成時にcount
を使っているならaws_cloudwatch_metric_alarm
でもcount
を使うことで対応できます。
resource "aws_cloudwatch_metric_alarm" "elasticsearch_cpu" {
alarm_name = "${element(aws_instance.elasticsearch.*.id, count.index)} CPU Utilization"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "2"
metric_name = "CPUUtilization"
namespace = "AWS/EC2"
dimensions { "InstanceId"="${element(aws_instance.elasticsearch.*.id, count.index)}" }
statistic = "Average"
period = "300"
threshold = "80"
alarm_description = "${element(aws_instance.elasticsearch.*.id, count.index)} CPU Utilization"
alarm_actions = [ "${aws_sns_topic.notify_to_slack.arn}" ]
insufficient_data_actions = [ "${aws_sns_topic.notify_to_slack.arn}" ]
ok_actions = [ "${aws_sns_topic.notify_to_slack.arn}" ]
count = "${var.elasticsearch_server_count}"
}
resource "aws_cloudwatch_metric_alarm" "elasticsearch_mem" {
alarm_name = "${element(aws_instance.elasticsearch.*.id, count.index)} Memory Usage"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "2"
metric_name = "MemoryUtilization"
namespace = "System/Linux"
dimensions { "InstanceId"="${element(aws_instance.elasticsearch.*.id, count.index)}" }
statistic = "Average"
period = "300"
threshold = "90"
alarm_description = "${element(aws_instance.elasticsearch.*.id, count.index)} CPU Utilization"
alarm_actions = [ "${aws_sns_topic.notify_to_slack.arn}" ]
insufficient_data_actions = [ "${aws_sns_topic.notify_to_slack.arn}" ]
ok_actions = [ "${aws_sns_topic.notify_to_slack.arn}" ]
count = "${var.elasticsearch_server_count}"
}