どもども、ランサーズでSREしているogimanです。
東京に在住なのですが、今年の夏はセミの声がほとんど聞こえなかった気がします。
暑すぎてセミも地上に出てこれなかったのでしょうか??
すでに鈴虫の声が聞こえたような聞こえなかったような(空耳アワー)
さて、今回は便利スクリプトの続編です。
https://qiita.com/ogiman/items/493062f88d9819e88e7b
前回はECSのスクリプトでしたが、今回はRDSです!!
結論
24H365D、動いていた検証環境のAurora/RDSをを夜間止めることでコストを30%削減しましたよ!と言いたかったのですが、一部のインスタンスではReservedインスタンス購入していたことを忘れており、30%削減には至りませんでした・・・残念。とはいっても削減できたことは確かです。
誰向け
- AWSを使う方
- Aurora/RDSを使っている方
目指すところ
システムでは各種ツールやクラウドを駆使して最適なシステムを実現するが、
一方でコストとの戦いとも向き合わないと・・・
右肩上がりのコストをどうやって抑え込むか?そして抑え込んだ上でどう最高のシステムにするのか?
そして、ユーザーに提供する価値を最大化できるのか?
そんな日々ですね。
対応したこと
AWSコストがマジで右肩上がり・・・
そこで前からずっと対応したかった、検証環境の夜間停止。
夜間は誰も使っていないのに動き続けているシステムはマジで無駄
Aurora/RDSサービスを対応!
実際のコード
以下の感じでLambdaにコード作成すれば解決できまっすー
Eventbridgeから起動したい・停止したい時間を指定すればOK
複数のクラスターにも対応しているので便利よ
イベント形式:
{
"action": "start",
"targets": [
{ "type": "rds", "identifier": "クラスター名" },
{ "type": "aurora", "identifier": "クラスター名" }
]
}
lambda_function.py
メインのコードです。このコードからRDSとAuroraを条件分岐して、それぞれのコードを呼び出してます。
"""
メインのLambda関数のエントリーポイント。
指定されたRDSインスタンスまたはAuroraクラスターの起動または停止を処理します。
イベント構造:
{
"action": "start" | "stop",
"targets": [
{ "type": "rds", "identifier": "db-instance-id" },
{ "type": "aurora", "identifier": "db-cluster-id" },
...
]
}
"""
import json
import logging
import os
import datetime
import boto3
from botocore.exceptions import ClientError
from rds_manager import RDSManager
from aurora_manager import AuroraManager
logger = logging.getLogger()
log_level = os.environ.get('LOG_LEVEL', 'INFO').upper()
logger.setLevel(log_level)
rds_client = boto3.client('rds')
def lambda_handler(event, context):
logger.info("Received event: %s", json.dumps(event))
action = event.get('action')
targets = event.get('targets')
# 入力バリデーション
if not action or action.lower() not in ['start', 'stop']:
logger.error("Invalid or missing 'action'. Must be 'start' or 'stop'.")
return {
'statusCode': 400,
'body': json.dumps({'error': "Invalid or missing 'action'. Must be 'start' or 'stop'."})
}
action = action.lower()
if not targets or not isinstance(targets, list):
logger.error("Invalid or missing 'targets'. Must be a list of target objects.")
return {
'statusCode': 400,
'body': json.dumps({'error': "Invalid or missing 'targets'. Must be a list of target objects."}) }
# Managerの初期化
rds_manager = RDSManager(rds_client, logger_instance=logger)
aurora_manager = AuroraManager(rds_client, logger_instance=logger)
# 変数を初期化
results = []
processed_count = 0
error_count = 0
for target in targets:
target_type = target.get('type')
identifier = target.get('identifier')
result_message = ""
if not target_type or not identifier:
logger.warning("Skipping invalid target entry: %s. Missing 'type' or 'identifier'.", target)
results.append({
'target': target,
'status': 'skipped',
'message': "Missing 'type' or 'identifier'."
})
error_count += 1
continue
logger.info("Processing action '%s' for %s target '%s'...", action, target_type, identifier)
try:
if target_type == 'rds':
if action == 'start':
result_message = rds_manager.start_instance(identifier)
elif action == 'stop':
result_message = rds_manager.stop_instance(identifier)
elif target_type == 'aurora':
if action == 'start':
result_message = aurora_manager.start_cluster(identifier)
elif action == 'stop':
result_message = aurora_manager.stop_cluster(identifier)
else:
logger.warning("Skipping target with unknown type: %s", target_type)
result_message = f"Unknown target type '{target_type}'. Skipped."
error_count += 1
status_code = 'success' if 'Error:' not in result_message and 'skipped' not in result_message and 'Unknown target type' not in result_message else 'error'
if status_code == 'error':
error_count += 1
else:
processed_count += 1
results.append({
'target': target,
'status': status_code,
'message': result_message
})
except Exception as e:
logger.error("Unexpected error processing target %s: %s", target, e, exc_info=True)
results.append({
'target': target,
'status': 'error',
'message': f"Unexpected internal error: {e}"
})
error_count += 1
# レスポンス
# すべて成功: 200, 部分的に成功: 207, すべて失敗: 500
if error_count == 0:
status_code = 200 # すべて成功
elif processed_count > 0:
status_code = 207 # 部分的に成功
else:
status_code = 500 # すべて失敗
response_body = {
'action': action,
'summary': f"Processed {processed_count} targets successfully, {error_count} errors/skipped.",
'details': results
}
logger.info("Lambda execution finished. Response: %s", json.dumps(response_body))
return {
'statusCode': status_code,
'body': json.dumps(response_body, indent=2)
}
rds_manager.py
RDSの処理を行うコードです。
import logging
import datetime
import boto3
from botocore.exceptions import ClientError
logger = logging.getLogger()
# RDSインスタンスの起動および停止操作を管理します。
class RDSManager:
# RDSManagerを初期化
def __init__(self, rds_client, logger_instance=None):
self.rds_client = rds_client
self.logger = logger_instance or logging.getLogger(__name__)
"""
RDSインスタンスを起動
Args:
instance_identifier: RDSインスタンスのID
Returns:
結果を示す文字列メッセージ
"""
def start_instance(self, instance_identifier):
try:
response = self.rds_client.describe_db_instances(DBInstanceIdentifier=instance_identifier)
if not response.get('DBInstances'):
self.logger.error("RDS instance %s description not found in response.", instance_identifier)
return f"Error: Could not retrieve details for RDS instance {instance_identifier}."
instance = response['DBInstances'][0]
status = instance['DBInstanceStatus']
self.logger.info("RDS instance %s current status: %s", instance_identifier, status)
if status == 'stopped':
self.logger.info("Starting RDS instance %s...", instance_identifier)
self.rds_client.start_db_instance(DBInstanceIdentifier=instance_identifier)
return f"Successfully initiated start for RDS instance {instance_identifier}."
elif status == 'available':
self.logger.warning("RDS instance %s is already available.", instance_identifier)
return f"RDS instance {instance_identifier} is already available."
elif status in ['starting', 'stopping', 'rebooting', 'modifying', 'deleting', 'backing-up', 'creating']:
self.logger.warning("RDS instance %s is in state '%s' and cannot be started now.", instance_identifier, status)
return f"RDS instance {instance_identifier} is in an intermediate state ({status}) and cannot be started now."
else:
self.logger.warning("RDS instance %s is in an unhandled state: %s. Cannot start.", instance_identifier, status)
return f"RDS instance {instance_identifier} is in an unhandled state: {status}. Cannot start."
except ClientError as e:
if e.response['Error']['Code'] == 'DBInstanceNotFound':
self.logger.error("RDS instance %s not found.", instance_identifier)
return f"Error: RDS instance {instance_identifier} not found."
else:
self.logger.error("Error starting RDS instance %s: %s", instance_identifier, e)
return f"Error starting RDS instance {instance_identifier}: {e.response['Error']['Code']}"
except Exception as e:
self.logger.error("Unexpected error starting RDS instance %s: %s", instance_identifier, e, exc_info=True)
return f"Unexpected error starting RDS instance {instance_identifier}."
"""
RDSインスタンスを停止
Args:
instance_identifier: RDSインスタンスのID
Returns:
結果を示す文字列メッセージ
"""
def stop_instance(self, instance_identifier):
try:
response = self.rds_client.describe_db_instances(DBInstanceIdentifier=instance_identifier)
if not response.get('DBInstances'):
self.logger.error("RDS instance %s description not found in response.", instance_identifier)
return f"Error: Could not retrieve details for RDS instance {instance_identifier}."
instance = response['DBInstances'][0]
status = instance['DBInstanceStatus']
self.logger.info("RDS instance %s current status: %s", instance_identifier, status)
if status == 'available':
self.logger.info("Stopping RDS instance %s...", instance_identifier)
self.rds_client.stop_db_instance(DBInstanceIdentifier=instance_identifier)
return f"Successfully initiated stop for RDS instance {instance_identifier}."
elif status == 'stopped':
self.logger.warning("RDS instance %s is already stopped.", instance_identifier)
return f"RDS instance {instance_identifier} is already stopped."
elif status in ['starting', 'stopping', 'rebooting', 'modifying', 'deleting', 'backing-up', 'creating']:
self.logger.warning("RDS instance %s is in state '%s' and cannot be stopped now.", instance_identifier, status)
return f"RDS instance {instance_identifier} is in an intermediate state ({status}) and cannot be stopped now."
else:
self.logger.warning("RDS instance %s is in an unhandled state: %s. Cannot stop.", instance_identifier, status)
return f"RDS instance {instance_identifier} is in an unhandled state: {status}. Cannot stop."
except ClientError as e:
if e.response['Error']['Code'] == 'DBInstanceNotFound':
self.logger.error("RDS instance %s not found.", instance_identifier)
return f"Error: RDS instance {instance_identifier} not found."
elif e.response['Error']['Code'] == 'InvalidDBInstanceState':
self.logger.warning("Cannot stop RDS instance %s due to its current state: %s", instance_identifier, e.response['Error']['Message'])
return f"Cannot stop RDS instance {instance_identifier}, likely already stopping or in an invalid state."
else:
self.logger.error("Error stopping RDS instance %s: %s", instance_identifier, e)
return f"Error stopping RDS instance {instance_identifier}: {e.response['Error']['Code']}"
except Exception as e:
self.logger.error("Unexpected error stopping RDS instance %s: %s", instance_identifier, e, exc_info=True)
return f"Unexpected error stopping RDS instance {instance_identifier}."
aurora_manager.py
Auroraの処理を行うコードです。
import logging
import boto3
from botocore.exceptions import ClientError
logger = logging.getLogger()
# Aurora cluster の起動と停止する
class AuroraManager:
# AuroraManagerの初期化
def __init__(self, rds_client, logger_instance=None):
self.rds_client = rds_client
self.logger = logger_instance or logging.getLogger(__name__)
"""
Auroraクラスターを起動
Args:
cluster_identifier: AuroraクラスターのID
Returns:
結果を示す文字列メッセージ
"""
def start_cluster(self, cluster_identifier):
try:
response = self.rds_client.describe_db_clusters(DBClusterIdentifier=cluster_identifier)
if not response.get('DBClusters'):
self.logger.error("Aurora cluster %s description not found in response.", cluster_identifier)
return f"Error: Could not retrieve details for Aurora cluster {cluster_identifier}."
cluster = response['DBClusters'][0]
status = cluster['Status']
self.logger.info("Aurora cluster %s current status: %s", cluster_identifier, status)
if status == 'stopped':
self.logger.info("Starting Aurora cluster %s...", cluster_identifier)
self.rds_client.start_db_cluster(DBClusterIdentifier=cluster_identifier)
return f"Successfully initiated start for Aurora cluster {cluster_identifier}."
elif status == 'available':
self.logger.warning("Aurora cluster %s is already available.", cluster_identifier)
return f"Aurora cluster {cluster_identifier} is already available."
elif status in ['starting', 'stopping', 'creating', 'modifying', 'deleting', 'failover', 'backing-up', 'cloning-failed']:
self.logger.warning("Aurora cluster %s is in state '%s' and cannot be started now.", cluster_identifier, status)
return f"Aurora cluster {cluster_identifier} is in an intermediate state ({status}) and cannot be started now."
else:
self.logger.warning("Aurora cluster %s is in an unhandled state: %s.", cluster_identifier, status)
return f"Aurora cluster {cluster_identifier} is in an unhandled state: {status}."
except ClientError as e:
if e.response['Error']['Code'] == 'DBClusterNotFoundFault':
self.logger.error("Aurora cluster %s not found.", cluster_identifier)
return f"Error: Aurora cluster {cluster_identifier} not found."
else:
self.logger.error("Error starting Aurora cluster %s: %s", cluster_identifier, e)
return f"Error starting Aurora cluster {cluster_identifier}: {e.response['Error']['Code']}"
except Exception as e:
self.logger.error("Unexpected error starting Aurora cluster %s: %s", cluster_identifier, e, exc_info=True)
return f"Unexpected error starting Aurora cluster {cluster_identifier}."
"""
Auroraクラスターを停止
Args:
cluster_identifier: AuroraクラスターのID。
Returns:
結果を示す文字列メッセージ。
"""
def stop_cluster(self, cluster_identifier):
try:
response = self.rds_client.describe_db_clusters(DBClusterIdentifier=cluster_identifier)
if not response.get('DBClusters'):
self.logger.error("Aurora cluster %s description not found in response.", cluster_identifier)
return f"Error: Could not retrieve details for Aurora cluster {cluster_identifier}."
cluster = response['DBClusters'][0]
status = cluster['Status']
self.logger.info("Aurora cluster %s current status: %s", cluster_identifier, status)
# Auroraクラスターが停止可能であれば停止
if status == 'available':
self.logger.info("Stopping Aurora cluster %s...", cluster_identifier)
self.rds_client.stop_db_cluster(DBClusterIdentifier=cluster_identifier)
return f"Successfully initiated stop for Aurora cluster {cluster_identifier}."
elif status == 'stopped':
self.logger.warning("Aurora cluster %s is already stopped.", cluster_identifier)
return f"Aurora cluster {cluster_identifier} is already stopped."
elif status in ['starting', 'stopping', 'creating', 'modifying', 'deleting', 'failover', 'backing-up', 'cloning-failed']:
self.logger.warning("Aurora cluster %s is in state '%s' and cannot be stopped now.", cluster_identifier, status)
return f"Aurora cluster {cluster_identifier} is in an intermediate state ({status}) and cannot be stopped now."
else:
self.logger.warning("Aurora cluster %s is in an unhandled state: %s.", cluster_identifier, status)
return f"Aurora cluster {cluster_identifier} is in an unhandled state: {status}."
except ClientError as e:
if e.response['Error']['Code'] == 'DBClusterNotFoundFault':
self.logger.error("Aurora cluster %s not found.", cluster_identifier)
return f"Error: Aurora cluster {cluster_identifier} not found."
elif e.response['Error']['Code'] == 'InvalidDBClusterStateFault':
self.logger.warning("Cannot stop Aurora cluster %s due to its current state: %s", cluster_identifier, e.response['Error']['Message'])
return f"Cannot stop Aurora cluster {cluster_identifier}, likely already stopping or in an invalid state."
else:
self.logger.error("Error stopping Aurora cluster %s: %s", cluster_identifier, e)
return f"Error stopping Aurora cluster {cluster_identifier}: {e.response['Error']['Code']}"
except Exception as e:
self.logger.error("Unexpected error stopping Aurora cluster %s: %s", cluster_identifier, e, exc_info=True)
return f"Unexpected error stopping Aurora cluster {cluster_identifier}."
最後に
次はGithubから開発生産性の指標を取得するスクリプトの紹介を予定しています。
開発チームの状態を測るのに便利だと思います!乞うご期待♪
この記事が少しでも参考になったら、 お願いします!
励みになります♪
以上です!
暑いので体調にくれぐれもお気をつけてー