https://repost.aws/knowledge-center/aurora-mysql-stop-cluster-seven-days

 

Stop Amazon Aurora cluster for longer than seven days

I want to stop an Amazon Relational Database Service (Amazon Aurora clusters) for longer than the seven-day duration.

repost.aws

 

  • Aurora 특성상 7 Days 이후에는 자동으로 시작
  • Lambda - EventBridge(스케쥴) 로 AutoStop 생성
    • AWS SNS 서비스로 Event 수신
  • Chatops 
    • Slackbot 으로 방식으로 Aurora 현재 상태 확인및 테스트 이후 Stop & Start 구현
import json
import os
import time
from urllib import request, parse
import boto3

def lambda_handler(event, context):
    try:
        bot_token = os.environ.get('BOT_TOKEN')
        if not bot_token:
            raise ValueError("BOT_TOKEN environment variable is not set")
        
        body = json.loads(event['body'])
        print(f"Received body: {body}")
        
        if 'challenge' in body:
            return {
                'statusCode': 200,
                'body': json.dumps({'challenge': body['challenge']})
            }
            
        if 'event' in body:
            if 'token' in body and body['token'] != os.environ.get('VERIFICATION_TOKEN'):
                return {
                    'statusCode': 403,
                    'body': json.dumps({'error': 'Invalid verification token'})
                }
                
            slack_event = body['event']
            if (slack_event['type'] == 'message' and 
                'channel_type' in slack_event and 
                slack_event['channel_type'] == 'channel' and
                'text' in slack_event):
                
                text = slack_event.get('text', '').strip()
                print(f"text: {text}")
                
                if text.startswith("stop "):
                    cluster_name = text.split("stop ")[1].strip()
                    initial_response = stop_specific_rds_cluster(cluster_name)
                    
                    # 초기 응답 전송
                    post_message_to_slack(initial_response, slack_event['channel'], bot_token)
                    
                    # 중지가 성공적으로 시작된 경우에만 상태 확인
                    if "Stopping cluster" in initial_response:
                        # 상태 확인 및 결과 전송
                        status_response = wait_for_cluster_stop(cluster_name)
                        post_message_to_slack(status_response, slack_event['channel'], bot_token)
        
        return {
            'statusCode': 200,
            'body': json.dumps({'message': 'Success'})
        }
        
    except Exception as e:
        print(f"Error in lambda_handler: {str(e)}")
        return {
            'statusCode': 500,
            'body': json.dumps({'error': str(e)})
        }

def wait_for_cluster_stop(cluster_name, max_attempts=30, delay_seconds=10):
    """
    RDS 클러스터가 완전히 중지될 때까지 대기하고 상태를 확인합니다.
    
    Args:
        cluster_name (str): RDS 클러스터 식별자
        max_attempts (int): 최대 확인 시도 횟수
        delay_seconds (int): 각 확인 사이의 대기 시간(초)
    
    Returns:
        str: 상태 확인 결과 메시지
    """
    rds_client = boto3.client('rds')
    attempt = 0
    
    try:
        while attempt < max_attempts:
            cluster = rds_client.describe_db_clusters(
                DBClusterIdentifier=cluster_name
            )['DBClusters'][0]
            
            current_status = cluster['Status']
            
            if current_status == 'stopped':
                return f"✅ Cluster {cluster_name} has been successfully stopped."
            elif current_status == 'stopping':
                if attempt == 0:
                    return f"⏳ Cluster {cluster_name} is stopping. Status will be updated..."
                time.sleep(delay_seconds)
                attempt += 1
            else:
                return f"❌ Unexpected cluster status: {current_status}. Please check manually."
        
        return f"⚠️ Timeout waiting for cluster {cluster_name} to stop. Current status: {current_status}"
        
    except rds_client.exceptions.DBClusterNotFoundFault:
        return f"❌ Error: Cluster {cluster_name} not found during status check."
    except Exception as e:
        error_msg = f"❌ Error checking cluster status: {str(e)}"
        print(error_msg)
        return error_msg

def post_message_to_slack(response_text, channel_id, bot_token):
    try:
        url = "https://slack.com/api/chat.postMessage"
        headers = {
            'Authorization': f"Bearer {bot_token}",
            'Content-Type': 'application/json'
        }
        payload = json.dumps({
            'channel': channel_id,
            'text': response_text
        }).encode('utf-8')
        
        req = request.Request(url, data=payload, headers=headers)
        response = request.urlopen(req)
        response_data = json.loads(response.read().decode())
        
        if not response_data.get('ok'):
            print(f"Slack API error: {response_data.get('error')}")
            
    except Exception as e:
        print(f"Error posting message to Slack: {str(e)}")

def stop_specific_rds_cluster(cluster_name, rds_client=None):
    if rds_client is None:
        rds_client = boto3.client('rds')
        
    try:
        cluster = rds_client.describe_db_clusters(
            DBClusterIdentifier=cluster_name
        )['DBClusters'][0]
        
        if cluster['Status'] != 'available':
            return f"❌ Cluster {cluster_name} is not in available state (current: {cluster['Status']})."
            
        tags = rds_client.list_tags_for_resource(
            ResourceName=cluster['DBClusterArn']
        )['TagList']
        
        if not any(tag['Key'] == 'autostop' and tag['Value'] == 'yes' 
                  for tag in tags):
            return f"❌ Cluster {cluster_name} does not have required autostop tag."
            
        rds_client.stop_db_cluster(DBClusterIdentifier=cluster_name)
        return f"⏳ Stopping cluster: {cluster_name}..."
        
    except rds_client.exceptions.DBClusterNotFoundFault:
        return f"❌ Cluster {cluster_name} not found."
    except Exception as e:
        error_msg = f"❌ Error stopping cluster {cluster_name}: {str(e)}"
        print(error_msg)
        return error_msg

+ Recent posts