Pythonでgzipを直接S3へPUTする

Posted at 2020-11-11

やりたいこと

Oracleから取得したデータのCSV作成
CSVをgzip形式に圧縮
ファイルを直接S3へ保存

サンプルコード

lambda_function.py

import csv
import gzip
import io
import os
import boto3
import cx_Oracle

DB_HOST = os.environ.get('DB_HOST')
DB_PORT = os.environ.get('DB_PORT')
DB_USER = os.environ.get('DB_USER')
DB_PASS = os.environ.get('DB_PASS')
DB_SID = os.environ.get('DB_SID')

AWS_S3_BUCKET_NAME = 'hoge'

s3 = boto3.client('s3')


def lambda_handler(event, context):

    file_name = 'hoge.csv.gz'
    # oracle接続
    connection = cx_Oracle.connect(DB_USER, DB_PASS, f'(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST={DB_HOST}) (PORT={DB_PORT}))(CONNECT_DATA=(SID={DB_SID})))')
    with connection:
        with connection.cursor() as cursor:
            sql = 'SELECT * FROM hoge'
            cursor.execute(sql)
            # テーブルのカラム名称
            csv_header = [column_name[0] for column_name in cursor.description]

            bio = io.BytesIO()
            with gzip.GzipFile(fileobj=bio, mode='wb') as gzip_file:
                with io.TextIOWrapper(gzip_file, encoding='utf-8') as wrapper:
                    writer = csv.writer(wrapper, lineterminator='\n', quoting=csv.QUOTE_ALL)
                    writer.writerow(csv_header)
                    while 1:
                        csv_detail = cursor.fetchmany()
                        if len(csv_detail) == 0:
                            break
                        writer.writerows(csv_detail)
            bio.seek(0)
            s3.put_object(Bucket=AWS_S3_BUCKET_NAME, Body=bio, Key=file_name)

    return event

まとめ

最初は tmp へファイルを作成してからS3へアップロードしていたのですが
io.BytesIO を利用することにより直接S3へPUTできました。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up