More than 1 year has passed since last update.

【Python】【boto3】S3からオブジェクトを分割して読み込む

Posted at 2023-02-04

AWSLambdaの限られたメモリの中で大きなファイルをダウンロードする必要がありそうだったので調査した。
Lambdaの起動時間の最大15分については、一旦考えないことにする。

import boto3


def s3_get_object_chunk(bucket, key, buf_size=4096):
    offset = 0
    s3_client = boto3.client('s3')
    file_size = s3_client.get_object(Bucket=bucket, Key=key)['ContentLength']
    print('file_size: ', int(file_size))
    chunk_cnt = file_size // buf_size
    floor = file_size if file_size<buf_size else file_size%buf_size
    chunk_list = [buf_size] * chunk_cnt
    chunk_list.append(floor)
    for chunk in chunk_list:
        if chunk < 1:
            break
        read_range = 'bytes={start}-{end}'.format(start=offset, end=offset+chunk-1)
        response = s3_client.get_object(Bucket=bucket, Key=key, Range=read_range)
        read_bytes = response['Body'].read()
        offset += chunk
        yield read_bytes


if __name__ == '__main__':
    bucket = 'bucket'
    s3_path = 'test.csv'
    local_output = 'output.txt'

    local_file = open(local_output, 'wb')

    total_size = 0
    for read_bytes in s3_get_object_chunk(bucket, s3_path):
        local_file.write(read_bytes)
        total_size += len(read_bytes)

    local_file.close()
    print('written: ', str(total_size))

参考

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up