前提
以下ログがS3に配置されいてること(非圧縮)
{"messageType": "DATA_MESSAGE", "owner": "<AccoundId>", "logGroup": "/test/CloudWatchLogs/LogsToFirehose/202111-01", "logStream": "LogsToFirehose-202111-01", "subscriptionFilters": ["Test-Firehose-CloudwatchlogsToS3-202111-02"], "logEvents": [{"id": "36495610292092529380037177934895819345077418786389491712", "timestamp": 1636519765021, "message": "test-01\na"}, {"id": "36495610399336813039770944622541088518243389268649312257", "timestamp": 1636519769830, "message": "test-02\nb"}, {"id": "36495610513516628456247735107203966074203000179269042178", "timestamp": 1636519774950, "message": "test-03\nc"}]}
{"messageType": "DATA_MESSAGE", "owner": "<AccoundId>", "logGroup": "/test/CloudWatchLogs/LogsToFirehose/202111-01", "logStream": "LogsToFirehose-202111-01", "subscriptionFilters": ["Test-Firehose-CloudwatchlogsToS3-202111-02"], "logEvents": [{"id": "36495611140234470770553837270328019036083635546410057728", "timestamp": 1636519803053, "message": "test-04\na"}, {"id": "36495611234767329667125148767297928793840039970261041153", "timestamp": 1636519807292, "message": "test-05\nb"}, {"id": "36495611392879613124707266840786171346916923047662190594", "timestamp": 1636519814382, "message": "test-06\nc"}]}
{"messageType": "DATA_MESSAGE", "owner": "<AccoundId>", "logGroup": "/test/CloudWatchLogs/LogsToFirehose/202111-01", "logStream": "LogsToFirehose-202111-01", "subscriptionFilters": ["Test-Firehose-CloudwatchlogsToS3-202111-02"], "logEvents": [{"id": "36495767037964727157786910348879192128772434082420162560", "timestamp": 1636526793749, "message": "test08"}]}
{"messageType": "DATA_MESSAGE", "owner": "<AccoundId>", "logGroup": "/test/CloudWatchLogs/LogsToFirehose/202111-02", "logStream": "LogsToFirehose-202111-02", "subscriptionFilters": ["Test-Firehose-CloudwatchlogsToS3-202111-02"], "logEvents": [{"id": "36495435695499634457656013139313993705878822386382929920", "timestamp": 1636511935839, "message": "a\na"}, {"id": "36495435878008933162430632929642312049233012951326654465", "timestamp": 1636511944023, "message": "b\nb"}, {"id": "36495435986814268986061543237195081501484368739005104130", "timestamp": 1636511948902, "message": "c\nc"}]}
{"messageType": "DATA_MESSAGE", "owner": "<AccoundId>", "logGroup": "/test/CloudWatchLogs/LogsToFirehose/202111-02", "logStream": "LogsToFirehose-202111-02", "subscriptionFilters": ["Test-Firehose-CloudwatchlogsToS3-202111-02"], "logEvents": [{"id": "36495436161295299419365138710787572588733193091870097408", "timestamp": 1636511956726, "message": "a\na"}, {"id": "36495436284729924093232137799187773227841874027471699969", "timestamp": 1636511962261, "message": "b\nb"}, {"id": "36495436393066944267693905020768292596367614223524560898", "timestamp": 1636511967119, "message": "c\nc"}]}
{"messageType": "DATA_MESSAGE", "owner": "<AccoundId>", "logGroup": "/test/CloudWatchLogs/LogsToFirehose/202111-02", "logStream": "LogsToFirehose-202111-02", "subscriptionFilters": ["Test-Firehose-CloudwatchlogsToS3-202111-02"], "logEvents": [{"id": "36495465864148445736669774644795726857087180459541594112", "timestamp": 1636513288648, "message": "d"}]}
{"messageType": "DATA_MESSAGE", "owner": "<AccoundId>", "logGroup": "/test/CloudWatchLogs/LogsToFirehose/202111-02", "logStream": "LogsToFirehose-202111-02", "subscriptionFilters": ["Test-Firehose-CloudwatchlogsToS3-202111-02"], "logEvents": [{"id": "36495465957633169608910146859159155584300292741498798080", "timestamp": 1636513292840, "message": "e"}]}
{"messageType": "DATA_MESSAGE", "owner": "<AccoundId>", "logGroup": "/test/CloudWatchLogs/LogsToFirehose/202111-02", "logStream": "LogsToFirehose-202111-02", "subscriptionFilters": ["Test-Firehose-CloudwatchlogsToS3-202111-02"], "logEvents": [{"id": "36495466054195396318547745067249555673879763060203716608", "timestamp": 1636513297170, "message": "f"}]}
.
└── log
└── yyyy
└── MM
├── dd
│ └── HH
│ ├── (ファイル)
│ └── (ファイル)
└── dd
└── HH
├── (ファイル)
└── (ファイル)
Athenaでの検索
Database作成
CREATE DATABASE awscloudwatchlogs;
- defaultでもよい
テーブル定義
CREATE EXTERNAL TABLE IF NOT EXISTS awscloudwatchlogs.CloudwatchLogsXXXlog (
messageType string,
owner string,
logGroup string,
logStream string,
subscriptionFilters array<string>,
logEvents array<
struct<
id:string,
`timestamp`:timestamp,
message:string
>
>
)
PARTITIONED BY (dt string)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = '1'
)
LOCATION 's3://<bucket>/log/test/'
TBLPROPERTIES ('has_encrypted_data'='false');
- logEvents要素が構造体の配列
-
timestamp
は予約語のため、backticks (`)で囲む
パーティション定義
ALTER TABLE awscloudwatchlogs.CloudwatchLogsXXXlog
ADD PARTITION (dt='2021-11')
LOCATION 's3://<bucket>/log/test/2021/11/'
検索
WITH logevents_work AS (
SELECT
logevents AS logevents_json_array
FROM
CloudwatchLogsXXXlog
WHERE
dt='2021-11' --PARTITION指定
)
SELECT
logevents_json."timestamp" AT TIME ZONE '+09:00' AS "timestamp", --JST変換
logevents_json.message AS message
FROM
logevents_work, UNNEST(logevents_json_array) AS t(logevents_json)
WHERE
logevents_json."timestamp" >= cast('2021-11-10 00:00:00 +09:00' as timestamp with time zone) --時間指定(From)
AND
logevents_json."timestamp" < cast('2021-11-11 23:00:00 +09:00' as timestamp with time zone) --時間指定(To)
ORDER BY "timestamp";
-
logevents_work
: logEventsのリストを取得したもの
[{"id": "36495611140234470770553837270328019036083635546410057728", "timestamp": 1636519803053, "message": "test-04\na"}, {"id": "36495611234767329667125148767297928793840039970261041153", "timestamp": 1636519807292, "message": "test-05\nb"}, {"id": "36495611392879613124707266840786171346916923047662190594", "timestamp": 1636519814382, "message": "test-06\nc"}]
-
logevents_json
: logEventsのリストを縦型データに変換したもの
{"id": "36495611140234470770553837270328019036083635546410057728", "timestamp": 1636519803053, "message": "test-04\na"}
{"id": "36495611234767329667125148767297928793840039970261041153", "timestamp": 1636519807292, "message": "test-05\nb"}
{"id": "36495611392879613124707266840786171346916923047662190594", "timestamp": 1636519814382, "message": "test-06\nc"}
結果
|timestamp|message|
|:--|:--||
|2021-11-10 11:38:55.839 +09:00|a a|
|2021-11-10 11:39:04.023 +09:00|b b|
|2021-11-10 11:39:08.902 +09:00|c c|
|2021-11-10 11:39:16.726 +09:00|a a|
|2021-11-10 11:39:16.726 +09:00|a a|
|2021-11-10 11:39:22.261 +09:00|b b|
|2021-11-10 11:39:22.261 +09:00|b b|
|2021-11-10 11:39:27.119 +09:00|c c|
|2021-11-10 11:39:27.119 +09:00|c c|
|2021-11-10 12:01:28.648 +09:00|d|
|2021-11-10 12:01:32.840 +09:00|e|
|2021-11-10 12:01:37.170 +09:00|f|
|2021-11-10 13:49:25.021 +09:00|test-01 a|
|2021-11-10 13:49:25.021 +09:00|test-01 a|
|2021-11-10 13:49:29.830 +09:00|test-02 b|
|2021-11-10 13:49:29.830 +09:00|test-02 b|
|2021-11-10 13:49:34.950 +09:00|test-03 c|
|2021-11-10 13:49:34.950 +09:00|test-03 c|
|2021-11-10 13:50:03.053 +09:00|test-04 a|
|2021-11-10 13:50:03.053 +09:00|test-04 a|
|2021-11-10 13:50:07.292 +09:00|test-05 b|
|2021-11-10 13:50:07.292 +09:00|test-05 b|
|2021-11-10 13:50:14.382 +09:00|test-06 c|
|2021-11-10 13:50:14.382 +09:00|test-06 c|
|2021-11-10 13:52:54.044 +09:00|test\10|
|2021-11-10 15:46:33.749 +09:00|test08|
|2021-11-10 15:47:28.209 +09:00|test8|
課題
- 改行コードが表示されない
- 改行コード
\n
は表示されていないがCSVでは改行されたデータが入っていたのでOK
- 改行コード
日本時間での表示時間指定でのSELECT- 圧縮データでの確認
テーブルパーティション化