直近のAthenaのALBアクセスログ見ようと思ってクエリしたら結果が空っぽになっていた。S3にログは随時上がってきているのに??
問題発生時期を遡っていくと2024/5/20あたりから、と思ったら2024/5/20にALBのバージョンアップでアクセスログにtraceability_id
が追加となりカラム数の不一致が原因であった。
Querying Application Load Balancer logs - Amazon Athena
AWS公式ののCREATE TABLE
スニペットの通りtraceability_id
を追加したテーブルを再構成すると無事にクエリは成功した。
新しいテーブル定義
athena
CREATE EXTERNAL TABLE `ms-alb-securite-access-log-rev20240607`(
`type` string COMMENT '',
`time` string COMMENT '',
`elb` string COMMENT '',
`client_ip` string COMMENT '',
`client_port` int COMMENT '',
`target_ip` string COMMENT '',
`target_port` int COMMENT '',
`request_processing_time` double COMMENT '',
`target_processing_time` double COMMENT '',
`response_processing_time` double COMMENT '',
`elb_status_code` string COMMENT '',
`target_status_code` string COMMENT '',
`received_bytes` bigint COMMENT '',
`sent_bytes` bigint COMMENT '',
`request_verb` string COMMENT '',
`request_url` string COMMENT '',
`request_proto` string COMMENT '',
`user_agent` string COMMENT '',
`ssl_cipher` string COMMENT '',
`ssl_protocol` string COMMENT '',
`target_group_arn` string COMMENT '',
`trace_id` string COMMENT '',
`domain_name` string COMMENT '',
`chosen_cert_arn` string COMMENT '',
`matched_rule_priority` string COMMENT '',
`request_creation_time` string COMMENT '',
`actions_executed` string COMMENT '',
`redirect_url` string COMMENT '',
`lambda_error_reason` string COMMENT '',
`target_port_list` string COMMENT '',
`target_status_code_list` string COMMENT '',
`classification` string COMMENT '',
`classification_reason` string COMMENT '',
-- 追加↓
`traceability_id` string COMMENT '')
PARTITIONED BY (
`log_date` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
-- 変更↓
'input.regex'='([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \"([^ ]*) (.*) (- |[^ ]*)\" \"([^\"]*)\" ([A-Z0-9-_]+) ([A-Za-z0-9.-]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^\"]*)\" ([-.0-9]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^ ]*)\" \"([^s]+?)\" \"([^s]+)\" \"([^ ]*)\" \"([^ ]*)\" ?([^ ]*)?( .*)?')
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
's3://alb-access-log/AWSLogs/************/elasticloadbalancing/ap-northeast-1'
TBLPROPERTIES (
'projection.enabled'='true',
'projection.log_date.format'='yyyy/MM/dd',
'projection.log_date.interval'='1',
'projection.log_date.interval.unit'='DAYS',
'projection.log_date.range'='NOW-1YEARS,NOW',
'projection.log_date.type'='date',
'storage.location.template'='s3://alb-access-log/AWSLogs/************/elasticloadbalancing/ap-northeast-1/${log_date}',
'transient_lastDdlTime'='1717717328')
以前のテーブル定義
athena
CREATE EXTERNAL TABLE `alb-access-log`(
`type` string COMMENT '',
`time` string COMMENT '',
`elb` string COMMENT '',
`client_ip` string COMMENT '',
`client_port` int COMMENT '',
`target_ip` string COMMENT '',
`target_port` int COMMENT '',
`request_processing_time` double COMMENT '',
`target_processing_time` double COMMENT '',
`response_processing_time` double COMMENT '',
`elb_status_code` string COMMENT '',
`target_status_code` string COMMENT '',
`received_bytes` bigint COMMENT '',
`sent_bytes` bigint COMMENT '',
`request_verb` string COMMENT '',
`request_url` string COMMENT '',
`request_proto` string COMMENT '',
`user_agent` string COMMENT '',
`ssl_cipher` string COMMENT '',
`ssl_protocol` string COMMENT '',
`target_group_arn` string COMMENT '',
`trace_id` string COMMENT '',
`domain_name` string COMMENT '',
`chosen_cert_arn` string COMMENT '',
`matched_rule_priority` string COMMENT '',
`request_creation_time` string COMMENT '',
`actions_executed` string COMMENT '',
`redirect_url` string COMMENT '',
`lambda_error_reason` string COMMENT '',
`target_port_list` string COMMENT '',
`target_status_code_list` string COMMENT '',
`classification` string COMMENT '',
`classification_reason` string COMMENT '')
PARTITIONED BY (
`log_date` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
'input.regex'='([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \"([^ ]*) ([^ ]*) (- |[^ ]*)\" \"([^\"]*)\" ([A-Z0-9-]+) ([A-Za-z0-9.-]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^\"]*)\" ([-.0-9]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^ ]*)\" \"([^s]+?)\" \"([^s]+)\" \"([^ ]*)\" \"([^ ]*)\"')
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
's3://alb-access-log/AWSLogs/************/elasticloadbalancing/ap-northeast-1'
TBLPROPERTIES (
'projection.enabled'='true',
'projection.log_date.format'='yyyy/MM/dd',
'projection.log_date.interval'='1',
'projection.log_date.interval.unit'='DAYS',
'projection.log_date.range'='NOW-1YEARS,NOW',
'projection.log_date.type'='date',
'storage.location.template'='s3://alb-access-log/AWSLogs/************/elasticloadbalancing/ap-northeast-1/${log_date}',
'transient_lastDdlTime'='1645361779')