LoginSignup
1
0

本記事で実現できること

Athenaのクエリを作成する

前提

  • クエリの保存場所を指定する
    s3://xxxxx/Athena/

CloudFrontログ参照

テーブル作成

title: CloudFront Make Table
CREATE EXTERNAL TABLE IF NOT EXISTS default.cloudfront_logs (
  `date` DATE,
  time STRING,
  x_edge_location STRING,
  sc_bytes BIGINT,
  c_ip STRING,
  cs_method STRING,
  cs_host STRING,
  cs_uri_stem STRING,
  sc_status INT,
  cs_referrer STRING,
  cs_user_agent STRING,
  cs_uri_query STRING,
  cs_cookie STRING,
  x_edge_result_type STRING,
  x_edge_request_id STRING,
  x_host_header STRING,
  cs_protocol STRING,
  cs_bytes BIGINT,
  time_taken FLOAT,
  x_forwarded_for STRING,
  ssl_protocol STRING,
  ssl_cipher STRING,
  x_edge_response_result_type STRING,
  cs_protocol_version STRING,
  fle_status STRING,
  fle_encrypted_fields INT,
  c_port INT,
  time_to_first_byte FLOAT,
  x_edge_detailed_result_type STRING,
  sc_content_type STRING,
  sc_content_len BIGINT,
  sc_range_start BIGINT,
  sc_range_end BIGINT
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LOCATION 's3://s3-cloudfront-log/'
TBLPROPERTIES ( 'skip.header.line.count'='2' )

ログ参照

title: CloudFront Watch Logs
SELECT  "date" AS log_date, time as log_time, x_edge_location as edge_location, sc_bytes, c_ip,
    cs_method, cs_uri_stem, sc_status,
    x_edge_result_type as result_type, cs_bytes, time_taken, x_forwarded_for as forwarded, ssl_protocol,
    ssl_cipher, x_edge_response_result_type, cs_protocol_version, c_port, time_to_first_byte,
    x_edge_detailed_result_type, sc_content_type, sc_content_len
FROM cloudfront_logs
WHERE "date" BETWEEN DATE '2024-01-09' AND DATE '2024-01-11'
ORDER BY "date" desc, time desc
LIMIT 1000;

WAFログ参照

テーブル作成

title: WAF Make Table
CREATE EXTERNAL TABLE default.waf_logs(
    `timestamp` bigint,
    `formatversion` int,
    `webaclid` string,
    `terminatingruleid` string,
    `terminatingruletype` string,
    `action` string,
    `terminatingrulematchdetails` array < struct < conditiontype: string,
    sensitivitylevel: string,
    location: string,
    matcheddata: array < string > > >,
    `httpsourcename` string,
    `httpsourceid` string,
    `rulegrouplist` array < struct < rulegroupid: string,
    terminatingrule: struct < ruleid: string,
    action: string,
    rulematchdetails: array < struct < conditiontype: string,
    sensitivitylevel: string,
    location: string,
    matcheddata: array < string > > > >,
    nonterminatingmatchingrules: array < struct < ruleid: string,
    action: string,
    overriddenaction: string,
    rulematchdetails: array < struct < conditiontype: string,
    sensitivitylevel: string,
    location: string,
    matcheddata: array < string > > > > >,
    excludedrules: string > >,
    `ratebasedrulelist` array < struct < ratebasedruleid: string,
    limitkey: string,
    maxrateallowed: int > >,
    `nonterminatingmatchingrules` array < struct < ruleid: string,
    action: string,
    rulematchdetails: array < struct < conditiontype: string,
    sensitivitylevel: string,
    location: string,
    matcheddata: array < string > > >,
    captcharesponse: struct < responsecode: string,
    solvetimestamp: string > > >,
    `requestheadersinserted` array < struct < name: string,
    value: string > >,
    `responsecodesent` string,
    `httprequest` struct < clientip: string,
    country: string,
    headers: array < struct < name: string,
    value: string > >,
    uri: string,
    args: string,
    httpversion: string,
    httpmethod: string,
    requestid: string >,
    `labels` array < struct < name: string > >,
    `captcharesponse` struct < responsecode: string,
    solvetimestamp: string,
    failureReason: string >,
    `challengeresponse` struct < responsecode: string,
    solvetimestamp: string,
    failureReason: string >,
    `ja3Fingerprint` string
)
PARTITIONED BY (`region` string, `date` string)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION 's3://waf-log-s3/AWSLogs/xxx(アカウントID)/WAFLogs/cloudfront/WAF-WebACL/'
TBLPROPERTIES(
    'projection.enabled' = 'true',
    'projection.region.type' = 'enum',
    'projection.region.values' = 'cloudfront,us-east-1,ap-northeast-1',
    'projection.date.type' = 'date',
    'projection.date.range' = '2021/01/01,NOW',
    'projection.date.format' = 'yyyy/MM/dd',
    'projection.date.interval' = '1',
    'projection.date.interval.unit' = 'DAYS',
    'storage.location.template' = 's3://aws-waf-logs-s3/AWSLogs/xxx(アカウントID)/WAFLogs/${region}/WAF-WebACL/${date}/'
)

ログ参照

title: WAF Watch Logs
SELECT timestamp,formatversion,webaclid,terminatingruleid,terminatingruletype,action,terminatingrulematchdetails,rulegrouplist,ratebasedrulelist,nonterminatingmatchingrules,requestheadersinserted,responsecodesent,httprequest,labels,captcharesponse,challengeresponse,ja3fingerprint
FROM waf_logs
WHERE from_unixtime(timestamp/1000) > now() - interval '1' day
LIMIT 1000;

VPCフローログ

テーブル作成

title: VPC Flowlogs Make Table
CREATE EXTERNAL default.vpc_flow_logs (
    `date` date,
    version int,
    account_id string,
    interface_id string,
    srcaddr string,
    dstaddr string,
    srcport int,
    dstport int,
    protocol bigint,
    packets bigint,
    bytes bigint,
    start bigint,
    `end` bigint,
    action string,
    log_status string,
    vpc_id string,
    subnet_id string,
    instance_id string,
    tcp_flags int,
    type string,
    pkt_srcaddr string,
    pkt_dstaddr string,
    region string,
    az_id string,
    sublocation_type string,
    sublocation_id string,
    pkt_src_aws_service string,
    pkt_dst_aws_service string,
    flow_direction string,
    traffic_path int
)
PARTITIONED BY (dt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ' '
LOCATION 's3://s3-vpc-log/AWSLogs/xxx(アカウントID)/vpcflowlogs/ap-northeast-1/'
TBLPROPERTIES ("skip.header.line.count" = "1")

S3ログ

テーブル作成

title: S3log Make Table
CREATE EXTERNAL TABLE `default.s3_xxx_logs`(
  `bucketowner` STRING,
  `bucket_name` STRING,
  `requestdatetime` STRING,
  `remoteip` STRING,
  `requester` STRING,
  `requestid` STRING,
  `operation` STRING,
  `key` STRING,
  `request_uri` STRING,
  `httpstatus` STRING,
  `errorcode` STRING,
  `bytessent` BIGINT,
  `objectsize` BIGINT,
  `totaltime` STRING,
  `turnaroundtime` STRING,
  `referrer` STRING,
  `useragent` STRING,
  `versionid` STRING,
  `hostid` STRING,
  `sigv` STRING,
  `ciphersuite` STRING,
  `authtype` STRING,
  `endpoint` STRING,
  `tlsversion` STRING)
ROW FORMAT SERDE
  'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
  'input.regex'='([^ ]*) ([^ ]*) \\[(.*?)\\] ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) (\"[^\"]*\"|-) (-|[0-9]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) (\"[^\"]*\"|-) ([^ ]*)(?: ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*))?.*$')
STORED AS INPUTFORMAT
  'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
  's3://s3-log/S3/s3-xxx/'
1
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
1
0