LoginSignup
4
2

More than 1 year has passed since last update.

AWS WAF(v2)のログをAthenaとDatadogで閲覧する

Last updated at Posted at 2021-05-16

AWS WAFのログ

AWS WAFは以下のようなログを出力します。
デフォルトではOFFになっており、公式ではKinesis Firehose経由での配信がサポートされています。
こちらのログを、AthenaおよびDatadogで閲覧できるようにインフラを構築してみましょう。

{
    "id": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890",
    "content": {
        "timestamp": "2021-05-15T00:00:00.000Z",
        "tags": [
            "aws_account:123456789012",
            "env:production",
            "region:ap-northeast-1",
            "service:waf",
            "source:waf",
            "sourcecategory:aws"
        ],
        "service": "waf",
        "attributes": {
            "http": {
                "url_details": {
                    "path": "/"
                },
                "method": "POST",
                "request_id": "abcdefghij1234567890"
            },
            "webaclId": "arn:aws:wafv2:ap-northeast-1:1234567890:regional/webacl/Sample/abcdefghij1234567890",
            "httpSourceId": "123456789012:abcdefghij1234567890:production",
            "httpSourceName": "APIGW",
            "system": {
                "action": "ALLOW"
            },
            "network": {
                "client": {
                    "ip": "100.100.100.100"
                }
            },
            "httpRequest": {
                "country": "JP",
                "httpVersion": "HTTP/1.1",
                "args": "",
                "headers": [
                    {
                        "name": "X-Forwarded-For",
                        "value": "100.100.100.100"
                    },
                    {
                        "name": "X-Forwarded-Proto",
                        "value": "https"
                    },
                    {
                        "name": "X-Forwarded-Port",
                        "value": "443"
                    },

                    ...(省略)

                ]
            },
            "ruleGroupList": [
                {
                    "ruleGroupId": "AWS#AWSManagedRulesCommonRuleSet"
                }
            ],
            "terminatingRuleId": "Default_Action",
            "terminatingRuleType": "REGULAR",
            "formatVersion": 1,
            "timestamp": 1621036800
        }
    }
}

構成図

構成図は以下の通りとなります。

waf_etl (1).png

インフラ定義

Terraformで定義します。
コンソール画面でも構築可能ですので、参考にされる方はTerraformを読み替えてください。

S3

まずはWAFログを配置するS3バケットを作成します。

resource "aws_s3_bucket" "log_bucket" {
    bucket = "xxxxxxxxxxxxxxxxxxxx" // バケット名
    acl = "private"
}

Glueテーブル

Athenaで閲覧できるように、Glueでテーブルを作成します。
ログを配置するパスは
s3://xxxxxxxxxxxxxxxxxxxx/waf/year=2021/month=05/day=15/hour=00/XXXXXXXXXX.parquet
のようなルールとしました。(後述するFirehoseの配信設定で定義します)

Partition Projection機能によるパーティションを設定しています。
year=${"$"}{year}/month=${"$"}{month}/day=${"$"}{day}/hour=${"$"}{hour}
年/月/日/時 をそれぞれパーティションキーとしました。

resource "aws_glue_catalog_database" "database" {
    name = "${var.service_name}_logs"
}

resource "aws_glue_catalog_table" "waf_catalog_table" {
    database_name = aws_glue_catalog_database.database.name
    name = "waf"

    parameters = {
        classification = "parquet"
        "projection.enabled" = true
        "projection.year.type" = "integer"
        "projection.year.digits" = "4"
        "projection.year.interval" = "1"
        "projection.year.range" = "2021,2099"
        "projection.month.type" = "enum"
        "projection.month.values" = "01,02,03,04,05,06,07,08,09,10,11,12"
        "projection.day.type" = "enum"
        "projection.day.values" = "01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31"
        "projection.hour.type" = "enum"
        "projection.hour.values" = "01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24"
        "storage.location.template" = "s3://${aws_s3_bucket.log_bucket.name}/waf/year=${"$"}{year}/month=${"$"}{month}/day=${"$"}{day}/hour=${"$"}{hour}"
    }

    partition_keys {
        name = "year"
        type = "string"
    }

    partition_keys {
        name = "month"
        type = "string"
    }

    partition_keys {
        name = "day"
        type = "string"
    }

    partition_keys {
        name = "hour"
        type = "string"
    }

    storage_descriptor {
        location = "s3://${aws_s3_bucket.log_bucket.name}/waf/"
        input_format = "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"
        output_format = "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"

        ser_de_info {
            name = "waf"
            serialization_library = "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"

            parameters = {
                "serialization.format" = 1
            }
        }

        columns {
            name = "timestamp"
            type = "bigint"
        }
        columns {
            name = "formatversion"
            type = "int"
        }
        columns {
            name = "webaclid"
            type = "string"
        }
        columns {
            name = "terminatingruleid"
            type = "string"
        }
        columns {
            name = "terminatingruletype"
            type = "string"
        }
        columns {
            name = "action"
            type = "string"
        }
        columns {
            name = "terminatingrulematchdetails"
            type = "array<struct<conditiontype:string,location:string,matcheddata:array<string>>>"
        }
        columns {
            name = "httpsourcename"
            type = "string"
        }
        columns {
            name = "httpsourceid"
            type = "string"
        }
        columns {
            name = "rulegrouplist"
            type = "array<struct<rulegroupid:string,terminatingrule:struct<ruleid:string,action:string>,nonterminatingmatchingrules:array<struct<action:string,ruleid:string>>,excludedrules:array<struct<exclusiontype:string,ruleid:string>>>>"
        }
        columns {
            name = "ratebasedrulelist"
            type = "array<struct<ratebasedruleid:string,limitkey:string,maxrateallowed:int>>"
        }
        columns {
            name = "nonterminatingmatchingrules"
            type = "array<struct<ruleid:string,action:string>>"
        }
        columns {
            name = "httprequest"
            type = "struct<clientIp:string,country:string,headers:array<struct<name:string,value:string>>,uri:string,args:string,httpVersion:string,httpMethod:string,requestId:string>"
        }
    }
}

Kinesis Firehose

Firehoseの名前はaws-waf-logs-で始まる必要があります。
また、S3に配信するFirehoseとDatadogに配信するFirehoseをそれぞれ定義します。

S3配信用Firehoseは、Lambdaを挟むことによりDatadog配信用Firehoseにも送信するアーキテクチャとします。
(Lambdaの実装は後述します。)

KMSキーなどの変数部分はそれぞれで定義方法を判断してください。

S3配信用Firehose

ポイントは、ログを配置するS3のPrefixです。
ここは前述のGlue定義でパーティション分割のキーとなるので以下のように定義します。

prefix = "waf/year=!{timestamp:yyyy}/month=!{timestamp:MM}/day=!{timestamp:dd}/hour=!{timestamp:HH}/"
resource "aws_iam_role" "firehose_role" {
    name = "${var.service_name}-Firehose-Role"
    assume_role_policy = data.aws_iam_policy_document.firehose_assume_role.json
}

data "aws_iam_policy_document" "firehose_assume_role" {
    statement {
        actions = ["sts:AssumeRole"]

        principals {
            type = "Service"
            identifiers = ["firehose.amazonaws.com"]
        }

        condition {
            test = "StringEquals"
            variable = "sts:ExternalId"
            values = [
                var.aws_account_id
            ]
        }
    }
}

resource "aws_iam_policy" "firehose_policy" {
    name = "${var.service_name}-Firehose-Policy"
    policy = <<POLICY
{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Sid": "",
            "Effect": "Allow",
            "Action": [
                "glue:GetTable",
                "glue:GetTableVersion",
                "glue:GetTableVersions"
            ],
            "Resource": "*"
        },
        {
            "Sid": "",
            "Effect": "Allow",
            "Action": [
                "s3:AbortMultipartUpload",
                "s3:GetBucketLocation",
                "s3:GetObject",
                "s3:ListBucket",
                "s3:ListBucketMultipartUploads",
                "s3:PutObject"
            ],
            "Resource": [
                "arn:aws:s3:::${aws_s3_bucket.log_bucket.name}",
                "arn:aws:s3:::${aws_s3_bucket.log_bucket.name}/*",
                "arn:aws:s3:::%FIREHOSE_BUCKET_NAME%",
                "arn:aws:s3:::%FIREHOSE_BUCKET_NAME%/*"
            ]
        },
        {
            "Sid": "",
            "Effect": "Allow",
            "Action": [
                "lambda:InvokeFunction",
                "lambda:GetFunctionConfiguration"
            ],
            "Resource": "arn:aws:lambda:ap-northeast-1:${var.aws_account_id}:function:*:*"
        },
        {
            "Sid": "",
            "Effect": "Allow",
            "Action": [
                "logs:PutLogEvents"
            ],
            "Resource": [
                "*"
            ]
        },
        {
            "Sid": "",
            "Effect": "Allow",
            "Action": [
                "kinesis:DescribeStream",
                "kinesis:GetShardIterator",
                "kinesis:GetRecords",
                "kinesis:ListShards"
            ],
            "Resource": "arn:aws:kinesis:ap-northeast-1:${var.aws_account_id}:stream/%FIREHOSE_STREAM_NAME%"
        },
        {
            "Effect": "Allow",
            "Action": [
                "kms:Decrypt"
            ],
            "Resource": [
                "arn:aws:kms:ap-northeast-1:${var.aws_account_id}:key/%SSE_KEY_ID%"
            ],
            "Condition": {
                "StringEquals": {
                    "kms:ViaService": "kinesis.%REGION_NAME%.amazonaws.com"
                },
                "StringLike": {
                    "kms:EncryptionContext:aws:kinesis:arn": "arn:aws:kinesis:%REGION_NAME%:${var.aws_account_id}:stream/%FIREHOSE_STREAM_NAME%"
                }
            }
        }
    ]
}
POLICY
}

resource "aws_iam_role_policy_attachment" "firehose_policy_0" {
    policy_arn = aws_iam_policy.firehose_policy.arn
    role = aws_iam_role.firehose_role.name
}

resource "aws_kinesis_firehose_delivery_stream" "waf_firehose" {
    destination = "extended_s3"
    name = "aws-waf-logs-${var.service_name}"

    extended_s3_configuration {
        role_arn = aws_iam_role.firehose_role.arn
        bucket_arn = aws_s3_bucket.log_bucket.arn
        prefix = "waf/year=!{timestamp:yyyy}/month=!{timestamp:MM}/day=!{timestamp:dd}/hour=!{timestamp:HH}/"
        error_output_prefix = "waf/errors/!{firehose:random-string}/!{firehose:error-output-type}/!{timestamp:yyyy-MM-dd}/"

        compression_format = "UNCOMPRESSED"

        buffer_interval = 300
        buffer_size = 128

        kms_key_arn = var.kms_key_arn

        processing_configuration {
            enabled = true

            processors {
                type = "Lambda"
                parameters {
                    parameter_name = "LambdaArn"
                    parameter_value = "arn:aws:lambda:ap-northeast-1:${var.aws_account_id}:function:${var.waf_firehose_lambda_name}:$LATEST"
                }
            }
        }

        cloudwatch_logging_options {
            enabled = true
            log_group_name = "/aws/kinesisfirehose/aws-waf-logs-${var.service_name}"
            log_stream_name = "S3Delivery"
        }

        data_format_conversion_configuration {

            input_format_configuration {
                deserializer {
                    open_x_json_ser_de {}
                }
            }

            output_format_configuration {
                serializer {
                    parquet_ser_de {}
                }
            }

            schema_configuration {
                database_name = aws_glue_catalog_database.database.name
                table_name = aws_glue_catalog_table.waf_catalog_table.name
                role_arn = aws_iam_role.firehose_role.arn
            }
        }
    }

    server_side_encryption {
        enabled = true
        key_type = "AWS_OWNED_CMK"
    }
}

Datadog配信用Firehose

Datadogに送信するためにはHTTPエンドポイントによる配信を使用します。
APIキーは変数で挿入する形としています。

resource "aws_iam_role" "firehose_role" {
    name = "DatadogForwarder-Firehose-Role"
    assume_role_policy = data.aws_iam_policy_document.firehose_assume_role.json
}

data "aws_iam_policy_document" "firehose_assume_role" {
    statement {
        actions = ["sts:AssumeRole"]

        principals {
            type = "Service"
            identifiers = ["firehose.amazonaws.com"]
        }

        condition {
            test = "StringEquals"
            variable = "sts:ExternalId"
            values = [
                var.aws_account_id
            ]
        }
    }
}

resource "aws_iam_policy" "firehose_policy" {
    name = "DatadogForwarder-Firehose-Policy"
    policy = <<POLICY
{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Sid": "",
            "Effect": "Allow",
            "Action": [
                "glue:GetTable",
                "glue:GetTableVersion",
                "glue:GetTableVersions"
            ],
            "Resource": "*"
        },
        {
            "Sid": "",
            "Effect": "Allow",
            "Action": [
                "s3:AbortMultipartUpload",
                "s3:GetBucketLocation",
                "s3:GetObject",
                "s3:ListBucket",
                "s3:ListBucketMultipartUploads",
                "s3:PutObject"
            ],
            "Resource": [
                "arn:aws:s3:::${aws_s3_bucket.log_bucket.name}",
                "arn:aws:s3:::${aws_s3_bucket.log_bucket.name}/*",
                "arn:aws:s3:::%FIREHOSE_BUCKET_NAME%",
                "arn:aws:s3:::%FIREHOSE_BUCKET_NAME%/*"
            ]
        },
        {
            "Sid": "",
            "Effect": "Allow",
            "Action": [
                "lambda:InvokeFunction",
                "lambda:GetFunctionConfiguration"
            ],
            "Resource": "arn:aws:lambda:ap-northeast-1:${var.aws_account_id}:function:*:*"
        },
        {
            "Sid": "",
            "Effect": "Allow",
            "Action": [
                "logs:PutLogEvents"
            ],
            "Resource": [
                "*"
            ]
        },
        {
            "Sid": "",
            "Effect": "Allow",
            "Action": [
                "kinesis:DescribeStream",
                "kinesis:GetShardIterator",
                "kinesis:GetRecords",
                "kinesis:ListShards"
            ],
            "Resource": "arn:aws:kinesis:ap-northeast-1:${var.aws_account_id}:stream/%FIREHOSE_STREAM_NAME%"
        },
        {
            "Effect": "Allow",
            "Action": [
                "kms:Decrypt"
            ],
            "Resource": [
                "arn:aws:kms:ap-northeast-1:${var.aws_account_id}:key/%SSE_KEY_ID%"
            ],
            "Condition": {
                "StringEquals": {
                    "kms:ViaService": "kinesis.%REGION_NAME%.amazonaws.com"
                },
                "StringLike": {
                    "kms:EncryptionContext:aws:kinesis:arn": "arn:aws:kinesis:%REGION_NAME%:${var.aws_account_id}:stream/%FIREHOSE_STREAM_NAME%"
                }
            }
        }
    ]
}
POLICY
}

resource "aws_iam_role_policy_attachment" "firehose_policy_0" {
    policy_arn = aws_iam_policy.firehose_policy.arn
    role = aws_iam_role.firehose_role.name
}

resource "aws_kinesis_firehose_delivery_stream" "waf_datadog_forwarder" {
    name = "aws-waf-logs-datadog-forwarder"
    destination = "http_endpoint"

    http_endpoint_configuration {
        name = "Datadog"
        url = "https://aws-kinesis-http-intake.logs.datadoghq.com/v1/input"
        access_key = var.datadog_api_key_value

        role_arn = aws_iam_role.firehose_role.arn

        buffering_interval = 60
        buffering_size = 4
        retry_duration = 60

        processing_configuration {
            enabled = false
        }

        request_configuration {
            content_encoding = "GZIP"

            common_attributes {
                name  = "env"
                value = var.stage
            }
        }

        s3_backup_mode = "FailedDataOnly"
    }

    s3_configuration {
        bucket_arn = aws_s3_bucket.log_bucket.arn
        prefix = "/firehose/aws-waf-logs-datadog-forwarder"
        compression_format = "GZIP"
        kms_key_arn = var.kms_key_arn
        role_arn = aws_iam_role.firehose_role.arn
    }

    server_side_encryption {
        enabled = true
        key_type = "AWS_OWNED_CMK"
    }
}

Lambda

S3配信用Firehoseから、Datadog転送用Firehoseにも送信するようにLambdaを使用します。
S3配信用Firehoseには、受領したデータをそのまま返します。

'use strict';

const AWS = require('aws-sdk');

const deliveryStreamName = 'aws-waf-logs-datadog-forwarder';

const firehose = new AWS.Firehose({
    region: 'ap-northeast-1',
});

module.exports.forwarder = async (event, context, callback) => {
    const data = event.records.map(record => {
        return {
            Data: Buffer.from(record.data, 'base64').toString('utf8'),
        }
    });

    firehose.putRecordBatch({
        DeliveryStreamName: deliveryStreamName,
        Records: data,
    }, (err) => {
        if (err) {
            console.error(err, err.stack);
        }
    });

    const output = event.records.map(record => {
        return {
            recordId: record.recordId,
            result: 'Ok',
            data: record.data,
        };
    });

    callback(null, { records: output });
}

LambdaのロールにFirehose権限を付与するのを忘れないようにしましょう。

{
    "Action": [
        "firehose:PutRecordBatch"
    ],
    "Resource": [
        "arn:aws:firehose:ap-northeast-1:123456789012:deliverystream/*"
    ],
    "Effect": "Allow"
}

WAF側の設定

WAFのログは、ONにするときにFirehoseを指定することになり
複雑な設定は不要ですので当記事の解説は割愛します。

おわりに

以上によりAthena、Datadogへのログ配信が可能になります。

Athenaではパーティションキーを使用して検索しましょう。

(例)

select * from waf where year = '2021' and month = '05' and day = '15' and hour = '00'
4
2
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
4
2