LoginSignup
6
6

More than 5 years have passed since last update.

Scalaのパーザコンビネータでアクセスログをパースしてみる

Posted at

CloudFlareのログのパースをする必要があったので、seratchさんのブログをみながら、Parser Combinatorで書いてみた。

なんとなくjavapを見てみたけど、このコードだとParserがログの行数分、インスタンスが作成されてしまうようにみえるけど、これでいいのかな。

あと、ログ全体のなかの、以下のBoldで表示したカラムだけが欲しかったんだけど、

www.cloudflare.com 1.1.1.1 1383426540 "GET / HTTP/1.1" 200 11022 "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0)" "CLEAN.HUMAN 1383426470.808 off" "http://blog.cloudflare.com/"

~やら~>やらをいい感じに1行で書く方法がわからなかったので分割して書いたけど、どう書くのが驚き最小なんだろう。

package com.example.service

import scala.io.Source
import scala.util.parsing.combinator.RegexParsers

object CloudFlareLogParserService {

  def resolve: Unit = {
    val accessLog =
      """
        |www.cloudflare.com 1.1.1.1 1383426540 "GET / HTTP/1.1" 200 11022 "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0)" "CLEAN.HUMAN 1383426470.808 off" "http://blog.cloudflare.com/"
        |www.cloudflare.com 1.1.1.1 1383426540 "GET / HTTP/1.1" 200 11022 "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0)" "CLEAN.HUMAN 1383426470.808 off" "http://blog.cloudflare.com/"
      """.stripMargin

    accessLog.split('\n').foreach {
      case line if line.trim.length != 0 => {
        try {
          println(CloudFlareLogParser.parseLine(line))
        } catch {
          case e: IllegalArgumentException => println(e)
        }
      }
      case _ =>
    }
  }

  case class CloudFlareAccessLog(host: String, ipAddress: String, size: Int, referrer: String)

  object CloudFlareLogParser extends RegexParsers {
    def lines = repsep(line, nl)

    def line = host ~ ipAddress ~ p2 ~ p3 ^^ { case host ~ ipAddress ~ size ~ referrer => CloudFlareAccessLog(host, ipAddress, size.toInt, referrer)}

    def nl = opt('\r') <~ '\n'

    def p2 = logWindow ~> request ~> statusCode ~> size

    def p3 = userAgent ~> cloudFlareInfo ~> referrer

    def host = """[\w.]+""".r

    def ipAddress = """[\d.]+""".r

    def logWindow = """\d+""".r

    def request = "\"" ~> "[^\"]*".r <~ "\""

    def statusCode = """\d{3}""".r

    def size = """\d+""".r

    def userAgent = "\"" ~> "[^\"]*".r <~ "\""

    def cloudFlareInfo = "\"" ~> "[\\w. ]*".r <~ "\""

    def referrer = "\"" ~> "[^\"]*".r <~ "\""

    def parseLines(input: String): List[CloudFlareAccessLog] = parseAll(lines, input).getOrElse {
      throw new IllegalArgumentException("Failed to parse: " + input)
    }

    def parseLine(input: String): CloudFlareAccessLog = parseLines(input).head
  }

}
6
6
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
6
6