感想
abコマンドでApacheに大量アクセスをすると、td-agentプロセスがCPU100%となってしまう。
ログパースをForwarderで行っているので、これが原因なのか、Fluentdは単純tailしているので、この処理自体が重いのか、、まではまだ解明できておらず。。
本番で使うにはこの部分の検証が必要かと思いました。
環境
- CentOS7
ログ収集対象
- Apache combinedに%D(応答時間)追加
LogFormat "%h %l %u %t \"%r\" %>s %b %D \"%{Referer}i\" \"%{User-Agent}i\"" combined
- Postfix
/var/log/maillog
- Syslog
/var/log/messages
構成
Apache,Poxtrix,Syslog【Forwarderノード】 → 【Aggregatorノード】 → Elasticsearch ← Kibana
###Forwarderノード
- ログパース
- Forwarderノードのホスト名タグ付与
###Aggregatorノード
- apacheログに対しGeoIPを付与
インストール
参照
https://docs.fluentd.org/v1.0/articles/install-by-rpm
curl -L https://toolbelt.treasuredata.com/sh/install-redhat-td-agent3.sh | sh
Forwarderノード プラグインインストール
td-agent-gem install fluent-plugin-multi-format-parser
td-agent-gem install fluent-plugin-filter_typecast
Aggregatorノード プラグインインストール
td-agent-gem install fluent-plugin-elasticsearch
yum install epel-release
yum install geoip-devel
td-agent-gem install fluent-plugin-geoip
systemd編集
vi td-agent.service
User=root
Group=root
Forwarderノード設定
/etc/td-agent/td-agent.conf
#### Global Setting
<source>
@type monitor_agent
bind 0.0.0.0
port 24220
</source>
#### Source
### Syslog
<source>
@type tail
tag syslog.messages
format syslog
path /var/log/messages
pos_file /var/log/td-agent/syslog.messages.pos
</source>
### Apache
<source>
@type tail
tag apache.access
format /^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*) (?<response_time>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
time_format %d/%b/%Y:%H:%M:%S %z
types code:integer,size:integer,response_time:float
path /var/log/httpd/access_log
pos_file /var/log/td-agent/apache_access.pos
read_from_head true
</source>
<source>
@type tail
tag apache.error
format apache_error
path /var/log/httpd/error_log
pos_file /var/log/td-agent/apache_error.pos
read_from_head true
</source>
### Postfix
<source>
@type tail
tag postfix.maillog
path /var/log/maillog
pos_file /var/log/td-agent/postfix.maillog.pos
read_from_head true
<parse>
@type multi_format
<pattern>
format /^(?<time>[^ ]* {1,2}[^ ]* [^ ]*) (?<host>[^ ]+) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?[^\:]*\: (?<queue_id>[0-9A-F]+): ?(to=(<(?<to>[^>]+)>)?)?,( ?(orig_to=<(?<orig_to>[^>]+)>),)? ?(relay=(?<relay>[^ ]+)), ?(delay=(?<delay>[^ ]+)), ?(delays=(?<delays>[^ ]+)), ?(dsn=(?<dsn>[^ ]+)), ?(status=(?<status>[^,]+)) \((?<status_msg>.*)\)/
time_format %b %d %H:%M:%S
</pattern>
<pattern>
format /^(?<time>[^ ]* {1,2}[^ ]* [^ ]*) (?<host>[^ ]+) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?[^\:]*\: (?<queue_id>[0-9A-F]+): ?(from=(<(?<from>[^>]+)>)?)? *(?<message>.*)$/
time_format %b %d %H:%M:%S
</pattern>
<pattern>
format /^(?<time>[^ ]* {1,2}[^ ]* [^ ]*) (?<host>[^ ]+) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?[^\:]*\: (?<queue_id>[0-9A-F]+): *(?<message>.*)$/
time_format %b %d %H:%M:%S
</pattern>
<pattern>
format /^(?<time>[^ ]* {1,2}[^ ]* [^ ]*) (?<host>[^ ]+) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?[^\:]*\: *(?<message>.*)$/
time_format %b %d %H:%M:%S
</pattern>
<pattern>
format none
</pattern>
</parse>
</source>
#### Filter
### Add MyHostname
<filter **>
@type record_transformer
<record>
hostname ${hostname}
</record>
</filter>
### Postfix
<filter postfix.maillog>
@type typecast
types pid:float,delay:float
</filter>
#### Output
<match **>
@type forward
<server>
host 10.0.0.20
port 24224
</server>
<buffer>
@type file
path /var/log/td-agent/buffer
flush_interval 1s
</buffer>
</match>
##Aggregatorノード設定
/etc/td-agent/td-agent.conf
#### Global Setting
<source>
@type monitor_agent
bind 0.0.0.0
port 24220
</source>
#### Source
### Forwarder
<source>
@type forward
bind 0.0.0.0
port 24224
</source>
### Add GeoIP
<filter apache.access>
@type geoip
geoip_lookup_keys host
skip_adding_null_record true
<record>
location_properties '{ "lat" : ${location.latitude["host"]}, "lon" : ${location.longitude["host"]} }'
location_string ${location.latitude["host"]},${location.longitude["host"]}
location_array '[${location.longitude["host"]},${location.latitude["host"]}]'
</record>
</filter>
#### Output
### syslog
<match syslog.messages>
@type elasticsearch
host localhost
port 9200
logstash_format true
logstash_prefix syslog.messages
<buffer>
@type file
path /var/log/td-agent/buffer/syslog.messages
flush_interval 1s
</buffer>
</match>
### Apache
<match apache.access>
@type elasticsearch
host localhost
port 9200
logstash_format true
logstash_prefix apache.access
<buffer>
@type file
path /var/log/td-agent/buffer/apache.access
flush_interval 1s
</buffer>
</match>
<match apache.error>
@type elasticsearch
host localhost
port 9200
logstash_format true
logstash_prefix apache.error
<buffer>
@type file
path /var/log/td-agent/buffer/apache.error
flush_interval 1s
</buffer>
</match>
### Postfix
<match postfix.maillog>
@type elasticsearch
host localhost
port 9200
logstash_format true
logstash_prefix postfix.maillog
<buffer>
@type file
path /var/log/td-agent/buffer/postfix.maillog
flush_interval 1s
</buffer>
</match>