下記のようなログから <timestamp, sourceIP, destinationIP>のようなデータ列を作りたいとする。。
「タプルの列」が思い浮かんだ。
1"2019/07/02 00:00:48.073","2019/07/02 00:00:48","2019/07/02 00:00:48","841","32.249.185.200","25846","Uu","244.210.2.143","51321","V4","K6i","d9ecn1Blv","2lH","Jlx6g","Bo93lPJx","8"\
,"lAp3mgEhJPaOxnVYzaQhG6jo7E","912","198","336","769","278","554","rand-pa1"
2"2019/07/02 03:03:24.826","2019/07/02 03:03:24","2019/07/02 03:03:24","478","80.115.211.4","41214","ov","34.252.129.206","23907","er","dOG","NyhxNslii","tKa","QXniz","uKXh6OT7","5",\
"cFoCLZMoDrKQSnhgJap936q1Nl","953","917","636","718","142","607","rand-pa1"
*IPアドレスはランダムに生成
タプルについて調べる。。
「標準ライブラリには、単純に2つの型を格納するために定義されたpairというクラステンプレートがあります。」
「これは主に、.... 「関数から2つの値を返したい」といった場面で使われます。」
Boost C++ Libraries 稲葉一浩 秀和システム
tupleはあくまでpairの拡張であって、列(std::mapのような使い方)にはならないらしい。。
そのため、multimapを使って、<timestamp, sourceIP> , <timestamp, destinationIP>をいうペアを作ることにした。
セマンティックアクション部を見てみる。
12static int line_counter = 0;
13std::multimap<int, std::string> m;
14
15struct AddrParse : grammar<AddrParse>
16{
17 struct MyAction
18 {
19 template<typename Ite>
20 void operator()( Ite i1, Ite i2 ) const
21 { cout << "文字数:" << i2 - i1 << endl
22 << " 内容:" << string(i1,i2) << endl;
23 m.insert(std::make_pair(line_counter, string(i1,i2)));
24 }
25 };
26
27 template<typename ScannerT>
28 struct definition
29 {
30 typedef rule<ScannerT> rule_t;
31 rule_t r;
32 definition( const AddrParse& self )
33 {
34 // r = 'a' >> (*ch_p('b'))[MyAction()] >> 'c';
35 r = (int_p >> '.' >> int_p >> '.' >> int_p >> '.' >> int_p)[MyAction()]; // >> +( '*' >> int_p );
36 }
37 const rule_t& start() const { return r; }
38 };
39};
23行目で、パースした (INT).(INT).(INT).(INT)の文字列をmultimapに代入する。
+c++
21 v.push_back(string(i1,i2));
33 r = (int_p >> '.' >> int_p >> '.' >> int_p >> '.' >> int_p)[MyAction()]; // >> +( '*' >> int_p );
+
少し長いが、コードを見てみる。。。
1#include <iostream>
2#include <fstream>
3#include <sstream>
4#include <string>
5#include <map>
6#include <boost/spirit.hpp>
7#include <boost/tokenizer.hpp>
8
9using namespace std;
10using namespace boost::spirit;
11
12static int line_counter = 0;
13std::multimap<int, std::string> m;
14
15struct AddrParse : grammar<AddrParse>
16{
17 struct MyAction
18 {
19 template<typename Ite> 20 void operator()( Ite i1, Ite i2 ) const
21 { cout << "文字数:" << i2 - i1 << endl
22 << " 内容:" << string(i1,i2) << endl;
23 m.insert(std::make_pair(line_counter, string(i1,i2)));
24 }
25 };
26
27 template<typename ScannerT>
28 struct definition
29 {
30 typedef rule<ScannerT> rule_t;
31 rule_t r;
32 definition( const AddrParse& self )
33 {
34 // r = 'a' >> (*ch_p('b'))[MyAction()] >> 'c';
35 r = (int_p >> '.' >> int_p >> '.' >> int_p >> '.' >> int_p)[MyAction()]; // >> +( '*' >> int_p );
36 }
37 const rule_t& start() const { return r; }
38 };
39};
40
41std::vector < std::vector< std::string > > parse_csv(const char* filepath)
42{
43 std::vector< std::vector< std::string > > cells;
44 std::string line;
45 std::ifstream ifs(filepath);
46
47 while (std::getline(ifs, line)) {
48
49 std::vector< std::string > data;
50
51 boost::tokenizer< boost::escaped_list_separator< char > > tokens(line);
52 for (const std::string& token : tokens) {
53 data.push_back(token);
54 }
55 cells.push_back(data);
56 }
57
58 return cells;
59}
60
61#include <typeinfo>
62int main(int argc, char* argv[]){
63 using namespace std;
64
65 if(argc != 2){
66 cerr << "引数の数が間違っています." << endl;
67 cerr << "./spirit_file_read [INPUT_FILE_NAME]" << endl;
68 return 1;
69 }
70
71 ifstream ifs(argv[1], ios::in);
72 if(!ifs){
73 cerr << "Error: file not opened." << endl;
74 return 1;
75 }
76
77 string tmp;
78 string str;
79
80 int counter = 0;
81
82 const auto cells = parse_csv(argv[1]);
83 AddrParse parser;
84 85 for (const auto& rows : cells) {
86
87 for (const auto& cell : rows) {
88 parse_info<string::const_iterator> info =
89 parse( cell.begin(), cell.end(), parser );
90
91 if(info.full) {
92 cout << "line:" << counter << " " << cell << endl;
93 // push_vector(cell);
94 }
95 }
96 line_counter++;
97 counter++;
98 }
100 ifs.close();
102 cout << "Displaying elements..." << endl;
103
104 auto begin = m.begin(), end = m.end();
105 for (auto iter = begin; iter != end; iter++) {
106 cout << "key = " << iter->first << "\n";
107 cout << "value = " << iter->second << "\n";
108 }
109
110 /*
111 for (auto& x:m) {
112 std::cout << x.first << " => " << x.second << std::endl;
113 }
114 */
115
116 return 0;
117}
実行してみる(IPアドレスの値はランダムに生成)
$ g++ -o random_data random_data.cpp
$ ./random_data 3
$ cat random_data.txt
"2019/07/02 02:02:00.839","2019/07/02 02:02:00","2019/07/02 02:02:00","841","*.178.167.132","25846","iy","*.215.218.225","51321","bu","8MP","VX13Gpdkt","drN","deETa","gyAY4gdZ","8","TJoPPQuOKvrxzAjCd11rpqqSqs","912","198","336","769","278","554","rand-pa1"
"2019/07/02 02:02:52.006","2019/07/02 02:02:52","2019/07/02 02:02:52","478","*.40.197.93","41214","ol","*.213.36.241","23907","Vu","OYF","H6zQlnN5X","yV3","P2VPw","9D3viFsS","5","cogAhSIycmvdYl7RaZNjGCsWqj","953","917","636","718","142","607","rand-pa1"
"2019/07/02 12:12:16.086","2019/07/02 12:12:16","2019/07/02 12:12:16","17","*.175.225.23","15918","Hk","*.156.36.246","8994","u1","Tt2","hihIKl4xd","OX5","uj8uP","hKwtE4iF","8","9EuzqcBTUrBjAago2vY5MMugKb","157","401","130","109","999","219","rand-pa1"
$ g++ ipaddress5.cpp -o ipaddress5 -lboost_system
In file included from ipaddress5.cpp:6:0:
/usr/include/boost/spirit.hpp:18:4: warning: #warning "This header is deprecated. Please use: boost/spirit/include/classic.hpp" [-Wcpp]
# warning "This header is deprecated. Please use: boost/spirit/include/classic.hpp"
^~~~~~~
$ ./ipaddress5 random_data.txt
文字数:15
内容:255.178.167.132
line:0 255.178.167.132
文字数:14
内容:69.215.218.225
line:0 69.215.218.225
文字数:13
内容:104.40.197.93
line:1 104.40.197.93
文字数:13
内容:76.213.36.241
line:1 76.213.36.241
文字数:14
内容:120.175.225.23
line:2 120.175.225.23
文字数:13
内容:55.156.36.246
line:2 55.156.36.246
Displaying elements...
key = 0
value = 255.178.167.132
key = 0
value = 69.215.218.225
key = 1
value = 104.40.197.93
key = 1
value = 76.213.36.241
key = 2
value = 120.175.225.23
key = 2
value = 55.156.36.246
↑で、keyは行番号である。
(`ー´)b