↓このようなデータから、Boost.Spiritを使って、
$ cat random_data.txt (IPアドレスはランダムに生成)
"2019/07/02 10:10:54.405","2019/07/02 10:10:54","2019-07-02T10:10:54Z","841","183.230.214.247","25846","ud","196.243.1.131","51321","yP","6dT","ExCAbzsxH","EgN","D7BSJ","kXpUs4bu","8","oMycVswGUdz1gnopyawKsEhCR3","912","198","336","769","278","554","rand-pa1"
"2019/07/02 11:11:14.681","2019/07/02 11:11:14","2019-07-02T11:11:14Z","478","143.84.130.129","41214","FG","53.81.231.167","23907","9n","lrm","3Nfd2JmWb","woH","OxKYH","Yd6Pwx4f","5","YCsmTqS4kkxfH8b777FWOhn3TP","953","917","636","718","142","607","rand-pa1"
"2019/07/02 23:23:38.408","2019/07/02 23:23:38","2019-07-02T23:23:38Z","17","3.7.170.152","15918","Og","52.222.98.186","8994","D4","DjM","LYEFIUvrP","DQt","oECV0","7f5OKrz0","8","mYOaqbYPvBFLhtU5IZixuD7dPV","157","401","130","109","999","219","rand-pa1"
■ タイムスタンプ - 2019-07-02T10:10:54Z
■ IPアドレス - 196.243.1.131
を取りだして、multimapやmulti-indexに格納したくなった。
Boost.Spiritでは、IPアドレスは、
+c++ 43 rule_t ipaddr; 49 ipaddr = (int_p >> '.' >> int_p >> '.' >> int_p >> '.' >> int_p)[Action_ipaddr()]; +
タイムスタンプは、
+c++ 44 rule_t timestamp; 50 timestamp = (int_p >> '-' >> int_p >> '-' >> int_p >> 'T' >> int_p >> ':' >> int_p >> ':' >> int_p >> 'Z')[Action_timestamp()]; +
と書ける。
Boost.Spritiには、grammerという便利な機能がある。。
grammerとは、ruleをまとめたものである。
例えば、Boost.Spiritで電卓を実装するときなどに使う。
8struct ArithCalc : grammar<ArithCalc>
9{
10 template<typename ScannerT>
11 struct definition
12 {
13 typedef rule<ScannerT> rule_t;
14 rule_t expr, fctr, term;
15
16 definition( const ArithCalc& )
17 {
18 expr = term >> *('+'>>term | '-'>>term);
19 term = fctr >> *('*'>>fctr | '/'>>fctr);
20 fctr = real_p | '('>>expr>>')';
21 }
22
23 const rule_t& start() const { return expr; }
24 };
25};
ここで、18行目から20行目で、<< と | を使ってルールの関係を設定している。。
grammerの部分はこちら↓
15struct AddrParse : grammar<AddrParse>
16{
38
39 template<typename ScannerT>
40 struct definition
41 {
42 typedef rule<ScannerT> rule_t;
43 rule_t ipaddr;
44 rule_t timestamp;
45 rule_t r;
46 definition( const AddrParse& self )
47 {
48 // r = 'a' >> (*ch_p('b'))[MyAction()] >> 'c';
49 ipaddr = (int_p >> '.' >> int_p >> '.' >> int_p >> '.' >> int_p)[Action_ipaddr()];
50 timestamp = (int_p >> '-' >> int_p >> '-' >> int_p >> 'T' >> int_p >> ':' >> int_p >> ':' >> int_p >> 'Z')[Action_timestamp()];
51 r = timestamp | ipaddr;
52 }
53 const rule_t& start() const { return r; }
54 };
55};
これを、↑の例(timestampとIPアドレス)に合わせて、下記のように書いてみた。
+c++ 51 r = timestamp | ipaddr; +
コードを見てみる。。。
1 #include <iostream>
2 #include <fstream>
3 #include <sstream>
4 #include <string>
5 #include <boost/spirit.hpp>
6 #include <boost/tokenizer.hpp>
7 using namespace std;
8 using namespace boost::spirit;
9 std::vector<string> v;
10 static int counter;
11 struct AddrParse : grammar<AddrParse>
12 {
13 struct Action_ipaddr
14 {
15 template<typename Ite>
16 void operator()( Ite i1, Ite i2 ) const
17 { cout << "IPaddr - line:" << counter << endl;
18 cout << "文字数:" << i2 - i1 << endl
19 << " 内容:" << string(i1,i2) << endl;
20 cout << endl;
21 }
22 };
23 struct Action_timestamp
24 {
25 template<typename Ite>
26 void operator()( Ite i1, Ite i2 ) const
27 { cout << "timestamp - line:" << counter << endl;
28 cout << "文字数:" << i2 - i1 << endl
29 << " 内容:" << string(i1,i2) << endl;
30 cout << endl;
31 }
32 };
33 template<typename ScannerT>
34 struct definition
35 {
36 typedef rule<ScannerT> rule_t;
37 rule_t ipaddr;
38 rule_t timestamp;
39 rule_t r;
40 definition( const AddrParse& self )
41 {
42 // r = 'a' >> (*ch_p('b'))[MyAction()] >> 'c';
43 ipaddr = (int_p >> '.' >> int_p >> '.' >> int_p >> '.' >> int_p)[Action_ipaddr()];
44 timestamp = (int_p >> '-' >> int_p >> '-' >> int_p >> 'T' >> int_p >> ':' >> int_p >> ':' >> int_p >> 'Z')[Action_timestamp()];
45 r = timestamp | ipaddr;
46 }
47 const rule_t& start() const { return r; }
48 };
49 };
50 std::vector < std::vector< std::string > > parse_csv(const char* filepath)
51 {
52 std::vector< std::vector< std::string > > cells;
53 std::string line;
54 std::ifstream ifs(filepath);
55 while (std::getline(ifs, line)) {
56 std::vector< std::string > data;
57 boost::tokenizer< boost::escaped_list_separator< char > > tokens(line);
58 for (const std::string& token : tokens) {
59 data.push_back(token);
60 }
61 cells.push_back(data);
62 }
63 return cells;
64 }
65 #include <typeinfo>
66 int main(int argc, char* argv[]){
67 using namespace std;
68 if(argc != 2){
69 cerr << "引数の数が間違っています." << endl;
70 cerr << "./spirit_file_read [INPUT_FILE_NAME]" << endl;
71 return 1;
72 }
73 ifstream ifs(argv[1], ios::in);
74 if(!ifs){
75 cerr << "Error: file not opened." << endl;
76 return 1;
77 }
78 string tmp;
79 string str;
80
81 // int counter = 0;
82 const auto cells = parse_csv(argv[1]);
83 AddrParse parser;
84 for (const auto& rows : cells) {
85
86 for (const auto& cell : rows) {
87 parse_info<string::const_iterator> info =
88 parse( cell.begin(), cell.end(), parser );
89 if(info.full) {
90 // cout << "line:" << counter << " " << cell << endl;
91 // push_vector(cell);
92 }
93 }
94 counter++;
95 }
96
97 ifs.close();
98 return 0;
99 }
実行してみる。。。
$ g++ -o random_data random_data.cpp
$ ./random_data 3
$ g++ -o ipaddress8 ipaddress8.cpp -lboost_system
In file included from ipaddress8.cpp:5:0:
/usr/include/boost/spirit.hpp:18:4: warning: #warning "This header is deprecated. Please use: boost/spirit/include/classic.hpp" [-Wcpp]
# warning "This header is deprecated. Please use: boost/spirit/include/classic.hpp"
^~~~~~~
$ ./ipaddress8 random_data.txt
timestamp - line:0
文字数:20
内容:2019-07-02T10:10:46Z
IPaddr - line:0
文字数:13
内容:227.13.174.42
IPaddr - line:0
文字数:14
内容:102.103.46.163
timestamp - line:1
文字数:20
内容:2019-07-02T16:16:20Z
IPaddr - line:1
文字数:11
内容:14.24.68.63
IPaddr - line:1
文字数:11
内容:99.21.36.88
timestamp - line:2
文字数:20
内容:2019-07-02T17:17:23Z
IPaddr - line:2
文字数:14
内容:167.125.202.72
IPaddr - line:2
文字数:13
内容:27.227.146.28
(`ー´)b