Boost Semantic ActionでCSVファイル中のIPv4のアドレスを取り出す方法を考えてみる。。
ルールとアクションを書く。。。
13struct AddrParse : grammar<AddrParse>
14{
15 struct MyAction
16 {
17 template<typename Ite>
18 void operator()( Ite i1, Ite i2 ) const
19 { cout << "文字数:" << i2 - i1 << endl
20 << " 内容:" << string(i1,i2) << endl; }
21 };
22
23 template<typename ScannerT>
24 struct definition
25 {
26 typedef rule<ScannerT> rule_t;
27 rule_t r;
28 definition( const AddrParse& self )
29 {
30 // r = 'a' >> (*ch_p('b'))[MyAction()] >> 'c';
31 r = (int_p >> '.' >> int_p >> '.' >> int_p >> '.' >> int_p)[MyAction()]; // >> +( '*' >> int_p );
32 }
33 const rule_t& start() const { return r; }
34 };
35};
31行目で 整数 . 整数 . 整数 . 整数 にマッチしたらMyActionを呼び出す。
+c++ 31 r = (int_p >> '.' >> int_p >> '.' >> int_p >> '.' >> int_p)[MyAction()]; +
コードを見てみる。。。
1#include <iostream>
2#include <fstream>
3#include <sstream>
4#include <string>
5#include <boost/spirit.hpp>
6#include <boost/tokenizer.hpp>
7
8using namespace std;
9using namespace boost::spirit;
10
11std::vector<string> v;
12
13struct AddrParse : grammar<AddrParse>
14{
15 struct MyAction
16 {
17 template<typename Ite>
18 void operator()( Ite i1, Ite i2 ) const
19 { cout << "文字数:" << i2 - i1 << endl
20 << " 内容:" << string(i1,i2) << endl; }
21 };
22
23 template<typename ScannerT>
24 struct definition
25 {
26 typedef rule<ScannerT> rule_t;
27 rule_t r;
28 definition( const AddrParse& self )
29 {
30 // r = 'a' >> (*ch_p('b'))[MyAction()] >> 'c';
31 r = (int_p >> '.' >> int_p >> '.' >> int_p >> '.' >> int_p)[MyAction()]; // >> +( '*' >> int_p );
32 }
33 const rule_t& start() const { return r; }
34 };
35};
36
37std::vector < std::vector< std::string > > parse_csv(const char* filepath)
38{
39 std::vector< std::vector< std::string > > cells;
40 std::string line;
41 std::ifstream ifs(filepath);
42
43 while (std::getline(ifs, line)) {
44
45 std::vector< std::string > data;
46
47 boost::tokenizer< boost::escaped_list_separator< char > > tokens(line);
48 for (const std::string& token : tokens) {
49 data.push_back(token);
50 }
51
52 cells.push_back(data);
53 }
54
55 return cells;
56}
57
58#include <typeinfo>
59int main(int argc, char* argv[]){
60 using namespace std;
61
62 if(argc != 2){
63 cerr << "引数の数が間違っています." << endl;
64 cerr << "./spirit_file_read [INPUT_FILE_NAME]" << endl;
65 return 1;
66 }
67
68 ifstream ifs(argv[1], ios::in);
69 if(!ifs){
70 cerr << "Error: file not opened." << endl;
71 return 1;
72 }
73
74 string tmp;
75 string str;
76
77 int counter = 0;
78
79 const auto cells = parse_csv(argv[1]);
80 AddrParse parser;
81
82 for (const auto& rows : cells) {
83
84 for (const auto& cell : rows) {
85 parse_info<string::const_iterator> info =
86 parse( cell.begin(), cell.end(), parser );
87
88 if(info.full) {
89 cout << "line:" << counter << " " << cell << endl;
90 // push_vector(cell);
91 }
92 }
93
94 counter++;
95 }
96
97 ifs.close();
98 return 0;
99}
実行してみる。。。
$ g++ -o random_data random_data.cpp
$ ./random_data 3
$ cat random_data.txt
"2019/07/02 02:02:00.839","2019/07/02 02:02:00","2019/07/02 02:02:00","841","*.178.167.132","25846","iy","*.215.218.225","51321","bu","8MP","VX13Gpdkt","drN","deETa","gyAY4gdZ","8","TJoPPQuOKvrxzAjCd11rpqqSqs","912","198","336","769","278","554","rand-pa1"
"2019/07/02 02:02:52.006","2019/07/02 02:02:52","2019/07/02 02:02:52","478","*.40.197.93","41214","ol","*.213.36.241","23907","Vu","OYF","H6zQlnN5X","yV3","P2VPw","9D3viFsS","5","cogAhSIycmvdYl7RaZNjGCsWqj","953","917","636","718","142","607","rand-pa1"
"2019/07/02 12:12:16.086","2019/07/02 12:12:16","2019/07/02 12:12:16","17","*.175.225.23","15918","Hk","*.156.36.246","8994","u1","Tt2","hihIKl4xd","OX5","uj8uP","hKwtE4iF","8","9EuzqcBTUrBjAago2vY5MMugKb","157","401","130","109","999","219","rand-pa1"
$ ./a.out random_data.txt
文字数:15
内容:255.178.167.132
line:0 255.178.167.132
文字数:14
内容:69.215.218.225
line:0 69.215.218.225
文字数:13
内容:104.40.197.93
line:1 104.40.197.93
文字数:13
内容:76.213.36.241
line:1 76.213.36.241
文字数:14
内容:120.175.225.23
line:2 120.175.225.23
文字数:13
内容:55.156.36.246
line:2 55.156.36.246
(`ー´)b