Boost Spiritを使ってCSVファイルからIPv4アドレスを切り出す方法を考えてみる。
ルールを書く
11struct MyGrammar : grammar<MyGrammar>
12{
13 template<typename ScannerT>
14 struct definition
15 {
16 typedef rule<ScannerT> rule_t;
17 rule_t r;
18
19 definition( const MyGrammar& )
20 {
21 r = int_p >> '.' >> int_p >> '.' >> int_p >> '.' >> int_p; ; // >> +( '*' >> int_p );
22 }
23
24 const rule_t& start() const { return r; }
25 };
26};
IPアドレス X.X.X.Xを21行目のようにする
+c++ 21 r = int_p >> '.' >> int_p >> '.' >> int_p >> '.' >> int_p; ; // >> +( '*' >> int_p ); +
テスト用のログファイルを生成する。
$ g++ -o random_data random_data.cpp
$ ./random_data 3
$ cat random_data.txt
"2019/07/02 08:08:32.547","2019/07/02 08:08:32","2019/07/02 08:08:32","841","..208.171","25846","bn","..12.224","51321","w7","oFD","tBJ3eQQSb","REi","BPo3a","SEhsNFI4","8","VlMhtG15LwnbVVc4TqijRLwLwg","912","198","336","769","278","554","rand-pa1"
"2019/07/02 13:13:51.113","2019/07/02 13:13:51","2019/07/02 13:13:51","478","..190.69","41214","oY","..2.229","23907","LU","Pe4","u3iHsabCn","ryI","Bc9J3","DTx4mO4a","5","ozmapPf9uJUusozrYoTIkfp1m3","953","917","636","718","142","607","rand-pa1"
"2019/07/02 16:16:12.140","2019/07/02 16:16:12","2019/07/02 16:16:12","17","..103.219","15918","Oc","..17.162","8994","zj","JuF","Xl2D2U7M8","0TZ","xZ0IU","cZKwmW0Z","8","pzfXONA0IoxjDiduGFHg59TLvI","157","401","130","109","999","219","rand-pa1"
コードを見てみる。。
1#include <iostream>
2#include <fstream>
3#include <sstream>
4#include <string>
5#include <boost/spirit.hpp>
6#include <boost/tokenizer.hpp>
7
8using namespace std;
9using namespace boost::spirit;
10
11struct MyGrammar : grammar<MyGrammar>
12{
13 template<typename ScannerT>
14 struct definition
15 {
16 typedef rule<ScannerT> rule_t;
17 rule_t r;
18
19 definition( const MyGrammar& )
20 {
21 r = int_p >> '.' >> int_p >> '.' >> int_p >> '.' >> int_p; ; // >> +( '*' >> int_p );
22 }
23
24 const rule_t& start() const { return r; }
25 };
26};
27
28std::vector < std::vector< std::string > > parse_csv(const char* filepath)
29{
30 std::vector< std::vector< std::string > > cells;
31 std::string line;
32 std::ifstream ifs(filepath);
33
34 while (std::getline(ifs, line)) {
35
36 std::vector< std::string > data;
37
38 boost::tokenizer< boost::escaped_list_separator< char > > tokens(line);
39 for (const std::string& token : tokens) {
40 data.push_back(token);
41 }
42
43 cells.push_back(data);
44 }
45
46 return cells;
47}
48
49#include <typeinfo>
50int main(int argc, char* argv[]){
51 using namespace std;
52
53 if(argc != 2){
54 cerr << "引数の数が間違っています." << endl;
55 cerr << "./spirit_file_read [INPUT_FILE_NAME]" << endl;
56 return 1;
57 }
58
59 ifstream ifs(argv[1], ios::in);
60 if(!ifs){
61 cerr << "Error: file not opened." << endl;
62 return 1;
63 }
64
65 string tmp;
66 string str;
67
68 int counter = 0;
69
70 const auto cells = parse_csv(argv[1]);
71 MyGrammar parser;
72
73 for (const auto& rows : cells) {
74
75 for (const auto& cell : rows) {
76 parse_info<string::const_iterator> info =
77 parse( cell.begin(), cell.end(), parser );
78
79 if(info.full) {
80 cout << "line:" << counter << " " << cell << endl;
81 }
82 }
83
84 counter++;
85 }
86
87 ifs.close();
88 return 0;
89}
実行してみる。。。
$ g++ -o random_data random_data.cpp
$ ./random_data 3
$ g++ -o ipaddress ipaddress.cpp -lboost_system
In file included from ipaddress.cpp:5:0:
/usr/include/boost/spirit.hpp:18:4: warning: #warning "This header is deprecated. Please use: boost/spirit/include/classic.hpp" [-Wcpp]
warning "This header is deprecated. Please use: boost/spirit/include/classic.hpp"
^~~~~~
$ ./ipaddress random_data.txt
line:0 *.178.167.132
line:0 *.215.218.225
line:1 *.40.197.93
line:1 *.213.36.241
line:2 *.175.225.23
line:2 *.156.36.246
(`ー´)b