More than 3 years have passed since last update.

Boost.Spiritのgrammerを使いながらCSVファイルからtimestampとIPアドレスを取り出す

Last updated at 2020-11-03Posted at 2020-11-03

↓このようなデータから、Boost.Spiritを使って、

$ cat random_data.txt (IPアドレスはランダムに生成）
"2019/07/02 10:10:54.405","2019/07/02 10:10:54","2019-07-02T10:10:54Z","841","183.230.214.247","25846","ud","196.243.1.131","51321","yP","6dT","ExCAbzsxH","EgN","D7BSJ","kXpUs4bu","8","oMycVswGUdz1gnopyawKsEhCR3","912","198","336","769","278","554","rand-pa1"
"2019/07/02 11:11:14.681","2019/07/02 11:11:14","2019-07-02T11:11:14Z","478","143.84.130.129","41214","FG","53.81.231.167","23907","9n","lrm","3Nfd2JmWb","woH","OxKYH","Yd6Pwx4f","5","YCsmTqS4kkxfH8b777FWOhn3TP","953","917","636","718","142","607","rand-pa1"
"2019/07/02 23:23:38.408","2019/07/02 23:23:38","2019-07-02T23:23:38Z","17","3.7.170.152","15918","Og","52.222.98.186","8994","D4","DjM","LYEFIUvrP","DQt","oECV0","7f5OKrz0","8","mYOaqbYPvBFLhtU5IZixuD7dPV","157","401","130","109","999","219","rand-pa1"

■ タイムスタンプ - 2019-07-02T10:10:54Z
■ IPアドレス　 - 196.243.1.131

を取りだして、multimapやmulti-indexに格納したくなった。

Boost.Spiritでは、IPアドレスは、

+c++ 43 rule_t ipaddr; 49 ipaddr = (int_p >> '.' >> int_p >> '.' >> int_p >> '.' >> int_p)[Action_ipaddr()]; +

タイムスタンプは、

+c++ 44 rule_t timestamp; 50 timestamp = (int_p >> '-' >> int_p >> '-' >> int_p >> 'T' >> int_p >> ':' >> int_p >> ':' >> int_p >> 'Z')[Action_timestamp()]; +

と書ける。

Boost.Spritiには、grammerという便利な機能がある。。
grammerとは、ruleをまとめたものである。

例えば、Boost.Spiritで電卓を実装するときなどに使う。

 8struct ArithCalc : grammar<ArithCalc>
 9{
10    template<typename ScannerT>
11      struct definition
12      {
13          typedef rule<ScannerT> rule_t;
14          rule_t expr, fctr, term;
15
16          definition( const ArithCalc& )
17          {
18              expr = term >> *('+'>>term | '-'>>term);
19              term = fctr >> *('*'>>fctr | '/'>>fctr);
20              fctr = real_p | '('>>expr>>')';
21          }
22
23          const rule_t& start() const { return expr; }
24      };
25};

ここで、18行目から20行目で、<< と | を使ってルールの関係を設定している。。

grammerの部分はこちら↓

15struct AddrParse : grammar<AddrParse>
16{
38
39    template<typename ScannerT>
40      struct definition
41      {
42          typedef rule<ScannerT> rule_t;
43          rule_t ipaddr;
44          rule_t timestamp;
45          rule_t r;
46          definition( const AddrParse& self )
47          {
48            // r = 'a' >> (*ch_p('b'))[MyAction()] >> 'c';                                                                                                                            
49            ipaddr = (int_p >> '.' >> int_p >> '.' >> int_p >> '.' >> int_p)[Action_ipaddr()];
50            timestamp = (int_p >> '-' >> int_p >> '-' >> int_p >> 'T' >> int_p >> ':' >> int_p >> ':' >> int_p >> 'Z')[Action_timestamp()];
51            r = timestamp | ipaddr;
52          }
53          const rule_t& start() const { return r; }
54      };
55};

これを、↑の例(timestampとIPアドレス)に合わせて、下記のように書いてみた。

+c++ 51 r = timestamp | ipaddr; +

コードを見てみる。。。

 1  #include <iostream>
 2  #include <fstream>
 3  #include <sstream>
 4  #include <string>
 5  #include <boost/spirit.hpp>
 6  #include <boost/tokenizer.hpp>
   
 7  using namespace std;
 8  using namespace boost::spirit;
   
 9  std::vector<string> v;
   
10  static int counter;
   
11  struct AddrParse : grammar<AddrParse>
12  {
13      struct Action_ipaddr
14      {
15          template<typename Ite>
16            void operator()( Ite i1, Ite i2 ) const
17              { cout << "IPaddr - line:" << counter << endl; 
18                cout << "文字数：" << i2 - i1 << endl
19                     << "　内容：" << string(i1,i2) << endl;
20                cout << endl;
21              }
22      };
   
23      struct Action_timestamp
24      {
25          template<typename Ite>
26            void operator()( Ite i1, Ite i2 ) const
27              { cout << "timestamp - line:" << counter << endl;
28                cout << "文字数：" << i2 - i1 << endl
29                     << "　内容：" << string(i1,i2) << endl;
30                cout << endl;
31              }
32      };
   
33      template<typename ScannerT>
34        struct definition
35        {
36            typedef rule<ScannerT> rule_t;
37            rule_t ipaddr;
38            rule_t timestamp;
39            rule_t r;
40            definition( const AddrParse& self )
41            {
42              // r = 'a' >> (*ch_p('b'))[MyAction()] >> 'c';
43              ipaddr = (int_p >> '.' >> int_p >> '.' >> int_p >> '.' >> int_p)[Action_ipaddr()]; 
44              timestamp = (int_p >> '-' >> int_p >> '-' >> int_p >> 'T' >> int_p >> ':' >> int_p >> ':' >> int_p >> 'Z')[Action_timestamp()]; 
45              r = timestamp | ipaddr;
46            }
47            const rule_t& start() const { return r; }
48        };
49  };
   
50  std::vector < std::vector< std::string > > parse_csv(const char* filepath)
51  {
52      std::vector< std::vector< std::string > > cells;
53      std::string line;
54      std::ifstream ifs(filepath);
   
55      while (std::getline(ifs, line)) {
   
56          std::vector< std::string > data;
   
57          boost::tokenizer< boost::escaped_list_separator< char > > tokens(line);
58          for (const std::string& token : tokens) {
59              data.push_back(token);
60          }
   
61          cells.push_back(data);
62      }
   
63      return cells;
64  }
   
65  #include <typeinfo>
66  int main(int argc, char* argv[]){
67      using namespace std;
   
68      if(argc != 2){
69          cerr << "引数の数が間違っています．" << endl;
70          cerr << "./spirit_file_read [INPUT_FILE_NAME]" << endl;
71          return 1;
72      }
   
73      ifstream ifs(argv[1], ios::in);
74      if(!ifs){
75          cerr << "Error: file not opened." << endl;
76          return 1;
77      }
   
78      string tmp;
79      string str;
80      
81      // int counter = 0;
   
82      const auto cells = parse_csv(argv[1]);
83      AddrParse parser;
   
84      for (const auto& rows : cells) {
85      
86        for (const auto& cell : rows) {
87          parse_info<string::const_iterator> info =
88            parse( cell.begin(), cell.end(), parser );
   
89          if(info.full) {
90            // cout << "line:" << counter << " " << cell << endl;
91            // push_vector(cell);
92          }
93        }
   
94        counter++;
95      }
96         
97      ifs.close();
98      return 0;
99  }

実行してみる。。。


$ g++ -o random_data random_data.cpp 
$ ./random_data 3

$ g++ -o ipaddress8 ipaddress8.cpp -lboost_system
In file included from ipaddress8.cpp:5:0:
/usr/include/boost/spirit.hpp:18:4: warning: #warning "This header is deprecated. Please use: boost/spirit/include/classic.hpp" [-Wcpp]
 #  warning "This header is deprecated. Please use: boost/spirit/include/classic.hpp"
    ^~~~~~~
$ ./ipaddress8 random_data.txt 
timestamp - line:0
文字数：20
　内容：2019-07-02T10:10:46Z

IPaddr - line:0
文字数：13
　内容：227.13.174.42

IPaddr - line:0
文字数：14
　内容：102.103.46.163

timestamp - line:1
文字数：20
　内容：2019-07-02T16:16:20Z

IPaddr - line:1
文字数：11
　内容：14.24.68.63

IPaddr - line:1
文字数：11
　内容：99.21.36.88

timestamp - line:2
文字数：20
　内容：2019-07-02T17:17:23Z

IPaddr - line:2
文字数：14
　内容：167.125.202.72

IPaddr - line:2
文字数：13
　内容：27.227.146.28

（｀ー´）b

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up