Intel TBBは、並列処理を実装するためのライブラリである (Intelが提供している)
Concurrent HashMapは、Intel TBBコンテナのうちの1つである。(そのほかに、ベクターとキューもある)
↓このようにして使う。。
7#include "tbb/concurrent_hash_map.h"
8#include "tbb/blocked_range.h"
9#include "tbb/parallel_for.h"
10#include "tbb/tick_count.h"
11#include "tbb/task_scheduler_init.h"
12#include "tbb/concurrent_vector.h"
13
14using namespace std;
15using namespace tbb;
16
17typedef tbb::concurrent_hash_map iTbb_addr_pair;
18static iTbb_addr_pair Tbb_Addr_Pair;
インサーション
69 iTbb_addr_pair::accessor t;
70 Tbb_Addr_Pair.insert(t, src_ipAddr);
71 t->second = dest_ipAddr;
格納した要素を表示
80 counter = 0;
81 for(auto itr = Tbb_Addr_Pair.begin(); itr != Tbb_Addr_Pair.end(); ++itr) {
82 if(counter > 0)
83 std::cout << counter << ":" << itr->first << "," << itr->second << std::endl;
84 counter++;
85 }
慣れてくると、利用はシンプルである。
例えば、↓のようなデータからIPアドレスのペア (71.153.59.69/149.38.245.191 - IPアドレスはランダムに生成)を格納したいとする。。。
"2019/07/02 00:00:48.033","2019/07/02 00:00:48","2019-07-02T00:00:48Z","841","149.38.245.191","25846","pU","71.153.59.69","51321","Yx","mU6","7gGd0vvjl","5pw","6KLBv","qQOS2G3d","8","nQTqV
mphosHwgZlYtVANbxyXO8","912","198","336","769","278","554","rand-pa1"
"2019/07/02 02:02:54.230","2019/07/02 02:02:54","2019-07-02T02:02:54Z","478","70.146.59.78","41214","Pq","77.21.128.75","23907","Xd","3bt","N1ADubtI0","iJq","XvZpV","TqaGYZOW","5","TCkH2EM
jrPpuVtUhZB3bEpuMpw","953","917","636","718","142","607","rand-pa1"
CSVファイルを読み込み、4列目と7列目の項目を map(srcIP, destIP) に格納することにする。。
コードを見てみる。。
1#include
2#include
3#include
4#include
5#include
6
7#include "tbb/concurrent_hash_map.h"
8#include "tbb/blocked_range.h"
9#include "tbb/parallel_for.h"
10#include "tbb/tick_count.h"
11#include "tbb/task_scheduler_init.h"
12#include "tbb/concurrent_vector.h"
13
14using namespace std;
15using namespace tbb;
16
17typedef tbb::concurrent_hash_map iTbb_addr_pair;
18static iTbb_addr_pair Tbb_Addr_Pair;
19
20std::vector < std::vector< std::string > > parse_csv(const char* filepath)
21{
22 std::vector< std::vector< std::string > > cells;
23 std::string line;
24 std::ifstream ifs(filepath);
25
26 while (std::getline(ifs, line)) {
27
28 std::vector< std::string > data;
29
31 boost::tokenizer< boost::escaped_list_separator< char > > tokens(line);
32 for (const std::string& token : tokens) {
33 data.push_back(token);
34 }
35
37 cells.push_back(data);
38 }
39
40 return cells;
41}
42
43int main(int argc, char *argv[])
44{
45 int counter = 0;
46
47 std::string src_ipAddr;
48 std::string dest_ipAddr;
49
50 const auto cells = parse_csv(argv[1]);
51 for (const auto& rows : cells) {
52
53 counter = 0;
54 for (const auto& cell : rows) {
55 // std::cout << "<" << cell << "> " << std::endl;
56
57 if(counter == 4)
58 {
59 // std::cout << cell << std::endl;
60 src_ipAddr = string(cell);
61 }
62
63 if(counter == 7)
64 {
65 // std::cout << cell << std::endl;
66 dest_ipAddr = string(cell);
67 }
68
69 iTbb_addr_pair::accessor t;
70 Tbb_Addr_Pair.insert(t, src_ipAddr);
71 t->second = dest_ipAddr;
72
73 counter++;
74
75 }
76
78 }
79
80 counter = 0;
81 for(auto itr = Tbb_Addr_Pair.begin(); itr != Tbb_Addr_Pair.end(); ++itr) {
82 if(counter > 0)
83 std::cout << counter << ":" << itr->first << "," << itr->second << std::endl;
84 counter++;
85 }
86
87 return 0;
88}
実行してみる。。。
$ g++ tbb.cpp -ltbb
$ head -n 2 random_data.txt
"2019/07/02 00:00:00.033","2019/07/02 00:00:00","2019-07-02T00:00:00Z","841","68.104.166.4","25846","hY","142.2.153.83","51321","ip","O0I","4s38T52FF","TUy","uuYOm","MSBa7NoD","8","67RgzNBbmggPJsN5p5J7YxQou6","912","198","336","769","278","554","rand-pa1"
"2019/07/02 00:00:00.043","2019/07/02 00:00:00","2019-07-02T00:00:00Z","478","24.65.11.145","41214","0p","40.193.169.129","23907","Pz","Vh8","D7i2u4FKG","mUX","7Eupl","ZjBDfZbs","5","NheJki5vi0XlwrOVS8MFE9vgQ2","953","917","636","718","142","607","rand-pa1"
$ ./a.out random_data.txt
1:80.7.36.18,87.202.209.244
2:84.197.61.38,28.31.35.245
3:149.38.245.191,71.153.59.69
4:244.201.242.36,3.140.138.200
5:69.208.33.205,206.26.44.221
6:20.174.210.174,191.70.14.196
7:158.40.105.63,35.52.152.35
8:93.16.76.199,193.99.45.197
9:70.146.59.78,77.21.128.75
10:250.210.21.183,47.166.17.227
少しメモ:
STLのハッシュマップを並列処理に使う場合 -> この場合、STLのハッシュマップをmutex(ロック)でラップして、スレッドセーフに実装するのが吉だが、こうすると、コンテナー(ハッシュマップ)内部の並列化が行われないため、マルチコアが活きない模様。
一方で、Intel TBBのハッシュマップは「重い」ので、並列化による効果が順次パフォーマンスより大きい場合に、使うとよい。
詳しくは、↓を参照
https://www.oreilly.co.jp/books/9784873113555/
https://www.amazon.com/Intel-Threading-Building-Blocks-Parallelism/dp/0596514808
「したがって、追加した並列化がより遅い順次パフォーマンスよりも重要である場合に、高度なコンカレント・コンテナーを利用してください」
とある。(86ページ)
(`ー´)b