LoginSignup
0
0

More than 3 years have passed since last update.

Intel TBBのConcurrent Hashmapを使ってみる

Last updated at Posted at 2020-12-15

Intel TBBは、並列処理を実装するためのライブラリである (Intelが提供している)

image.png

Concurrent HashMapは、Intel TBBコンテナのうちの1つである。(そのほかに、ベクターとキューもある)

↓このようにして使う。。


     7#include "tbb/concurrent_hash_map.h"
     8#include "tbb/blocked_range.h"
     9#include "tbb/parallel_for.h"
    10#include "tbb/tick_count.h"
    11#include "tbb/task_scheduler_init.h"
    12#include "tbb/concurrent_vector.h"
    13
    14using namespace std;
    15using namespace tbb;
    16
    17typedef tbb::concurrent_hash_map iTbb_addr_pair;
    18static iTbb_addr_pair Tbb_Addr_Pair;

インサーション


    69          iTbb_addr_pair::accessor t;
    70          Tbb_Addr_Pair.insert(t, src_ipAddr);
    71          t->second = dest_ipAddr;

格納した要素を表示


    80    counter = 0;
    81    for(auto itr = Tbb_Addr_Pair.begin(); itr != Tbb_Addr_Pair.end(); ++itr) {
    82      if(counter > 0)
    83        std::cout << counter << ":" << itr->first << "," << itr->second << std::endl;
    84      counter++;
    85    }

慣れてくると、利用はシンプルである。

例えば、↓のようなデータからIPアドレスのペア (71.153.59.69/149.38.245.191 - IPアドレスはランダムに生成)を格納したいとする。。。


"2019/07/02 00:00:48.033","2019/07/02 00:00:48","2019-07-02T00:00:48Z","841","149.38.245.191","25846","pU","71.153.59.69","51321","Yx","mU6","7gGd0vvjl","5pw","6KLBv","qQOS2G3d","8","nQTqV
mphosHwgZlYtVANbxyXO8","912","198","336","769","278","554","rand-pa1"
"2019/07/02 02:02:54.230","2019/07/02 02:02:54","2019-07-02T02:02:54Z","478","70.146.59.78","41214","Pq","77.21.128.75","23907","Xd","3bt","N1ADubtI0","iJq","XvZpV","TqaGYZOW","5","TCkH2EM
jrPpuVtUhZB3bEpuMpw","953","917","636","718","142","607","rand-pa1"

CSVファイルを読み込み、4列目と7列目の項目を map(srcIP, destIP) に格納することにする。。

コードを見てみる。。


     1#include 
     2#include 
     3#include 
     4#include 
     5#include 
     6
     7#include "tbb/concurrent_hash_map.h"
     8#include "tbb/blocked_range.h"
     9#include "tbb/parallel_for.h"
    10#include "tbb/tick_count.h"
    11#include "tbb/task_scheduler_init.h"
    12#include "tbb/concurrent_vector.h"
    13
    14using namespace std;
    15using namespace tbb;
    16
    17typedef tbb::concurrent_hash_map iTbb_addr_pair;
    18static iTbb_addr_pair Tbb_Addr_Pair;
    19
    20std::vector < std::vector< std::string > > parse_csv(const char* filepath)
    21{
    22    std::vector< std::vector< std::string > > cells;
    23    std::string line;
    24    std::ifstream ifs(filepath);
    25
    26    while (std::getline(ifs, line)) {
    27
    28        std::vector< std::string > data;
    29                                                                                                                                                            
    31        boost::tokenizer< boost::escaped_list_separator< char > > tokens(line);
    32        for (const std::string& token : tokens) {
    33            data.push_back(token);
    34        }
    35                                                                                                                                              
    37        cells.push_back(data);
    38    }
    39
    40    return cells;
    41}
    42
    43int main(int argc, char *argv[])
    44{
    45    int counter = 0;
    46
    47    std::string src_ipAddr;
    48    std::string dest_ipAddr;
    49
    50    const auto cells = parse_csv(argv[1]);
    51    for (const auto& rows : cells) {
    52
    53        counter = 0;
    54        for (const auto& cell : rows) {
    55          // std::cout << " " << std::endl;                                                                                                                           
    56
    57          if(counter == 4)
    58            {
    59              // std::cout << cell << std::endl;                                                                                                                                      
    60             src_ipAddr = string(cell);
    61            }
    62
    63          if(counter == 7)
    64            {
    65              // std::cout << cell << std::endl;                                                                                                                                      
    66             dest_ipAddr = string(cell);
    67            }
    68
    69          iTbb_addr_pair::accessor t;
    70          Tbb_Addr_Pair.insert(t, src_ipAddr);
    71          t->second = dest_ipAddr;
    72
    73          counter++;
    74
    75        }
    76                                                                                                                                               
    78    }
    79
    80    counter = 0;
    81    for(auto itr = Tbb_Addr_Pair.begin(); itr != Tbb_Addr_Pair.end(); ++itr) {
    82      if(counter > 0)
    83        std::cout << counter << ":" << itr->first << "," << itr->second << std::endl;
    84      counter++;
    85    }
    86
    87    return 0;
    88} 

実行してみる。。。


$ g++ tbb.cpp -ltbb

$ head -n 2 random_data.txt 
"2019/07/02 00:00:00.033","2019/07/02 00:00:00","2019-07-02T00:00:00Z","841","68.104.166.4","25846","hY","142.2.153.83","51321","ip","O0I","4s38T52FF","TUy","uuYOm","MSBa7NoD","8","67RgzNBbmggPJsN5p5J7YxQou6","912","198","336","769","278","554","rand-pa1"
"2019/07/02 00:00:00.043","2019/07/02 00:00:00","2019-07-02T00:00:00Z","478","24.65.11.145","41214","0p","40.193.169.129","23907","Pz","Vh8","D7i2u4FKG","mUX","7Eupl","ZjBDfZbs","5","NheJki5vi0XlwrOVS8MFE9vgQ2","953","917","636","718","142","607","rand-pa1"

$ ./a.out random_data.txt 
1:80.7.36.18,87.202.209.244
2:84.197.61.38,28.31.35.245
3:149.38.245.191,71.153.59.69
4:244.201.242.36,3.140.138.200
5:69.208.33.205,206.26.44.221
6:20.174.210.174,191.70.14.196
7:158.40.105.63,35.52.152.35
8:93.16.76.199,193.99.45.197
9:70.146.59.78,77.21.128.75
10:250.210.21.183,47.166.17.227

少しメモ:

STLのハッシュマップを並列処理に使う場合 -> この場合、STLのハッシュマップをmutex(ロック)でラップして、スレッドセーフに実装するのが吉だが、こうすると、コンテナー(ハッシュマップ)内部の並列化が行われないため、マルチコアが活きない模様。

一方で、Intel TBBのハッシュマップは「重い」ので、並列化による効果が順次パフォーマンスより大きい場合に、使うとよい。

詳しくは、↓を参照
https://www.oreilly.co.jp/books/9784873113555/
https://www.amazon.com/Intel-Threading-Building-Blocks-Parallelism/dp/0596514808

「したがって、追加した並列化がより遅い順次パフォーマンスよりも重要である場合に、高度なコンカレント・コンテナーを利用してください」
とある。(86ページ)

(`ー´)b

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0