c++にはstring内にsplitメソッドが定義されていない。そのため自分で実装しなければならない。
##実装
###stringstreamを用いる
split.cpp
#include <vector>
#include <string>
#include <sstream>
using namespace std;
vector<string> split(const string &s, char delim) {
vector<string> elems;
stringstream ss(s);
string item;
while (getline(ss, item, delim)) {
if (!item.empty()) {
elems.push_back(item);
}
}
return elems;
}
###愚直に実装
split_naive.cpp
#include <vector>
#include <string>
using namespace std;
vector<string> split_naive(const string &s, char delim) {
vector<string> elems;
string item;
for (char ch: s) {
if (ch == delim) {
if (!item.empty())
elems.push_back(item);
item.clear();
}
else {
item += ch;
}
}
if (!item.empty())
elems.push_back(item);
return elems;
}
##ベンチマーク
###bench1: 入力文字は短く、関数の実行回数が多い
bench1.cpp
void bench1() {
string input = "This is the part when I say I don't want ya";
char delim = ' ';
cout << "split: " << measeure(split, input, delim, 100000) / (CLOCKS_PER_SEC / 1000) << "ms" << endl;
cout << "naive: " << measeure(split_naive, input, delim, 100000) / (CLOCKS_PER_SEC / 1000) << "ms" << endl;
}
###bench2: 入力文字は長く、関数の実行回数は少ない。
bench2.cpp
void bench2() {
string random_input = "";
int len = 1000000;
mt19937 engine;
uniform_int_distribution<int> distribution(0, 25);
char delim = distribution(engine) + 'a';
for (int p=0; p<len; ++p) {
int ch = distribution(engine);
random_input += ch + 'a';
}
cout << "split: " << measeure(split, random_input, delim, 10) / (CLOCKS_PER_SEC / 1000) << "ms" << endl;
cout << "naive: " << measeure(split_naive, random_input, delim, 10) / (CLOCKS_PER_SEC / 1000) << "ms" << endl;
}
結果:
bench1[ms] | bench2[ms] | |
---|---|---|
split | 604 | 477 |
naive | 384 | 188 |
##Reference
[1]http://stackoverflow.com/questions/236129/how-to-split-a-string-in-c