C++でCSVを１行ずつ読み取るクラス

Last updated at 2019-12-11Posted at 2019-12-10

UEC（電気通信大学）アドベントカレンダーの10日目の記事です。

概要

2年ほど前、C++でCSVファイルを扱うためのクラスを自作し、Qiitaに掲載したのですが、現在も稀にストックされることがあるので、現在私が使っている、もう少し実用性を高めたバージョンも記します。

ライブラリを追加するのとかがめんどくさい時、簡易的な読み取りであれば、これで十分かと思います。
１行ずつ読み取るので、巨大なCSVファイルでも読み取ることができます。

コード

loadcsv.cpp

#include <string>
#include <vector>

using namespace std;

typedef vector<string> CellsTy;

CellsTy cells_split(char* line, char comma) {

	size_t len = strlen(line);
	line[len - 1] = comma;
	line[len] = '\0';
	int last = 0;

	CellsTy cells;
	string l;

	for (int i = 0; i < len; i++) {
		if (line[i] == '\r') continue;
		if (line[i] == comma) {
			cells.push_back(l);
			last = i;
			l.clear();
			continue;
		}
		l.push_back(line[i]);
	}
	return cells;
}

使い方

必要な標準ライブラリを読み込んだ後に、上記コードをペーストし、main関数のように1行ずつ読み取ります。
下の使用例は、data.csvというCSVを１行ずつ読み取り、各セルを出力するプログラムです。

main.cpp

#include <string>
#include <vector>
#include <iostream>
#include <cstdio>

using namespace std;

typedef vector<string> CellsTy;

CellsTy cells_split(char* line, char comma) {

	size_t len = strlen(line);
	line[len - 1] = comma;
	line[len] = '\0';
	int last = 0;

	CellsTy cells;
	string l;

	for (int i = 0; i < len; i++) {
		if (line[i] == '\r') continue;
		if (line[i] == comma) {
			cells.push_back(l);
			last = i;
			l.clear();
			continue;
		}
		l.push_back(line[i]);
	}
	return cells;
}

const int MaxLine = 1024; // 読み取るファイルの１行あたりの最大文字数

int main() {
	FILE* fp = fopen("data.csv", "r");
	if (fp == NULL) return 1;
	while (!feof(fp)) {
		char line[MaxLine];
		fgets(line, MaxLine, fp);
		CellsTy cells = cells_split(line, ',');
		if (cells.size() == 0) continue;

		// セルの出力処理
		for (int i = 0; i < cells.size(); i++) {
			cout << cells[i] << "   ";
		}
		cout << endl;
	}
}

少し汎用性を持たせる

使用する型をtypedefで別途定義することで、汎用性を持たせました。wstringを使用する場合は、typedef~の行を変更してください。

loadcsv.cpp

#include <string>
#include <vector>
#include <cstdio>

using namespace std;

typedef string cells_string;
typedef char cells_char;
typedef vector<cells_string> CellsTy;

CellsTy cells_split(cells_char* line, cells_char comma) {

	size_t len = strlen(line);
	line[len - 1] = comma;
	line[len] = '\0';
	int last = 0;

	CellsTy cells;
	cells_string l;

	for (int i = 0; i < len; i++) {
		if (line[i] == '\r') continue;
		if (line[i] == comma) {
			cells.push_back(l);
			last = i;
			l.clear();
			continue;
		}
		l.push_back(line[i]);
	}
	return cells;
}

template関数にする

さらに、引数の型をtemplate化したバージョンです。
char型にもwchar_t型にも対応することができます。

loadcsv.cpp

#include <string>
#include <vector>

using namespace std;

template <typename cell_char> vector<basic_string<cell_char>> cells_split(cell_char* line, cell_char comma) {

	size_t len = strlen(line);
	line[len - 1] = comma;
	line[len] = '\0';
	int last = 0;

	CellsTy cells;
	basic_string<cell_char> l;

	for (int i = 0; i < len; i++) {
		if (line[i] == '\r') continue;
		if (line[i] == comma) {
			cells.push_back(l);
			last = i;
			l.clear();
			continue;
		}
		l.push_back(line[i]);
	}
	return cells;
}

こうした方がよいという改善案や、挙動がおかしな点がありましたらコメントお願い致します。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up