概要

簡単な CSV インポーターを作成したいと思います。
以下の CSV からデータを読み込んで、Student レコードとして保存します。
(今回の例ではレコードの新規作成しか考慮しません。)

hidamari.csv

201,ゆの,ゆの,山梨県,阿澄佳奈
202,宮子,みやこ,福岡県,水橋かおり
101,ヒロ,ひろ,山形県,後藤邑子
102,沙英,さえ,,新谷良子
103,乃莉,のり,関西,原田ひとみ
203,なずな,なずな,,小見川千明
101,茉里,まつり,,

hidamari_importer.rb

require 'csv'

class HidamariImporter
  def initialize(path)
    @path = path
  end

  def import
    Student.transaction do
      CSV.foreach(@path, 'r:SJIS') do |row|
        attributes = { room: row[0], name: row[1], birthplace: row[3] }

        Student.create!(attributes)
      end
    end
  end
end

importer = HidamariImporter.new('hidamari.csv')
importer.import

これだけでも CSV データのインポート機能を実装できます。
しかし、ソースコードの中で配列のインデックス値を直接指定しているのでメンテナンスがしづらいと思います。

そこで便利なのが Struct です。

hidamari_importer.rb

require 'csv'

class HidamariImporter
  # インポート時に使用しない列は先頭に _ を付けています。
  Row = Struct.new(:room, :name, :_furigana, :birthplace, :_cv)

  def initialize(path)
    @path = path
  end

  def import
    Student.transaction do
      CSV.foreach(@path, 'r:SJIS') do |row_data|
        row = Row.new(*row_data)
        # Struct#to_h でハッシュに変換できる。
        attributes = row.to_h.slice(:room, :name, :birthplace)

        Student.create!(attributes)
      end
    end
  end
end

importer = HidamariImporter.new('hidamari.csv')
importer.import

こうすると Struct.new の引数を確認すれば CSV のフォーマットが分かるので、
メンテナンスが容易になると思います。

おまけ

Struct を使わない場合は、例えば Array#zip を使っても同じことが実現できます。

hidamari_importer.rb

require 'csv'

class HidamariImporter
  COLUMNS = [:room, :name, :_furigana, :birthplace, :_cv].freeze

  def initialize(path)
    @path = path
  end

  def import
    Student.transaction do
      CSV.foreach(@path, 'r:SJIS') do |row|
        attributes = COLUMNS.zip(row).to_h

        Student.create!(attributes)
      end
    end
  end
end

importer = HidamariImporter.new('hidamari.csv')
importer.import

では Struct を使う方法と Array#zip を使う方法はどちらがパフォーマンスがよいのでしょうか。
ベンチマークを取ってみました。

COLUMNS = [:room, :name, :_furigana, :birthplace, :_cv].freeze
N = 1_000_000.freeze
Row = Struct.new(*COLUMNS)

row = [201, 'ゆの', 'ゆの', '山梨県', '阿澄佳奈']

Benchmark.bm(20) do |x|
  x.report('Array#zip  -> Hash :') { N.times { COLUMNS.zip(row).to_h } }
  x.report('Struct.new -> Hash :') { N.times { Row.new(*row).to_h } }
end

                           user     system      total        real
Array#zip  -> Hash :   1.860000   0.110000   1.970000 (  1.967263)
Struct.new -> Hash :   1.380000   0.050000   1.430000 (  1.437352)

Struct を使うほうが早い！！！

Struct よりも Array の方が Hash に変換するコストが大きいのかなと思い、さらに計測してみました。

array  = COLUMNS.zip(row)
#=> [[:room, 201], [:name, "ゆの"], [:_furigana, "ゆの"], [:birthplace, "山梨県"], [:_cv, "阿澄佳奈"]]
struct = Row.new(*row)
#=> #<struct Row room=201, name="ゆの", _furigana="ゆの", birthplace="山梨県", _cv="阿澄佳奈">

Benchmark.bm(13) do |x|
  x.report('Array#to_h  :') { N.times { array.to_h } }
  x.report('Struct#to_h :') { N.times { struct.to_h } }
end

                    user     system      total        real
Array#to_h  :   0.860000   0.030000   0.890000 (  0.892108)
Struct#to_h :   1.040000   0.020000   1.060000 (  1.065418)

おや、Struct の方が遅い…。
ということは Array#zip のコストが大きいということでしょうか。

Benchmark.bm(12) do |x|
  x.report('Array#zip  :') { N.times { COLUMNS.zip(row) } }
  x.report('Struct.new :') { N.times { Row.new(*row) } }
end

                   user     system      total        real
Array#zip  :   0.910000   0.010000   0.920000 (  0.923178)
Struct.new :   0.390000   0.000000   0.390000 (  0.393138)

やはり Struct のオブジェクトを生成するほうが Array#zip よりかなり速いんですね。

Array#zip	Array#to_h	Array#zip → #to_h
0.923178	0.892108	1.967263
Struct.new	Struct#to_h	Struct.new → #to_h
0.393138	1.065418	1.437352

Rails での CSV インポートの際に Struct がプチ便利

概要

おまけ