LoginSignup
0
0

csv を1行ずつ struct に読み込むやつ

Posted at

モチベーション

csv を1行ずつ読んで struct にパースして yield みたいなのがやりたかった。
csvutil の中では1行ずつ読み出してるので巨大な csv でもいけるはず。

サンプルデータは titanic で。
https://github.com/datasciencedojo/datasets/blob/master/titanic.csv

コード

package main

import (
	"encoding/csv"
	"flag"
	"fmt"
	"io"
	"os"

	"github.com/jszwec/csvutil"
)

type Titanic struct {
	C1  int32   `csv:"PassengerId"`
	C2  int32   `csv:"Survived"`
	C3  int32   `csv:"Pclass"`
	C4  string  `csv:"Name"`
	C5  string  `csv:"Sex"`
	C6  string  `csv:"Age"`
	C7  string  `csv:"SibSp"`
	C8  string  `csv:"Parch"`
	C9  string  `csv:"Ticket"`
	C10 float32 `csv:"Fare"`
	C11 string  `csv:"Cabin"`
	C12 string  `csv:"Embarked"`
}

func eachLineCsv[T any](filename string, ch chan T) error {
	defer close(ch)

	var r io.Reader
	switch filename {
	case "", "-":
		r = os.Stdin
	default:
		// fmt.Println("OPEN FILE: " + filename)
		f, err := os.Open(filename)
		if err != nil {
			return err
		}
		defer func() {
			// fmt.Println("CLOSE FILE: " + filename)
			f.Close()
		}()
		r = f
	}

	csvReader := csv.NewReader(r)
	csvReader.Comma = ','

	dec, err := csvutil.NewDecoder(csvReader)
	if err != nil {
		return err
	}

	for {
		var u T
		err := dec.Decode(&u)
		if err == io.EOF {
			break
		} else if err != nil {
			return err
		}
		ch <- u
	}

	// fmt.Println("END OF DATA")
	return nil
}

func main() {
	inputfile := flag.String("input", "", "入力ファイル(tsv)")
	flag.Parse()

	ch := make(chan Titanic)
	go eachLineCsv(*inputfile, ch)
	for v := range ch {
		fmt.Printf("%+v\n", v)
	}
}

確認

$ go run main.go <train.csv | wc -l
891

$ go run main.go <train.csv | tail
{C1:882 C2:0 C3:3 C4:Markun, Mr. Johann C5:male C6:33 C7:0 C8:0 C9:349257 C10:7.8958 C11: C12:S}
{C1:883 C2:0 C3:3 C4:Dahlberg, Miss. Gerda Ulrika C5:female C6:22 C7:0 C8:0 C9:7552 C10:10.5167 C11: C12:S}
{C1:884 C2:0 C3:2 C4:Banfield, Mr. Frederick James C5:male C6:28 C7:0 C8:0 C9:C.A./SOTON 34068 C10:10.5 C11: C12:S}
{C1:885 C2:0 C3:3 C4:Sutehall, Mr. Henry Jr C5:male C6:25 C7:0 C8:0 C9:SOTON/OQ 392076 C10:7.05 C11: C12:S}
{C1:886 C2:0 C3:3 C4:Rice, Mrs. William (Margaret Norton) C5:female C6:39 C7:0 C8:5 C9:382652 C10:29.125 C11: C12:Q}
{C1:887 C2:0 C3:2 C4:Montvila, Rev. Juozas C5:male C6:27 C7:0 C8:0 C9:211536 C10:13 C11: C12:S}
{C1:888 C2:1 C3:1 C4:Graham, Miss. Margaret Edith C5:female C6:19 C7:0 C8:0 C9:112053 C10:30 C11:B42 C12:S}
{C1:889 C2:0 C3:3 C4:Johnston, Miss. Catherine Helen "Carrie" C5:female C6: C7:1 C8:2 C9:W./C. 6607 C10:23.45 C11: C12:S}
{C1:890 C2:1 C3:1 C4:Behr, Mr. Karl Howell C5:male C6:26 C7:0 C8:0 C9:111369 C10:30 C11:C148 C12:C}
{C1:891 C2:0 C3:3 C4:Dooley, Mr. Patrick C5:male C6:32 C7:0 C8:0 C9:370376 C10:7.75 C11: C12:Q}

cf.

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0