モチベーション
csv を1行ずつ読んで struct にパースして yield みたいなのがやりたかった。
csvutil の中では1行ずつ読み出してるので巨大な csv でもいけるはず。
サンプルデータは titanic で。
https://github.com/datasciencedojo/datasets/blob/master/titanic.csv
コード
package main
import (
"encoding/csv"
"flag"
"fmt"
"io"
"os"
"github.com/jszwec/csvutil"
)
type Titanic struct {
C1 int32 `csv:"PassengerId"`
C2 int32 `csv:"Survived"`
C3 int32 `csv:"Pclass"`
C4 string `csv:"Name"`
C5 string `csv:"Sex"`
C6 string `csv:"Age"`
C7 string `csv:"SibSp"`
C8 string `csv:"Parch"`
C9 string `csv:"Ticket"`
C10 float32 `csv:"Fare"`
C11 string `csv:"Cabin"`
C12 string `csv:"Embarked"`
}
func eachLineCsv[T any](filename string, ch chan T) error {
defer close(ch)
var r io.Reader
switch filename {
case "", "-":
r = os.Stdin
default:
// fmt.Println("OPEN FILE: " + filename)
f, err := os.Open(filename)
if err != nil {
return err
}
defer func() {
// fmt.Println("CLOSE FILE: " + filename)
f.Close()
}()
r = f
}
csvReader := csv.NewReader(r)
csvReader.Comma = ','
dec, err := csvutil.NewDecoder(csvReader)
if err != nil {
return err
}
for {
var u T
err := dec.Decode(&u)
if err == io.EOF {
break
} else if err != nil {
return err
}
ch <- u
}
// fmt.Println("END OF DATA")
return nil
}
func main() {
inputfile := flag.String("input", "", "入力ファイル(tsv)")
flag.Parse()
ch := make(chan Titanic)
go eachLineCsv(*inputfile, ch)
for v := range ch {
fmt.Printf("%+v\n", v)
}
}
確認
$ go run main.go <train.csv | wc -l
891
$ go run main.go <train.csv | tail
{C1:882 C2:0 C3:3 C4:Markun, Mr. Johann C5:male C6:33 C7:0 C8:0 C9:349257 C10:7.8958 C11: C12:S}
{C1:883 C2:0 C3:3 C4:Dahlberg, Miss. Gerda Ulrika C5:female C6:22 C7:0 C8:0 C9:7552 C10:10.5167 C11: C12:S}
{C1:884 C2:0 C3:2 C4:Banfield, Mr. Frederick James C5:male C6:28 C7:0 C8:0 C9:C.A./SOTON 34068 C10:10.5 C11: C12:S}
{C1:885 C2:0 C3:3 C4:Sutehall, Mr. Henry Jr C5:male C6:25 C7:0 C8:0 C9:SOTON/OQ 392076 C10:7.05 C11: C12:S}
{C1:886 C2:0 C3:3 C4:Rice, Mrs. William (Margaret Norton) C5:female C6:39 C7:0 C8:5 C9:382652 C10:29.125 C11: C12:Q}
{C1:887 C2:0 C3:2 C4:Montvila, Rev. Juozas C5:male C6:27 C7:0 C8:0 C9:211536 C10:13 C11: C12:S}
{C1:888 C2:1 C3:1 C4:Graham, Miss. Margaret Edith C5:female C6:19 C7:0 C8:0 C9:112053 C10:30 C11:B42 C12:S}
{C1:889 C2:0 C3:3 C4:Johnston, Miss. Catherine Helen "Carrie" C5:female C6: C7:1 C8:2 C9:W./C. 6607 C10:23.45 C11: C12:S}
{C1:890 C2:1 C3:1 C4:Behr, Mr. Karl Howell C5:male C6:26 C7:0 C8:0 C9:111369 C10:30 C11:C148 C12:C}
{C1:891 C2:0 C3:3 C4:Dooley, Mr. Patrick C5:male C6:32 C7:0 C8:0 C9:370376 C10:7.75 C11: C12:Q}
cf.