こんにちは。
MySQL で LOAD DATA INFILE
を使い、シェルスクリプト(+ヒアドキュメント)を利用して CSV データを読み込んでみました。これまでもすでに多く解説されているようです。
名前付きパイプ利用
第一例目は名前付きパイプ利用です(LOAD DATA INFILE '$FIFO'
)。
$ ./load_data.sh sampledata_CRLF.csv.gz
count: 4
1 Tokyo POINT(139.7541618 35.6822589)
2 Osaka POINT(135.5001783 34.6914242)
3 Kyoto POINT(135.7620049 35.0166064)
4 Nagoya POINT(136.9019222 35.1830687)
$ gzcat sampledata_CRLF.csv.gz | nkf --guess | ruby -ne 'puts $_.match(/\((\w+)\)/)[1]' | gsed -r 's/LF/\\n/;s/CR/\\r/' | echo \"`cat`\"
"\r\n"
$ cat load_data.sh
# !/bin/sh
USER="root"
PASSWORD=""
DATABASE=""
DAT_INFILE_GZ=$1
TABLENAME="temp_table"
FIELDSEPARATOR=","
LINEBREAK="\\r\\n"
FIFO="/tmp/dat.named_pipe"
[ -z `pidof mysqld` ] && echo "Please start mysqld: mysql.server start"; exit 1
[ ! -e ${FIFO} ] && mkfifo ${FIFO}
gzcat < ${DAT_INFILE_GZ} > ${FIFO} &
mysql -u${USER} -p${PASSWORD} -D ${DATABASE} -N -B 2>/dev/null << EOS
CREATE TEMPORARY TABLE $TABLENAME (
no integer,
city text,
location geometry not null,
SPATIAL INDEX (location)
) ENGINE=MyISAM;
SET @i=0;
LOAD DATA LOCAL INFILE '$FIFO'
INTO TABLE $TABLENAME
FIELDS TERMINATED BY '$FIELDSEPARATOR'
LINES TERMINATED BY '$LINEBREAK'
# IGNORE 1 LINES
# ENCLOSED BY '"'
(@a,@b,@lon,@lat)
SET no=(@i:=@i+1), city=@a, location=st_geomfromtext(concat('point(',@lon,' ',@lat,')'));
SELECT 'count:', count(location) from $TABLENAME;
SELECT no, city, st_astext(location) from $TABLENAME;
# SHOW CREATE TABLE $TABLENAME;
EOS
exit $?
jq コマンドを使って標準入力へ流し込み
第二例目は、jq コマンドを使って JSON データから切り出した CSV データを標準入力へ流し込んでいます(LOAD DATA INFILE '/dev/stdin'
)。
$ ./load_data_2.sh
count: 2
1 POINT(139.7541618 35.6822589)
2 POINT(135.5001783 34.6914242)
$ cat load_data_2.sh
# !/bin/sh
USER="root"
PASSWORD=""
DATABASE=""
TABLENAME="temp_table"
FIELDSEPARATOR=","
[ -z `pidof mysqld` ] && echo "Please start mysqld: mysql.server start"; exit 1
echo "`jq -r '.geometry.coordinates|@csv' << EOS
{"geometry": {"type":"Point","coordinates":[139.7541618,35.6822589]}}
{"geometry": {"type":"Point","coordinates":[135.5001783,34.6914242]}}
EOS
`" | mysql -u${USER} -p${PASSWORD} -D ${DATABASE} -N -B -e "
CREATE TEMPORARY TABLE $TABLENAME (
no integer,
location geometry not null,
SPATIAL INDEX (location)
) ENGINE=MyISAM;
SET @i=0;
LOAD DATA LOCAL INFILE '/dev/stdin'
INTO TABLE $TABLENAME
FIELDS TERMINATED BY '$FIELDSEPARATOR'
(@lon,@lat)
SET no=(@i:=@i+1), location=st_geomfromtext(concat('point(',@lon,' ',@lat,')'));
SELECT 'count:', count(location) from $TABLENAME;
SELECT no, st_astext(location) from $TABLENAME;
" 2>/dev/null
exit $?
直接読み込み
第三例目は、JSON データを直接読み込む形にしました。
$ ./load_data_3.sh
count: 2
1 POINT(139.7541618 35.6822589)
2 POINT(135.5001783 34.6914242)
$ cat load_data_3.sh
# !/bin/sh
USER="root"
PASSWORD=""
DATABASE=""
TABLENAME="temp_table"
FIELDSEPARATOR="\\t"
[ -z `pidof mysqld` ] && echo "Please start mysqld: mysql.server start"; exit 1
echo "`cat << EOS
{"geometry": {"type":"Point","coordinates":[139.7541618,35.6822589]}}
{"geometry": {"type":"Point","coordinates":[135.5001783,34.6914242]}}
EOS
`" | mysql -u${USER} -p${PASSWORD} -D ${DATABASE} -N -B -e "
CREATE TEMPORARY TABLE $TABLENAME (
no integer,
location geometry not null,
SPATIAL INDEX (location)
) ENGINE=MyISAM;
SET @i=0;
LOAD DATA LOCAL INFILE '/dev/stdin'
INTO TABLE $TABLENAME
FIELDS TERMINATED BY '$FIELDSEPARATOR'
(@json)
SET no=(@i:=@i+1), location=ST_GeomFromGeoJson(json_extract(@json,'$.geometry'));
SELECT 'count:', count(location) from $TABLENAME;
SELECT no, st_astext(location) from $TABLENAME;
" 2>/dev/null
exit $?