※雑に jq の備忘録にしちゃうことにした
csv を JSON Lines に変換する
こんな感じでできるけど雑だからダブルクォーテーションの考慮とかカラム内の改行とかガン無視してる
$ cat example.csv
c1,c2,c3,c4,c5
1,2,3,4,5
6,7,8,9,0
a,b,c,d,e
f,,g,h,
$ cat example.csv | tr -d '\r' | jq -Rsc 'split("\n") | [.[] | split(",") | select((.|length)!=0)] | (length - 1) as $len | transpose as $items | reduce range($len) as $idx ([]; . + [reduce $items[] as $item ({}; .[$item[0]] = $item[$idx+1])]) | .[]'
{"c1":"1","c2":"2","c3":"3","c4":"4","c5":"5"}
{"c1":"6","c2":"7","c3":"8","c4":"9","c5":"0"}
{"c1":"a","c2":"b","c3":"c","c4":"d","c5":"e"}
$ # 最低限,数字は integer にしておきたい場合
$ cat example.csv | tr -d '\r' | jq -Rsc 'split("\n") | [.[] | split(",") | select((.|length)!=0)] | (length - 1) as $len | transpose as $items | reduce range($len) as $idx ([]; . + [reduce $items[] as $item ({}; .[$item[0]] = ($item[$idx+1] as $v | try ($v | tonumber) catch $v))]) | .[]'
{"c1":1,"c2":2,"c3":3,"c4":4,"c5":5}
{"c1":6,"c2":7,"c3":8,"c4":9,"c5":0}
{"c1":"a","c2":"b","c3":"c","c4":"d","c5":"e"}
$ # "" なら null にもしてみる
cat example.csv | tr -d '\r' | jq -Rsc 'split("\n") | [.[] | split(",") | select((.|length)!=0)] | (length - 1) as $len | transpose as $items | reduce range($len) as $idx ([]; . + [reduce $items[] as $item ({}; .[$item[0]] = ($item[$idx+1] as $v | if $v == "" then null else try ($v | tonumber) catch $v end))]) | .[]'
{"c1":1,"c2":2,"c3":3,"c4":4,"c5":5}
{"c1":6,"c2":7,"c3":8,"c4":9,"c5":0}
{"c1":"a","c2":"b","c3":"c","c4":"d","c5":"e"}
{"c1":"f","c2":null,"c3":"h","c4":"i","c5":null}
- 先頭にカラム名を示す行がある前提
-
tr -d '\r'
は念のため -
split("\n")
で行毎の配列にして更にsplit(",")
でカンマ区切りのカラムを配列にしてる -
select((.|length)!=0)
は文末の改行で空の行存在する場合に困るので一応削除してる -
(length - 1) as $len
でデータ数を保持する -
transpose as $items
で各配列の N 番目の要素でグループ化した配列を生成 -
reduce $items[] as $item ({}; .[$item[0]] = $item[$idx+1])
でグループ化された各カラムのデータの中から,0番目のデータをキー,$idx番目のデータを値としてJSON化 - これを保存しておいた $len (データ数) だけ繰り返すと出来上がり
クエリ文字列との相互変換
$ cat example.json
{
"a": "str",
"b": 1,
"c": null,
"d": "http://localhost:8000",
"e": true
}
$ cat example.json | jq -r 'to_entries | map((.key | @uri) + "=" + (.value | @uri)) | join("&")'
a=str&b=1&c=null&d=http%3A%2F%2Flocalhost%3A8000&e=true
$ echo 'a=str&b=1&c=null&d=http%3A%2F%2Flocalhost%3A8000&e=true' | jq -R 'split("&") | map(split("=") | {key: .[0], value: .[1]})| from_entries'
{
"a": "str",
"b": "1",
"c": "null",
"d": "http%3A%2F%2Flocalhost%3A8000",
"e": "true"
}
- URLからJSONにするときは文字列になってしまう
- jq には何故か URL decode がないのでそれもできない (めんどいからしてない)