次のページのデータソースを PDF から xlsx に変更しました。
栃木県の新型コロナウイルス感染症患者の発生状況の表を作成
データのソースは栃木県です。
xlsx を JSON へ変換
./xlsx_tochigi.py 020426hasseijyoukyouitiran.xlsx data_tochigi.json
xlsx_tochigi.py
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# xlsx_tochigi.py
#
# Apr/27/2020
#
# -------------------------------------------------------------------
import sys
import json
from openpyxl import Workbook
from openpyxl import load_workbook
from dateutil.parser import parse
from datetime import timedelta
#
# -------------------------------------------------------------------
def convert_to_date_string_proc(tt):
pp = parse("1900-1-1") + timedelta(days = tt - 2)
qq = pp.strftime('%Y-%m-%d')
return qq
# -------------------------------------------------------------------
def row_process(row,dict_aa):
unit_aa = {}
key = "t%03d" % row[0].value
unit_aa['age'] = row[1].value
unit_aa['sex'] = row[2].value
unit_aa['place'] = row[3].value
unit_aa['date'] = convert_to_date_string_proc(row[4].value)
dict_aa[key] = unit_aa
#
# -------------------------------------------------------------------
def xlsx_read_proc (xlsx_file):
dict_aa = {}
wb = load_workbook (filename = xlsx_file)
ws = wb.active
print (ws.max_row)
max_row = ws.max_row
for row in ws.rows:
vv0 = row[0].value
if str(vv0).isdigit():
# sys.stderr.write("digit: %d\n" % vv0)
row_process(row,dict_aa)
elif vv0 != None:
sys.stderr.write("string: " + vv0 + "\n")
else:
sys.stderr.write("None\n")
key = row[0].value
#
wb.close()
#
return dict_aa
# -------------------------------------------------------------------
sys.stderr.write("*** 開始 ***\n")
#
xlsx_file = sys.argv[1]
file_json = sys.argv[2]
#
sys.stderr.write(xlsx_file + "\n")
sys.stderr.write(file_json + "\n")
#
dict_aa = xlsx_read_proc(xlsx_file)
#
dict_bb = {}
for key in sorted(dict_aa.keys(), reverse=True):
dict_bb[key] = dict_aa[key]
#
json_str = json.dumps(dict_bb)
#
fp_out = open(file_json,mode='w',encoding='utf-8')
fp_out.write(json_str)
fp_out.close()
#
sys.stderr.write("*** 終了 ***\n")
# -------------------------------------------------------------------
ホームページの表示はこちらと同じです。