次で公表されている PDF を JSON に変換します。
診療・検査医療機関の公表について
小山地区のデータを取得します。
wget https://www.pref.tochigi.lg.jp/e04/documents/20220112225313.pdf
PDF の表示
evince 20220112225313.pdf
変換
pdftotext -layout 20220112225313.pdf
#
./medical_to_json.py 20220112225313.txt oyama_medical.json > tmp01
#
medical_to_json.py
#! /usr/bin/python
#
# medical_to_json.py
#
# Jan/24/2022
# ------------------------------------------------------------------
import sys
import json
# ------------------------------------------------------------------
def true_false_convert(ss_in):
rvalue = False
if ss_in == '○':
rvalue = True
#
return rvalue
# ------------------------------------------------------------------
def convert_to_dict(ss):
unit_aa = {}
unit_aa["name"] = ss[0]
if 1 < len(ss):
unit_aa["postal"] = ss[1]
unit_aa["address"] = ss[2]
unit_aa["phone"] = ss[3]
unit_aa["consult"] = true_false_convert(ss[4])
unit_aa["inspect"] = true_false_convert(ss[5])
unit_aa["only"] = true_false_convert(ss[6])
if 7 < len(ss):
unit_aa["others"] = true_false_convert(ss[7])
if 8 < len(ss):
unit_aa["comment"] = ss[8]
#
return unit_aa
#
# ------------------------------------------------------------------
def omit_check_proc(line):
rvalue = True
array_omit = ["診療・検査医療機関","実施内容","電話番号","医療機関名",
"(代表)","のみ可","検査","ページ",":"]
for word in array_omit:
if word in line:
rvalue = False
#
return rvalue
# ------------------------------------------------------------------
def line_proc(line,list_aa):
chx = line[0]
# if chx != " " and chx != "【" and chx != "0":
if chx != "【" and chx != "0":
if (omit_check_proc(line)):
ss = line.split()
print(ss)
unit_aa = convert_to_dict(ss)
list_aa.append(unit_aa)
# print(line)
#
# ------------------------------------------------------------------
sys.stderr.write ("*** 開始 ***\n")
file_in = sys.argv[1]
file_json = sys.argv[2]
sys.stderr.write(file_in + "\n")
sys.stderr.write(file_json + "\n")
#
fp_in = open(file_in,encoding='utf-8')
lines = fp_in.readlines()
fp_in.close()
#
list_aa = []
for it in range(len(lines)):
line = lines[it]
if 10 < len(line):
line_proc(line,list_aa)
#
json_str = json.dumps(list_aa)
#
fp_out = open(file_json,mode='w',encoding='utf-8')
fp_out.write(json_str)
fp_out.close()
sys.stderr.write ("*** 終了 ***\n")
# ------------------------------------------------------------------