次で公表されている PDF を JSON に変換します。
診療・検査医療機関の公表について
小山地区のデータを取得します。
wget https://www.pref.tochigi.lg.jp/e04/documents/20220112225313.pdf
PDF の表示
evince 20220112225313.pdf
変換
pdftotext -layout 20220112225313.pdf
#
./medical_to_json.py 20220112225313.txt oyama_medical.json > tmp01
#
medical_to_json.py
# ! /usr/bin/python
#
#	medical_to_json.py
#
#						Jan/24/2022
# ------------------------------------------------------------------
import sys
import json
# ------------------------------------------------------------------
def true_false_convert(ss_in):
	rvalue = False
	if ss_in == '○':
		rvalue = True
#
	return rvalue
# ------------------------------------------------------------------
def convert_to_dict(ss):
	unit_aa = {}
	unit_aa["name"] = ss[0]
	if 1 < len(ss):
		unit_aa["postal"] = ss[1]
		unit_aa["address"] = ss[2]
		unit_aa["phone"] = ss[3]
		unit_aa["consult"] = true_false_convert(ss[4])
		unit_aa["inspect"] = true_false_convert(ss[5])
		unit_aa["only"] = true_false_convert(ss[6])
		if 7 < len(ss):
			unit_aa["others"] = true_false_convert(ss[7])
		if 8 < len(ss):
			unit_aa["comment"] = ss[8]
#
	return unit_aa
#
# ------------------------------------------------------------------
def omit_check_proc(line):
	rvalue = True
	array_omit = ["診療・検査医療機関","実施内容","電話番号","医療機関名",
			"(代表)","のみ可","検査","ページ",":"]
	for word in array_omit:
		if word in line:
			rvalue = False
#
	return rvalue
# ------------------------------------------------------------------
def line_proc(line,list_aa):
	chx = line[0]
#	if chx != " " and chx != "【" and chx != "0":
	if chx != "【" and chx != "0":
		if (omit_check_proc(line)):
			ss = line.split()
			print(ss)
			unit_aa = convert_to_dict(ss)
			list_aa.append(unit_aa)
#			print(line)
#
# ------------------------------------------------------------------
sys.stderr.write ("*** 開始 ***\n")
file_in = sys.argv[1]
file_json = sys.argv[2]
sys.stderr.write(file_in + "\n")
sys.stderr.write(file_json + "\n")
#
fp_in = open(file_in,encoding='utf-8')
lines = fp_in.readlines()
fp_in.close()
#
list_aa = [] 
for it in range(len(lines)):
	line = lines[it]
	if 10 < len(line):
		line_proc(line,list_aa)
#
json_str = json.dumps(list_aa)
#
fp_out = open(file_json,mode='w',encoding='utf-8')
fp_out.write(json_str)
fp_out.close()
sys.stderr.write ("*** 終了 ***\n")
# ------------------------------------------------------------------
