Web で閲覧できる次の表を作成します。
データのソースは厚労省です。
PDF の処理に必要なコマンドのインストール (Ubuntu 19.10)
sudo apt install pdftk-java
PDF を JSON へ変換
```bash
pdftk 000606477.pdf cat 4 output out01.pdf
pdftotext -layout out01.pdf
./capacity_parse.py out01.txt data_capacity_mar07.json
#
pdftotext -layout 000614570.pdf
./capacity_parse.py 000614570.txt data_capacity_mar24.json
capacity_parse.py
#! /usr/bin/python
# -*- coding: utf-8 -*-
#
# capacity_parse.py
#
# Mar/29/2020
#
import sys
import json
#
# ------------------------------------------------------------------
def add_data_proc(dict_aa,key_in,name,num):
key = "p%02d" % int(key_in)
unit_aa = {}
unit_aa['name'] = name
unit_aa['num'] = num
dict_aa[key] = unit_aa
#
# ------------------------------------------------------------------
sys.stderr.write ("*** 開始 ***\n")
file_in = sys.argv[1]
file_json = sys.argv[2]
sys.stderr.write(file_in + "\n")
sys.stderr.write(file_json + "\n")
#
fp_in = open(file_in,encoding='utf-8')
lines = fp_in.readlines()
fp_in.close()
#
dict_aa = {}
for line in lines:
if (5 < len(line)):
cols= line[:-1].split ()
llx = len(cols)
if (llx == 6):
# print(len(cols),line[:-1])
add_data_proc(dict_aa,cols[0],cols[1],cols[2])
add_data_proc(dict_aa,cols[3],cols[4],cols[5])
elif (llx == 5):
# print(len(cols),line[:-1])
add_data_proc(dict_aa,cols[0],cols[1],cols[2])
#
dict_bb = {}
for key in sorted(dict_aa.keys()):
dict_bb[key] = dict_aa[key]
#
json_str = json.dumps(dict_bb)
#
fp_out = open(file_json,mode='w',encoding='utf-8')
fp_out.write(json_str)
fp_out.close()
#
sys.stderr.write ("*** 終了 ***\n")
# ------------------------------------------------------------------
JSON の マージ
./merge.py data_capacity_mar07.json data_capacity_mar24.json data_capacity.json
json_merge.py
#! /usr/bin/python
# -*- coding: utf-8 -*-
#
# json_merge.py
#
# Mar/29/2020
#
# ------------------------------------------------------------------
import sys
import os
import json
#
# --------------------------------------------------------------------
def file_to_str_proc(file_in):
str_out = ""
try:
fp_in = open(file_in,encoding='utf-8')
str_out = fp_in.read()
fp_in.close()
except Exception as ee:
sys.stderr.write("*** error *** file_to_str_proc ***\n")
sys.stderr.write(str (ee))
#
return str_out
# --------------------------------------------------------------------
def file_write_proc(file_name,str_out):
#
fp_out = open(file_name,mode='w',encoding='utf-8')
fp_out.write(str_out)
fp_out.close()
#
# ------------------------------------------------------------------
def merge_proc(dict_aa,dict_bb):
dict_result = {}
#
for key in dict_aa:
# print(key)
unit_aa = dict_aa[key]
unit_bb = dict_bb[key]
unit_new = {}
unit_new['name'] = unit_aa['name']
if unit_aa['name'] != unit_bb['name']:
sys.stderr.write("*** error ***\n")
unit_new['mar07'] = unit_aa['num']
unit_new['mar24'] = unit_bb['num']
dict_result[key] = unit_new
#
return dict_result
# ------------------------------------------------------------------
sys.stderr.write("*** 開始 ***\n")
#
file_aa = sys.argv[1]
file_bb = sys.argv[2]
file_out = sys.argv[3]
sys.stderr.write(file_aa + "\n")
sys.stderr.write(file_bb + "\n")
sys.stderr.write(file_out + "\n")
#
json_str_aa = file_to_str_proc(file_aa)
json_str_bb = file_to_str_proc(file_bb)
#
dict_aa = json.loads(json_str_aa)
dict_bb = json.loads(json_str_bb)
#
dict_result = merge_proc(dict_aa,dict_bb)
#
out_str = json.dumps(dict_result)
file_write_proc(file_out,out_str)
#
sys.stderr.write("*** 終了 ***\n")
# ------------------------------------------------------------------
ホームページ
capacity_pref.html
<!DOCTYPE html>
<html lang="ja">
<head>
<meta http-equiv="Pragma" content="no-cache" />
<meta http-equiv="Cache-Control" content="no-cache" />
<meta http-equiv="CONTENT-TYPE" content="text/html; charset=utf-8" />
<script src="/js/jquery-3.4.1.min.js"></script>
<script src="capacity_pref.js"></script>
<link rel="stylesheet" href="capacity_pref.css">
<title>PCR検査の1日あたり実施可能件数(都道府県別)</title>
</head>
<body>
<blockquote>
<h2>PCR検査の1日あたり実施可能件数(都道府県別)</h2><p />
<blockquote>
(3月7日時点)
(3月24日時点)<p />
</blockquote>
</blockquote>
<blockquote>
<div class="contents"></div>
</blockquote>
</blockquote>
<hr />
データソース
<blockquote>
https://www.mhlw.go.jp/content/000606477.pdf<p />
https://www.mhlw.go.jp/content/10900000/000614570.pdf<p />
</blockquote>
<a href="../">Return</a><p />
Mar/28/2020 AM 07:00<p />
</body>
</html>
capacity_pref.css
/* -------------------------------------------------------------- */
/*
capacity_pref.css
Mar/29/2020
*/
/* -------------------------------------------------------------- */
table.main,td,th {
table-layout:fixed;
border:1.5px #7e7e7e solid;
border-collapse: collapse;
height: 16px;
}
th {
background: #c6c6c6;
}
table.tag {
border:0.5px green solid;
}
tr.cyan {
background-color: #c7d7c7;
}
.red {color:#ff0000;}
/* -------------------------------------------------------------- */
capacity_pref.js
// -----------------------------------------------------------------------
// capacity_pref.js
//
// Feb/29/2019
//
// -----------------------------------------------------------------------
jQuery (function ()
{
jQuery("#outarea_aa").text ("*** capacity_pref *** start ***")
const file_in = "./data_capacity.json"
jQuery.getJSON (file_in,function (data_aa)
{
var str_out = ""
str_out += "<table>"
str_out += "<tr>"
str_out += "<th>No</th>"
str_out += "<th>Pref</th>"
str_out += "<th>Mar/7</th>"
str_out += "<th>Mar/24</th>"
str_out += "</tr>"
var sum_mar07 = 0
var sum_mar24 = 0
for (var key in data_aa)
{
const unit_aa = data_aa[key]
str_out += "<tr>"
str_out += "<td>" + key + "</td>"
str_out += "<td>" + unit_aa.name + "</td>"
str_out += "<td>" + unit_aa.mar07 + "</td>"
str_out += "<td>" + unit_aa.mar24 + "</td>"
str_out += "</tr>"
sum_mar07 += parseInt(unit_aa.mar07)
sum_mar24 += parseInt(unit_aa.mar24)
}
str_out += "<tr>"
str_out += "<td></td>"
str_out += "<th>計</th>"
str_out += "<td>" + sum_mar07 + "</td>"
str_out += "<td>" + sum_mar24 + "</td>"
str_out += "</tr>"
str_out += "</table>"
jQuery(".contents").html (str_out)
})
jQuery("#outarea_hh").text ("*** capacity_pref *** end ***")
})
// -----------------------------------------------------------------------