Web で閲覧できる次の表を作成します。
データのソースは茨城県境町です。
ページをスクレイピングしてJSONに変換
./get_ibaraki.py \
http://www.town.sakai.ibaraki.jp/sp/page/page002165.html \
data_ibaraki_tmp.json
#
./ibaraki_sort.py data_ibaraki_tmp.json data_ibaraki.json
get_ibaraki.py
#! /usr/bin/python
# -*- coding: utf-8 -*-
#
# get_ibaraki.py
#
# Apr/02/2020
#
# ------------------------------------------------------------------
import requests
import sys
import json
from bs4 import BeautifulSoup
#
# ------------------------------------------------------------------
sys.path.append('/var/www/data_base/common/python_common')
from file_io import file_write_proc
# ------------------------------------------------------------------
def parser(rows):
array_aa = []
icount = 0
count_pref = 0
count_city = 0
pref = "***"
city = "***"
for row in rows:
tds = row.findAll(['td', 'th'])
unit_aa = {}
rowspan = tds[0].get('rowspan')
if rowspan:
sys.stderr.write("*** ppp count_pref = %d ***\n" % count_pref)
cell = tds[0].get_text(strip=True)
sys.stderr.write(cell + "\n")
if count_pref == 0:
count_pref = int(rowspan)
sys.stderr.write("count_pref = %d\n" % count_pref)
pref = tds[0].get_text(strip=True)
city = tds[1].get_text(strip=True)
sys.stderr.write("pref = %s\t" % pref)
sys.stderr.write("city = %s\n" % city)
unit_aa['pref'] = pref
unit_aa['place'] = city
unit_aa['age'] = tds[2].get_text(strip=True)
unit_aa['date'] = tds[3].get_text(strip=True)
elif count_city == 0:
count_city = int(rowspan)
sys.stderr.write("count_city = %d\n" % count_city)
city = tds[0].get_text(strip=True)
sys.stderr.write("city = %s\n" % city)
unit_aa['pref'] = pref
unit_aa['place'] = city
unit_aa['age'] = tds[1].get_text(strip=True)
unit_aa['date'] = tds[2].get_text(strip=True)
else:
unit_aa['pref'] = pref
if count_city == 0:
unit_aa['place'] = tds[0].get_text(strip=True)
unit_aa['age'] = tds[1].get_text(strip=True)
unit_aa['date'] = tds[2].get_text(strip=True)
else:
unit_aa['place'] = city
unit_aa['age'] = tds[0].get_text(strip=True)
unit_aa['date'] = tds[1].get_text(strip=True)
#
#
icount += 1
count_pref -= 1
if count_pref < 0:
count_pref = 0
count_city -= 1
if count_city < 0:
count_city = 0
# if 100 < icount:
if pref == "埼玉県":
break
#
if pref == "茨城県":
array_aa.append(unit_aa)
#
return array_aa
#
# ------------------------------------------------------------------
url = sys.argv[1]
file_out = sys.argv[2]
#
sys.stderr.write("*** start ***\n")
#
headers = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:74.0) Gecko/20100101 Firefox/74.0",}
#
array_aa = []
try:
rr = requests.get(url,headers=headers)
html = rr.content
try:
soup = BeautifulSoup(html, "html.parser")
table = soup.findAll("table")[0]
sys.stderr.write("*** aaa ***\n")
rows = table.findAll("tr")
sys.stderr.write("*** bbb ***\n")
sys.stderr.write("len(rows) = %d\n" % len(rows))
array_aa = parser(rows)
# for aa in soup.find_all("a"):
# link = aa.get("table")
# name = aa.get_text()
# print(link,"\t",name)
except Exception as ee:
sys.stderr.write("*** error *** in BeautifulSoup ***\n")
sys.stderr.write(str(ee) + "\n")
#
except Exception as ee:
sys.stderr.write("*** error *** in requests.get ***\n")
sys.stderr.write(str(ee) + "\n")
#
out_str = json.dumps(array_aa)
file_write_proc(file_out,out_str)
#
sys.stderr.write("*** end ***\n")
# ------------------------------------------------------------------
ibaraki_sort.py
#! /usr/bin/python
# -*- coding: utf-8 -*-
#
# ibaraki_sort.py
#
# Apr/02/2020
#
# ------------------------------------------------------------------
import sys
import json
from functools import cmp_to_key
#
sys.path.append('/var/www/data_base/common/python_common')
from file_io import file_to_str_proc
from file_io import file_write_proc
# ------------------------------------------------------------------
def date_to_int_proc(dd):
pp = dd.split("月")
qq = pp[1].split("日")
rvalue = int(pp[0]) * 100 + int(qq[0])
#
return rvalue
# ------------------------------------------------------------------
def sort_proc(aa,bb):
iaa = date_to_int_proc(aa['date'])
ibb = date_to_int_proc(bb['date'])
# print(iaa,ibb)
if iaa == ibb: return 0
return -1 if iaa < ibb else 1
# ------------------------------------------------------------------
file_in = sys.argv[1]
file_out = sys.argv[2]
#
sys.stderr.write("*** start ***\n")
#
json_str = file_to_str_proc(file_in)
#
array_aa = json.loads(json_str)
#array_bb = sorted(array_aa, key=cmp_to_key(date_to_int_proc))
array_bb = sorted(array_aa, key=cmp_to_key(sort_proc))
#
dict_aa = {}
count = 1
for unit_aa in array_bb:
key = "i%03d" % count
age_sex = unit_aa['age']
tt = age_sex.split("・")
unit_aa['age'] = tt[0]
unit_aa['sex'] = tt[1]
dict_aa[key] = unit_aa
count += 1
#
out_str = json.dumps(dict_aa)
file_write_proc(file_out,out_str)
#
sys.stderr.write("*** end ***\n")
# ------------------------------------------------------------------
ホームページ
ibaraki_patient.html
<!DOCTYPE html>
<html lang="ja">
<head>
<meta http-equiv="Pragma" content="no-cache" />
<meta http-equiv="Cache-Control" content="no-cache" />
<meta http-equiv="CONTENT-TYPE" content="text/html; charset=utf-8" />
<script src="/js/jquery-3.4.1.min.js"></script>
<script src="ibaraki_patient.js"></script>
<link rel="stylesheet" href="ibaraki_patient.css">
<title>茨城県の新型コロナウイルス感染症患者の発生状況</title>
</head>
<body>
<blockquote>
<h2>茨城県の新型コロナウイルス感染症患者の発生状況</h2><p />
<blockquote>
(4月1日時点)<p />
</blockquote>
</blockquote>
<blockquote>
<div class="contents"></div>
</blockquote>
</blockquote>
<hr />
データソース
<blockquote>
<a href="http://www.town.sakai.ibaraki.jp/sp/page/page002165.html">茨城県及び近県における新型コロナウイルス感染者について</a><p />
</blockquote>
<a href="../">Return</a><p />
Apr/02/2020 AM 07:00<p />
</body>
</html>
ibaraki_patient.css
/* -------------------------------------------------------------- */
/*
ibaraki_patient.css
Apr/02/2020
*/
/* -------------------------------------------------------------- */
table.main,td,th {
table-layout:fixed;
border:1.5px #7e7e7e solid;
border-collapse: collapse;
height: 16px;
}
th {
background: #c6c6c6;
}
table.tag {
border:0.5px green solid;
}
tr.cyan {
background-color: #c7d7c7;
}
.red {color:#ff0000;}
/* -------------------------------------------------------------- */
ibaraki_patient.js
// -----------------------------------------------------------------------
// ibaraki_patient.js
//
// Apr/02/2020
//
// -----------------------------------------------------------------------
jQuery (function ()
{
jQuery("#outarea_aa").text ("*** ibaraki_patient *** start ***")
const file_in = "./data_ibaraki.json"
jQuery.getJSON (file_in,function (data_aa)
{
var str_out = ""
str_out += "<table>"
str_out += "<tr>"
str_out += "<th>No</th>"
str_out += "<th>陽性判明日</th>"
str_out += "<th>年代</th>"
str_out += "<th>性別</th>"
str_out += "<th>居住地</th>"
str_out += "</tr>"
for (var key in data_aa)
{
const unit_aa = data_aa[key]
str_out += "<tr>"
str_out += "<td>" + key + "</td>"
str_out += "<td>" + unit_aa.date + "</td>"
str_out += "<td>" + unit_aa.age + "</td>"
str_out += "<td>" + unit_aa.sex + "</td>"
str_out += "<td>" + unit_aa.place + "</td>"
str_out += "</tr>"
}
str_out += "</table>"
jQuery(".contents").html (str_out)
})
jQuery("#outarea_hh").text ("*** ibaraki_patient *** end ***")
})
// -----------------------------------------------------------------------