0
0

More than 3 years have passed since last update.

茨城県の新型コロナウイルス感染症患者の発生状況の表を作成

Last updated at Posted at 2020-04-02

Web で閲覧できる次の表を作成します。

茨城県の新型コロナウイルス感染症患者の発生状況
ibaraki_apr03.png

データのソースは茨城県境町です。

茨城県及び近県における新型コロナウイルス感染者について

ページをスクレイピングしてJSONに変換

./get_ibaraki.py \
    http://www.town.sakai.ibaraki.jp/sp/page/page002165.html \
    data_ibaraki_tmp.json
#
./ibaraki_sort.py data_ibaraki_tmp.json data_ibaraki.json
get_ibaraki.py
#! /usr/bin/python
# -*- coding: utf-8 -*-
#
#   get_ibaraki.py
#
#                   Apr/02/2020
#
# ------------------------------------------------------------------
import requests
import sys
import json
from bs4 import BeautifulSoup
#
# ------------------------------------------------------------------
sys.path.append('/var/www/data_base/common/python_common')
from file_io import file_write_proc
# ------------------------------------------------------------------
def parser(rows):
    array_aa = []
    icount = 0
    count_pref = 0
    count_city = 0
    pref = "***"
    city = "***"
    for row in rows:
        tds = row.findAll(['td', 'th'])
        unit_aa = {}
        rowspan = tds[0].get('rowspan')
        if rowspan:
            sys.stderr.write("*** ppp count_pref = %d ***\n" % count_pref)
            cell = tds[0].get_text(strip=True)
            sys.stderr.write(cell + "\n")
            if count_pref == 0:
                count_pref = int(rowspan)
                sys.stderr.write("count_pref = %d\n" % count_pref)
                pref = tds[0].get_text(strip=True)
                city = tds[1].get_text(strip=True)
                sys.stderr.write("pref = %s\t" % pref)
                sys.stderr.write("city = %s\n" % city)
                unit_aa['pref'] = pref
                unit_aa['place'] = city
                unit_aa['age'] = tds[2].get_text(strip=True)
                unit_aa['date'] = tds[3].get_text(strip=True)
            elif count_city == 0:
                count_city = int(rowspan)
                sys.stderr.write("count_city = %d\n" % count_city)
                city = tds[0].get_text(strip=True)
                sys.stderr.write("city = %s\n" % city)
                unit_aa['pref'] = pref
                unit_aa['place'] = city
                unit_aa['age'] = tds[1].get_text(strip=True)
                unit_aa['date'] = tds[2].get_text(strip=True)
        else:
            unit_aa['pref'] = pref
            if count_city == 0:
                unit_aa['place'] = tds[0].get_text(strip=True)
                unit_aa['age'] = tds[1].get_text(strip=True)
                unit_aa['date'] = tds[2].get_text(strip=True)
            else:
                unit_aa['place'] = city
                unit_aa['age'] = tds[0].get_text(strip=True)
                unit_aa['date'] = tds[1].get_text(strip=True)
#
#
        icount += 1
        count_pref -= 1
        if count_pref < 0:
            count_pref = 0
        count_city -= 1
        if count_city < 0:
            count_city = 0
#       if 100 < icount:
        if pref == "埼玉県":
            break
#
        if pref == "茨城県":
            array_aa.append(unit_aa)
#
    return array_aa
#
# ------------------------------------------------------------------
url = sys.argv[1]
file_out = sys.argv[2]
#
sys.stderr.write("*** start ***\n")
#
headers = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:74.0) Gecko/20100101 Firefox/74.0",}
#
array_aa = []
try:
    rr = requests.get(url,headers=headers)
    html = rr.content
    try:
        soup = BeautifulSoup(html, "html.parser")
        table = soup.findAll("table")[0]
        sys.stderr.write("*** aaa ***\n")
        rows = table.findAll("tr")
        sys.stderr.write("*** bbb ***\n")
        sys.stderr.write("len(rows) = %d\n" % len(rows))

        array_aa = parser(rows)


#       for aa in soup.find_all("a"):
#           link = aa.get("table")
#           name = aa.get_text()
#           print(link,"\t",name)
    except Exception as ee:
        sys.stderr.write("*** error *** in BeautifulSoup ***\n")
        sys.stderr.write(str(ee) + "\n")
#

except Exception as ee:
    sys.stderr.write("*** error *** in requests.get ***\n")
    sys.stderr.write(str(ee) + "\n")
#
out_str = json.dumps(array_aa)
file_write_proc(file_out,out_str)
#
sys.stderr.write("*** end ***\n")
# ------------------------------------------------------------------
ibaraki_sort.py
#! /usr/bin/python
# -*- coding: utf-8 -*-
#
#   ibaraki_sort.py
#
#                   Apr/02/2020
#
# ------------------------------------------------------------------
import sys
import json
from functools import cmp_to_key
#
sys.path.append('/var/www/data_base/common/python_common')
from file_io import file_to_str_proc
from file_io import file_write_proc
# ------------------------------------------------------------------
def date_to_int_proc(dd):
    pp = dd.split("月")
    qq = pp[1].split("日")
    rvalue = int(pp[0]) * 100 + int(qq[0])
#
    return rvalue
# ------------------------------------------------------------------
def sort_proc(aa,bb):
    iaa = date_to_int_proc(aa['date'])
    ibb = date_to_int_proc(bb['date'])
#   print(iaa,ibb)
    if iaa == ibb: return 0
    return -1 if iaa < ibb else 1
# ------------------------------------------------------------------
file_in = sys.argv[1]
file_out = sys.argv[2]
#
sys.stderr.write("*** start ***\n")
#
json_str = file_to_str_proc(file_in)
#
array_aa = json.loads(json_str)
#array_bb = sorted(array_aa, key=cmp_to_key(date_to_int_proc))
array_bb = sorted(array_aa, key=cmp_to_key(sort_proc))
#
dict_aa = {}
count = 1
for unit_aa in array_bb:
    key = "i%03d" % count
    age_sex = unit_aa['age']
    tt = age_sex.split("・")
    unit_aa['age'] = tt[0]
    unit_aa['sex'] = tt[1]
    dict_aa[key] = unit_aa
    count += 1
#
out_str = json.dumps(dict_aa)
file_write_proc(file_out,out_str)
#
sys.stderr.write("*** end ***\n")
# ------------------------------------------------------------------

ホームページ

ibaraki_patient.html
<!DOCTYPE html>
<html lang="ja">
<head>
<meta http-equiv="Pragma" content="no-cache" />
<meta http-equiv="Cache-Control" content="no-cache" />
<meta http-equiv="CONTENT-TYPE" content="text/html; charset=utf-8" />
<script src="/js/jquery-3.4.1.min.js"></script>
<script src="ibaraki_patient.js"></script>
<link rel="stylesheet" href="ibaraki_patient.css">
<title>茨城県の新型コロナウイルス感染症患者の発生状況</title>
</head>
<body>
<blockquote>
<h2>茨城県の新型コロナウイルス感染症患者の発生状況</h2><p />
    <blockquote>
    (4月1日時点)<p />
    </blockquote>
</blockquote>
    <blockquote>
    <div class="contents"></div>
    </blockquote>
</blockquote>
<hr />
データソース
    <blockquote>
    <a href="http://www.town.sakai.ibaraki.jp/sp/page/page002165.html">茨城県及び近県における新型コロナウイルス感染者について</a><p />
    </blockquote>

<a href="../">Return</a><p />
Apr/02/2020 AM 07:00<p />
</body>
</html>
ibaraki_patient.css
/* -------------------------------------------------------------- */
/*

    ibaraki_patient.css

                        Apr/02/2020

*/
/* -------------------------------------------------------------- */
table.main,td,th {
table-layout:fixed;
border:1.5px #7e7e7e solid;
border-collapse: collapse;
height: 16px;
}

th {
    background: #c6c6c6;
}


table.tag {
border:0.5px green solid;
}

tr.cyan {
    background-color: #c7d7c7;
}

.red {color:#ff0000;}

/* -------------------------------------------------------------- */
ibaraki_patient.js
// -----------------------------------------------------------------------
//  ibaraki_patient.js
//
//                  Apr/02/2020
//
// -----------------------------------------------------------------------
jQuery (function ()
{
    jQuery("#outarea_aa").text ("*** ibaraki_patient *** start ***")

    const file_in = "./data_ibaraki.json"

    jQuery.getJSON (file_in,function (data_aa)
        {
        var str_out = ""
        str_out += "<table>"
        str_out += "<tr>"
        str_out += "<th>No</th>"
        str_out += "<th>陽性判明日</th>"
        str_out += "<th>年代</th>"
        str_out += "<th>性別</th>"
        str_out += "<th>居住地</th>"
        str_out += "</tr>"

        for (var key in data_aa)
            {
            const unit_aa = data_aa[key]
            str_out += "<tr>"
            str_out += "<td>" + key + "</td>"
            str_out += "<td>" + unit_aa.date + "</td>"
            str_out += "<td>" + unit_aa.age + "</td>"
            str_out += "<td>" + unit_aa.sex + "</td>"
            str_out += "<td>" + unit_aa.place + "</td>"
            str_out += "</tr>"

            }

        str_out += "</table>"

        jQuery(".contents").html (str_out)
        })

    jQuery("#outarea_hh").text ("*** ibaraki_patient *** end ***")

})

// -----------------------------------------------------------------------
0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0