LoginSignup
0
1

More than 3 years have passed since last update.

滋賀県の新型コロナウイルス感染症患者の発生状況をCSVに変換

Posted at

最新の情報を抽出できるように更新日を追加

apt install python3-tk ghostscript
pip install camelot-py[cv]
import requests
from bs4 import BeautifulSoup

import pathlib
import re

from urllib.parse import urljoin

import pandas as pd

import camelot

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
}


def fetch_soup(url, parser="html.parser"):

    r = requests.get(url, headers=headers)
    r.raise_for_status()

    soup = BeautifulSoup(r.content, parser)

    return soup


def fetch_file(url, dir="."):

    p = pathlib.Path(dir, pathlib.PurePath(url).name)
    p.parent.mkdir(parents=True, exist_ok=True)

    if not p.exists():

        r = requests.get(url)
        r.raise_for_status()

        with p.open(mode="wb") as fw:
            fw.write(r.content)

    return p


def fetch_pdf(url):

    soup = fetch_soup(url)

    tag = soup.find("span", text=re.compile("^患者の発生について")).find_parent("a")

    link = urljoin(url, tag.get("href"))

    p = fetch_file(link)

    dfs = []

    tables = camelot.read_pdf(
        str(p), pages="all", split_text=True, strip_text="\n", line_scale=40
    )

    for table in tables:

        if table.data[0][0] == "例目":

            df_tmp = pd.DataFrame(table.data[1:], columns=table.data[0])

            row, col = df_tmp.shape

            if 10 < col < 13:
                dfs.append(df_tmp)

    df = pd.concat(dfs)

    df["更新日"] = update

    return df


url = "https://www.pref.shiga.lg.jp/ippan/kenkouiryouhukushi/yakuzi/310735.html"

soup = fetch_soup(url)

dfs = []

for i in soup.find_all("a", text="患者の発生について"):

    update = (
        i.find_parent("span")
        .find_previous_sibling("span", class_="first")
        .get_text(strip=True)
    )

    print(update)

    link = urljoin(url, i.get("href"))

    d = fetch_pdf(link)

    dfs.append(d)

df = pd.concat(dfs)

df.info()

df

df.to_csv("data.csv", encoding="utf_8_sig")
0
1
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
1