0
1

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 3 years have passed since last update.

気象庁から全国の気象警報・注意報をスクレイピングして集計する

Last updated at Posted at 2021-01-11

気象庁の気象警報・注意報のページから
全国の気象警報・注意報をスクレイピングして集計する

※2021/02/20現在利用できません

import pathlib
import time
from urllib.parse import urljoin

import requests
from bs4 import BeautifulSoup
from tqdm.notebook import tqdm

# 並べ替え用
pref_code = {
    "01": "北海道",
    "02": "青森県",
    "03": "岩手県",
    "04": "宮城県",
    "05": "秋田県",
    "06": "山形県",
    "07": "福島県",
    "08": "茨城県",
    "09": "栃木県",
    "10": "群馬県",
    "11": "埼玉県",
    "12": "千葉県",
    "13": "東京都",
    "14": "神奈川県",
    "15": "新潟県",
    "16": "富山県",
    "17": "石川県",
    "18": "福井県",
    "19": "山梨県",
    "20": "長野県",
    "21": "岐阜県",
    "22": "静岡県",
    "23": "愛知県",
    "24": "三重県",
    "25": "滋賀県",
    "26": "京都府",
    "27": "大阪府",
    "28": "兵庫県",
    "29": "奈良県",
    "30": "和歌山県",
    "31": "鳥取県",
    "32": "島根県",
    "33": "岡山県",
    "34": "広島県",
    "35": "山口県",
    "36": "徳島県",
    "37": "香川県",
    "38": "愛媛県",
    "39": "高知県",
    "40": "福岡県",
    "41": "佐賀県",
    "42": "長崎県",
    "43": "熊本県",
    "44": "大分県",
    "45": "宮崎県",
    "46": "鹿児島県",
    "47": "沖縄県",
}

# 都道府県名のリスト作成
pref = [v for v in pref_code.values()]

スクレイピング

url = "https://www.jma.go.jp/jp/warn/"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
}

# セッション
with requests.Session() as s:

    r = s.get(url, headers=headers)

    r.raise_for_status()

    base = BeautifulSoup(r.content, "html5lib")

    htmls = []

    for tag in tqdm(base.select("div#title > noscript > table > tbody > tr > td > a")):

        area = tag.get_text(strip=True)
        link = urljoin(url, tag.get("href"))

        r = s.get(link, headers=headers)
        r.raise_for_status()

        soup = BeautifulSoup(r.content, "html5lib")

        p = pathlib.Path("html", pathlib.PurePath(link).name)
        p.parent.mkdir(parents=True, exist_ok=True)

        with p.open(mode="w") as fw:
            fw.write(soup.prettify())

        htmls.append({"area": area, "url": link, "path": p})

        time.sleep(3)
import pandas as pd

def fetch_warn(p, area):

    tmp = pd.read_html(p.open(mode="r"), attrs={"id": "WarnTableTable"})[0]

    df = tmp.melt(
        id_vars=[
            ("Unnamed: 0_level_0", "Unnamed: 0_level_1"),
            ("Unnamed: 1_level_0", "Unnamed: 1_level_1"),
            ("Unnamed: 2_level_0", "Unnamed: 2_level_1"),
        ]
    ).dropna(thresh=5)

    df.set_axis(
        ["area1", "area2", "city", "level", "alert", "value"], axis=1, inplace=True
    )

    df["pref"] = area

    return df


dfs = [fetch_warn(html["path"], html["area"]) for html in htmls]

df = pd.concat(dfs).reset_index(drop=True)

# 文字を正規化、空白文字を削除
for col in df.select_dtypes(include=object).columns:
    df[col] = df[col].str.normalize("NFKC").str.replace("\s", "")

# 地方名を北海道と沖縄県に置換
df["pref"].replace(
    {
        "宗谷地方": "北海道",
        "上川・留萌地方": "北海道",
        "網走・北見・紋別地方": "北海道",
        "釧路・根室・十勝地方": "北海道",
        "胆振・日高地方": "北海道",
        "石狩・空知・後志地方": "北海道",
        "渡島・檜山地方": "北海道",
        "沖縄本島地方": "沖縄県",
        "大東島地方": "沖縄県",
        "宮古島地方": "沖縄県",
        "八重山地方": "沖縄県",
    },
    inplace=True,
)

# "●"を0と1に変換
df["value"] = (df["value"] == "").astype(int)

# 集計
df_alert = df.pivot_table(
    index="pref", columns="level", values="value", aggfunc=sum
).reindex(index=pref, columns=["警報", "注意報"])

df_alert

df.to_csv("alert.csv", encoding="utf_8_sig")
0
1
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
1

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?