Pandasで気象庁XMLから市町村別の気象特別警報・警報・注意報を抽出

Last updated at 2024-07-15Posted at 2024-07-15
namespaceは辞書及びスキーマの辞書一式(令和6年5月28日一部更新)
#nsのシートを参照
import pandas as pd
import requests
from lxml import etree


def fetch_atom(url):
    # 名前空間
    ns = {
        None: "http://www.w3.org/2005/Atom",
    }

    # XMLファイルをダウンロード
    response = requests.get(url)
    response.raise_for_status()

    # XMLコンテンツを解析
    tree = etree.fromstring(response.content)

    # データを格納するリスト
    data = []

    # 'entry'要素を抽出して、必要な情報を取得
    for entry in tree.findall("entry", ns):
        title = entry.find("title", ns).text
        id_ = entry.find("id", ns).text
        updated = entry.find("updated", ns).text
        author = entry.find("author/name", ns).text
        link = entry.find("link", ns).attrib["href"]
        content = entry.find("content", ns).text

        # データをリストに追加
        data.append({"Title": title, "ID": id_, "Updated": updated, "Author": author, "Link": link, "Content": content})

    # DataFrameの作成
    df = pd.DataFrame(data)

    df["Updated"] = pd.to_datetime(df["Updated"], errors="coerce")
    df["Updated"] = df["Updated"].dt.tz_convert("Asia/Tokyo").dt.tz_localize(None)

    return df


def fetch_warning(url):
    # 名前空間
    ns = {
        "jmx": "http://xml.kishou.go.jp/jmaxml1/",
        "jmx_ib": "http://xml.kishou.go.jp/jmaxml1/informationBasis1/",
        "jmx_eb": "http://xml.kishou.go.jp/jmaxml1/elementBasis1/",
        "jmx_mete": "http://xml.kishou.go.jp/jmaxml1/body/meteorology1/",
    }

    # XMLファイルをダウンロード
    response = requests.get(url)
    response.raise_for_status()

    # XMLコンテンツを解析
    tree = etree.fromstring(response.content)

    title = tree.xpath(".//jmx_ib:Head/jmx_ib:Title", namespaces=ns)[0].text
    info_type = tree.xpath(".//jmx_ib:Head/jmx_ib:InfoType", namespaces=ns)[0].text
    date_time = pd.to_datetime(tree.xpath(".//jmx_ib:Head/jmx_ib:ReportDateTime", namespaces=ns)[0].text).tz_localize(
        None
    )

    # データを格納するリスト
    data = []

    for warning in tree.xpath('.//jmx_mete:Warning[@type="気象警報・注意報（市町村等）"]', namespaces=ns):
        for item in warning.xpath(".//jmx_mete:Item", namespaces=ns):
            area = item.xpath(".//jmx_mete:Area", namespaces=ns)[0]

            area_name = area.xpath("jmx_mete:Name", namespaces=ns)[0].text
            area_code = area.xpath("jmx_mete:Code", namespaces=ns)[0].text

            for kind in item.xpath(".//jmx_mete:Kind", namespaces=ns):
                data.append(
                    {
                        "Title": title,
                        "InfoType": info_type,
                        "Date": date_time,
                        "Name": kind.xpath("jmx_mete:Name", namespaces=ns)[0].text,
                        "Code": kind.xpath("jmx_mete:Code", namespaces=ns)[0].text,
                        "Status": kind.xpath("jmx_mete:Status", namespaces=ns)[0].text,
                        "Addition": "、".join(
                            [note.text for note in kind.xpath(".//jmx_mete:Addition/jmx_mete:Note", namespaces=ns)]
                        ),
                        "CityName": area_name,
                        "CityCode": area_code,
                    }
                )

    # DataFrameの作成
    df = pd.DataFrame(data)
    df = df[df["Status"] != "解除"]

    return df


df_extra = fetch_atom("https://www.data.jma.go.jp/developer/xml/feed/extra.xml")
df_extra

# 愛媛県の場合
# df_pref = df_extra[(df_extra["Title"] == "気象特別警報・警報・注意報") & (df_extra["Author"] == "松山地方気象台")].copy()

# 最新のデータを参照
df_pref = df_extra[(df_extra["Title"] == "気象特別警報・警報・注意報")].copy()
df_pref

url = df_pref.iloc[0]["Link"]
url

df_warning = fetch_warning(url)
df_warning
参考

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme
What you can do with signing up